In [30]:
import pandas as pd

markets_info_df = pd.read_parquet("epl_markets.parquet")
all_histories = pd.read_parquet("first_token_histories.parquet")
markets_info_df.dtypes

winner                               object
loser                                object
is_draw                                bool
condition_id                         object
question_id                          object
id                                   object
description                          object
end_date_iso                         object
uma_end_data                         object
volume                              float64
closed                                 bool
first_token_id                       object
first_token_outcome                  object
first_token_price                   float64
second_token_id                      object
second_token_outcome                 object
second_token_price                  float64
game_start_time         datetime64[ns, UTC]
dtype: object

In [31]:
joined_gametime_prices = (
    all_histories.reset_index(names=["token_id", "timestamp"])
    .merge(
        markets_info_df[
            [
                "game_start_time",
                "first_token_id",
                "first_token_price",
                "closed",
                "volume",
            ]
        ],
        left_on="token_id",
        right_on="first_token_id",
    )
    .assign(
        timestamp=lambda x: pd.to_datetime(x.timestamp).dt.tz_localize("UTC"),
    )
    .assign(pre_game_time=lambda x: x.game_start_time - pd.Timedelta(minutes=5))
)
joined_gametime_prices

Unnamed: 0,token_id,timestamp,price,game_start_time,first_token_id,first_token_price,closed,volume,pre_game_time
0,9490258977501443984976270266519961028070416829...,2023-02-13 15:16:53+00:00,0.50,2023-02-13 20:00:00+00:00,9490258977501443984976270266519961028070416829...,0.999811,True,2436.708073,2023-02-13 19:55:00+00:00
1,9490258977501443984976270266519961028070416829...,2023-02-13 15:19:00+00:00,0.50,2023-02-13 20:00:00+00:00,9490258977501443984976270266519961028070416829...,0.999811,True,2436.708073,2023-02-13 19:55:00+00:00
2,9490258977501443984976270266519961028070416829...,2023-02-13 15:20:33+00:00,0.50,2023-02-13 20:00:00+00:00,9490258977501443984976270266519961028070416829...,0.999811,True,2436.708073,2023-02-13 19:55:00+00:00
3,9490258977501443984976270266519961028070416829...,2023-02-13 15:21:36+00:00,0.78,2023-02-13 20:00:00+00:00,9490258977501443984976270266519961028070416829...,0.999811,True,2436.708073,2023-02-13 19:55:00+00:00
4,9490258977501443984976270266519961028070416829...,2023-02-13 15:22:26+00:00,0.58,2023-02-13 20:00:00+00:00,9490258977501443984976270266519961028070416829...,0.999811,True,2436.708073,2023-02-13 19:55:00+00:00
...,...,...,...,...,...,...,...,...,...
668602,4937274880027886183807479622344736653709925151...,2024-08-25 15:25:01+00:00,0.13,2024-08-25 15:30:00+00:00,4937274880027886183807479622344736653709925151...,0.000000,True,9199.510944,2024-08-25 15:25:00+00:00
668603,4937274880027886183807479622344736653709925151...,2024-08-25 15:26:01+00:00,0.13,2024-08-25 15:30:00+00:00,4937274880027886183807479622344736653709925151...,0.000000,True,9199.510944,2024-08-25 15:25:00+00:00
668604,4937274880027886183807479622344736653709925151...,2024-08-25 15:27:01+00:00,0.13,2024-08-25 15:30:00+00:00,4937274880027886183807479622344736653709925151...,0.000000,True,9199.510944,2024-08-25 15:25:00+00:00
668605,4937274880027886183807479622344736653709925151...,2024-08-25 15:28:01+00:00,0.13,2024-08-25 15:30:00+00:00,4937274880027886183807479622344736653709925151...,0.000000,True,9199.510944,2024-08-25 15:25:00+00:00


In [35]:
procesed_df = (
    joined_gametime_prices.loc[
        (joined_gametime_prices.timestamp < joined_gametime_prices.pre_game_time)
        & (joined_gametime_prices.volume > 1.0)
    ]
    .groupby("token_id")
    .last()
)

In [36]:
import plotly.express as px

px.scatter(procesed_df, x="price", y="first_token_price")

In [41]:
import numpy as np

(
    np.round(procesed_df.price, 0) == np.round(procesed_df.first_token_price, 0)
).sum() / len(procesed_df)

np.float64(0.717948717948718)