In [8]:
import polars as pl
from functime.cross_validation import train_test_split
from functime.evaluation import rank_fva, rank_point_forecasts, rank_residuals
from functime.forecasting import lightgbm, snaive
from functime.plotting import plot_comet, plot_forecasts, plot_fva, plot_residuals
from functime.preprocessing import detrend

pl.Config.set_tbl_width_chars(256)
pl.Config.set_fmt_str_lengths(256)
pl.Config.set_tbl_rows(24)

# Location of the IPC generated files from above.
ipc_dir = "/home/seb/git/s2protocol-rs/ipcs"

# Filter out these:
# Beacon*: Similiar to above, default targets of hatcheries and buildings
#          points to Beacons, clicks on the map, or actions when teams play
#          together and send attack/defend points on the map.
#          For now we'll avoid them.
unit_born_df = pl.scan_ipc(f"{ipc_dir}/unit_born.ipc")
# .filter((~pl.col("unit_type_name").str.starts_with("Beacon")))
unit_died_df = pl.scan_ipc(f"{ipc_dir}/unit_died.ipc")
# .filter((~pl.col("unit_died_name").str.starts_with("Beacon")))
stats_df = pl.scan_ipc(f"{ipc_dir}/stats.ipc")
upgrades_df = pl.scan_ipc(f"{ipc_dir}/upgrades.ipc")
user_init_data_df = pl.scan_ipc(f"{ipc_dir}/user_init_data.ipc")
lobby_slot_init_data_df = pl.scan_ipc(f"{ipc_dir}/lobby_init_data.ipc")
details_df = pl.scan_ipc(f"{ipc_dir}/details.ipc")
lobby_slot_init_data_df.collect_schema()


Schema([('ext_fs_id', UInt64),
        ('ext_fs_sha256', String),
        ('ext_fs_file_name', String),
        ('control', Int64),
        ('user_id', Int64),
        ('team_id', Int64),
        ('observe', UInt8),
        ('working_set_slot_id', UInt8),
        ('map_size_x', UInt8),
        ('map_size_y', UInt8)])

In [7]:
upgrades_df.collect_schema()

Schema([('player_id', UInt8),
        ('name', String),
        ('count', Int32),
        ('ext_replay_loop', Int64),
        ('ext_replay_seconds', UInt32),
        ('ext_fs_id', UInt64)])

In [6]:

y = (
    upgrades_df.join(
        details_df,
        left_on=[
            pl.col("ext_fs_replay_sha256"),
            pl.col("player_id"),
        ],
        right_on=[
            pl.col("ext_fs_replay_sha256"),
            pl.col("working_set_slot_id"),
        ],
    )
    .filter((pl.col("ext_replay_seconds").gt(0)) & (pl.col("name") == "zerglingmovementspeed"))
    .select(
        [
            pl.col("name"),
            pl.col("ext_datetime").alias("time"),
            pl.col("ext_replay_seconds").cast(pl.Float64),
        ]
    )
    .sort("time")
    .collect()
)
fh = 12
entity_col = y.columns[0]
y_train, y_test = train_test_split(test_size=fh, eager=True)(y)
y.select(
    pl.all().exclude("ext_replay_seconds").n_unique(),
    pl.col("time").min().dt.date().alias("start"),
    pl.col("time").max().dt.date().alias("end"),
)
y_pred_bench = snaive(freq="1w", sp=24)(y=y_train, fh=fh)
y_pred_bench.head()

selected_entities = ranks.head(4).get_column(entity_col).unique()
figure = plot_forecasts(
    y_true=y.filter(pl.col(entity_col).is_in(selected_entities)),
    y_pred=y_pred_bench.filter(pl.col(entity_col).is_in(selected_entities)),
    n_cols=2,
    height=1000,
    width=1200,
)
figure.show(renderer="svg")
ranks = rank_point_forecasts(y_true=y_test, y_pred=y_pred_bench, descending=True)
ranks.head()
selected_entities = ranks.head(4).get_column(entity_col).unique()
figure = plot_forecasts(
    y_true=y.filter(pl.col(entity_col).is_in(selected_entities)),
    y_pred=y_pred_bench.filter(pl.col(entity_col).is_in(selected_entities)),
    n_cols=2,
    height=1000,
    width=1200,
)
figure.show(renderer="svg")
figure = plot_fva(
    y_true=y_test, y_pred=y_pred, y_pred_bench=y_pred_bench, height=900, width=900
)
figure.show(renderer="svg")

ColumnNotFoundError: ext_fs_replay_sha256

Resolved plan until failure:

	---> FAILED HERE RESOLVING 'sink' <---
Ipc SCAN [/home/seb/git/s2protocol-rs/ipcs/details.ipc] [id: 139890742623024]
PROJECT */22 COLUMNS