In [11]:
#Matchstats helper for features file

import pandas as pd

MATCHSTATS_PATH = "data/vlr_matchstats_demo.csv"
PATCHPOOL_PATH  = "data/vlr_patchpool_demo.csv"
OUTPUT_PATH     = "data/vlr_matchstats_with_matchid_demo.csv"

matchstats = pd.read_csv(MATCHSTATS_PATH)
patchpool  = pd.read_csv(PATCHPOOL_PATH)

#clean team abbreviations 
matchstats["team1_name"] = matchstats["team1_name"].astype(str).str.strip()
matchstats["team2_name"] = matchstats["team2_name"].astype(str).str.strip()
patchpool["player_team_abbrev"] = patchpool["player_team_abbrev"].astype(str).str.strip()

#reshape patchpool to match-level map rows
map_rows = []
for i in range(1, 6):
    temp = patchpool[["MatchID", f"Map{i}_ID", "player_team_abbrev", "series_datetime"]].copy()
    temp = temp.rename(columns={f"Map{i}_ID": "game_id"})
    temp["map_num"] = i
    map_rows.append(temp)

maps_long = pd.concat(map_rows, ignore_index=True)
maps_long = maps_long.dropna(subset=["game_id"])
maps_long["game_id"] = maps_long["game_id"].astype(int)

#prepare matchstats for merge
matchstats["game_id"] = matchstats["game_id"].astype(int)

#lookup for game_id -> matchid
game_to_match = maps_long[["game_id", "MatchID", "series_datetime"]].drop_duplicates()


#merge matchstats with matchid lookup
merged = matchstats.merge(
    game_to_match,
    on="game_id",
    how="left",
    suffixes=("_orig", "_patchpool")
)

#series_datetime column handling
if "series_datetime_patchpool" in merged.columns and "series_datetime_orig" in merged.columns:
    #ff both exist: prefer patchpool, fall back to original
    merged["series_datetime"] = merged["series_datetime_patchpool"].fillna(merged["series_datetime_orig"])
    merged = merged.drop(columns=["series_datetime_orig", "series_datetime_patchpool"])
elif "series_datetime_patchpool" in merged.columns:
    #handling if only patchpool exists
    merged["series_datetime"] = merged["series_datetime_patchpool"]
    merged = merged.drop(columns=["series_datetime_patchpool"])
elif "series_datetime_orig" in merged.columns:
    #handling if only original exists
    merged["series_datetime"] = merged["series_datetime_orig"] 
    merged = merged.drop(columns=["series_datetime_orig"])
# if neither exists with suffixes, the original series_datetime column should still be there

#conversion of matchid to proper int type
merged["MatchID"] = merged["MatchID"].astype("Int64")


merged.to_csv(OUTPUT_PATH, index=False)
print(f"\Saved to {OUTPUT_PATH}")
print(f"File contains {len(merged)} rows with {merged['MatchID'].dropna().nunique()} unique MatchIDs")

\Saved to /Users/samharwood/Downloads/vlr_matchstats_with_matchid_demo.csv
File contains 32366 rows with 1632 unique MatchIDs
