# Jungle Coach — Ingest & Train (v4)

Description: import os, nest_asyncio; nest_asyncio.apply()


In [4]:
import os, nest_asyncio; nest_asyncio.apply()
RIOT_API_KEY = os.getenv("RIOT_API_KEY") or "REPLACE_ME_WITH_YOUR_KEY"
os.environ["RIOT_API_KEY"] = RIOT_API_KEY
assert not RIOT_API_KEY.startswith("REPLACE_ME"), "Please set RIOT_API_KEY"
print("Key loaded ✓")
import sys, os as _os
sys.path.append(_os.path.abspath(".."))


Key loaded ✓


Description: PLATFORM = "na1"


In [None]:
PLATFORM = "na1"
QUEUE = "RANKED_SOLO_5x5"
MIN_TIER, MAX_TIER = "CHALLENGER", "CHALLENGER"
DIVISIONS = ["I","II","III","IV"]
PAGES = 3
PUUID_LIMIT = 300
PER_PUUID = 10
CONCURRENCY = 6
QUEUE_ID = None
START_TIME = None
END_TIME = None
WINDOW_S = 5
CHAMP_FILTER = None  # e.g., 'Nunu' to only include matches with Nunu
USE_WINDOWS_CACHE = True # set True to use past games
USE_PARQUET = True  # also write/read Parquet for faster IO
SKIP_TRAIN_IF_MODEL_EXISTS = False  # set True to leave previous models intact
CACHE_DIR = "../data/cache"
import os as _os; _os.makedirs(CACHE_DIR, exist_ok=True)
WINDOWS_CACHE = f"{CACHE_DIR}/windows_{PLATFORM}_{QUEUE}_ws{WINDOW_S}.csv"
WINDOWS_CACHE_PARQ = f"{CACHE_DIR}/windows_{PLATFORM}_{QUEUE}_ws{WINDOW_S}.parquet"
OUT_CSV = "../windows_v7.csv"
TRAIN_MODES = ["classification","regression_ev","rl_awr"]
def purge_windows_cache():
    removed = []
    import os
    try:
        if os.path.exists(WINDOWS_CACHE):
            os.remove(WINDOWS_CACHE); removed.append(WINDOWS_CACHE)
        if 'WINDOWS_CACHE_PARQ' in globals() and os.path.exists(WINDOWS_CACHE_PARQ):
            os.remove(WINDOWS_CACHE_PARQ); removed.append(WINDOWS_CACHE_PARQ)
    except Exception as e:
        print('Purge error:', e)
    print('Purged:', removed if removed else 'nothing')
print("Config ready.")


Config ready.


Description: import asyncio, os, json, joblib


In [6]:
import asyncio, os, json, joblib
from util.riot_api import platform_to_routing, seed_puuids, fetch_many_ids, fetch_matches_and_timelines
CACHE_DIR = os.path.abspath(os.path.join('..','data','cache'))
os.makedirs(CACHE_DIR, exist_ok=True)
REFETCH = False  # set True to force network refetch
routing = platform_to_routing(PLATFORM)
print("Routing:", routing)
puuids_path = os.path.join(CACHE_DIR, f'puuids_{PLATFORM}_{QUEUE}_{MIN_TIER}_{MAX_TIER}.json')
ids_path = os.path.join(CACHE_DIR, f'ids_{PLATFORM}_{QUEUE}.json')
matches_path = os.path.join(CACHE_DIR, 'matches.joblib')
timelines_path = os.path.join(CACHE_DIR, 'timelines.joblib')
matches, timelines, puuids, ids = {}, {}, [], []
have_cache = all(os.path.exists(p) for p in [puuids_path, ids_path, matches_path, timelines_path])
if have_cache and not REFETCH:
    try:
        with open(puuids_path) as f: puuids = json.load(f)
        with open(ids_path) as f: ids = json.load(f)
        matches = joblib.load(matches_path)
        timelines = joblib.load(timelines_path)
    except Exception as e:
        print("Cache load failed, refetching:", e)
        REFETCH = True
if (not have_cache) or REFETCH:
    puuids = asyncio.run(seed_puuids(PLATFORM, QUEUE, MIN_TIER, MAX_TIER, DIVISIONS, PAGES, PUUID_LIMIT))
    with open(puuids_path, 'w') as f: json.dump(puuids, f)
    ids = asyncio.run(fetch_many_ids(routing, puuids, per_puuid=PER_PUUID, concurrency=CONCURRENCY, queue_id=QUEUE_ID, start_time=START_TIME, end_time=END_TIME))
    with open(ids_path, 'w') as f: json.dump(ids, f)
    matches, timelines = asyncio.run(fetch_matches_and_timelines(routing, ids, concurrency=CONCURRENCY))
    joblib.dump(matches, matches_path); joblib.dump(timelines, timelines_path)
print("PUUIDs:", len(puuids))
print("Collected IDs:", len(ids))
print("Fetched:", len(matches), "matches; timelines:", len(timelines))


Routing: americas
PUUIDs: 300
Collected IDs: 1960
Fetched: 1960 matches; timelines: 1960


Description: import pandas as pd


In [7]:
import pandas as pd
try:
    import importlib, util.windows_builder as wb
except ModuleNotFoundError:
    import sys, os as _os
    sys.path.append(_os.path.abspath(".."))
    import importlib, util.windows_builder as wb
wb = importlib.reload(wb)
from util.windows_builder import build_windows_v7_for_match, RICH_COLUMNS
print("USE_WINDOWS_CACHE:", USE_WINDOWS_CACHE, "USE_PARQUET:", USE_PARQUET)
print("CSV cache:", WINDOWS_CACHE)
print("PARQ cache:", WINDOWS_CACHE_PARQ)
print("Cache exists?", os.path.exists(WINDOWS_CACHE), os.path.exists(WINDOWS_CACHE_PARQ))
matches = globals().get('matches', {})
timelines = globals().get('timelines', {})
ids = globals().get('ids', [])
if USE_WINDOWS_CACHE and ((USE_PARQUET and os.path.exists(WINDOWS_CACHE_PARQ)) or os.path.exists(WINDOWS_CACHE)):
    if USE_PARQUET and os.path.exists(WINDOWS_CACHE_PARQ):
        try:
            windows_v7 = pd.read_parquet(WINDOWS_CACHE_PARQ)
            print("Loaded cached windows (parquet):", WINDOWS_CACHE_PARQ, "shape:", windows_v7.shape)
        except Exception as e:
            print("Parquet read failed, falling back to CSV:", e)
            windows_v7 = pd.read_csv(WINDOWS_CACHE) if os.path.exists(WINDOWS_CACHE) else pd.DataFrame(columns=RICH_COLUMNS)
    else:
        windows_v7 = pd.read_csv(WINDOWS_CACHE)
        print("Loaded cached windows (csv):", WINDOWS_CACHE, "shape:", windows_v7.shape)
else:
    dfs = []
    for mid in ids[:len(ids)]:
        m = matches.get(mid); t = timelines.get(mid)
        d = build_windows_v7_for_match(m, t, window_s=WINDOW_S, champ_filter=CHAMP_FILTER)
        if not d.empty: dfs.append(d)
    windows_v7 = pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame(columns=RICH_COLUMNS)
    windows_v7.to_csv(OUT_CSV, index=False)
    windows_v7.to_csv(WINDOWS_CACHE, index=False)
    if USE_PARQUET:
        try:
            windows_v7.to_parquet(WINDOWS_CACHE_PARQ, index=False)
        except Exception as e:
            print("Parquet write skipped:", e)
    print("Saved", OUT_CSV, "and cache:", WINDOWS_CACHE, ("and parquet:" if USE_PARQUET else ""), (WINDOWS_CACHE_PARQ if USE_PARQUET else ""), "shape:", windows_v7.shape)
import numpy as _np
dt = windows_v7.sort_values(["game_id","team","time_s"]).groupby(["game_id","team"])["time_s"].diff().dropna()
print("Unique dt (first 10):", _np.unique(dt.values)[:10])
windows_v7.head(10)


USE_WINDOWS_CACHE: True USE_PARQUET: True
CSV cache: ../data/cache/windows_na1_RANKED_SOLO_5x5_ws5.csv
PARQ cache: ../data/cache/windows_na1_RANKED_SOLO_5x5_ws5.parquet
Cache exists? True False
Loaded cached windows (csv): ../data/cache/windows_na1_RANKED_SOLO_5x5_ws5.csv shape: (1227034, 60)
Unique dt (first 10): [0. 5.]


Unnamed: 0,game_id,time_s,team,win,role,window_type,action_space,action,reward,gold_xp_delta,...,roll_vision_delta_30s,roll_ward_kill_diff_30s,roll_tower_diff_30s,roll_plate_diff_30s,roll_dragon_diff_30s,roll_herald_diff_30s,roll_baron_diff_30s,team_engage_support,team_engage_count,team_disengage_count
0,NA1_5364003812,0,100,1,JUNGLE,team5s,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0,0
1,NA1_5364003812,0,200,0,JUNGLE,team5s,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0
2,NA1_5364003812,5,100,1,JUNGLE,team5s,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0,0
3,NA1_5364003812,5,200,0,JUNGLE,team5s,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0
4,NA1_5364003812,10,100,1,JUNGLE,team5s,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0,0
5,NA1_5364003812,10,200,0,JUNGLE,team5s,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0
6,NA1_5364003812,15,100,1,JUNGLE,team5s,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0,0
7,NA1_5364003812,15,200,0,JUNGLE,team5s,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0
8,NA1_5364003812,20,100,1,JUNGLE,team5s,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0,0
9,NA1_5364003812,20,200,0,JUNGLE,team5s,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0


Description: import os


In [8]:
import os
for mode in TRAIN_MODES:
    if mode == "classification":
        out_path = "../models/jungle_bc.joblib"
        if SKIP_TRAIN_IF_MODEL_EXISTS and os.path.exists(out_path):
            print("Skip classification: model exists", out_path)
        else:
            from training.train_classification import main as train_main
            train_main(OUT_CSV, out_model=out_path)
    elif mode == "regression_ev":
        from training.train_regression_ev import main as train_main
        train_main(OUT_CSV, out_model_dir="../models")
    elif mode == "rl_awr":
        from training.train_rl_awr import main as train_main
        train_main(OUT_CSV, out_dir="../models")
    else:
        print("Unknown mode:", mode)
print("Training complete.")


AUC: 0.8458
ACC: 0.7506
BRIER: 0.1637
Saved ../models/jungle_bc.joblib (XGBoost)
Saved ../models/feature_importance.csv
AUC: 0.5000
ACC: 0.5035
BRIER: 0.2500
AUC: 0.8470
ACC: 0.7552
BRIER: 0.1595
AUC: 0.8411
ACC: 0.6690
BRIER: 0.2618
Training complete.
