In [1]:
from pathlib import Path
import sys

# --- Resolve repo root (roundBall) ---
REPO_ROOT = Path.cwd().resolve()
while REPO_ROOT.name != "roundBall" and REPO_ROOT.parent != REPO_ROOT:
    REPO_ROOT = REPO_ROOT.parent

if REPO_ROOT.name != "roundBall":
    raise RuntimeError("Could not locate repo root folder named 'roundBall' from current working directory.")

# --- Make utils importable ---
UTILS_DIR = REPO_ROOT / "utils"
if str(UTILS_DIR) not in sys.path:
    sys.path.append(str(UTILS_DIR))

# --- L1 data directory ---
L1_DATA_DIR = REPO_ROOT / "L1" / "data"
L1_DATA_DIR.mkdir(parents=True, exist_ok=True)

In [2]:
# ============================================================
# Cell 1: Imports & paths
# ============================================================
import pandas as pd
import importlib.util
import sys

# Paths
UTILS_PATH = REPO_ROOT / "utils" / "utils.py"
LIVE_PATH  = REPO_ROOT / "L1" / "data" / "basketballReference_LIVE_L1.csv"

# Keys for joins
JOIN_KEY   = "PLAYER_NORM"
PLAYER_KEY = "INDEX"

In [3]:
# ============================================================
# Cell 2: Load utils.py (normalize_name, load_player_index)
# ============================================================
spec = importlib.util.spec_from_file_location("utils_module", UTILS_PATH)
utils_module = importlib.util.module_from_spec(spec)
sys.modules["utils_module"] = utils_module
spec.loader.exec_module(utils_module)

# Bind helpers
normalize_name    = utils_module.normalize_name
load_player_index = getattr(utils_module, "load_player_index", None)

print("✅ utils.py loaded from:", UTILS_PATH)

✅ utils.py loaded from: /Users/ryanbrowder/Documents/Fantasy/roundBall/utils/utils.py


In [4]:
# ============================================================
# Cell 3: Load player index (INDEX, PLAYER, PLAYER_NORM)
# ============================================================
if load_player_index is None:
    raise RuntimeError("utils.py must expose load_player_index().")

index_df = load_player_index(
    utils_dir=REPO_ROOT / "utils",
    index_filename="playerIndex.csv"
)

# Sanity check
display(index_df.head(10))
print("Rows:", len(index_df), "| Unique INDEX:", index_df[PLAYER_KEY].nunique())

# Slim view for merge
index_slim = index_df[[PLAYER_KEY, JOIN_KEY]].drop_duplicates()

Unnamed: 0,INDEX,PLAYER,PLAYER_NORM
0,10001,Nikola Jokic,nikola jokic
1,10002,Shai Gilgeous-Alexander,shai gilgeous alexander
2,10003,Victor Wembanyama,victor wembanyama
3,10004,Luka Doncic,luka doncic
4,10005,Giannis Antetokounmpo,giannis antetokounmpo
5,10006,Anthony Davis,anthony davis
6,10007,Cade Cunningham,cade cunningham
7,10008,Anthony Edwards,anthony edwards
8,10009,Karl-Anthony Towns,karl anthony towns
9,10010,James Harden,james harden


Rows: 547 | Unique INDEX: 547


In [5]:
# ============================================================
# Cell 4: Load basketballReference LIVE data
# ============================================================
keep_cols = ["PLAYER","G","MPG","PTS","FGM","FGA","FG%","FTA","FTM","FT%","3PM","REB","AST","STL","BLK","TO"]

df_live = pd.read_csv(LIVE_PATH)

# Standardize column casing
df_live = df_live.rename(columns={"Player": "PLAYER"})

# Keep only relevant columns, in order
df_live = df_live[[c for c in keep_cols if c in df_live.columns]]

# Normalize player names for merging
df_live["PLAYER"] = df_live["PLAYER"].astype(str).str.strip()
df_live[JOIN_KEY] = df_live["PLAYER"].apply(normalize_name)

# Convert numeric columns to float safely
num_cols = ["PTS","G","MPG","FGM","FGA","FG%","FTA","FTM","FT%","3PM","REB","AST","STL","BLK","TO"]
for c in set(num_cols).intersection(df_live.columns):
    df_live[c] = pd.to_numeric(df_live[c], errors="coerce")

display(df_live.head(10))

Unnamed: 0,PLAYER,G,MPG,PTS,FGM,FGA,FG%,FTA,FTM,FT%,3PM,REB,AST,STL,BLK,TO,PLAYER_NORM
0,Luka Dončić,18.0,37.2,35.0,10.8,23.0,0.471,12.2,9.8,0.804,3.6,9.0,9.0,1.5,0.6,4.3,luka doncic
1,Shai Gilgeous-Alexander,25.0,33.2,32.4,10.8,19.4,0.56,9.7,8.6,0.884,2.2,4.6,6.4,1.4,0.7,1.9,shai gilgeous alexander
2,Tyrese Maxey,23.0,39.9,31.5,10.7,22.9,0.467,7.3,6.4,0.881,3.7,4.7,7.2,1.7,0.9,2.7,tyrese maxey
3,Donovan Mitchell,24.0,34.4,31.3,10.9,21.5,0.505,6.3,5.3,0.842,4.2,4.8,5.4,1.4,0.3,3.4,donovan mitchell
4,Nikola Jokić,24.0,34.7,29.5,10.6,17.1,0.622,7.2,6.1,0.85,2.1,12.3,10.9,1.4,0.8,3.4,nikola jokic
5,Jaylen Brown,24.0,33.5,29.1,10.7,21.4,0.499,7.2,5.7,0.791,2.1,6.1,4.8,1.1,0.4,3.6,jaylen brown
6,Giannis Antetokounmpo,17.0,29.1,28.9,11.1,17.3,0.639,9.8,6.2,0.635,0.6,10.1,6.1,0.9,0.9,3.3,giannis antetokounmpo
7,Jalen Brunson,23.0,35.1,28.8,10.2,21.0,0.487,6.7,5.6,0.838,2.8,3.1,6.4,0.7,0.1,2.3,jalen brunson
8,Anthony Edwards,20.0,34.4,28.7,9.7,19.3,0.5,7.2,6.0,0.833,3.4,4.9,3.8,1.2,0.8,3.0,anthony edwards
9,Stephen Curry,17.0,31.1,28.5,9.4,19.9,0.473,5.5,4.9,0.903,4.8,3.8,4.1,1.2,0.5,3.1,stephen curry


In [6]:
# ============================================================
# Cell 5: Merge LIVE file with player index
# ============================================================
df_live_idx = df_live.merge(index_slim, on=JOIN_KEY, how="left")

# Merge results summary
total = len(df_live_idx)
matched = df_live_idx[PLAYER_KEY].notna().sum()
unmatched = total - matched
print(f"Rows: {total} | Matched INDEX: {matched} | Unmatched: {unmatched}")

# Show sample of unmatched players
if unmatched > 0:
    unmatched_df = df_live_idx[df_live_idx[PLAYER_KEY].isna()][["PLAYER", JOIN_KEY]]
    display(unmatched_df.head(20))
else:
    print("✅ All players matched to INDEX.")

Rows: 502 | Matched INDEX: 441 | Unmatched: 61


Unnamed: 0,PLAYER,PLAYER_NORM
228,Daniss Jenkins,daniss jenkins
234,Egor Dёmin,egor dеmin
235,Caleb Love,caleb love
239,Will Richard,will richard
248,Pat Spencer,pat spencer
278,Kobe Sanders,kobe sanders
283,Mac McClung,mac mcclung
287,Ethan Thompson,ethan thompson
302,Sidy Cissoko,sidy cissoko
305,Tyrese Proctor,tyrese proctor


In [7]:
# ============================================================
# Cell 6: Assemble final output shape & save
# ============================================================
import numpy as np

# Build final ordered columns
final_cols = [
    "INDEX", "PLAYER_NORM", "G", "MPG", "PTS", "FG%", "FT%", 
    "3PM", "REB", "AST", "STL", "BLK", "TO"
]

# Ensure G and MPG exist; if not, create as NaN
for col in ["G", "MPG"]:
    if col not in df_live_idx.columns:
        df_live_idx[col] = np.nan

# Keep only columns we need (if present), in order
final_df = df_live_idx.copy()

# Drop columns we don't want in the final file
drop_unneeded = ["PLAYER", "FGM", "FGA", "FTA", "FTM"]
final_df = final_df.drop(columns=[c for c in drop_unneeded if c in final_df.columns], errors="ignore")

# Reorder to the exact final spec
final_df = final_df[[c for c in final_cols if c in final_df.columns]]

# Coerce numeric columns to float
numeric_cols = ["INDEX", "G", "MPG", "PTS", "FG%", "FT%", "3PM", "REB", "AST", "STL", "BLK", "TO"]
for c in [col for col in numeric_cols if col in final_df.columns]:
    final_df[c] = pd.to_numeric(final_df[c], errors="coerce")

# Save (no unmatched file)
output_main = REPO_ROOT / "L2" / "data" / "live_L2.csv"
final_df.to_csv(output_main, index=False)
print(f"✅ Final LIVE output saved to: {output_main}")

# Quick preview
display(final_df.head(10))

# Match summary (still useful context)
total = len(df_live_idx)
matched = df_live_idx["INDEX"].notna().sum()
unmatched = total - matched
print(f"Rows: {total} | Matched INDEX: {matched} | Unmatched: {unmatched}")

✅ Final LIVE output saved to: /Users/ryanbrowder/Documents/Fantasy/roundBall/L2/data/live_L2.csv


Unnamed: 0,INDEX,PLAYER_NORM,G,MPG,PTS,FG%,FT%,3PM,REB,AST,STL,BLK,TO
0,10004.0,luka doncic,18.0,37.2,35.0,0.471,0.804,3.6,9.0,9.0,1.5,0.6,4.3
1,10002.0,shai gilgeous alexander,25.0,33.2,32.4,0.56,0.884,2.2,4.6,6.4,1.4,0.7,1.9
2,10014.0,tyrese maxey,23.0,39.9,31.5,0.467,0.881,3.7,4.7,7.2,1.7,0.9,2.7
3,10018.0,donovan mitchell,24.0,34.4,31.3,0.505,0.842,4.2,4.8,5.4,1.4,0.3,3.4
4,10001.0,nikola jokic,24.0,34.7,29.5,0.622,0.85,2.1,12.3,10.9,1.4,0.8,3.4
5,10038.0,jaylen brown,24.0,33.5,29.1,0.499,0.791,2.1,6.1,4.8,1.1,0.4,3.6
6,10005.0,giannis antetokounmpo,17.0,29.1,28.9,0.639,0.635,0.6,10.1,6.1,0.9,0.9,3.3
7,10023.0,jalen brunson,23.0,35.1,28.8,0.487,0.838,2.8,3.1,6.4,0.7,0.1,2.3
8,10008.0,anthony edwards,20.0,34.4,28.7,0.5,0.833,3.4,4.9,3.8,1.2,0.8,3.0
9,10015.0,stephen curry,17.0,31.1,28.5,0.473,0.903,4.8,3.8,4.1,1.2,0.5,3.1


Rows: 502 | Matched INDEX: 441 | Unmatched: 61
