In [1]:
# Cell 1: Imports & display options
import os
import pandas as pd
import numpy as np

pd.set_option("display.max_rows", 200)
pd.set_option("display.max_columns", 200)
pd.set_option("display.float_format", lambda v: f"{v:,.4f}")
pd.options.display.float_format = '{:.2f}'.format

print("✅ Imports ready.")

✅ Imports ready.


In [2]:
# Cell 2: Paths & configuration (projections + live)

# Projections (cleaned)
PROJ_INPUT  = "data/projections_L2.csv"
PROJ_OUTPUT = "../L3/data/rankings_L3.csv"

# Live (cleaned + indexed to your playerIndex)
LIVE_INPUT  = "data/live_L2.csv"
LIVE_OUTPUT = "../L3/data/rankings_LIVE_L3.csv"

# IDs expected in BOTH files
ID_COLS = ["INDEX", "PLAYER_NORM"]

# Stats used for z-scoring and ranking
STAT_COLS = ["PTS","REB","AST","STL","BLK","3PM","FG%","FT%","TO"]

# Optional context columns (included in outputs if present)
CONTEXT_COLS = ["G","MPG"]

# If any FG% / FT% happen to be in 0–100, set this True to auto-scale to 0–1
AUTO_SCALE_PCT = True

# Use weights? If True and STAT_WEIGHTS available in utils.py, we’ll weight z-scores
USE_WEIGHTS = False  # keep False to mirror “same z scoring” as requested

print("✅ Paths and config set.")

✅ Paths and config set.


In [3]:
# Cell 3: (Optional) Load STAT_WEIGHTS from utils.py if present and desired
# This cell is safe even if utils.py or STAT_WEIGHTS is missing.

STAT_WEIGHTS = None
if USE_WEIGHTS:
    try:
        import importlib.util, sys
        UTILS_PATH = "../utils/utils.py"
        spec = importlib.util.spec_from_file_location("utils_module", UTILS_PATH)
        utils_module = importlib.util.module_from_spec(spec)
        sys.modules["utils_module"] = utils_module
        spec.loader.exec_module(utils_module)

        # Prefer function get_stat_weights(), fallback to STAT_WEIGHTS dict
        get_stat_weights = getattr(utils_module, "get_stat_weights", None)
        if callable(get_stat_weights):
            STAT_WEIGHTS = get_stat_weights()
        elif hasattr(utils_module, "STAT_WEIGHTS"):
            STAT_WEIGHTS = getattr(utils_module, "STAT_WEIGHTS")

        print("✅ Weights loaded:", STAT_WEIGHTS)
    except Exception as e:
        print("⚠️ Could not load utils.py weights:", e)
else:
    print("ℹ️ USE_WEIGHTS=False — proceeding with unweighted z-score sum.")

ℹ️ USE_WEIGHTS=False — proceeding with unweighted z-score sum.


In [4]:
# Cell 4: Helper functions

def coerce_numeric(df: pd.DataFrame, cols):
    """Safely convert listed columns to numeric if they exist."""
    for c in cols:
        if c in df.columns:
            df[c] = pd.to_numeric(df[c], errors="coerce")
    return df

def maybe_scale_percentages(df: pd.DataFrame, pct_cols=("FG%", "FT%")):
    """If AUTO_SCALE_PCT=True and a pct column has values > 1, scale by 100→1."""
    if not AUTO_SCALE_PCT:
        return df
    for c in pct_cols:
        if c in df.columns:
            # Heuristic: if median > 1.5, likely 0–100 scale → convert to 0–1
            med = df[c].median(skipna=True)
            if pd.notna(med) and med > 1.5:
                df[c] = df[c] / 100.0
    return df

def compute_zscores(df: pd.DataFrame, stat_cols):
    """Compute z-scores per column; if std == 0 or NaN, assign 0.0."""
    out = df.copy()
    for c in stat_cols:
        if c not in out.columns:
            continue
        mu = out[c].mean(skipna=True)
        sd = out[c].std(ddof=0, skipna=True)  # population std for stability
        if pd.isna(sd) or sd == 0:
            out[f"{c}_z"] = 0.0
        else:
            out[f"{c}_z"] = (out[c] - mu) / sd
    return out

def build_rank(df: pd.DataFrame, stat_cols, weights=None):
    """
    Build composite RANK_SCORE:
      - Sum positive stats' z-scores (PTS, REB, AST, STL, BLK, 3PM, FG%, FT%)
      - Subtract turnovers' z-score (TO)
    If weights is provided (dict), weight the z-scores accordingly.
    """
    pos = ["PTS","REB","AST","STL","BLK","3PM","FG%","FT%"]
    neg = ["TO"]

    # Only keep stats that exist in dataframe
    pos = [c for c in pos if f"{c}_z" in df.columns]
    neg = [c for c in neg if f"{c}_z" in df.columns]

    score = pd.Series(0.0, index=df.index)

    # Helper to get weight (1.0 default when weights=None)
    def w(c):
        if weights and isinstance(weights, dict):
            return float(weights.get(c, 1.0))
        return 1.0

    for c in pos:
        score = score + df[f"{c}_z"].fillna(0.0) * w(c)
    for c in neg:
        score = score - df[f"{c}_z"].fillna(0.0) * w(c)

    out = df.copy()
    out["RANK_SCORE"] = score
    out["RANK"] = out["RANK_SCORE"].rank(method="dense", ascending=False).astype(int)
    return out

def compute_rankings(input_csv, output_csv, id_cols, stat_cols, context_cols, weights=None):
    """Full pipeline for one CSV → ranked CSV."""
    # Load
    df = pd.read_csv(input_csv)

    # Verify IDs
    missing_ids = [c for c in id_cols if c not in df.columns]
    if missing_ids:
        raise ValueError(f"Missing ID columns {missing_ids} in {input_csv}")

    # Optional percent scaling guard
    df = maybe_scale_percentages(df, pct_cols=("FG%", "FT%"))

    # Coerce numerics for stats/context
    df = coerce_numeric(df, stat_cols + context_cols)

    # Z-scores
    zdf = compute_zscores(df, stat_cols)

    # Rank (with or without weights)
    rdf = build_rank(zdf, stat_cols, weights=weights)

    # Output columns
    out_cols = (
        id_cols
        + [c for c in context_cols if c in rdf.columns]
        + stat_cols
        + ["RANK_SCORE","RANK"]
    )
    out_cols = [c for c in out_cols if c in rdf.columns]
    out = rdf[out_cols].sort_values(["RANK","RANK_SCORE"], ascending=[True, False]).reset_index(drop=True)

    # Save
    os.makedirs(os.path.dirname(output_csv), exist_ok=True)
    out.to_csv(output_csv, index=False)
    print(f"✅ Saved rankings to: {output_csv}")
    display(out.head(15))
    return out

In [5]:
# Cell 5: Compute & save PROJECTIONS rankings
proj_rankings = compute_rankings(
    input_csv=PROJ_INPUT,
    output_csv=PROJ_OUTPUT,
    id_cols=ID_COLS,
    stat_cols=STAT_COLS,
    context_cols=CONTEXT_COLS,
    weights=(STAT_WEIGHTS if USE_WEIGHTS else None)
)

✅ Saved rankings to: ../L3/data/rankings_L3.csv


Unnamed: 0,INDEX,PLAYER_NORM,G,MPG,PTS,REB,AST,STL,BLK,3PM,FG%,FT%,TO,RANK_SCORE,RANK
0,10003.0,victor wembanyama,67.8,32.96,25.95,11.37,4.01,1.23,3.76,2.26,0.5,0.82,3.51,14.92,1
1,10001.0,nikola jokic,73.2,35.72,28.08,12.3,10.01,1.63,0.73,1.86,0.58,0.81,3.19,13.43,2
2,10004.0,luka doncic,70.8,35.84,30.47,8.42,8.59,1.64,0.49,3.6,0.48,0.78,3.77,11.21,3
3,10002.0,shai gilgeous alexander,74.0,33.84,32.34,5.24,6.4,1.74,0.98,1.98,0.52,0.89,2.38,11.16,4
4,10006.0,anthony davis,63.4,34.3,24.77,11.46,3.69,1.17,2.18,0.73,0.52,0.79,2.33,10.4,5
5,10005.0,giannis antetokounmpo,70.0,34.5,31.46,11.82,6.78,0.95,1.08,0.5,0.6,0.63,3.4,8.44,6
6,10021.0,derrick white,69.6,33.73,17.52,4.89,5.55,1.06,1.15,3.37,0.44,0.86,1.86,8.27,7
7,10014.0,tyrese maxey,67.4,37.82,26.09,3.51,6.09,1.46,0.41,3.16,0.44,0.87,2.18,8.11,8
8,10015.0,stephen curry,66.8,32.44,25.47,4.59,5.81,0.99,0.42,4.55,0.45,0.93,2.88,8.07,9
9,10008.0,anthony edwards,73.0,36.4,27.82,5.79,4.92,1.26,0.63,3.88,0.45,0.83,3.22,8.0,10


In [6]:
# Cell 6: Compute & save LIVE rankings
live_rankings = compute_rankings(
    input_csv=LIVE_INPUT,
    output_csv=LIVE_OUTPUT,
    id_cols=ID_COLS,
    stat_cols=STAT_COLS,
    context_cols=CONTEXT_COLS,
    weights=(STAT_WEIGHTS if USE_WEIGHTS else None)
)

✅ Saved rankings to: ../L3/data/rankings_LIVE_L3.csv


Unnamed: 0,INDEX,PLAYER_NORM,G,MPG,PTS,REB,AST,STL,BLK,3PM,FG%,FT%,TO,RANK_SCORE,RANK
0,10003.0,victor wembanyama,4.0,32.3,31.0,13.8,2.8,1.5,4.8,1.0,0.6,0.8,1.5,17.34,1
1,10004.0,luka doncic,2.0,38.0,46.0,11.5,8.5,1.0,0.5,3.5,0.62,0.79,3.0,12.9,2
2,10001.0,nikola jokic,4.0,34.3,20.3,14.5,11.3,2.0,0.5,0.8,0.57,0.93,3.0,11.6,3
3,10014.0,tyrese maxey,4.0,43.0,37.5,3.8,8.3,1.0,0.8,4.5,0.46,0.87,2.0,11.08,4
4,10015.0,stephen curry,5.0,30.4,27.0,3.8,5.0,1.8,0.8,4.4,0.51,1.0,2.2,10.05,5
5,10005.0,giannis antetokounmpo,4.0,32.8,36.3,14.0,7.0,0.8,1.3,1.0,0.69,0.64,3.3,10.04,6
6,10064.0,lauri markkanen,4.0,38.3,34.0,7.3,2.8,0.8,0.8,4.0,0.51,0.91,1.0,9.8,7
7,10017.0,lamelo ball,4.0,32.5,26.3,8.3,9.5,1.8,0.3,4.3,0.45,0.8,4.0,9.64,8
8,10040.0,austin reaves,5.0,38.2,34.2,5.6,10.0,1.8,0.0,3.0,0.53,0.9,3.8,9.16,9
9,10002.0,shai gilgeous alexander,5.0,38.6,34.8,6.2,5.4,1.4,1.4,1.4,0.52,0.85,2.4,8.86,10


In [7]:
# Cell 7: Compare by RANK_SCORE delta (LIVE vs PROJECTIONS)
# Positive RANK_SCORE_DELTA => outperforming projections
# Negative RANK_SCORE_DELTA => underperforming

compare_cols = ["INDEX", "PLAYER_NORM", "RANK", "RANK_SCORE"]

p = proj_rankings[compare_cols].rename(
    columns={"RANK": "RANK_PROJ", "RANK_SCORE": "RANK_SCORE_PROJ"}
)
l = live_rankings[compare_cols].rename(
    columns={"RANK": "RANK_LIVE", "RANK_SCORE": "RANK_SCORE_LIVE"}
)

cmp_df = p.merge(l, on=["INDEX", "PLAYER_NORM"], how="inner")

# Compute deltas
cmp_df["RANK_SCORE_DELTA"] = cmp_df["RANK_SCORE_LIVE"] - cmp_df["RANK_SCORE_PROJ"]
cmp_df["RANK_DELTA"] = cmp_df["RANK_LIVE"] - cmp_df["RANK_PROJ"]

# Sort by overperforming first
over_df = cmp_df.sort_values(["RANK_SCORE_DELTA", "RANK_LIVE"], ascending=[False, True]).reset_index(drop=True)

# Filter out players with zero projected rank score before sorting underperformers
under_df = (
    cmp_df[cmp_df["RANK_SCORE_PROJ"] != 0]
    .sort_values(["RANK_SCORE_DELTA", "RANK_LIVE"], ascending=[True, True])
    .reset_index(drop=True)
)

cols = [
    "INDEX", "PLAYER_NORM",
    "RANK_PROJ", "RANK_LIVE",
    "RANK_SCORE_PROJ", "RANK_SCORE_LIVE",
    "RANK_SCORE_DELTA", "RANK_DELTA"
]

print("✅ Positive RANK_SCORE_DELTA = outperforming projections; negative = underperforming.\n")

print("🏀 Top Overperformers (LIVE > PROJECTIONS):")
display(over_df[cols].head(20))

print("😬 Top Underperformers (LIVE < PROJECTIONS):")
display(under_df[cols].head(20))

✅ Positive RANK_SCORE_DELTA = outperforming projections; negative = underperforming.

🏀 Top Overperformers (LIVE > PROJECTIONS):


Unnamed: 0,INDEX,PLAYER_NORM,RANK_PROJ,RANK_LIVE,RANK_SCORE_PROJ,RANK_SCORE_LIVE,RANK_SCORE_DELTA,RANK_DELTA
0,10529.0,sion james,507,86,-2.85,3.31,6.15,-421
1,10361.0,nique clifford,521,107,-3.29,2.55,5.84,-414
2,10268.0,cedric coward,355,42,-0.34,5.15,5.49,-313
3,10189.0,dillon brooks,199,23,1.79,6.73,4.94,-176
4,10096.0,mikal bridges,120,14,3.61,8.39,4.77,-106
5,10113.0,kyshawn george,109,12,3.81,8.47,4.66,-97
6,10040.0,austin reaves,75,9,4.65,9.16,4.51,-66
7,10129.0,norman powell,138,17,3.08,7.54,4.47,-121
8,10321.0,steven adams,397,85,-1.03,3.35,4.38,-312
9,10451.0,svi mykhailiuk,440,101,-1.61,2.74,4.35,-339


😬 Top Underperformers (LIVE < PROJECTIONS):


Unnamed: 0,INDEX,PLAYER_NORM,RANK_PROJ,RANK_LIVE,RANK_SCORE_PROJ,RANK_SCORE_LIVE,RANK_SCORE_DELTA,RANK_DELTA
0,10347.0,johnny juzang,243,409,1.18,-8.59,-9.76,166
1,10070.0,jakob poeltl,93,374,4.21,-4.82,-9.03,281
2,10256.0,bogdan bogdanovic,258,407,0.95,-8.02,-8.97,149
3,10128.0,andrew nembhard,160,387,2.55,-5.56,-8.11,227
4,10239.0,justin champagnie,169,384,2.4,-5.4,-7.8,215
5,10275.0,jordan goodwin,270,400,0.83,-6.79,-7.62,130
6,10106.0,isaiah jackson,112,339,3.72,-3.81,-7.53,227
7,10224.0,jonathan isaac,154,360,2.62,-4.45,-7.08,206
8,10037.0,dyson daniels,23,195,6.79,0.13,-6.66,172
9,10255.0,tristan vukcevic,281,394,0.6,-6.01,-6.61,113
