In [1]:
# Cell 1: Imports & display options
import os
import pandas as pd
import numpy as np

pd.set_option("display.max_rows", 200)
pd.set_option("display.max_columns", 200)
pd.set_option("display.float_format", lambda v: f"{v:,.4f}")

print("✅ Imports ready.")

✅ Imports ready.


In [2]:
# Cell 2: Paths & configuration (projections + live)

# Projections (cleaned)
PROJ_INPUT  = "data/projections_L2.csv"
PROJ_OUTPUT = "../L3/data/rankings_L3.csv"

# Live (cleaned + indexed to your playerIndex)
LIVE_INPUT  = "data/live_L2.csv"
LIVE_OUTPUT = "../L3/data/rankings_LIVE_L3.csv"

# IDs expected in BOTH files
ID_COLS = ["INDEX", "PLAYER_NORM"]

# Stats used for z-scoring and ranking
STAT_COLS = ["PTS","REB","AST","STL","BLK","3PM","FG%","FT%","TO"]

# Optional context columns (included in outputs if present)
CONTEXT_COLS = ["G","MPG"]

# If any FG% / FT% happen to be in 0–100, set this True to auto-scale to 0–1
AUTO_SCALE_PCT = True

# Use weights? If True and STAT_WEIGHTS available in utils.py, we’ll weight z-scores
USE_WEIGHTS = False  # keep False to mirror “same z scoring” as requested

print("✅ Paths and config set.")

✅ Paths and config set.


In [3]:
# Cell 3: (Optional) Load STAT_WEIGHTS from utils.py if present and desired
# This cell is safe even if utils.py or STAT_WEIGHTS is missing.

STAT_WEIGHTS = None
if USE_WEIGHTS:
    try:
        import importlib.util, sys
        UTILS_PATH = "../utils/utils.py"
        spec = importlib.util.spec_from_file_location("utils_module", UTILS_PATH)
        utils_module = importlib.util.module_from_spec(spec)
        sys.modules["utils_module"] = utils_module
        spec.loader.exec_module(utils_module)

        # Prefer function get_stat_weights(), fallback to STAT_WEIGHTS dict
        get_stat_weights = getattr(utils_module, "get_stat_weights", None)
        if callable(get_stat_weights):
            STAT_WEIGHTS = get_stat_weights()
        elif hasattr(utils_module, "STAT_WEIGHTS"):
            STAT_WEIGHTS = getattr(utils_module, "STAT_WEIGHTS")

        print("✅ Weights loaded:", STAT_WEIGHTS)
    except Exception as e:
        print("⚠️ Could not load utils.py weights:", e)
else:
    print("ℹ️ USE_WEIGHTS=False — proceeding with unweighted z-score sum.")

ℹ️ USE_WEIGHTS=False — proceeding with unweighted z-score sum.


In [4]:
# Cell 4: Helper functions

def coerce_numeric(df: pd.DataFrame, cols):
    """Safely convert listed columns to numeric if they exist."""
    for c in cols:
        if c in df.columns:
            df[c] = pd.to_numeric(df[c], errors="coerce")
    return df

def maybe_scale_percentages(df: pd.DataFrame, pct_cols=("FG%", "FT%")):
    """If AUTO_SCALE_PCT=True and a pct column has values > 1, scale by 100→1."""
    if not AUTO_SCALE_PCT:
        return df
    for c in pct_cols:
        if c in df.columns:
            # Heuristic: if median > 1.5, likely 0–100 scale → convert to 0–1
            med = df[c].median(skipna=True)
            if pd.notna(med) and med > 1.5:
                df[c] = df[c] / 100.0
    return df

def compute_zscores(df: pd.DataFrame, stat_cols):
    """Compute z-scores per column; if std == 0 or NaN, assign 0.0."""
    out = df.copy()
    for c in stat_cols:
        if c not in out.columns:
            continue
        mu = out[c].mean(skipna=True)
        sd = out[c].std(ddof=0, skipna=True)  # population std for stability
        if pd.isna(sd) or sd == 0:
            out[f"{c}_z"] = 0.0
        else:
            out[f"{c}_z"] = (out[c] - mu) / sd
    return out

def build_rank(df: pd.DataFrame, stat_cols, weights=None):
    """
    Build composite RANK_SCORE:
      - Sum positive stats' z-scores (PTS, REB, AST, STL, BLK, 3PM, FG%, FT%)
      - Subtract turnovers' z-score (TO)
    If weights is provided (dict), weight the z-scores accordingly.
    """
    pos = ["PTS","REB","AST","STL","BLK","3PM","FG%","FT%"]
    neg = ["TO"]

    # Only keep stats that exist in dataframe
    pos = [c for c in pos if f"{c}_z" in df.columns]
    neg = [c for c in neg if f"{c}_z" in df.columns]

    score = pd.Series(0.0, index=df.index)

    # Helper to get weight (1.0 default when weights=None)
    def w(c):
        if weights and isinstance(weights, dict):
            return float(weights.get(c, 1.0))
        return 1.0

    for c in pos:
        score = score + df[f"{c}_z"].fillna(0.0) * w(c)
    for c in neg:
        score = score - df[f"{c}_z"].fillna(0.0) * w(c)

    out = df.copy()
    out["RANK_SCORE"] = score
    out["RANK"] = out["RANK_SCORE"].rank(method="dense", ascending=False).astype(int)
    return out

def compute_rankings(input_csv, output_csv, id_cols, stat_cols, context_cols, weights=None):
    """Full pipeline for one CSV → ranked CSV."""
    # Load
    df = pd.read_csv(input_csv)

    # Verify IDs
    missing_ids = [c for c in id_cols if c not in df.columns]
    if missing_ids:
        raise ValueError(f"Missing ID columns {missing_ids} in {input_csv}")

    # Optional percent scaling guard
    df = maybe_scale_percentages(df, pct_cols=("FG%", "FT%"))

    # Coerce numerics for stats/context
    df = coerce_numeric(df, stat_cols + context_cols)

    # Z-scores
    zdf = compute_zscores(df, stat_cols)

    # Rank (with or without weights)
    rdf = build_rank(zdf, stat_cols, weights=weights)

    # Output columns
    out_cols = (
        id_cols
        + [c for c in context_cols if c in rdf.columns]
        + stat_cols
        + ["RANK_SCORE","RANK"]
    )
    out_cols = [c for c in out_cols if c in rdf.columns]
    out = rdf[out_cols].sort_values(["RANK","RANK_SCORE"], ascending=[True, False]).reset_index(drop=True)

    # Save
    os.makedirs(os.path.dirname(output_csv), exist_ok=True)
    out.to_csv(output_csv, index=False)
    print(f"✅ Saved rankings to: {output_csv}")
    display(out.head(15))
    return out

In [5]:
# Cell 5: Compute & save PROJECTIONS rankings
proj_rankings = compute_rankings(
    input_csv=PROJ_INPUT,
    output_csv=PROJ_OUTPUT,
    id_cols=ID_COLS,
    stat_cols=STAT_COLS,
    context_cols=CONTEXT_COLS,
    weights=(STAT_WEIGHTS if USE_WEIGHTS else None)
)

✅ Saved rankings to: ../L3/data/rankings_L3.csv


Unnamed: 0,INDEX,PLAYER_NORM,G,MPG,PTS,REB,AST,STL,BLK,3PM,FG%,FT%,TO,RANK_SCORE,RANK
0,10003.0,victor wembanyama,67.8,32.9618,25.3318,11.3705,4.0114,1.2273,3.7636,2.9091,0.4769,0.8265,3.5068,15.3929,1
1,10001.0,nikola jokic,73.2,35.7176,28.0773,12.3045,10.0114,1.6273,0.7341,1.8568,0.5782,0.8078,3.1932,13.3923,2
2,10004.0,luka doncic,70.8,35.8412,30.4727,8.425,8.5886,1.6409,0.4886,3.5955,0.4758,0.783,3.7659,11.1527,3
3,10002.0,shai gilgeous alexander,74.0,33.8412,32.3432,5.2364,6.3955,1.7409,0.9795,1.9818,0.521,0.8918,2.3773,11.1291,4
4,10006.0,anthony davis,63.4,34.3029,24.775,11.4636,3.6864,1.1705,2.1773,0.7318,0.5219,0.7869,2.3295,10.4156,5
5,10005.0,giannis antetokounmpo,70.0,34.5029,31.325,11.825,6.7795,0.9477,1.0841,0.4023,0.5979,0.6331,3.4,8.2864,6
6,10030.0,chet holmgren,64.4,29.2,17.0432,8.8023,2.4818,0.7659,2.3659,1.625,0.5048,0.7734,1.8045,8.2388,7
7,10021.0,derrick white,69.6,33.7324,17.5227,4.8932,5.5523,1.0591,1.1545,3.3727,0.4415,0.8632,1.8591,8.2278,8
8,10015.0,stephen curry,66.8,32.4412,25.4659,4.5886,5.8068,0.9886,0.4227,4.55,0.4508,0.9285,2.8773,8.0229,9
9,10008.0,anthony edwards,79.0,36.4029,27.8227,5.7886,4.9227,1.2614,0.6341,3.8795,0.4522,0.8337,3.2205,7.9534,10


In [6]:
# Cell 6: Compute & save LIVE rankings
live_rankings = compute_rankings(
    input_csv=LIVE_INPUT,
    output_csv=LIVE_OUTPUT,
    id_cols=ID_COLS,
    stat_cols=STAT_COLS,
    context_cols=CONTEXT_COLS,
    weights=(STAT_WEIGHTS if USE_WEIGHTS else None)
)

✅ Saved rankings to: ../L3/data/rankings_LIVE_L3.csv


Unnamed: 0,INDEX,PLAYER_NORM,G,MPG,PTS,REB,AST,STL,BLK,3PM,FG%,FT%,TO,RANK_SCORE,RANK
0,10003.0,victor wembanyama,2.0,31.0,34.5,13.0,1.5,1.0,6.0,0.5,0.636,0.8,0.0,15.7998,1
1,10146.0,aaron gordon,1.0,39.0,50.0,8.0,2.0,0.0,1.0,10.0,0.81,1.0,2.0,14.4792,2
2,10014.0,tyrese maxey,1.0,41.0,40.0,2.0,6.0,2.0,1.0,7.0,0.542,0.875,1.0,12.6938,3
3,10004.0,luka doncic,2.0,38.0,46.0,11.5,8.5,1.0,0.5,3.5,0.62,0.793,3.0,11.2862,4
4,10015.0,stephen curry,3.0,32.0,33.3,4.3,4.7,2.3,1.3,5.3,0.525,1.0,2.7,10.6539,5
5,10113.0,kyshawn george,2.0,33.5,27.5,10.0,4.0,1.5,2.5,5.0,0.6,0.692,3.5,10.5731,6
6,10060.0,nikola vucevic,1.0,33.0,28.0,14.0,2.0,1.0,0.0,4.0,0.611,1.0,1.0,9.1219,7
7,10002.0,shai gilgeous alexander,2.0,46.5,45.0,6.5,5.0,2.0,1.5,1.5,0.474,0.825,2.5,9.0817,8
8,10001.0,nikola jokic,1.0,41.0,21.0,13.0,10.0,1.0,1.0,2.0,0.348,0.75,2.0,8.636,9
9,10031.0,alperen sengun,2.0,43.5,28.0,9.0,7.0,2.5,0.5,2.5,0.405,0.81,3.5,7.6381,10


In [7]:
# Cell 7: Compare by RANK_SCORE delta (LIVE vs PROJECTIONS)
# Positive RANK_SCORE_DELTA => outperforming projections
# Negative RANK_SCORE_DELTA => underperforming

compare_cols = ["INDEX", "PLAYER_NORM", "RANK", "RANK_SCORE"]

p = proj_rankings[compare_cols].rename(
    columns={"RANK": "RANK_PROJ", "RANK_SCORE": "RANK_SCORE_PROJ"}
)
l = live_rankings[compare_cols].rename(
    columns={"RANK": "RANK_LIVE", "RANK_SCORE": "RANK_SCORE_LIVE"}
)

cmp_df = p.merge(l, on=["INDEX", "PLAYER_NORM"], how="inner")

# Compute deltas
cmp_df["RANK_SCORE_DELTA"] = cmp_df["RANK_SCORE_LIVE"] - cmp_df["RANK_SCORE_PROJ"]
cmp_df["RANK_DELTA"] = cmp_df["RANK_LIVE"] - cmp_df["RANK_PROJ"]

# Sort by overperforming first
over_df = cmp_df.sort_values(["RANK_SCORE_DELTA", "RANK_LIVE"], ascending=[False, True]).reset_index(drop=True)
under_df = cmp_df.sort_values(["RANK_SCORE_DELTA", "RANK_LIVE"], ascending=[True, True]).reset_index(drop=True)

cols = [
    "INDEX", "PLAYER_NORM",
    "RANK_PROJ", "RANK_LIVE",
    "RANK_SCORE_PROJ", "RANK_SCORE_LIVE",
    "RANK_SCORE_DELTA", "RANK_DELTA"
]

print("✅ Positive RANK_SCORE_DELTA = outperforming projections; negative = underperforming.\n")

print("🏀 Top Overperformers (LIVE > PROJECTIONS):")
display(over_df[cols].head(20))

print("😬 Top Underperformers (LIVE < PROJECTIONS):")
display(under_df[cols].head(20))

✅ Positive RANK_SCORE_DELTA = outperforming projections; negative = underperforming.

🏀 Top Overperformers (LIVE > PROJECTIONS):


Unnamed: 0,INDEX,PLAYER_NORM,RANK_PROJ,RANK_LIVE,RANK_SCORE_PROJ,RANK_SCORE_LIVE,RANK_SCORE_DELTA,RANK_DELTA
0,10146.0,aaron gordon,170,2,2.3773,14.4792,12.1019,-168
1,10113.0,kyshawn george,135,6,3.036,10.5731,7.5371,-129
2,10288.0,ben sheppard,265,13,0.8142,7.4382,6.624,-252
3,10268.0,cedric coward,418,35,-1.3461,4.7876,6.1337,-383
4,10504.0,will riley,546,166,-5.7339,0.3237,6.0576,-380
5,10493.0,hugo gonzalez,543,158,-4.9316,0.5383,5.47,-385
6,10361.0,nique clifford,519,112,-3.2831,1.7411,5.0241,-407
7,10014.0,tyrese maxey,11,3,7.9338,12.6938,4.76,-8
8,10413.0,kris murray,505,129,-2.8372,1.3639,4.2011,-376
9,10096.0,mikal bridges,120,11,3.592,7.6284,4.0364,-109


😬 Top Underperformers (LIVE < PROJECTIONS):


Unnamed: 0,INDEX,PLAYER_NORM,RANK_PROJ,RANK_LIVE,RANK_SCORE_PROJ,RANK_SCORE_LIVE,RANK_SCORE_DELTA,RANK_DELTA
0,10106.0,isaiah jackson,88,353,4.3602,-8.2721,-12.6322,265
1,10024.0,amen thompson,15,254,7.299,-2.1406,-9.4396,239
2,10037.0,dyson daniels,20,263,6.9852,-2.359,-9.3442,243
3,10016.0,domantas sabonis,32,273,6.4481,-2.5353,-8.9834,241
4,10224.0,jonathan isaac,155,342,2.6185,-6.1238,-8.7424,187
5,10103.0,dangelo russell,110,332,3.7948,-4.8709,-8.6657,222
6,10347.0,johnny juzang,242,349,1.1626,-7.4505,-8.6131,107
7,10275.0,jordan goodwin,264,350,0.822,-7.7562,-8.5783,86
8,10255.0,tristan vukcevic,276,351,0.6005,-7.8285,-8.429,75
9,10114.0,tari eason,60,295,5.1356,-3.261,-8.3966,235
