In [1]:
# ============================================================
# Cell 1: Imports & global settings
# ============================================================
import sys
from pathlib import Path
import importlib.util

import numpy as np
import pandas as pd
from IPython.display import display

pd.set_option("display.max_columns", 200)
pd.set_option("display.width", 200)

# Data locations (edit if your layout differs)
DATA_DIR = Path("data")
UTILS_PATH = Path("../utils/utils.py").resolve()

# Canonical keys & stat list
PLAYER_KEY = "PLAYER_NORM"   # normalized player name
JOIN_KEY   = "INDEX"         # unique index id

STATS = ['G','MPG','PTS','FGA','FG%','FTA','FT%','3PM','REB','AST','STL','BLK','TO']

# Source -> (csv path, column suffix used when merging)
SOURCES = {
    'basketballReference': (DATA_DIR / "basketballReference_L1.csv", "bref"),
    'fantasyPros':         (DATA_DIR / "fantasyPros_L1.csv",         "fantasyPros"),
    'hashTag':             (DATA_DIR / "hashTag_L1.csv",             "hashTag"),
    'fanScout':            (DATA_DIR / "fanScout_L1.csv",            "fanScout"),
}

# Default source weights (renormalized among present, matched sources per row)
SOURCE_WEIGHTS = {
    'basketballReference': 1.0,
    'fantasyPros':         0.9,
    'hashTag':             1.5,
    'fanScout':            1.0,
}

In [2]:
# ============================================================
# Cell 2: Load utils.py (normalize_name, load_player_index, etc.)
# ============================================================
spec = importlib.util.spec_from_file_location("utils_module", UTILS_PATH)
utils_module = importlib.util.module_from_spec(spec)
sys.modules["utils_module"] = utils_module
spec.loader.exec_module(utils_module)

# Bind helpers you use
normalize_name   = utils_module.normalize_name
load_player_index = getattr(utils_module, "load_player_index", None)

print("✅ utils.py loaded from:", UTILS_PATH)

✅ utils.py loaded from: /Users/ryanbrowder/Documents/Fantasy/roundBall/utils/utils.py


In [3]:
# ============================================================
# Cell 3: Load player index (INDEX, PLAYER, PLAYER_NORM)
# ============================================================
if load_player_index is None:
    raise RuntimeError("utils.py must expose load_player_index().")

index_df = load_player_index(utils_dir="../utils", index_filename="playerIndex.csv")

# Sanity checks
display(index_df.head(10))
print("Rows:", len(index_df), "| Unique INDEX:", index_df[JOIN_KEY].nunique())

# Slim view for merges
index_slim = index_df[[JOIN_KEY, PLAYER_KEY]].drop_duplicates()

Unnamed: 0,INDEX,PLAYER,PLAYER_NORM
0,10001,Nikola Jokic,nikola jokic
1,10002,Shai Gilgeous-Alexander,shai gilgeous alexander
2,10003,Victor Wembanyama,victor wembanyama
3,10004,Luka Doncic,luka doncic
4,10005,Giannis Antetokounmpo,giannis antetokounmpo
5,10006,Anthony Davis,anthony davis
6,10007,Cade Cunningham,cade cunningham
7,10008,Anthony Edwards,anthony edwards
8,10009,Karl-Anthony Towns,karl anthony towns
9,10010,James Harden,james harden


Rows: 546 | Unique INDEX: 546


In [4]:
# ============================================================
# Cell 4: Cleaning helpers for % and numeric columns
# ============================================================
def clean_percent_cols(df: pd.DataFrame, pct_cols=('FG%','FT%')):
    """
    Convert percentage columns to decimals only if needed.
    Handles both 49% and 0.49 inputs safely.
    """
    for col in pct_cols:
        if col in df.columns:
            # Remove symbols and blanks
            df[col] = (
                df[col].astype(str)
                .str.replace('%','', regex=False)
                .str.replace(',','', regex=False)
                .str.strip()
                .replace('', np.nan)
            )
            df[col] = pd.to_numeric(df[col], errors='coerce')

            # Only divide if typical values look like 49 or 78 (not 0.49)
            if df[col].median(skipna=True) > 1:
                df[col] = df[col] / 100.0
    return df

def clean_numeric_cols(df: pd.DataFrame, numeric_cols):
    """Coerce to numeric (except percent columns which are handled separately)."""
    for col in numeric_cols:
        if col in df.columns and col not in ('FG%','FT%'):
            df[col] = (
                df[col].astype(str)
                .str.replace(',','', regex=False)
                .str.strip()
                .replace('', np.nan)
            )
            df[col] = pd.to_numeric(df[col], errors='coerce')
    return df

def suffix_stats(df: pd.DataFrame, stats, suffix):
    """Append a source suffix to stat columns."""
    rename_map = {col: f"{col}_{suffix}" for col in stats if col in df.columns}
    return df.rename(columns=rename_map)

In [5]:
# ============================================================
# Cell 5: Load each source, normalize names, join to index in memory
# ============================================================
inmem_frames   = {}           # source -> enriched df with suffixed stat cols
unmatched_sets = {}           # source -> set of PLAYER_NORM that failed to match INDEX

for src_name, (path, suffix) in SOURCES.items():
    # Load CSV
    df_src = pd.read_csv(path)

    # Normalize names if PLAYER_NORM missing
    if PLAYER_KEY not in df_src.columns:
        df_src[PLAYER_KEY] = df_src["PLAYER"].astype(str).map(normalize_name)

    # Join to index to get INDEX (left join preserves all rows from source)
    enriched = df_src.merge(index_slim, on=PLAYER_KEY, how='left')

    # Track unmatched PLAYER_NORM for this source
    unmatched_sets[src_name] = set(
        enriched.loc[enriched[JOIN_KEY].isna(), PLAYER_KEY].dropna().unique()
    )

    # Keep just keys + available stats (source may not have all stats)
    keep_cols = [JOIN_KEY, PLAYER_KEY] + [c for c in STATS if c in enriched.columns]
    enriched = enriched.loc[:, keep_cols].copy()

    # Clean columns
    enriched = clean_percent_cols(enriched, pct_cols=('FG%','FT%'))
    enriched = clean_numeric_cols(enriched, numeric_cols=STATS)

    # Suffix stat columns to preserve provenance
    enriched = suffix_stats(enriched, STATS, suffix)

    inmem_frames[src_name] = enriched

print("Loaded sources:", list(inmem_frames.keys()))

Loaded sources: ['basketballReference', 'fantasyPros', 'hashTag', 'fanScout']


In [6]:
# ============================================================
# Cell 6: Outer merge all sources on INDEX + PLAYER_NORM
# ============================================================
frames = list(inmem_frames.values())
if not frames:
    raise RuntimeError("No sources loaded.")

projections_L1 = frames[0]
for f in frames[1:]:
    projections_L1 = projections_L1.merge(f, on=[JOIN_KEY, PLAYER_KEY], how='outer')

print("Shape after merge:", projections_L1.shape)
display(projections_L1.head(5))

Shape after merge: (758, 49)


Unnamed: 0,INDEX,PLAYER_NORM,PTS_bref,FGA_bref,FG%_bref,FTA_bref,FT%_bref,3PM_bref,REB_bref,AST_bref,STL_bref,BLK_bref,TO_bref,MPG_fantasyPros,PTS_fantasyPros,FG%_fantasyPros,FT%_fantasyPros,3PM_fantasyPros,REB_fantasyPros,AST_fantasyPros,STL_fantasyPros,BLK_fantasyPros,TO_fantasyPros,G_hashTag,MPG_hashTag,PTS_hashTag,FGA_hashTag,FG%_hashTag,FTA_hashTag,FT%_hashTag,3PM_hashTag,REB_hashTag,AST_hashTag,STL_hashTag,BLK_hashTag,TO_hashTag,G_fanScout,MPG_fanScout,PTS_fanScout,FGA_fanScout,FG%_fanScout,FTA_fanScout,FT%_fanScout,3PM_fanScout,REB_fanScout,AST_fanScout,STL_fanScout,BLK_fanScout,TO_fanScout
0,10001.0,nikola jokic,27.7,18.4,0.578,6.0,0.806,1.6,12.3,9.6,1.6,0.7,3.2,36.1,27.6,0.575,0.807,1.6,12.3,9.6,1.6,0.7,3.2,74.0,35.7,28.8,18.9,0.579,6.0,0.808,2.1,12.7,10.3,1.6,0.8,3.2,72.0,35.4,27.8,17.97,0.58,6.34,0.81,1.98,11.72,10.36,1.72,0.7,3.17
1,10002.0,shai gilgeous alexander,32.9,21.8,0.524,9.2,0.891,1.9,5.5,6.5,1.9,1.0,2.5,34.9,32.4,0.52,0.893,1.8,5.3,6.3,1.8,0.9,2.5,74.0,34.1,32.1,21.0,0.527,8.8,0.886,2.1,5.3,6.3,1.7,1.0,2.3,74.0,32.5,32.1,21.21,0.51,9.12,0.9,2.05,4.82,6.52,1.59,1.0,2.26
2,10003.0,victor wembanyama,26.2,19.8,0.483,5.2,0.822,2.9,12.3,4.3,1.4,4.1,3.7,33.3,23.9,0.467,0.827,2.4,10.5,3.8,1.2,3.4,3.4,65.0,32.4,24.3,18.4,0.47,5.0,0.827,2.9,11.0,3.7,1.1,3.7,3.7,72.0,33.5,27.3,19.52,0.49,5.93,0.83,3.39,11.78,4.38,1.27,3.85,3.12
3,10004.0,luka doncic,30.1,21.3,0.474,8.2,0.781,3.6,8.5,8.3,1.6,0.5,3.6,36.4,29.7,0.478,0.774,3.1,8.5,8.5,1.5,0.5,3.9,70.0,35.8,31.1,22.0,0.473,8.3,0.785,3.8,8.7,8.8,1.6,0.5,3.8,72.0,35.4,30.6,21.13,0.48,8.69,0.79,3.73,7.87,8.64,1.87,0.45,3.76
4,10005.0,giannis antetokounmpo,31.2,20.1,0.593,10.9,0.632,0.4,12.0,6.6,1.0,1.1,3.4,35.4,32.2,0.586,0.638,0.4,11.6,6.6,1.0,1.0,3.5,70.0,34.7,31.1,19.8,0.607,10.8,0.633,0.4,11.8,6.8,0.9,1.1,3.4,70.0,33.4,31.0,19.64,0.6,11.0,0.63,0.41,11.89,7.09,0.92,1.12,3.31


In [7]:
# ============================================================
# Cell 7: Weighted mean helper (excludes NaNs, renormalizes weights)
# ============================================================
def weighted_row_mean(values: dict, weights: dict) -> float:
    """
    values:  {source_name: float or np.nan}
    weights: {source_name: float}
    Returns weighted mean among the non-NaN sources; np.nan if none.
    """
    items = [(src, val) for src, val in values.items() if pd.notna(val)]
    if not items:
        return np.nan

    srcs = [s for s, _ in items]
    v = np.array([val for _, val in items], dtype=float)
    w = np.array([weights.get(s, 1.0) for s in srcs], dtype=float)

    if not np.isfinite(w).any() or np.allclose(w.sum(), 0.0):
        w = np.ones_like(v)

    w = w / w.sum()
    return float(np.sum(w * v))

In [8]:
# ============================================================
# Cell 8: Compute *stat*_w columns (exclude unmatched sources per player)
# ============================================================
# Map source -> suffix so we can find columns like "PTS_bref"
SOURCE_TOKENS = {src: suffix for src, (_, suffix) in SOURCES.items()}

for stat in STATS:
    # Build source->column mapping for this stat
    colmap = {src: f"{stat}_{suffix}" for src, suffix in SOURCE_TOKENS.items()}

    # Pre-extract series (or NaN series if column missing)
    src_series = {
        src: (projections_L1[c] if c in projections_L1.columns else pd.Series([np.nan]*len(projections_L1), index=projections_L1.index))
        for src, c in colmap.items()
    }

    out_vals = []
    for idx, row in projections_L1.iterrows():
        pn = row.get(PLAYER_KEY, np.nan)

        # Exclude any source that failed to match this player to INDEX
        excluded = {s for s, sset in unmatched_sets.items() if pd.notna(pn) and pn in sset}

        vals = {}
        eff_w = {}
        for src, ser in src_series.items():
            if src in excluded:
                continue
            vals[src] = ser.iloc[idx]
            eff_w[src] = SOURCE_WEIGHTS.get(src, 1.0)

        out_vals.append(weighted_row_mean(vals, eff_w))

    projections_L1[f"{stat}_w"] = pd.Series(out_vals, index=projections_L1.index)

print("✅ Weighted columns added.")
display(projections_L1[[JOIN_KEY, PLAYER_KEY] + [f"{s}_w" for s in STATS]].head(10))

✅ Weighted columns added.


Unnamed: 0,INDEX,PLAYER_NORM,G_w,MPG_w,PTS_w,FGA_w,FG%_w,FTA_w,FT%_w,3PM_w,REB_w,AST_w,STL_w,BLK_w,TO_w
0,10001.0,nikola jokic,73.2,35.717647,28.077273,18.491429,0.578182,6.097143,0.807795,1.856818,12.304545,10.011364,1.627273,0.734091,3.193182
1,10002.0,shai gilgeous alexander,74.0,33.841176,32.343182,21.288571,0.521023,9.005714,0.89175,1.981818,5.236364,6.395455,1.740909,0.979545,2.377273
2,10003.0,victor wembanyama,67.8,32.961765,25.331818,19.12,0.476886,5.322857,0.826545,2.909091,11.370455,4.011364,1.227273,3.763636,3.506818
3,10004.0,luka doncic,70.8,35.841176,30.472727,21.551429,0.475841,8.382857,0.782977,3.595455,8.425,8.588636,1.640909,0.488636,3.765909
4,10005.0,giannis antetokounmpo,70.0,34.502941,31.325,19.84,0.597932,10.885714,0.633114,0.402273,11.825,6.779545,0.947727,1.084091,3.4
5,10006.0,anthony davis,63.4,34.302941,24.775,17.862857,0.521932,7.088571,0.786864,0.731818,11.463636,3.686364,1.170455,2.177273,2.329545
6,10007.0,cade cunningham,72.0,35.223529,26.354545,20.94,0.468682,5.451429,0.852727,2.211364,5.834091,9.031818,1.018182,0.720455,4.136364
7,10008.0,anthony edwards,79.0,36.402941,27.822727,20.382857,0.452159,6.611429,0.833705,3.879545,5.788636,4.922727,1.261364,0.634091,3.220455
8,10009.0,karl anthony towns,69.8,33.7,23.443182,16.454286,0.518136,5.377143,0.842227,2.056818,11.293182,3.090909,0.913636,0.677273,2.731818
9,10010.0,james harden,74.6,34.555882,20.743182,14.782857,0.420886,6.434286,0.873818,2.843182,5.511364,8.454545,1.331818,0.713636,3.709091


In [9]:
# ============================================================
# Cell 9: Tidy column order (keys, raw stats by source, then *_w)
# ============================================================
weighted_cols = [f"{s}_w" for s in STATS if f"{s}_w" in projections_L1.columns]
first_cols = [JOIN_KEY, PLAYER_KEY]
other_cols = [c for c in projections_L1.columns if c not in first_cols + weighted_cols]

projections_L1 = projections_L1[first_cols + other_cols + weighted_cols]
display(projections_L1.head(10))

Unnamed: 0,INDEX,PLAYER_NORM,PTS_bref,FGA_bref,FG%_bref,FTA_bref,FT%_bref,3PM_bref,REB_bref,AST_bref,STL_bref,BLK_bref,TO_bref,MPG_fantasyPros,PTS_fantasyPros,FG%_fantasyPros,FT%_fantasyPros,3PM_fantasyPros,REB_fantasyPros,AST_fantasyPros,STL_fantasyPros,BLK_fantasyPros,TO_fantasyPros,G_hashTag,MPG_hashTag,PTS_hashTag,FGA_hashTag,FG%_hashTag,FTA_hashTag,FT%_hashTag,3PM_hashTag,REB_hashTag,AST_hashTag,STL_hashTag,BLK_hashTag,TO_hashTag,G_fanScout,MPG_fanScout,PTS_fanScout,FGA_fanScout,FG%_fanScout,FTA_fanScout,FT%_fanScout,3PM_fanScout,REB_fanScout,AST_fanScout,STL_fanScout,BLK_fanScout,TO_fanScout,G_w,MPG_w,PTS_w,FGA_w,FG%_w,FTA_w,FT%_w,3PM_w,REB_w,AST_w,STL_w,BLK_w,TO_w
0,10001.0,nikola jokic,27.7,18.4,0.578,6.0,0.806,1.6,12.3,9.6,1.6,0.7,3.2,36.1,27.6,0.575,0.807,1.6,12.3,9.6,1.6,0.7,3.2,74.0,35.7,28.8,18.9,0.579,6.0,0.808,2.1,12.7,10.3,1.6,0.8,3.2,72.0,35.4,27.8,17.97,0.58,6.34,0.81,1.98,11.72,10.36,1.72,0.7,3.17,73.2,35.717647,28.077273,18.491429,0.578182,6.097143,0.807795,1.856818,12.304545,10.011364,1.627273,0.734091,3.193182
1,10002.0,shai gilgeous alexander,32.9,21.8,0.524,9.2,0.891,1.9,5.5,6.5,1.9,1.0,2.5,34.9,32.4,0.52,0.893,1.8,5.3,6.3,1.8,0.9,2.5,74.0,34.1,32.1,21.0,0.527,8.8,0.886,2.1,5.3,6.3,1.7,1.0,2.3,74.0,32.5,32.1,21.21,0.51,9.12,0.9,2.05,4.82,6.52,1.59,1.0,2.26,74.0,33.841176,32.343182,21.288571,0.521023,9.005714,0.89175,1.981818,5.236364,6.395455,1.740909,0.979545,2.377273
2,10003.0,victor wembanyama,26.2,19.8,0.483,5.2,0.822,2.9,12.3,4.3,1.4,4.1,3.7,33.3,23.9,0.467,0.827,2.4,10.5,3.8,1.2,3.4,3.4,65.0,32.4,24.3,18.4,0.47,5.0,0.827,2.9,11.0,3.7,1.1,3.7,3.7,72.0,33.5,27.3,19.52,0.49,5.93,0.83,3.39,11.78,4.38,1.27,3.85,3.12,67.8,32.961765,25.331818,19.12,0.476886,5.322857,0.826545,2.909091,11.370455,4.011364,1.227273,3.763636,3.506818
3,10004.0,luka doncic,30.1,21.3,0.474,8.2,0.781,3.6,8.5,8.3,1.6,0.5,3.6,36.4,29.7,0.478,0.774,3.1,8.5,8.5,1.5,0.5,3.9,70.0,35.8,31.1,22.0,0.473,8.3,0.785,3.8,8.7,8.8,1.6,0.5,3.8,72.0,35.4,30.6,21.13,0.48,8.69,0.79,3.73,7.87,8.64,1.87,0.45,3.76,70.8,35.841176,30.472727,21.551429,0.475841,8.382857,0.782977,3.595455,8.425,8.588636,1.640909,0.488636,3.765909
4,10005.0,giannis antetokounmpo,31.2,20.1,0.593,10.9,0.632,0.4,12.0,6.6,1.0,1.1,3.4,35.4,32.2,0.586,0.638,0.4,11.6,6.6,1.0,1.0,3.5,70.0,34.7,31.1,19.8,0.607,10.8,0.633,0.4,11.8,6.8,0.9,1.1,3.4,70.0,33.4,31.0,19.64,0.6,11.0,0.63,0.41,11.89,7.09,0.92,1.12,3.31,70.0,34.502941,31.325,19.84,0.597932,10.885714,0.633114,0.402273,11.825,6.779545,0.947727,1.084091,3.4
5,10006.0,anthony davis,25.4,18.0,0.529,7.2,0.788,0.6,12.3,3.6,1.2,2.2,2.3,35.2,24.4,0.54,0.793,0.5,11.9,3.4,1.2,2.2,2.1,63.0,34.5,25.1,18.0,0.521,7.1,0.787,0.8,11.8,3.5,1.2,2.1,2.2,64.0,33.2,24.0,17.52,0.5,6.96,0.78,0.97,9.73,4.31,1.07,2.25,2.76,63.4,34.302941,24.775,17.862857,0.521932,7.088571,0.786864,0.731818,11.463636,3.686364,1.170455,2.177273,2.329545
6,10007.0,cade cunningham,26.0,20.7,0.471,5.2,0.853,2.2,5.9,8.8,1.0,0.7,4.1,35.4,25.4,0.463,0.85,2.1,5.8,8.5,1.0,0.6,4.2,70.0,35.2,26.4,21.1,0.463,5.4,0.856,2.2,5.9,9.3,1.0,0.8,4.1,75.0,35.1,27.5,20.94,0.48,5.78,0.85,2.34,5.7,9.34,1.08,0.73,4.17,72.0,35.223529,26.354545,20.94,0.468682,5.451429,0.852727,2.211364,5.834091,9.031818,1.018182,0.720455,4.136364
7,10008.0,anthony edwards,26.9,19.9,0.461,6.2,0.833,3.4,5.8,4.8,1.3,0.6,3.1,36.8,27.3,0.45,0.822,3.6,5.9,5.1,1.3,0.7,3.2,79.0,36.1,28.3,20.6,0.449,6.7,0.837,4.1,5.9,5.1,1.3,0.6,3.3,79.0,36.5,28.5,20.54,0.45,6.89,0.84,4.28,5.51,4.62,1.13,0.66,3.24,79.0,36.402941,27.822727,20.382857,0.452159,6.611429,0.833705,3.879545,5.788636,4.922727,1.261364,0.634091,3.220455
8,10009.0,karl anthony towns,24.3,17.0,0.515,5.5,0.839,2.1,11.6,3.3,0.9,0.7,2.9,34.2,22.5,0.512,0.842,2.0,10.9,3.0,0.9,0.6,2.5,69.0,33.8,23.4,16.3,0.516,5.3,0.846,2.1,11.8,3.1,0.9,0.7,2.8,71.0,33.1,23.5,16.14,0.53,5.37,0.84,2.0,10.58,2.95,0.96,0.69,2.67,69.8,33.7,23.443182,16.454286,0.518136,5.377143,0.842227,2.056818,11.293182,3.090909,0.913636,0.677273,2.731818
9,10010.0,james harden,20.9,15.1,0.411,6.5,0.869,2.8,5.7,8.7,1.4,0.7,3.8,34.1,20.3,0.426,0.872,2.8,5.3,8.5,1.3,0.6,3.7,75.0,34.8,20.6,14.6,0.425,6.2,0.874,2.8,5.5,8.5,1.3,0.8,3.6,74.0,34.6,21.2,14.74,0.42,6.72,0.88,2.99,5.53,8.1,1.34,0.7,3.79,74.6,34.555882,20.743182,14.782857,0.420886,6.434286,0.873818,2.843182,5.511364,8.454545,1.331818,0.713636,3.709091


In [10]:
# ============================================================
# Cell 10: Coverage audit — count non-null sources contributing to each stat
# ============================================================
coverage = {}
for stat in STATS:
    cols = [f"{stat}_{suffix}" for _, suffix in SOURCES.values() if f"{stat}_{suffix}" in projections_L1.columns]
    if not cols:
        continue
    coverage[stat] = projections_L1[cols].notna().sum(axis=1)

coverage_df = pd.DataFrame(coverage)
coverage_df.columns = [f"{c}_count" for c in coverage_df.columns]
coverage_df.index = projections_L1.index

projections_L1 = pd.concat([projections_L1, coverage_df], axis=1)
display(projections_L1.head(10))

Unnamed: 0,INDEX,PLAYER_NORM,PTS_bref,FGA_bref,FG%_bref,FTA_bref,FT%_bref,3PM_bref,REB_bref,AST_bref,STL_bref,BLK_bref,TO_bref,MPG_fantasyPros,PTS_fantasyPros,FG%_fantasyPros,FT%_fantasyPros,3PM_fantasyPros,REB_fantasyPros,AST_fantasyPros,STL_fantasyPros,BLK_fantasyPros,TO_fantasyPros,G_hashTag,MPG_hashTag,PTS_hashTag,FGA_hashTag,FG%_hashTag,FTA_hashTag,FT%_hashTag,3PM_hashTag,REB_hashTag,AST_hashTag,STL_hashTag,BLK_hashTag,TO_hashTag,G_fanScout,MPG_fanScout,PTS_fanScout,FGA_fanScout,FG%_fanScout,FTA_fanScout,FT%_fanScout,3PM_fanScout,REB_fanScout,AST_fanScout,STL_fanScout,BLK_fanScout,TO_fanScout,G_w,MPG_w,PTS_w,FGA_w,FG%_w,FTA_w,FT%_w,3PM_w,REB_w,AST_w,STL_w,BLK_w,TO_w,G_count,MPG_count,PTS_count,FGA_count,FG%_count,FTA_count,FT%_count,3PM_count,REB_count,AST_count,STL_count,BLK_count,TO_count
0,10001.0,nikola jokic,27.7,18.4,0.578,6.0,0.806,1.6,12.3,9.6,1.6,0.7,3.2,36.1,27.6,0.575,0.807,1.6,12.3,9.6,1.6,0.7,3.2,74.0,35.7,28.8,18.9,0.579,6.0,0.808,2.1,12.7,10.3,1.6,0.8,3.2,72.0,35.4,27.8,17.97,0.58,6.34,0.81,1.98,11.72,10.36,1.72,0.7,3.17,73.2,35.717647,28.077273,18.491429,0.578182,6.097143,0.807795,1.856818,12.304545,10.011364,1.627273,0.734091,3.193182,2,3,4,3,4,3,4,4,4,4,4,4,4
1,10002.0,shai gilgeous alexander,32.9,21.8,0.524,9.2,0.891,1.9,5.5,6.5,1.9,1.0,2.5,34.9,32.4,0.52,0.893,1.8,5.3,6.3,1.8,0.9,2.5,74.0,34.1,32.1,21.0,0.527,8.8,0.886,2.1,5.3,6.3,1.7,1.0,2.3,74.0,32.5,32.1,21.21,0.51,9.12,0.9,2.05,4.82,6.52,1.59,1.0,2.26,74.0,33.841176,32.343182,21.288571,0.521023,9.005714,0.89175,1.981818,5.236364,6.395455,1.740909,0.979545,2.377273,2,3,4,3,4,3,4,4,4,4,4,4,4
2,10003.0,victor wembanyama,26.2,19.8,0.483,5.2,0.822,2.9,12.3,4.3,1.4,4.1,3.7,33.3,23.9,0.467,0.827,2.4,10.5,3.8,1.2,3.4,3.4,65.0,32.4,24.3,18.4,0.47,5.0,0.827,2.9,11.0,3.7,1.1,3.7,3.7,72.0,33.5,27.3,19.52,0.49,5.93,0.83,3.39,11.78,4.38,1.27,3.85,3.12,67.8,32.961765,25.331818,19.12,0.476886,5.322857,0.826545,2.909091,11.370455,4.011364,1.227273,3.763636,3.506818,2,3,4,3,4,3,4,4,4,4,4,4,4
3,10004.0,luka doncic,30.1,21.3,0.474,8.2,0.781,3.6,8.5,8.3,1.6,0.5,3.6,36.4,29.7,0.478,0.774,3.1,8.5,8.5,1.5,0.5,3.9,70.0,35.8,31.1,22.0,0.473,8.3,0.785,3.8,8.7,8.8,1.6,0.5,3.8,72.0,35.4,30.6,21.13,0.48,8.69,0.79,3.73,7.87,8.64,1.87,0.45,3.76,70.8,35.841176,30.472727,21.551429,0.475841,8.382857,0.782977,3.595455,8.425,8.588636,1.640909,0.488636,3.765909,2,3,4,3,4,3,4,4,4,4,4,4,4
4,10005.0,giannis antetokounmpo,31.2,20.1,0.593,10.9,0.632,0.4,12.0,6.6,1.0,1.1,3.4,35.4,32.2,0.586,0.638,0.4,11.6,6.6,1.0,1.0,3.5,70.0,34.7,31.1,19.8,0.607,10.8,0.633,0.4,11.8,6.8,0.9,1.1,3.4,70.0,33.4,31.0,19.64,0.6,11.0,0.63,0.41,11.89,7.09,0.92,1.12,3.31,70.0,34.502941,31.325,19.84,0.597932,10.885714,0.633114,0.402273,11.825,6.779545,0.947727,1.084091,3.4,2,3,4,3,4,3,4,4,4,4,4,4,4
5,10006.0,anthony davis,25.4,18.0,0.529,7.2,0.788,0.6,12.3,3.6,1.2,2.2,2.3,35.2,24.4,0.54,0.793,0.5,11.9,3.4,1.2,2.2,2.1,63.0,34.5,25.1,18.0,0.521,7.1,0.787,0.8,11.8,3.5,1.2,2.1,2.2,64.0,33.2,24.0,17.52,0.5,6.96,0.78,0.97,9.73,4.31,1.07,2.25,2.76,63.4,34.302941,24.775,17.862857,0.521932,7.088571,0.786864,0.731818,11.463636,3.686364,1.170455,2.177273,2.329545,2,3,4,3,4,3,4,4,4,4,4,4,4
6,10007.0,cade cunningham,26.0,20.7,0.471,5.2,0.853,2.2,5.9,8.8,1.0,0.7,4.1,35.4,25.4,0.463,0.85,2.1,5.8,8.5,1.0,0.6,4.2,70.0,35.2,26.4,21.1,0.463,5.4,0.856,2.2,5.9,9.3,1.0,0.8,4.1,75.0,35.1,27.5,20.94,0.48,5.78,0.85,2.34,5.7,9.34,1.08,0.73,4.17,72.0,35.223529,26.354545,20.94,0.468682,5.451429,0.852727,2.211364,5.834091,9.031818,1.018182,0.720455,4.136364,2,3,4,3,4,3,4,4,4,4,4,4,4
7,10008.0,anthony edwards,26.9,19.9,0.461,6.2,0.833,3.4,5.8,4.8,1.3,0.6,3.1,36.8,27.3,0.45,0.822,3.6,5.9,5.1,1.3,0.7,3.2,79.0,36.1,28.3,20.6,0.449,6.7,0.837,4.1,5.9,5.1,1.3,0.6,3.3,79.0,36.5,28.5,20.54,0.45,6.89,0.84,4.28,5.51,4.62,1.13,0.66,3.24,79.0,36.402941,27.822727,20.382857,0.452159,6.611429,0.833705,3.879545,5.788636,4.922727,1.261364,0.634091,3.220455,2,3,4,3,4,3,4,4,4,4,4,4,4
8,10009.0,karl anthony towns,24.3,17.0,0.515,5.5,0.839,2.1,11.6,3.3,0.9,0.7,2.9,34.2,22.5,0.512,0.842,2.0,10.9,3.0,0.9,0.6,2.5,69.0,33.8,23.4,16.3,0.516,5.3,0.846,2.1,11.8,3.1,0.9,0.7,2.8,71.0,33.1,23.5,16.14,0.53,5.37,0.84,2.0,10.58,2.95,0.96,0.69,2.67,69.8,33.7,23.443182,16.454286,0.518136,5.377143,0.842227,2.056818,11.293182,3.090909,0.913636,0.677273,2.731818,2,3,4,3,4,3,4,4,4,4,4,4,4
9,10010.0,james harden,20.9,15.1,0.411,6.5,0.869,2.8,5.7,8.7,1.4,0.7,3.8,34.1,20.3,0.426,0.872,2.8,5.3,8.5,1.3,0.6,3.7,75.0,34.8,20.6,14.6,0.425,6.2,0.874,2.8,5.5,8.5,1.3,0.8,3.6,74.0,34.6,21.2,14.74,0.42,6.72,0.88,2.99,5.53,8.1,1.34,0.7,3.79,74.6,34.555882,20.743182,14.782857,0.420886,6.434286,0.873818,2.843182,5.511364,8.454545,1.331818,0.713636,3.709091,2,3,4,3,4,3,4,4,4,4,4,4,4


In [11]:
# ============================================================
# Keep only weighted columns + keys, and drop "_w" suffix
# ============================================================

keep_cols = [
    "INDEX",
    "PLAYER_NORM",
    "G_w",
    "MPG_w",
    "PTS_w",
    "FG%_w",
    "FT%_w",
    "3PM_w",
    "REB_w",
    "AST_w",
    "STL_w",
    "BLK_w",
    "TO_w",
]

# Filter to those columns (ignore any missing just in case)
df_proj = projections_L1[[c for c in keep_cols if c in projections_L1.columns]].copy()

# Rename by stripping "_w"
df_proj.columns = [col.replace("_w", "") for col in df_proj.columns]

display(df_proj.head(10))
print("✅ Cleaned projection dataframe shape:", df_proj.shape)

Unnamed: 0,INDEX,PLAYER_NORM,G,MPG,PTS,FG%,FT%,3PM,REB,AST,STL,BLK,TO
0,10001.0,nikola jokic,73.2,35.717647,28.077273,0.578182,0.807795,1.856818,12.304545,10.011364,1.627273,0.734091,3.193182
1,10002.0,shai gilgeous alexander,74.0,33.841176,32.343182,0.521023,0.89175,1.981818,5.236364,6.395455,1.740909,0.979545,2.377273
2,10003.0,victor wembanyama,67.8,32.961765,25.331818,0.476886,0.826545,2.909091,11.370455,4.011364,1.227273,3.763636,3.506818
3,10004.0,luka doncic,70.8,35.841176,30.472727,0.475841,0.782977,3.595455,8.425,8.588636,1.640909,0.488636,3.765909
4,10005.0,giannis antetokounmpo,70.0,34.502941,31.325,0.597932,0.633114,0.402273,11.825,6.779545,0.947727,1.084091,3.4
5,10006.0,anthony davis,63.4,34.302941,24.775,0.521932,0.786864,0.731818,11.463636,3.686364,1.170455,2.177273,2.329545
6,10007.0,cade cunningham,72.0,35.223529,26.354545,0.468682,0.852727,2.211364,5.834091,9.031818,1.018182,0.720455,4.136364
7,10008.0,anthony edwards,79.0,36.402941,27.822727,0.452159,0.833705,3.879545,5.788636,4.922727,1.261364,0.634091,3.220455
8,10009.0,karl anthony towns,69.8,33.7,23.443182,0.518136,0.842227,2.056818,11.293182,3.090909,0.913636,0.677273,2.731818
9,10010.0,james harden,74.6,34.555882,20.743182,0.420886,0.873818,2.843182,5.511364,8.454545,1.331818,0.713636,3.709091


✅ Cleaned projection dataframe shape: (758, 13)


In [12]:
# ============================================================
# Cell 11: Save final projection table (optional)
# ============================================================
OUT_PATH = Path("../L2/data/projections_L2.csv")
df_proj.to_csv(OUT_PATH, index=False)
print("Saved →", OUT_PATH.resolve())

Saved → /Users/ryanbrowder/Documents/Fantasy/roundBall/L2/data/projections_L2.csv
