# Week 2 Quick Check
Runs a few sanity checks to see if Week 2 data is present and lightly inspect files if found.

**What it does:**
- Locates repo base (folder with `Makefile` and `data/`)
- Looks for:
  - `data/props/props_with_model_week2.csv`
  - `data/actuals/week2.csv`
- If present: prints row counts, a small preview, distinct weeks, and attempts a loose join to gauge match rate.

> Safe to run multiple times; it never writes anything.

In [1]:

from pathlib import Path
import pandas as pd

# --- Config ---
WEEK = 2

# --- Find repo base (folder containing Makefile and data/) ---
BASE = Path.cwd()
for p in [BASE] + list(BASE.parents):
    if (p/"Makefile").exists() and (p/"data").exists():
        BASE = p
        break
print("[BASE]", BASE)

props_path = BASE / f"data/props/props_with_model_week{WEEK}.csv"
acts_path  = BASE / f"data/actuals/week{WEEK}.csv"

print("[check] props:", props_path, "exists:", props_path.exists())
print("[check] acts :", acts_path, "exists:", acts_path.exists())

have_props = props_path.exists()
have_acts  = acts_path.exists()

if not have_props and not have_acts:
    print("\n[NO DATA] Neither props nor actuals for Week", WEEK, "found yet.")


[BASE] /Users/pwitt/fourth-and-value
[check] props: /Users/pwitt/fourth-and-value/data/props/props_with_model_week2.csv exists: True
[check] acts : /Users/pwitt/fourth-and-value/data/actuals/week2.csv exists: False


In [2]:

import pandas as pd

if 'have_props' in globals() and have_props:
    dfp = pd.read_csv(props_path)
    rows, cols = dfp.shape
    print(f"\n[props] rows={rows}, cols={cols}")
    # Try to detect a 'week' column
    week_col = None
    for c in ["week","Week","WEEK"]:
        if c in dfp.columns:
            week_col = c; break
    if week_col:
        print("[props] distinct weeks:", sorted(dfp[week_col].dropna().unique().tolist())[:20], ("..." if dfp[week_col].nunique()>20 else ""))
    # Show minimal columns if present
    preview_cols = [c for c in ["game","player","player_key","market_std","side","point_key","model_prob","mkt_prob","book","odds_american","kick_et"] if c in dfp.columns]
    display(dfp[preview_cols].head(10) if preview_cols else dfp.head(10))



[props] rows=2673, cols=24
[props] distinct weeks: [2] 


Unnamed: 0,game,player,player_key,market_std,point_key,model_prob,mkt_prob
0,Tampa Bay Buccaneers @ Houston Texans,Bucky Irving,buckyirving,first_td,,,0.153846
1,Tampa Bay Buccaneers @ Houston Texans,Nick Chubb,nickchubb,first_td,,,0.142857
2,Tampa Bay Buccaneers @ Houston Texans,Nico Collins,nicocollins,first_td,,,0.125
3,Tampa Bay Buccaneers @ Houston Texans,Mike Evans,mikeevans,first_td,,,0.105263
4,Tampa Bay Buccaneers @ Houston Texans,Emeka Egbuka,emekaegbuka,first_td,,,0.095238
5,Tampa Bay Buccaneers @ Houston Texans,Dalton Schultz,daltonschultz,first_td,,,0.076923
6,Tampa Bay Buccaneers @ Houston Texans,Cade Otton,cadeotton,first_td,,,0.052632
7,Tampa Bay Buccaneers @ Houston Texans,Jayden Higgins,jaydenhiggins,first_td,,,0.05
8,Tampa Bay Buccaneers @ Houston Texans,Houston Texans D/ST,houstontexansdst,first_td,,,0.047619
9,Tampa Bay Buccaneers @ Houston Texans,Tampa Bay Buccaneers D/ST,tampabaybuccaneersdst,first_td,,,0.047619


In [3]:

import pandas as pd

if 'have_acts' in globals() and have_acts:
    dfa = pd.read_csv(acts_path)
    rows_a, cols_a = dfa.shape
    print(f"\n[actuals] rows={rows_a}, cols={cols_a}")
    # Expected columns (best-effort)
    exp = ["player_key","market_std","side","result"]
    missing = [c for c in exp if c not in dfa.columns]
    if missing:
        print("[actuals] missing expected columns:", missing)
    else:
        print("[actuals] ok: has minimal columns:", exp)
    # quick peek
    preview_cols_a = [c for c in ["player","player_key","market_std","side","point_key","result","actual_value"] if c in dfa.columns]
    display(dfa[preview_cols_a].head(10) if preview_cols_a else dfa.head(10))


In [4]:

# If both files exist, attempt a loose join to estimate match rate
import pandas as pd

if 'have_props' in globals() and have_props and 'have_acts' in globals() and have_acts:
    dfp = pd.read_csv(props_path)
    dfa = pd.read_csv(acts_path)

    # choose minimal join keys that are likely present
    keys = []
    for k in ["player_key","market_std","side"]:
        if k in dfp.columns and k in dfa.columns:
            keys.append(k)
    if "point_key" in dfp.columns and "point_key" in dfa.columns:
        keys_with_point = keys + ["point_key"]
    else:
        keys_with_point = None

    matched = 0; total = len(dfp)
    merged = None

    if keys_with_point:
        merged = dfp.merge(dfa, on=keys_with_point, how="left", suffixes=("","_act"))
        matched = merged["result"].notna().sum() if "result" in merged.columns else 0
        rate = matched/total if total else 0
        print(f"\n[join] strict (incl point_key): matched {matched}/{total} = {rate:.1%}")

    # fallback without point_key
    merged2 = dfp.merge(dfa, on=keys, how="left", suffixes=("","_act"))
    matched2 = merged2["result"].notna().sum() if "result" in merged2.columns else 0
    rate2 = matched2/total if total else 0
    print(f"[join] loose  (no point_key)   : matched {matched2}/{total} = {rate2:.1%}")
