In [1]:
import sqlite3

# Connect to SQLite (creates db file if it doesn't exist)
conn = sqlite3.connect("mlb_data.db")
cur = conn.cursor()

# Drop old table if it exists (useful when redesigning schema)
cur.execute("DROP TABLE IF EXISTS watchability")

# Recreate with unique game/date and last_updated timestamp
cur.execute("""
CREATE TABLE watchability (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    game_pk INTEGER NOT NULL,
    game_date TEXT NOT NULL,
    game_time TEXT,  -- NEW COLUMN
    away_team TEXT NOT NULL,
    home_team TEXT NOT NULL,
    playoff_pts INTEGER,
    quality_pts INTEGER,
    score INTEGER,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, -- when row was first created
    last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP, -- updated each run
    UNIQUE (game_pk, game_date) -- prevents duplicates
)
""")

conn.commit()
conn.close()


In [2]:
import statsapi
import sqlite3
from datetime import date

# Get today's date as string (YYYY-MM-DD)
today = date.today().isoformat()
print("Today's date:", today)

# Pull today's games from MLB StatsAPI
games = statsapi.schedule(start_date=today, end_date=today)

print(f"Found {len(games)} games today")
for g in games[:3]:  # preview first 3 games
    print(g['game_id'], g['home_name'], "vs", g['away_name'])


Today's date: 2025-09-12
Found 15 games today
776352 Chicago Cubs vs Tampa Bay Rays
776351 Washington Nationals vs Pittsburgh Pirates
776354 Philadelphia Phillies vs Kansas City Royals


In [3]:
# Load standings (AL=103, NL=104)
standings = statsapi.standings_data(leagueId="103,104")
print(list(standings.keys()))  # shows division IDs like 201, 202, etc.


[201, 202, 200, 204, 205, 203]


In [4]:
def get_team_record(team_id, standings_data):
    """
    Look up a team in standings_data and return:
      - team name
      - win percentage
      - games back in division
    """
    for division in standings_data.values():
        for team in division['teams']:
            if team['team_id'] == team_id:
                w, l = team['w'], team['l']
                winPct = w / (w + l)
                gb = team['gb']
                gb_val = 0.0 if gb == "-" else float(gb)
                
                return {
                    'name': team['name'],
                    'winPct': winPct,
                    'gamesBack': gb_val
                }
    return None


In [5]:
# Test lookup on the first game
home_id = games[0]['home_id']
away_id = games[0]['away_id']

home_team = get_team_record(home_id, standings)
away_team = get_team_record(away_id, standings)

print(home_team)
print(away_team)


{'name': 'Chicago Cubs', 'winPct': 0.5684931506849316, 'gamesBack': 5.5}
{'name': 'Tampa Bay Rays', 'winPct': 0.4931506849315068, 'gamesBack': 12.0}


In [6]:
# --- Kernel 6.4 — Season-aware playoff logic ---

import sqlite3
from datetime import datetime, date
import pytz

conn = sqlite3.connect("mlb_data.db")
cur = conn.cursor()

# --- season-aware flags ---
today = date.today()
is_early_season = today < date(today.year, 7, 1)

# --- parameters depending on season ---
QUALITY_FLOOR = 0.520 if is_early_season else 0.530
MAX_SYNERGY_BONUS = 10 if is_early_season else 15

def _wc_gb_value(team_id, standings_data):
    """Return numeric Wild Card distance for a team (0.0 if tied/leading)."""
    for division in standings_data.values():
        for t in division['teams']:
            if t['team_id'] == team_id:
                raw = t.get('wc_gb', "0")
                if raw in ("-", "+0", "+0.0", "0", "0.0"):
                    return 0.0
                try:
                    return abs(float(str(raw).replace("+", "")))
                except Exception:
                    return 999.0
    return 999.0

def _sliding_playoff_points(distance):
    """Convert distance (games back) into playoff points (stricter 0–4 GB scale)."""
    if distance <= 0:
        return 30
    elif distance >= 4:
        return 0
    elif distance <= 2:
        return 30 - (distance / 2) * 15
    else:  # 2 < distance < 4
        return 15 - ((distance - 2) / 2) * 15

def _playoff_points_single(team_summary, team_id, standings_data):
    """Combine GB with win% floor for realistic playoff chances."""
    if team_summary['winPct'] < 0.50:
        return 0
    div_gb = team_summary['gamesBack']
    wc_gb = _wc_gb_value(team_id, standings_data)
    div_pts = _sliding_playoff_points(div_gb)
    wc_pts  = _sliding_playoff_points(wc_gb)
    return max(div_pts, wc_pts)

def _division_or_wc_head_to_head_bonus(home_id, away_id, standings_data):
    """Bonus only if both teams are alive (nonzero playoff_pts)."""
    home = get_team_record(home_id, standings_data)
    away = get_team_record(away_id, standings_data)
    if not home or not away:
        return 0

    home_pts = _playoff_points_single(home, home_id, standings_data)
    away_pts = _playoff_points_single(away, away_id, standings_data)

    if home_pts == 0 or away_pts == 0:
        return 0

    # Same division?
    for div_id, div_data in standings_data.items():
        team_ids = [t['team_id'] for t in div_data['teams']]
        if home_id in team_ids and away_id in team_ids:
            return 10

    # Wild Card proximity (within 3 games)
    home_wc = _wc_gb_value(home_id, standings_data)
    away_wc = _wc_gb_value(away_id, standings_data)
    if home_wc <= 3 and away_wc <= 3:
        return 10

    return 0

# --- Main loop over games ---
for g in games:
    home_id = g['home_id']
    away_id = g['away_id']
    game_pk = g['game_id']
    game_date = g['game_date']

    # Always parse game_datetime into clean ET string
    raw_iso = str(g.get('game_datetime'))
    utc_dt = datetime.fromisoformat(raw_iso.replace("Z", "+00:00"))
    local_dt = utc_dt.astimezone(pytz.timezone("US/Eastern"))
    game_time = local_dt.strftime("%I:%M %p").lstrip("0") + " ET"

    home_team = get_team_record(home_id, standings)
    away_team = get_team_record(away_id, standings)

    playoff_pts_home = _playoff_points_single(home_team, home_id, standings) if home_team else 0
    playoff_pts_away = _playoff_points_single(away_team, away_id, standings) if away_team else 0
    playoff_pts = max(playoff_pts_home, playoff_pts_away)

    # Cap mismatches (one alive, one dead) at 30
    if (playoff_pts_home > 0 and playoff_pts_away == 0) or (playoff_pts_away > 0 and playoff_pts_home == 0):
        playoff_pts = min(playoff_pts, 30)

    # Both-alive synergy bonus
    both_alive_bonus = 0
    if playoff_pts_home > 0 and playoff_pts_away > 0:
        if playoff_pts_home >= 20 and playoff_pts_away >= 20:
            both_alive_bonus = min(15, MAX_SYNERGY_BONUS)
        elif playoff_pts_home >= 15 and playoff_pts_away >= 15:
            both_alive_bonus = min(10, MAX_SYNERGY_BONUS)
        elif playoff_pts_home >= 10 and playoff_pts_away >= 10:
            both_alive_bonus = min(5, MAX_SYNERGY_BONUS)
    playoff_pts += both_alive_bonus

    # Head-to-head bonus
    head2head_bonus = _division_or_wc_head_to_head_bonus(home_id, away_id, standings)
    playoff_pts += head2head_bonus

    # Quality points (both ≥ threshold)
    quality_pts = 0
    if playoff_pts > 0:
        if home_team['winPct'] >= QUALITY_FLOOR and away_team['winPct'] >= QUALITY_FLOOR:
            quality_pts = 20

    score = round(playoff_pts + quality_pts)

    cur.execute("""
        INSERT OR REPLACE INTO watchability
        (game_pk, game_date, game_time, home_team, away_team, playoff_pts, quality_pts, score, last_updated)
        VALUES (?, ?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP)
    """, (game_pk, game_date, game_time, home_team['name'], away_team['name'],
          playoff_pts, quality_pts, score))

conn.commit()
conn.close()


In [7]:
import pandas as pd

conn = sqlite3.connect("mlb_data.db")
cur = conn.cursor()

# Quick sanity check: total rows vs unique games
cur.execute("SELECT COUNT(*) AS total_rows, COUNT(DISTINCT game_pk) AS unique_games FROM watchability")
print("Row count check:", cur.fetchone())

# Pull table into DataFrame
df = pd.read_sql_query("SELECT * FROM watchability ORDER BY score DESC", conn)
conn.close()

df


Row count check: (15, 15)


Unnamed: 0,id,game_pk,game_date,game_time,away_team,home_team,playoff_pts,quality_pts,score,created_at,last_updated
0,5,776359,2025-09-12,7:10 PM ET,Texas Rangers,New York Mets,50.0,0,50,2025-09-12 17:23:34,2025-09-12 17:23:34
1,8,776363,2025-09-12,7:10 PM ET,New York Yankees,Boston Red Sox,30.0,20,50,2025-09-12 17:23:34,2025-09-12 17:23:34
2,15,776350,2025-09-12,10:15 PM ET,Los Angeles Dodgers,San Francisco Giants,50.0,0,50,2025-09-12 17:23:34,2025-09-12 17:23:34
3,3,776354,2025-09-12,6:45 PM ET,Kansas City Royals,Philadelphia Phillies,30.0,0,30,2025-09-12 17:23:34,2025-09-12 17:23:34
4,4,776353,2025-09-12,7:07 PM ET,Baltimore Orioles,Toronto Blue Jays,30.0,0,30,2025-09-12 17:23:34,2025-09-12 17:23:34
5,7,776366,2025-09-12,7:10 PM ET,Detroit Tigers,Miami Marlins,30.0,0,30,2025-09-12 17:23:34,2025-09-12 17:23:34
6,9,776360,2025-09-12,7:15 PM ET,Houston Astros,Atlanta Braves,30.0,0,30,2025-09-12 17:23:34,2025-09-12 17:23:34
7,11,776356,2025-09-12,8:10 PM ET,St. Louis Cardinals,Milwaukee Brewers,30.0,0,30,2025-09-12 17:23:34,2025-09-12 17:23:34
8,14,776345,2025-09-12,10:10 PM ET,Los Angeles Angels,Seattle Mariners,30.0,0,30,2025-09-12 17:23:34,2025-09-12 17:23:34
9,13,776358,2025-09-12,10:05 PM ET,Cincinnati Reds,Athletics,18.75,0,19,2025-09-12 17:23:34,2025-09-12 17:23:34


In [8]:
# --- Diagnostic breakdown of scoring (Kernel 6.4 logic) ---

from datetime import date

# Season-aware flags
today = date.today()
is_early_season = today < date(today.year, 7, 1)

QUALITY_FLOOR = 0.520 if is_early_season else 0.530
MAX_SYNERGY_BONUS = 10 if is_early_season else 15

for g in games:
    home_id = g['home_id']
    away_id = g['away_id']
    game_pk = g['game_id']
    game_date = g['game_date']

    # Always parse clean game_time
    raw_iso = str(g.get('game_datetime'))
    utc_dt = datetime.fromisoformat(raw_iso.replace("Z", "+00:00"))
    local_dt = utc_dt.astimezone(pytz.timezone("US/Eastern"))
    game_time = local_dt.strftime("%I:%M %p").lstrip("0") + " ET"

    home_team = get_team_record(home_id, standings)
    away_team = get_team_record(away_id, standings)

    playoff_pts_home = _playoff_points_single(home_team, home_id, standings) if home_team else 0
    playoff_pts_away = _playoff_points_single(away_team, away_id, standings) if away_team else 0
    playoff_pts = max(playoff_pts_home, playoff_pts_away)

    # Cap mismatches (one alive, one dead) at 30
    if (playoff_pts_home > 0 and playoff_pts_away == 0) or (playoff_pts_away > 0 and playoff_pts_home == 0):
        playoff_pts = min(playoff_pts, 30)

    # Both-alive synergy bonus
    both_alive_bonus = 0
    if playoff_pts_home > 0 and playoff_pts_away > 0:
        if playoff_pts_home >= 20 and playoff_pts_away >= 20:
            both_alive_bonus = min(15, MAX_SYNERGY_BONUS)
        elif playoff_pts_home >= 15 and playoff_pts_away >= 15:
            both_alive_bonus = min(10, MAX_SYNERGY_BONUS)
        elif playoff_pts_home >= 10 and playoff_pts_away >= 10:
            both_alive_bonus = min(5, MAX_SYNERGY_BONUS)
    playoff_pts += both_alive_bonus

    # Head-to-head bonus
    head2head_bonus = _division_or_wc_head_to_head_bonus(home_id, away_id, standings)
    playoff_pts += head2head_bonus

    # Quality points (both ≥ threshold)
    quality_pts = 0
    if playoff_pts > 0:
        if home_team['winPct'] >= QUALITY_FLOOR and away_team['winPct'] >= QUALITY_FLOOR:
            quality_pts = 20

    score = round(playoff_pts + quality_pts)

    # 🔍 PRINT DEBUG INFO
    print(f"\nGame: {away_team['name']} vs {home_team['name']} ({game_time})")
    print(f"  Home playoff_pts: {playoff_pts_home}")
    print(f"  Away playoff_pts: {playoff_pts_away}")
    print(f"  Head-to-head bonus: {head2head_bonus}")
    print(f"  Both-alive bonus: {both_alive_bonus}")
    print(f"  Quality_pts: {quality_pts}")
    print(f"  Final Score: {score}")



Game: Chicago Cubs vs Tampa Bay Rays (2:20 PM ET)
  Home playoff_pts: 0
  Away playoff_pts: 0
  Head-to-head bonus: 0
  Both-alive bonus: 0
  Quality_pts: 0
  Final Score: 0

Game: Washington Nationals vs Pittsburgh Pirates (6:45 PM ET)
  Home playoff_pts: 0
  Away playoff_pts: 0
  Head-to-head bonus: 0
  Both-alive bonus: 0
  Quality_pts: 0
  Final Score: 0

Game: Philadelphia Phillies vs Kansas City Royals (6:45 PM ET)
  Home playoff_pts: 30
  Away playoff_pts: 0
  Head-to-head bonus: 0
  Both-alive bonus: 0
  Quality_pts: 0
  Final Score: 30

Game: Toronto Blue Jays vs Baltimore Orioles (7:07 PM ET)
  Home playoff_pts: 30
  Away playoff_pts: 0
  Head-to-head bonus: 0
  Both-alive bonus: 0
  Quality_pts: 0
  Final Score: 30

Game: New York Mets vs Texas Rangers (7:10 PM ET)
  Home playoff_pts: 30
  Away playoff_pts: 15.0
  Head-to-head bonus: 10
  Both-alive bonus: 10
  Quality_pts: 0
  Final Score: 50

Game: Cleveland Guardians vs Chicago White Sox (7:10 PM ET)
  Home playoff_pts: 