In [1]:
import sqlite3

# Connect to SQLite (creates db file if it doesn't exist)
conn = sqlite3.connect("mlb_data.db")
cur = conn.cursor()

# Drop old table if it exists (useful when redesigning schema)
cur.execute("DROP TABLE IF EXISTS watchability")

# Recreate with unique game/date and last_updated timestamp
cur.execute("""
CREATE TABLE watchability (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    game_pk INTEGER NOT NULL,
    game_date TEXT NOT NULL,
    game_time TEXT,  -- NEW COLUMN
    home_team TEXT NOT NULL,
    away_team TEXT NOT NULL,
    playoff_pts INTEGER,
    quality_pts INTEGER,
    score INTEGER,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, -- when row was first created
    last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP, -- updated each run
    UNIQUE (game_pk, game_date) -- prevents duplicates
)
""")

conn.commit()
conn.close()


In [2]:
import statsapi
import sqlite3
from datetime import date

# Get today's date as string (YYYY-MM-DD)
today = date.today().isoformat()
print("Today's date:", today)

# Pull today's games from MLB StatsAPI
games = statsapi.schedule(start_date=today, end_date=today)

print(f"Found {len(games)} games today")
for g in games[:3]:  # preview first 3 games
    print(g['game_id'], g['home_name'], "vs", g['away_name'])


Today's date: 2025-09-09
Found 15 games today
776401 Baltimore Orioles vs Pittsburgh Pirates
776403 Cleveland Guardians vs Kansas City Royals
776397 Miami Marlins vs Washington Nationals


In [3]:
# Load standings (AL=103, NL=104)
standings = statsapi.standings_data(leagueId="103,104")
print(list(standings.keys()))  # shows division IDs like 201, 202, etc.


[201, 202, 200, 204, 205, 203]


In [4]:
def get_team_record(team_id, standings_data):
    """
    Look up a team in standings_data and return:
      - team name
      - win percentage
      - games back in division
    """
    for division in standings_data.values():
        for team in division['teams']:
            if team['team_id'] == team_id:
                w, l = team['w'], team['l']
                winPct = w / (w + l)
                gb = team['gb']
                gb_val = 0.0 if gb == "-" else float(gb)
                
                return {
                    'name': team['name'],
                    'winPct': winPct,
                    'gamesBack': gb_val
                }
    return None


In [5]:
# Test lookup on the first game
home_id = games[0]['home_id']
away_id = games[0]['away_id']

home_team = get_team_record(home_id, standings)
away_team = get_team_record(away_id, standings)

print(home_team)
print(away_team)


{'name': 'Baltimore Orioles', 'winPct': 0.46153846153846156, 'gamesBack': 16.0}
{'name': 'Pittsburgh Pirates', 'winPct': 0.4444444444444444, 'gamesBack': 24.5}


In [6]:
# --- Kernel 6 (rewritten) — Main loop with improved playoff logic and clean game_time ---

import sqlite3
from datetime import datetime
import pytz

conn = sqlite3.connect("mlb_data.db")
cur = conn.cursor()

def _wc_gb_value(team_id, standings_data):
    """Return numeric Wild Card distance for a team (0.0 if tied/leading)."""
    for division in standings_data.values():
        for t in division['teams']:
            if t['team_id'] == team_id:
                raw = t.get('wc_gb', "0")
                if raw in ("-", "+0", "+0.0", "0", "0.0"):
                    return 0.0
                try:
                    return abs(float(str(raw).replace("+", "")))
                except Exception:
                    return 999.0
    return 999.0

def _sliding_playoff_points(distance):
    """Convert distance (games back) into playoff points."""
    if distance <= 0:
        return 30
    elif distance >= 6:
        return 0
    elif distance <= 3:
        return 30 - (distance / 3) * 15
    else:
        return 15 - ((distance - 3) / 3) * 15

def _playoff_points_single(team_summary, team_id, standings_data):
    div_gb = team_summary['gamesBack']
    wc_gb = _wc_gb_value(team_id, standings_data)
    div_pts = _sliding_playoff_points(div_gb)
    wc_pts  = _sliding_playoff_points(wc_gb)
    return max(div_pts, wc_pts)

def _division_or_wc_head_to_head_bonus(home_id, away_id, standings_data):
    home = get_team_record(home_id, standings_data)
    away = get_team_record(away_id, standings_data)
    if not home or not away:
        return 0
    # Same division?
    for div_id, div_data in standings_data.items():
        team_ids = [t['team_id'] for t in div_data['teams']]
        if home_id in team_ids and away_id in team_ids:
            return 10
    # Wild Card proximity (within 3 games)
    home_wc = _wc_gb_value(home_id, standings_data)
    away_wc = _wc_gb_value(away_id, standings_data)
    if home_wc <= 3 and away_wc <= 3:
        return 10
    return 0

# --- Main loop over games ---
for g in games:
    home_id = g['home_id']
    away_id = g['away_id']
    game_pk = g['game_id']
    game_date = g['game_date']  # YYYY-MM-DD

    # Always parse game_datetime into clean ET string
    raw_iso = str(g.get('game_datetime'))
    utc_dt = datetime.fromisoformat(raw_iso.replace("Z", "+00:00"))
    local_dt = utc_dt.astimezone(pytz.timezone("US/Eastern"))
    game_time = local_dt.strftime("%I:%M %p").lstrip("0") + " ET"

    home_team = get_team_record(home_id, standings)
    away_team = get_team_record(away_id, standings)

    playoff_pts_home = _playoff_points_single(home_team, home_id, standings) if home_team else 0
    playoff_pts_away = _playoff_points_single(away_team, away_id, standings) if away_team else 0
    playoff_pts = max(playoff_pts_home, playoff_pts_away)

    if playoff_pts_home > 0 and playoff_pts_away > 0:
        if playoff_pts_home >= 20 and playoff_pts_away >= 20:
            playoff_pts += 15
        elif playoff_pts_home >= 10 and playoff_pts_away >= 10:
            playoff_pts += 10
        else:
            playoff_pts += 5

    playoff_pts += _division_or_wc_head_to_head_bonus(home_id, away_id, standings)

    quality_pts = 0
    if playoff_pts > 0:
        if home_team['winPct'] >= 0.55 and away_team['winPct'] >= 0.55:
            quality_pts = 20
        elif home_team['winPct'] >= 0.55 or away_team['winPct'] >= 0.55:
            quality_pts = 10

    score = round(playoff_pts + quality_pts)

    cur.execute("""
        INSERT OR REPLACE INTO watchability
        (game_pk, game_date, game_time, home_team, away_team, playoff_pts, quality_pts, score, last_updated)
        VALUES (?, ?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP)
    """, (game_pk, game_date, game_time, home_team['name'], away_team['name'],
          playoff_pts, quality_pts, score))

conn.commit()
conn.close()


In [7]:
import pandas as pd

conn = sqlite3.connect("mlb_data.db")
cur = conn.cursor()

# Quick sanity check: total rows vs unique games
cur.execute("SELECT COUNT(*) AS total_rows, COUNT(DISTINCT game_pk) AS unique_games FROM watchability")
print("Row count check:", cur.fetchone())

# Pull table into DataFrame
df = pd.read_sql_query("SELECT * FROM watchability ORDER BY score DESC", conn)
conn.close()

df


Row count check: (15, 15)


Unnamed: 0,id,game_pk,game_date,game_time,home_team,away_team,playoff_pts,quality_pts,score,created_at,last_updated
0,4,776404,2025-09-09,6:45 PM ET,Philadelphia Phillies,New York Mets,55.0,10,65,2025-09-09 14:15:59,2025-09-09 14:15:59
1,5,776398,2025-09-09,7:05 PM ET,New York Yankees,Detroit Tigers,45.0,20,65,2025-09-09 14:15:59,2025-09-09 14:15:59
2,6,776393,2025-09-09,7:07 PM ET,Toronto Blue Jays,Houston Astros,55.0,10,65,2025-09-09 14:15:59,2025-09-09 14:15:59
3,9,776395,2025-09-09,8:05 PM ET,Texas Rangers,Milwaukee Brewers,55.0,10,65,2025-09-09 14:15:59,2025-09-09 14:15:59
4,15,776389,2025-09-09,10:10 PM ET,Los Angeles Dodgers,Colorado Rockies,40.0,10,50,2025-09-09 14:15:59,2025-09-09 14:15:59
5,2,776403,2025-09-09,6:40 PM ET,Cleveland Guardians,Kansas City Royals,37.5,0,38,2025-09-09 14:15:59,2025-09-09 14:15:59
6,11,776392,2025-09-09,9:40 PM ET,San Diego Padres,Cincinnati Reds,35.0,0,35,2025-09-09 14:15:59,2025-09-09 14:15:59
7,12,776391,2025-09-09,9:40 PM ET,Seattle Mariners,St. Louis Cardinals,35.0,0,35,2025-09-09 14:15:59,2025-09-09 14:15:59
8,13,776394,2025-09-09,9:45 PM ET,San Francisco Giants,Arizona Diamondbacks,30.0,0,30,2025-09-09 14:15:59,2025-09-09 14:15:59
9,14,776396,2025-09-09,10:05 PM ET,Athletics,Boston Red Sox,15.0,10,25,2025-09-09 14:15:59,2025-09-09 14:15:59
