In [1]:
import sqlite3

# Connect to SQLite (creates db file if it doesn't exist)
conn = sqlite3.connect("mlb_data.db")
cur = conn.cursor()

# Drop old table if it exists (useful when redesigning schema)
cur.execute("DROP TABLE IF EXISTS watchability")

# Recreate with unique game/date and last_updated timestamp
cur.execute("""
CREATE TABLE watchability (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    game_pk INTEGER NOT NULL,
    game_date TEXT NOT NULL,
    home_team TEXT NOT NULL,
    away_team TEXT NOT NULL,
    playoff_pts INTEGER,
    quality_pts INTEGER,
    score INTEGER,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, -- when row was first created
    last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP, -- updated each run
    UNIQUE (game_pk, game_date) -- prevents duplicates
)
""")

conn.commit()
conn.close()


In [2]:
import statsapi
import sqlite3
from datetime import date

# Get today's date as string (YYYY-MM-DD)
target_date = "2025-08-15"
print("Today's date:", target_date)

# Pull today's games from MLB StatsAPI
games = statsapi.schedule(start_date=target_date, end_date=target_date)

print(f"Found {len(games)} games today")
for g in games[:3]:  # preview first 3 games
    print(g['game_id'], g['home_name'], "vs", g['away_name'])


Today's date: 2025-08-15
Found 15 games today
776737 Chicago Cubs vs Pittsburgh Pirates
776740 Cincinnati Reds vs Milwaukee Brewers
776739 Washington Nationals vs Philadelphia Phillies


In [3]:
# Load standings (AL=103, NL=104)
standings = statsapi.standings_data(leagueId="103,104")
print(list(standings.keys()))  # shows division IDs like 201, 202, etc.


[201, 202, 200, 204, 205, 203]


In [4]:
def get_team_record(team_id, standings_data):
    """
    Look up a team in standings_data and return:
      - team name
      - win percentage
      - games back in division
    """
    for division in standings_data.values():
        for team in division['teams']:
            if team['team_id'] == team_id:
                w, l = team['w'], team['l']
                winPct = w / (w + l)
                gb = team['gb']
                gb_val = 0.0 if gb == "-" else float(gb)
                
                return {
                    'name': team['name'],
                    'winPct': winPct,
                    'gamesBack': gb_val
                }
    return None


In [5]:
# Test lookup on the first game
home_id = games[0]['home_id']
away_id = games[0]['away_id']

home_team = get_team_record(home_id, standings)
away_team = get_team_record(away_id, standings)

print(home_team)
print(away_team)


{'name': 'Chicago Cubs', 'winPct': 0.5625, 'gamesBack': 7.5}
{'name': 'Pittsburgh Pirates', 'winPct': 0.4444444444444444, 'gamesBack': 24.5}


In [6]:
# Kernel 6 — Main loop (daily insert/update) with SLIDING SCALE playoff logic

import sqlite3

conn = sqlite3.connect("mlb_data.db")
cur = conn.cursor()

def _wc_gb_value(team_id, standings_data):
    """Return numeric Wild Card distance for a team (0.0 if tied/leading)."""
    for division in standings.values():
        for t in division['teams']:
            if t['team_id'] == team_id:
                raw = t.get('wc_gb', "0")
                if raw in ("-", "+0", "+0.0", "0", "0.0"):
                    return 0.0
                try:
                    return abs(float(str(raw).replace("+", "")))
                except Exception:
                    return 999.0
    return 999.0

def _sliding_playoff_points(distance):
    """
    Convert distance (games back) into playoff points:
      0 GB   -> 30 pts
      3 GB   -> 15 pts
      6+ GB  -> 0 pts
      Linear interpolation in between.
    """
    if distance <= 0:
        return 30
    elif distance >= 6:
        return 0
    elif distance <= 3:
        # slide between 0–3
        return 30 - (distance / 3) * 15
    else:
        # slide between 3–6
        return 15 - ((distance - 3) / 3) * 15

def _playoff_points_single(team_summary, team_id):
    """
    Compute playoff urgency for one team using division GB and WC GB.
    Take the *max urgency* of the two distances (whichever looks closer).
    """
    div_gb = team_summary['gamesBack']
    wc_gb = _wc_gb_value(team_id, standings)
    div_pts = _sliding_playoff_points(div_gb)
    wc_pts  = _sliding_playoff_points(wc_gb)
    return max(div_pts, wc_pts)

for g in games:
    home_id = g['home_id']
    away_id = g['away_id']
    game_pk = g['game_id']
    game_date = g['game_date']

    home_team = get_team_record(home_id, standings)
    away_team = get_team_record(away_id, standings)

    # --- Playoff implications ---
    playoff_pts_home = _playoff_points_single(home_team, home_id) if home_team else 0
    playoff_pts_away = _playoff_points_single(away_team, away_id) if away_team else 0

    # take max urgency
    playoff_pts = max(playoff_pts_home, playoff_pts_away)

    # small synergy bonus if both are alive
    if playoff_pts_home > 0 and playoff_pts_away > 0:
        playoff_pts += 5  # bonus is smaller than before (5 instead of 10)

    # --- Team quality (lighter weight; only if playoff-relevant overall) ---
    quality_pts = 0
    if playoff_pts > 0:
        if home_team['winPct'] >= 0.55 and away_team['winPct'] >= 0.55:
            quality_pts = 20
        elif home_team['winPct'] >= 0.55 or away_team['winPct'] >= 0.55:
            quality_pts = 10

    score = round(playoff_pts + quality_pts)

    # Upsert row
    cur.execute("""
        INSERT OR REPLACE INTO watchability
        (game_pk, game_date, home_team, away_team, playoff_pts, quality_pts, score, last_updated)
        VALUES (?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP)
    """, (game_pk, game_date, home_team['name'], away_team['name'], playoff_pts, quality_pts, score))

conn.commit()
conn.close()


In [7]:
import pandas as pd

conn = sqlite3.connect("mlb_data.db")
cur = conn.cursor()

# Quick sanity check: total rows vs unique games
cur.execute("SELECT COUNT(*) AS total_rows, COUNT(DISTINCT game_pk) AS unique_games FROM watchability")
print("Row count check:", cur.fetchone())

# Pull table into DataFrame
df = pd.read_sql_query("SELECT * FROM watchability ORDER BY score DESC", conn)
conn.close()

df


Row count check: (15, 15)


Unnamed: 0,id,game_pk,game_date,home_team,away_team,playoff_pts,quality_pts,score,created_at,last_updated
0,2,776740,2025-08-15,Cincinnati Reds,Milwaukee Brewers,35.0,10,45,2025-09-09 13:05:20,2025-09-09 13:05:20
1,4,776735,2025-08-15,Toronto Blue Jays,Texas Rangers,35.0,10,45,2025-09-09 13:05:20,2025-09-09 13:05:20
2,14,776727,2025-08-15,Los Angeles Dodgers,San Diego Padres,35.0,10,45,2025-09-09 13:05:20,2025-09-09 13:05:20
3,3,776739,2025-08-15,Washington Nationals,Philadelphia Phillies,30.0,10,40,2025-09-09 13:05:20,2025-09-09 13:05:20
4,9,776732,2025-08-15,Minnesota Twins,Detroit Tigers,30.0,10,40,2025-09-09 13:05:20,2025-09-09 13:05:20
5,5,776736,2025-08-15,New York Mets,Seattle Mariners,35.0,0,35,2025-09-09 13:05:20,2025-09-09 13:05:20
6,11,776731,2025-08-15,St. Louis Cardinals,New York Yankees,25.0,10,35,2025-09-09 13:05:20,2025-09-09 13:05:20
7,8,776733,2025-08-15,Houston Astros,Baltimore Orioles,30.0,0,30,2025-09-09 13:05:20,2025-09-09 13:05:20
8,7,776741,2025-08-15,Boston Red Sox,Miami Marlins,15.0,10,25,2025-09-09 13:05:20,2025-09-09 13:05:20
9,15,776729,2025-08-15,San Francisco Giants,Tampa Bay Rays,20.0,0,20,2025-09-09 13:05:20,2025-09-09 13:05:20
