In [1]:
!pip install pandas requests beautifulsoup4 lxml



In [2]:
!pip install pandas requests




In [3]:
import pandas as pd

# Load your CSV from the data folder
file_path = "./data/mlb_betting_dashboard_template.csv"
dashboard = pd.read_csv(file_path)

dashboard.head()  # Show the empty template

Unnamed: 0,Game,SP (ERA/WHIP),Bullpen ERA,Team RPG (L7),Team OPS (L7),Opp RPG (L7),Opp OPS (L7),Weather,Notes


In [4]:
import requests
import pandas as pd
from datetime import datetime
from zoneinfo import ZoneInfo


# Define your teams of interest
teams_of_interest = ['NYM', 'LAD', 'NYY', 'DET', 'SEA', 'CHC']

# Get today's date in the required format
today = datetime.now(ZoneInfo("America/Los_Angeles")).strftime('%Y-%m-%d')

# MLB API endpoint for today's schedule
url = f'https://statsapi.mlb.com/api/v1/schedule?sportId=1&date={today}&hydrate=team,linescore,probablePitcher'

# Fetch the schedule data
response = requests.get(url)
data = response.json()

# Prepare a list to store the data
games_list = []

# Iterate through the games
for date_info in data['dates']:
    for game in date_info['games']:
        home_team = game['teams']['home']['team']['abbreviation']
        away_team = game['teams']['away']['team']['abbreviation']
        home_pitcher = game['teams']['home'].get('probablePitcher', {}).get('fullName', 'TBD')
        away_pitcher = game['teams']['away'].get('probablePitcher', {}).get('fullName', 'TBD')

        # Check if either team is in your list
        if home_team in teams_of_interest or away_team in teams_of_interest:
            games_list.append({
                'Home Team': home_team,
                'Away Team': away_team,
                'Home Pitcher': home_pitcher,
                'Away Pitcher': away_pitcher
            })

# Convert the list to a DataFrame
df = pd.DataFrame(games_list)

# Display the DataFrame
df

Unnamed: 0,Home Team,Away Team,Home Pitcher,Away Pitcher
0,MIA,CHC,Max Meyer,Cade Horton
1,STL,DET,Andre Pallante,Brant Hurter
2,CWS,SEA,Shane Smith,Logan Evans
3,BOS,NYM,Garrett Crochet,Tylor Megill
4,NYY,TEX,Ryan Yarbrough,Jacob deGrom
5,LAD,AZ,Dustin May,Corbin Burnes


In [None]:
import requests
import pandas as pd
from datetime import datetime
from zoneinfo import ZoneInfo

# Step 1: Get today's date
today = datetime.now(ZoneInfo("America/Los_Angeles")).strftime('%Y-%m-%d')

# Step 2: Get today's games from the MLB API
schedule_url = f'https://statsapi.mlb.com/api/v1/schedule?sportId=1&date={today}&hydrate=team,linescore,probablePitcher'
schedule_response = requests.get(schedule_url)
schedule_data = schedule_response.json()

# Step 3: Define your teams
teams_of_interest = ['NYM', 'LAD', 'NYY', 'DET', 'SEA', 'CHC']

# Step 4: Extract starting pitchers and team info (includes opponent pitchers)
pitchers = []

for date_info in schedule_data["dates"]:
    for game in date_info["games"]:
        home_team = game["teams"]["home"]["team"]["abbreviation"]
        away_team = game["teams"]["away"]["team"]["abbreviation"]

        home_pitcher = game["teams"]["home"].get("probablePitcher")
        away_pitcher = game["teams"]["away"].get("probablePitcher")

        # Add both pitchers regardless of team
        if home_pitcher:
            pitchers.append({
                "Team": home_team,
                "Pitcher Name": home_pitcher["fullName"],
                "Pitcher ID": home_pitcher["id"]
            })
        if away_pitcher:
            pitchers.append({
                "Team": away_team,
                "Pitcher Name": away_pitcher["fullName"],
                "Pitcher ID": away_pitcher["id"]
            })

# Step 5: For each pitcher, get last 3 starts and calculate ERA and WHIP
pitcher_stats = []

for p in pitchers:
    player_id = p['Pitcher ID']

    # Get handedness
    info_url = f"https://statsapi.mlb.com/api/v1/people/{player_id}"
    info_response = requests.get(info_url)
    info_data = info_response.json()
    handedness = info_data.get("people", [{}])[0].get("pitchHand", {}).get("code", "")

    # Get game logs
    logs_url = f"https://statsapi.mlb.com/api/v1/people/{player_id}/stats?stats=gameLog&group=pitching"
    logs_response = requests.get(logs_url)
    logs_data = logs_response.json()

    # Handle missing or empty game log section
    if 'stats' not in logs_data or not logs_data['stats'] or not logs_data['stats'][0].get('splits'):
        pitcher_stats.append({
            "Team": p["Team"],
            "Pitcher": p["Pitcher Name"],
            "ERA (Last 3 Starts)": "Fewer than 3 Starts",
            "WHIP (Last 3 Starts)": "Fewer than 3 Starts",
            "Pitcher Handedness": handedness.upper()
        })
        continue

    game_logs = logs_data['stats'][0]['splits']
    starts = []

    for game in game_logs:
        stat = game['stat']
        if stat['gamesStarted'] > 0:
            starts.append({
                "ER": stat['earnedRuns'],
                "IP": stat['inningsPitched'],
                "BB": stat['baseOnBalls'],
                "H": stat['hits']
            })
        if len(starts) == 3:
            break

    def parse_ip(ip_str):
        if '.' in ip_str:
            whole, partial = ip_str.split('.')
            return int(whole) + (1 if partial == '1' else 2)/3 if partial in ['1', '2'] else int(whole)
        return float(ip_str)

    if len(starts) == 0:
        pitcher_stats.append({
            "Team": p["Team"],
            "Pitcher": p["Pitcher Name"],
            "ERA (Last 3 Starts)": "Fewer than 3 Starts",
            "WHIP (Last 3 Starts)": "Fewer than 3 Starts",
            "Pitcher Handedness": handedness.upper()
        })
        continue
    elif len(starts) < 3:
        pitcher_stats.append({
            "Team": p["Team"],
            "Pitcher": p["Pitcher Name"],
            "ERA (Last 3 Starts)": "Fewer than 3 Starts",
            "WHIP (Last 3 Starts)": "Fewer than 3 Starts",
            "Pitcher Handedness": handedness.upper()
        })
        continue

    total_er = sum(s['ER'] for s in starts)
    total_bb = sum(s['BB'] for s in starts)
    total_h = sum(s['H'] for s in starts)
    total_ip = sum(parse_ip(s['IP']) for s in starts)

    if total_ip == 0:
        pitcher_stats.append({
            "Team": p["Team"],
            "Pitcher": p["Pitcher Name"],
            "ERA (Last 3 Starts)": "Fewer than 3 Starts",
            "WHIP (Last 3 Starts)": "Fewer than 3 Starts",
            "Pitcher Handedness": handedness.upper()
        })
        continue

    era = round((total_er / total_ip) * 9, 2)
    whip = round((total_bb + total_h) / total_ip, 2)

    pitcher_stats.append({
        "Team": p["Team"],
        "Pitcher": p["Pitcher Name"],
        "ERA (Last 3 Starts)": era,
        "WHIP (Last 3 Starts)": whip,
        "Pitcher Handedness": handedness.upper()
    })

# Step 6: Display the result
pitcher_stats_df = pd.DataFrame(pitcher_stats)
pitcher_stats_df


In [None]:
import requests
import pandas as pd
from datetime import datetime, timedelta
from zoneinfo import ZoneInfo

# Get today's date
today = datetime.now(ZoneInfo("America/Los_Angeles")).strftime('%Y-%m-%d')

# Get today's games from MLB API
schedule_url = f'https://statsapi.mlb.com/api/v1/schedule?sportId=1&date={today}&hydrate=team,linescore,probablePitcher'
schedule_response = requests.get(schedule_url)
schedule_data = schedule_response.json()

# Step 1: Build matchups for your 6 teams
teams_of_interest = ['NYM', 'LAD', 'NYY', 'DET', 'SEA', 'CHC']
matchups = []

for date_info in schedule_data['dates']:
    for game in date_info['games']:
        home_team = game['teams']['home']['team']
        away_team = game['teams']['away']['team']
        home_abbr = home_team['abbreviation']
        away_abbr = away_team['abbreviation']

        if home_abbr in teams_of_interest or away_abbr in teams_of_interest:
            if home_abbr in teams_of_interest:
                my_team = home_abbr
                my_team_id = home_team['id']
                opponent = away_abbr
                opponent_id = away_team['id']
            else:
                my_team = away_abbr
                my_team_id = away_team['id']
                opponent = home_abbr
                opponent_id = home_team['id']

            matchups.append({
                "Team": my_team,
                "Opponent": opponent,
                "Team ID": my_team_id,
                "Opponent ID": opponent_id
            })

# Step 2: Get RPG and OPS for both teams in each matchup
def get_rpg_ops(team_id):
    end_date = datetime.now()
    start_date = end_date - timedelta(days=14)
    start = start_date.strftime('%Y-%m-%d')
    end = end_date.strftime('%Y-%m-%d')

    url = f"https://statsapi.mlb.com/api/v1/teams/{team_id}/stats?stats=gameLog&startDate={start}&endDate={end}&group=hitting"
    response = requests.get(url)
    data = response.json()

    if not data.get("stats") or not data["stats"][0].get("splits"):
        return None, None

    splits = data["stats"][0]["splits"][:7]  # Last 7 games
    total_runs = sum(int(game["stat"].get("runs", 0)) for game in splits)
    total_ops = sum(float(game["stat"].get("ops", 0)) for game in splits)

    rpg = round(total_runs / len(splits), 2)
    ops = round(total_ops / len(splits), 3)
    return rpg, ops

team_stats = []
for matchup in matchups:
    team_rpg, team_ops = get_rpg_ops(matchup["Team ID"])
    opp_rpg, opp_ops = get_rpg_ops(matchup["Opponent ID"])

    team_stats.append({
        "Team": matchup["Team"],
        "Opponent": matchup["Opponent"],
        "Team RPG (L7)": team_rpg,
        "Team OPS (L7)": team_ops,
        "Opp RPG (L7)": opp_rpg,
        "Opp OPS (L7)": opp_ops
    })

rpg_ops_df = pd.DataFrame(team_stats)

# Step 3: Build final dashboard (matchup-based)
matchups_df = pd.DataFrame(matchups).drop(columns=["Team ID", "Opponent ID"])

# Merge team pitcher stats
team_pitching = pitcher_stats_df.rename(columns={
    "Team": "Team",
    "Pitcher": "Pitcher",
    "ERA (Last 3 Starts)": "ERA (Last 3 Starts)",
    "WHIP (Last 3 Starts)": "WHIP (Last 3 Starts)",
    "Pitcher Handedness": "Pitcher Handedness"
})
matchups_df = matchups_df.merge(team_pitching, on="Team", how="left")

# Merge opponent pitcher stats
opponent_pitching = pitcher_stats_df.rename(columns={
    "Team": "Opponent",
    "Pitcher": "Opponent Pitcher",
    "ERA (Last 3 Starts)": "Opponent ERA (Last 3 Starts)",
    "WHIP (Last 3 Starts)": "Opponent WHIP (Last 3 Starts)",
    "Pitcher Handedness": "Opponent Handedness"
})
matchups_df = matchups_df.merge(opponent_pitching, on="Opponent", how="left")

# Merge offensive stats
final_dashboard = matchups_df.merge(rpg_ops_df, on=["Team", "Opponent"], how="left")

# Opponent handedness risk
final_dashboard["Opponent Handedness Risk"] = final_dashboard["Opponent Handedness"].map({
    "R": "rhh",
    "L": "lhh"
})

# Reorder columns
final_dashboard = final_dashboard[[
    "Team", "Opponent",
    "Pitcher", "Pitcher Handedness", "ERA (Last 3 Starts)", "WHIP (Last 3 Starts)",
    "Opponent Pitcher", "Opponent Handedness", "Opponent ERA (Last 3 Starts)", "Opponent WHIP (Last 3 Starts)",
    "Team RPG (L7)", "Team OPS (L7)", "Opp RPG (L7)", "Opp OPS (L7)",
    "Opponent Handedness Risk"
]]

# Step 4: Add Lean Call, Strength, and Reason
def generate_lean(row):
    try:
        era = float(row["ERA (Last 3 Starts)"])
        whip = float(row["WHIP (Last 3 Starts)"])
        opp_era = float(row["Opponent ERA (Last 3 Starts)"])
        opp_whip = float(row["Opponent WHIP (Last 3 Starts)"])
        ops = float(row["Team OPS (L7)"])
        opp_ops = float(row["Opp OPS (L7)"])
        rpg = float(row["Team RPG (L7)"])
        opp_rpg = float(row["Opp RPG (L7)"])
    except:
        return "Stay Away"

    pitching_edge = (opp_era - era >= 1.0) and (opp_whip - whip >= 0.15)
    pitching_disadv = (era - opp_era >= 1.0) and (whip - opp_whip >= 0.15)
    batting_edge = (ops - opp_ops >= 0.05) and (rpg - opp_rpg >= 0.5)
    batting_disadv = (opp_ops - ops >= 0.05) and (opp_rpg - rpg >= 0.5)

    if pitching_edge and batting_edge:
        return "Lean RL"
    elif pitching_edge or batting_edge:
        return "Lean ML"
    elif pitching_disadv and batting_disadv:
        return "Fade ML"
    else:
        return "Stay Away"

def classify_strength(row):
    try:
        era = float(row["ERA (Last 3 Starts)"])
        whip = float(row["WHIP (Last 3 Starts)"])
    except:
        return "Normal"

    if row["Lean Call"].startswith("Lean ML"):
        if era < 2.0 and whip < 1.00:
            return "Strong"
        elif era < 3.0:
            return "Moderate"
    elif row["Lean Call"].startswith("Lean RL"):
        if era < 2.5 and whip < 1.05:
            return "Strong"
        elif era < 3.5:
            return "Moderate"
    elif row["Lean Call"].startswith("Fade ML"):
        if era > 5.0 or whip > 1.45:
            return "Strong"
        elif era > 4.5:
            return "Moderate"
    return "Normal"

def explain_lean_reason(row):
    reasons = []
    types = set()

    try:
        era_diff = float(row["Opponent ERA (Last 3 Starts)"]) - float(row["ERA (Last 3 Starts)"])
        reasons.append(f"ERA {era_diff:+.2f}")
        types.add("pitching")
    except: pass

    try:
        whip_diff = float(row["Opponent WHIP (Last 3 Starts)"]) - float(row["WHIP (Last 3 Starts)"])
        reasons.append(f"WHIP {whip_diff:+.2f}")
        types.add("pitching")
    except: pass

    try:
        ops_diff = float(row["Team OPS (L7)"]) - float(row["Opp OPS (L7)"])
        reasons.append(f"OPS {ops_diff:+.3f}")
        types.add("offense")
    except: pass

    try:
        rpg_diff = float(row["Team RPG (L7)"]) - float(row["Opp RPG (L7)"])
        reasons.append(f"RPG {rpg_diff:+.2f}")
        types.add("offense")
    except: pass

    if not reasons:
        return None
    if types == {"pitching"}:
        reasons.append("Pitching Advantage Only")
    elif types == {"offense"}:
        reasons.append("Offense Advantage Only")
    elif types == {"pitching", "offense"}:
        reasons.append("Mixed Advantage")
    return ", ".join(reasons)

# Apply lean logic
final_dashboard["Lean Call"] = final_dashboard.apply(generate_lean, axis=1)
final_dashboard["Lean Strength"] = final_dashboard.apply(classify_strength, axis=1)
final_dashboard["Lean Call"] = final_dashboard["Lean Call"] + " (" + final_dashboard["Lean Strength"] + ")"
final_dashboard = final_dashboard.drop(columns=["Lean Strength"])
final_dashboard["Lean Reason"] = final_dashboard.apply(explain_lean_reason, axis=1)


In [None]:
# Filter pitcher_stats_df to only include your tracked teams
your_teams = ['NYM', 'LAD', 'NYY', 'DET', 'SEA', 'CHC']
filtered_pitchers = pitcher_stats_df[pitcher_stats_df['Team'].isin(your_teams)]

filtered_pitchers.head()

In [None]:
# Step 1: Drop duplicates in filtered_pitchers to ensure one pitcher per team (fix for team vs team issue)
filtered_pitchers = filtered_pitchers.drop_duplicates(subset=["Team", "Pitcher"])

# Step 1: Merge your team’s pitcher stats with RPG/OPS
dashboard = pd.merge(filtered_pitchers, rpg_ops_df.drop_duplicates(subset=["Team", "Opponent"]), on="Team", how="left")

# Step 1b: Add Pitcher Handedness from pitcher_stats_df (deduped)
pitcher_handedness = pitcher_stats_df.drop_duplicates(subset=["Team"])[["Team", "Pitcher Handedness"]]
dashboard = pd.merge(dashboard, pitcher_handedness, on="Team", how="left")

# Step 2: Merge opponent pitcher stats (deduped and renamed to avoid column name conflicts)
opponent_pitcher_stats = pitcher_stats_df.rename(columns={
    "Team": "Opponent",
    "Pitcher": "Opponent Pitcher",
    "ERA (Last 3 Starts)": "Opponent ERA (Last 3 Starts)",
    "WHIP (Last 3 Starts)": "Opponent WHIP (Last 3 Starts)"
}).drop_duplicates(subset=["Opponent", "Opponent Pitcher"])
dashboard = pd.merge(dashboard, opponent_pitcher_stats, on="Opponent", how="left")

# Step 2b: Add Opponent Handedness (deduped)
opponent_handedness = pitcher_stats_df.rename(columns={
    "Team": "Opponent",
    "Pitcher Handedness": "Opponent Handedness"
})[["Opponent", "Opponent Handedness"]].drop_duplicates(subset=["Opponent", "Opponent Handedness"])
dashboard = pd.merge(dashboard, opponent_handedness, on="Opponent", how="left")

# Step 2c: Map Opponent Handedness Risk
dashboard["Opponent Handedness Risk"] = dashboard["Opponent Handedness"].map({
    "R": "rhh",
    "L": "lhh"
})

# Step 3: Reorder columns
dashboard = dashboard[[
    "Team", "Opponent",
    "Pitcher", "ERA (Last 3 Starts)", "WHIP (Last 3 Starts)",
    "Opponent Pitcher", "Opponent ERA (Last 3 Starts)", "Opponent WHIP (Last 3 Starts)",
    "Team RPG (L7)", "Team OPS (L7)",
    "Opp RPG (L7)", "Opp OPS (L7)",
    "Pitcher Handedness", "Opponent Handedness", "Opponent Handedness Risk"
]]

# Step 4: Final result
final_dashboard = dashboard

# Step 5: True matchup-based lean logic
def generate_lean(row):
    try:
        era = float(row["ERA (Last 3 Starts)"])
        whip = float(row["WHIP (Last 3 Starts)"])
        opp_era = float(row["Opponent ERA (Last 3 Starts)"])
        opp_whip = float(row["Opponent WHIP (Last 3 Starts)"])
        ops = float(row["Team OPS (L7)"])
        opp_ops = float(row["Opp OPS (L7)"])
        rpg = float(row["Team RPG (L7)"])
        opp_rpg = float(row["Opp RPG (L7)"])
    except:
        return "Stay Away"

    # Pitching edge thresholds
    pitching_edge = (opp_era - era >= 1.0) and (opp_whip - whip >= 0.15)
    pitching_disadv = (era - opp_era >= 1.0) and (whip - opp_whip >= 0.15)

    # Offensive edge thresholds
    batting_edge = (ops - opp_ops >= 0.05) and (rpg - opp_rpg >= 0.5)
    batting_disadv = (opp_ops - ops >= 0.05) and (opp_rpg - rpg >= 0.5)

    # Decision logic
    if pitching_edge and batting_edge:
        return "Lean RL"
    elif pitching_edge or batting_edge:
        return "Lean ML"
    elif pitching_disadv and batting_disadv:
        return "Fade ML"
    else:
        return "Stay Away"

# Step 6: Strength classification based on your pitcher
def classify_strength(row):
    try:
        era = float(row["ERA (Last 3 Starts)"])
        whip = float(row["WHIP (Last 3 Starts)"])
    except:
        return "Normal"

    if row["Lean Call"].startswith("Lean ML"):
        if era < 2.0 and whip < 1.00:
            return "Strong"
        elif era < 3.0:
            return "Moderate"
    elif row["Lean Call"].startswith("Lean RL"):
        if era < 2.5 and whip < 1.05:
            return "Strong"
        elif era < 3.5:
            return "Moderate"
    elif row["Lean Call"].startswith("Fade ML"):
        if era > 5.0 or whip > 1.45:
            return "Strong"
        elif era > 4.5:
            return "Moderate"
    return "Normal"

# Step 7: Add Lean + Strength
final_dashboard["Lean Call"] = final_dashboard.apply(generate_lean, axis=1)
final_dashboard["Lean Strength"] = final_dashboard.apply(classify_strength, axis=1)
final_dashboard["Lean Call"] = final_dashboard["Lean Call"] + " (" + final_dashboard["Lean Strength"] + ")"
final_dashboard = final_dashboard.drop(columns=["Lean Strength"])

# Step 8: Add Lean Reason column with labels
def explain_lean_reason(row):
    reasons = []
    types = set()

    # ERA difference
    try:
        era_diff = float(row["Opponent ERA (Last 3 Starts)"]) - float(row["ERA (Last 3 Starts)"])
        reasons.append(f"ERA {era_diff:+.2f}")
        types.add("pitching")
    except:
        pass

    # WHIP difference
    try:
        whip_diff = float(row["Opponent WHIP (Last 3 Starts)"]) - float(row["WHIP (Last 3 Starts)"])
        reasons.append(f"WHIP {whip_diff:+.2f}")
        types.add("pitching")
    except:
        pass

    # OPS difference
    try:
        ops_diff = float(row["Team OPS (L7)"]) - float(row["Opp OPS (L7)"])
        reasons.append(f"OPS {ops_diff:+.3f}")
        types.add("offense")
    except:
        pass

    # RPG difference
    try:
        rpg_diff = float(row["Team RPG (L7)"]) - float(row["Opp RPG (L7)"])
        reasons.append(f"RPG {rpg_diff:+.2f}")
        types.add("offense")
    except:
        pass

    if not reasons:
        return None

    # Add contextual label
    if types == {"pitching"}:
        reasons.append("Pitching Advantage Only")
    elif types == {"offense"}:
        reasons.append("Offense Advantage Only")
    elif types == {"pitching", "offense"}:
        reasons.append("Mixed Advantage")

    return ", ".join(reasons)

final_dashboard["Lean Reason"] = final_dashboard.apply(explain_lean_reason, axis=1)
final_dashboard = final_dashboard[final_dashboard["Opponent"].notna()]



In [None]:
final_dashboard.head()

In [None]:
filtered_pitchers.head()

In [None]:
# Final display/export structure
final_dashboard = final_dashboard[[
    "Team", "Opponent", "Pitcher", "Pitcher Handedness",
    "ERA (Last 3 Starts)", "WHIP (Last 3 Starts)",
    "Opponent Pitcher", "Opponent Handedness",
    "Opponent ERA (Last 3 Starts)", "Opponent WHIP (Last 3 Starts)",
    "Team RPG (L7)", "Team OPS (L7)",
    "Opp RPG (L7)", "Opp OPS (L7)",
    "Lean Call", "Lean Reason"
]]

# Save updated dashboard
final_dashboard.to_csv(f"data/mlb_dashboard_{today}.csv", index=False)
final_dashboard.head()

In [None]:
# Append daily results to bet history log
from pathlib import Path

log_file = Path("data/bet_history.csv")
final_dashboard["Date"] = today

columns_to_log = [
    "Date", "Team", "Opponent", "Pitcher", "Opponent Pitcher",
    "ERA (Last 3 Starts)", "WHIP (Last 3 Starts)",
    "Team OPS (L7)", "Opp OPS (L7)",
    "Lean Call"
]

if log_file.exists():
    history_df = pd.read_csv(log_file)
    updated_df = pd.concat([history_df, final_dashboard[columns_to_log]], ignore_index=True)
else:
    updated_df = final_dashboard[columns_to_log]

updated_df.to_csv(log_file, index=False)

cd C:\Users\baile\Projects\mlb-dashboard

streamlit run dashboard_app.py

If not updating: streamlit run dashboard_app.py --clear-cache