<a href="https://colab.research.google.com/github/lonespear/MA206/blob/main/bball_review_sim.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [49]:
import random
import pandas as pd
import numpy as np

def roll_2d6():
    return random.randint(1, 6) + random.randint(1, 6)

def get_era_modifier(era):
    if pd.isna(era): return 0
    if era <= 2.50: return -2
    elif era <= 3.00: return -1
    elif era <= 3.75: return 0
    else: return 1

def get_obp_threshold(obp):
    if obp >= 0.400: return 5
    elif obp >= 0.375: return 6
    elif obp >= 0.350: return 7
    elif obp >= 0.325: return 8
    else: return 9

def get_hit_type(slg, roll):
    if pd.isna(slg): return "OUT"
    if slg >= 0.550:
        return ["1B", "1B", "1B", "2B", "2B", "2B", "3B", "3B", "3B", "HR", "HR", "HR"][roll-2]
    elif slg >= 0.500:
        return ["1B", "1B", "1B", "1B", "2B", "2B", "2B", "3B", "3B", "3B", "HR", "HR"][roll-2]
    elif slg >= 0.450:
        return ["1B", "1B", "1B", "1B", "1B", "1B", "2B", "2B", "2B", "3B", "3B", "3B"][roll-2]
    elif slg >= 0.400:
        return ["1B", "1B", "1B", "1B", "1B", "1B", "1B", "2B", "2B", "2B", "3B", "3B"][roll-2]
    else:
        return ["1B", "1B", "1B", "1B", "1B", "1B", "1B", "1B", "2B", "2B", "2B", "2B"][roll-2]

def simulate_at_bat(hitter, pitcher):
    contact_roll = roll_2d6() + get_era_modifier(pitcher["ERA"])
    threshold = get_obp_threshold(hitter["OBP"])
    return get_hit_type(hitter["SLG"], roll_2d6()) if contact_roll >= threshold else "OUT"

def simulate_half_inning(batting_lineup, pitcher, start_index):
    score = 0
    outs = 0
    bases = [False, False, False]
    idx = start_index

    while outs < 3:
        hitter = batting_lineup[idx % len(batting_lineup)]
        result = simulate_at_bat(hitter, pitcher)
        if result == "OUT":
            outs += 1
        elif result == "1B":
            if bases[2]: score += 1
            bases[2], bases[1], bases[0] = bases[1], bases[0], True
        elif result == "2B":
            score += bases[2] + bases[1]
            bases[2], bases[1], bases[0] = bases[0], True, False
        elif result == "3B":
            score += sum(bases)
            bases = [False, False, True]
        elif result == "HR":
            score += 1 + sum(bases)
            bases = [False, False, False]
        idx += 1

    return score, idx % len(batting_lineup)

In [56]:
# Build your full dataset
data = {
    "Player": ["Grant Dinger", "Patton Powers", "Mac McKillops", "Farnsworth Lantern", "Pershing Popsicle", "Sherman Swing",
               "Custer Clutch", "Dansby Gronson", "Frank Thomas", "Sheridan Smash", "Crook Clout", "Morgan Mash",
               "Merritt Mash", "Chaffee Chomp", "Macomb Muscle", "Ruggles Rip", "Shaler Shot", "Gates Glide",
               "Gray Escobar", "Dean Drive", "Stockton Strike", "Whoop Phillips", "Henebry Klaus", "Dusty Klein",
               "Taylor Bigsby", "Thayer Thumper", "McClellan Mash", "Costello Kincaid", "Keyes Klout", "Jack Upton",
               "Burnside Blast", "Meade Mash", "Pope Pop", "Doubleday Drive", "Sedgewick Swathers", "Chamberlain Chop",
               "Warren Collins", "Terry Vick", "Gibbon Gilden", "Bliss Westleton",
               "Grant Ford", "Bobby Caslen", "MacArthur Mound", "Ike Isthmus", "Pershing Pitch", "Sherman Strike",
               "Chesty Puller", "Lee Leader", "Harold Hansen", "Sheridan Gonzalez", "Corn Chowder", "Morgan Move",
               "Burnside Bender", "David Meade", "Duke Gransby", "Bubba Tornado"],
    "Position": ["Hitter"] * 40 + ["Pitcher"] * 16,
    "OBP": [0.389, 0.352, 0.375, 0.364, 0.35, 0.363, 0.323, 0.41, 0.379, 0.324, 0.393, 0.331, 0.409, 0.36, 0.333, 0.325,
            0.349, 0.394, 0.389, 0.374, 0.368, 0.326, 0.39, 0.349, 0.368, 0.352, 0.37, 0.368, 0.321, 0.339, 0.344, 0.362,
            0.385, 0.366, 0.348, 0.392, 0.331, 0.372, 0.355, 0.401] + [None]*16,
    "SLG": [0.56, 0.431, 0.453, 0.461, 0.584, 0.458, 0.538, 0.51, 0.548, 0.589, 0.559, 0.469, 0.51, 0.562, 0.42, 0.504,
            0.545, 0.427, 0.529, 0.488, 0.557, 0.557, 0.565, 0.424, 0.405, 0.444, 0.5, 0.566, 0.429, 0.498, 0.477, 0.522,
            0.496, 0.453, 0.506, 0.547, 0.421, 0.486, 0.445, 0.533] + [None]*16,
    "ERA": [None]*40 + [3.07, 4.08, 2.55, 2.11, 2.48, 3.79, 2.84, 3.03, 3.73, 3.76, 3.8, 3.65, 2.95, 3.68, 3.12, 2.42]
}

df = pd.DataFrame(data)

# Map of player to team
g1 = {
    "Grant Dinger": "stat ballers", "Patton Powers": "OAKLAND ATHLETICS", "Mac McKillops": "stat ballers",
    "Farnsworth Lantern": "Staters", "Pershing Popsicle": "Strange and Littrell", "Sherman Swing": "Rush Hour",
    "Custer Clutch": "Staters", "Dansby Gronson": "OAKLAND ATHLETICS", "Frank Thomas": "Linear Regressors",
    "Sheridan Smash": "Linear Regressors", "Crook Clout": "stat ballers", "Morgan Mash": "Rush Hour",
    "Merritt Mash": "OAKLAND ATHLETICS", "Chaffee Chomp": "Staters", "Macomb Muscle": "Statically Proficient",
    "Ruggles Rip": "Statically Proficient", "Shaler Shot": "Staters", "Gates Glide": "Linear Regressors",
    "Gray Escobar": "Staters", "Dean Drive": "OAKLAND ATHLETICS", "Stockton Strike": "Linear Regressors",
    "Whoop Phillips": "Strange and Littrell", "Henebry Klaus": "Rush Hour", "Dusty Klein": "Statically Proficient",
    "Taylor Bigsby": "Rush Hour", "Thayer Thumper": "Statically Proficient", "McClellan Mash": "Strange and Littrell",
    "Costello Kincaid": "Linear Regressors", "Keyes Klout": "Strange and Littrell", "Jack Upton": "Rush Hour",
    "Burnside Blast": "Rush Hour", "Meade Mash": "Team Three", "Pope Pop": "Team Three", "Doubleday Drive": "Statically Proficient",
    "Sedgewick Swathers": "Team Three", "Chamberlain Chop": "Team Three", "Warren Collins": "stat ballers",
    "Terry Vick": "Statically Proficient", "Gibbon Gilden": "OAKLAND ATHLETICS", "Bliss Westleton": "Team Three",
    "Grant Ford": "Statically Proficient", "Bobby Caslen": "OAKLAND ATHLETICS", "MacArthur Mound": "Team Three",
    "Ike Isthmus": "stat ballers", "Pershing Pitch": "Strange and Littrell", "Sherman Strike": "Staters",
    "Chesty Puller": "Staters", "Lee Leader": "Linear Regressors", "Harold Hansen": "Linear Regressors",
    "Sheridan Gonzalez": "Statically Proficient", "Corn Chowder": "OAKLAND ATHLETICS", "Morgan Move": "Rush Hour",
    "Burnside Bender": "Team Three", "David Meade": "stat ballers", "Duke Gransby": "Rush Hour",
    "Bubba Tornado": "Strange and Littrell"
}

i1 = {
    # Team 11
    "Merritt Mash":      "Team 11",
    "Meade Mash":        "Team 11",
    "Doubleday Drive":   "Team 11",
    "Sedgewick Swathers":"Team 11",
    "Gibbon Gilden":     "Team 11",
    "Duke Gransby":      "Team 11",
    "Burnside Bender":   "Team 11",

    # Sons of Pitches
    "Grant Dinger":      "Sons of Pitches",
    "Farnsworth Lantern":"Sons of Pitches",
    "Sherman Swing":     "Sons of Pitches",
    "Sheridan Smash":    "Sons of Pitches",
    "Costello Kincaid":  "Sons of Pitches",
    "MacArthur Mound":   "Sons of Pitches",
    "David Meade":       "Sons of Pitches",

    # Bronx Bombers
    "Henebry Klaus":     "Bronx Bombers",
    "Chaffee Chomp":     "Bronx Bombers",
    "Jack Upton":        "Bronx Bombers",
    "Pershing Popsicle": "Bronx Bombers",
    "Dusty Klein":       "Bronx Bombers",
    "Pershing Pitch":    "Bronx Bombers",
    "Bobby Caslen":      "Bronx Bombers",

    # The Statomatics
    "Terry Vick":        "The Statomatics",
    "Whoop Phillips":    "The Statomatics",
    "Dean Drive":        "The Statomatics",
    "Patton Powers":     "The Statomatics",
    "Macomb Muscle":     "The Statomatics",
    "Harold Hansen":     "The Statomatics",
    "Morgan Move":       "The Statomatics",

    # Choldin&Ginty
    "Crook Clout":       "Choldin&Ginty",
    "Gates Glide":       "Choldin&Ginty",
    "Chamberlain Chop":  "Choldin&Ginty",
    "McClellan Mash":    "Choldin&Ginty",
    "Custer Clutch":     "Choldin&Ginty",
    "Sheridan Gonzalez": "Choldin&Ginty",
    "Grant Ford":        "Choldin&Ginty",

    # Oakland A's
    "Gray Escobar":      "Oakland A's",
    "Shaler Shot":       "Oakland A's",
    "Taylor Bigsby":     "Oakland A's",
    "Thayer Thumper":    "Oakland A's",
    "Mac McKillops":     "Oakland A's",
    "Lee Leader":        "Oakland A's",
    "Sherman Strike":    "Oakland A's",

    # Quantum Syndicate
    "Dansby Gronson":    "Quantum Syndicate",
    "Frank Thomas":      "Quantum Syndicate",
    "Bliss Westleton":   "Quantum Syndicate",
    "Keyes Klout":       "Quantum Syndicate",
    "Warren Collins":    "Quantum Syndicate",
    "Ike Isthmus":       "Quantum Syndicate",
    "Bubba Tornado":     "Quantum Syndicate",

    # BaseLine Analysis
    "Morgan Mash":       "BaseLine Analysis",
    "Ruggles Rip":       "BaseLine Analysis",
    "Stockton Strike":   "BaseLine Analysis",
    "Burnside Blast":    "BaseLine Analysis",
    "Pope Pop":          "BaseLine Analysis",
    "Corn Chowder":      "BaseLine Analysis",
    "Chesty Puller":     "BaseLine Analysis",
}

df["Team"] = df["Player"].map(g1).fillna("UNASSIGNED")

results = []

# Do this:
teams = df["Team"].unique()
for home_team in teams:
    for away_team in teams:
        if home_team == away_team:
            continue
        else:
            home_hitters = df[(df.Team == home_team) & (df.Position == "Hitter")].to_dict("records")
            away_hitters = df[(df.Team == away_team) & (df.Position == "Hitter")].to_dict("records")
            home_pitcher = df[(df.Team == home_team) & (df.Position == "Pitcher")].to_dict("records")[0]
            away_pitcher = df[(df.Team == away_team) & (df.Position == "Pitcher")].to_dict("records")[0]

            for game_number in range(1, 501):
                # Reset scores and batting order
                home_score = away_score = 0
                home_idx = away_idx = 0

                for inning in range(5):
                    away_run, away_idx = simulate_half_inning(away_hitters, home_pitcher, away_idx)
                    home_run, home_idx = simulate_half_inning(home_hitters, away_pitcher, home_idx)
                    away_score += away_run
                    home_score += home_run

                outcome = (
                    "Home Win" if home_score > away_score else
                    "Away Win" if away_score > home_score else
                    "Tie"
                )

                results.append({
                    "Game": game_number,
                    "Home Team": home_team,
                    "Away Team": away_team,
                    "Away Score": away_score,
                    "Home Score": home_score,
                    "Outcome": outcome
                })

# Assume you already have:
results_df = pd.DataFrame(results)

# Count total games and home wins by (Home Team, Away Team)
matchup_summary = (
    results_df.groupby(["Home Team", "Away Team"])["Outcome"]
    .value_counts()
    .unstack()
    .fillna(0)
)

# Ensure columns exist
for col in ["Home Win", "Away Win", "Tie"]:
    if col not in matchup_summary.columns:
        matchup_summary[col] = 0

# Calculate win percentage
matchup_summary["Total"] = matchup_summary.sum(axis=1)
matchup_summary["Win %"] = matchup_summary["Home Win"] / matchup_summary["Total"]

# Pivot to matrix
win_matrix = matchup_summary.reset_index().pivot(
    index="Home Team",
    columns="Away Team",
    values="Win %"
).round(3).fillna("—")  # Replace missing with em-dash for clarity

win_matrix

Away Team,Linear Regressors,OAKLAND ATHLETICS,Rush Hour,Staters,Statically Proficient,Strange and Littrell,Team Three,stat ballers
Home Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Linear Regressors,—,0.614,0.728,0.956,0.806,0.092,0.028,0.002
OAKLAND ATHLETICS,0.34,—,0.566,0.97,0.674,0.03,0.0,0.0
Rush Hour,0.242,0.432,—,0.874,0.596,0.042,0.002,0.0
Staters,0.032,0.04,0.1,—,0.184,0.004,0.0,0.0
Statically Proficient,0.182,0.324,0.36,0.766,—,0.008,0.002,0.0
Strange and Littrell,0.88,0.964,0.954,0.998,0.97,—,0.364,0.134
Team Three,0.948,0.992,0.982,1.0,0.984,0.508,—,0.12
stat ballers,0.998,1.0,1.0,1.0,1.0,0.77,0.844,—


In [57]:
# --- 1) Figure out who won each game ---
# Home Win → home team wins; Away Win → away team wins; tie → no winner
results_df["Winner"] = np.where(
    results_df["Outcome"] == "Home Win",
    results_df["Home Team"],
    np.where(
        results_df["Outcome"] == "Away Win",
        results_df["Away Team"],
        None
    )
)

# --- 2) Count wins and total games for each team ---
win_counts  = results_df["Winner"].value_counts()
home_games  = results_df["Home Team"].value_counts()
away_games  = results_df["Away Team"].value_counts()
total_games = home_games.add(away_games, fill_value=0)

# --- 3) Compute overall win% and sort descending ---
win_pct = (win_counts / total_games).fillna(0)
league_table = (
    win_pct
    .sort_values(ascending=False)
    .rename("Win %")
    .reset_index()
    .rename(columns={"index":"Team"})
)

print(league_table)

                    Team     Win %
0           stat ballers  0.943000
1             Team Three  0.793286
2   Strange and Littrell  0.748000
3      Linear Regressors  0.458429
4      OAKLAND ATHLETICS  0.365286
5              Rush Hour  0.309429
6  Statically Proficient  0.232857
7                Staters  0.051286


In [53]:
# --- 1) Figure out who won each game ---
# Home Win → home team wins; Away Win → away team wins; tie → no winner
results_df["Winner"] = np.where(
    results_df["Outcome"] == "Home Win",
    results_df["Home Team"],
    np.where(
        results_df["Outcome"] == "Away Win",
        results_df["Away Team"],
        None
    )
)

# --- 2) Count wins and total games for each team ---
win_counts  = results_df["Winner"].value_counts()
home_games  = results_df["Home Team"].value_counts()
away_games  = results_df["Away Team"].value_counts()
total_games = home_games.add(away_games, fill_value=0)

# --- 3) Compute overall win% and sort descending ---
win_pct = (win_counts / total_games).fillna(0)
league_table = (
    win_pct
    .sort_values(ascending=False)
    .rename("Win %")
    .reset_index()
    .rename(columns={"index":"Team"})
)

print(league_table)

                Team     Win %
0  Quantum Syndicate  0.987286
1            Team 11  0.728571
2    Sons of Pitches  0.662714
3  BaseLine Analysis  0.553714
4      Choldin&Ginty  0.493000
5    The Statomatics  0.282000
6      Bronx Bombers  0.117571
7        Oakland A's  0.079143
