In [None]:
import pandas as pd
from itertools import combinations

player_url = "https://raw.githubusercontent.com/tracyhua2/SYS3034-BaseballCase/refs/heads/main/Data/player_data.csv"
player_data = pd.read_csv(player_url)


team_url = "https://raw.githubusercontent.com/tracyhua2/SYS3034-BaseballCase/refs/heads/main/Data/team_performance.csv"
team_data = pd.read_csv(team_url)

In [None]:
team_data.columns

In [None]:
team_data.head()

In [None]:
# Create copy of player_data
player_df = player_data.copy()

# Change Player column to float
player_df['Player'] = player_df['Player'].str.replace(',', '').astype(float)

player_df.head()

Predicted Win % based on scaled weights (Not Using Pythagorean Theorem of Win Percentage)


In [None]:
def calculate_win_percentage(player, team_win_pct):
    # Define weights dictionary
    weights = {
        'AB': 0.026580,
        'R': 0.100512,
        'H': 0.072376,
        '2B': 0.026418,
        '3B': 0.006667,
        'HR': 0.073431,
        'TB': 0.093896,
        'RBI': 0.064800,
        'BA': 0.064209,
        'OBP': 0.105842,
        'SLG': 0.114382,
        'OPS': 0.128888,
        'BB': 0.037556,
        'CS': 0.042222,
        'SB': 0.042222
    }

    # Calculate contribution
    contribution = 0
    for stat, weight in weights.items():
        if stat in player.index:  # Check if stat exists in player's data
            contribution += player[stat] * weight

    # Add contribution to team's base win percentage
    predicted_win = team_win_pct + contribution
    return predicted_win

# Get top 10 players
# Use player_df instead of player_data to access the converted 'Player' column
top_10 = player_df.sort_values('final_score', ascending=False).head(10)

# Get team win percentage for team 26
team_win = team_data.loc[team_data['team_rank'] == 26, 'win_percentage'].values[0]

# Calculate predicted win percentage for each player
for _, player in top_10.iterrows():
    predicted_win = calculate_win_percentage(player, team_win)
    print(f"Player {int(player['Player #'])}: Team Predicted Win = {predicted_win:.3f}")

Predicted Win % based on simulated Runs Allowed/Runs Scored (Pythagorean Formula for Calculating win Percentage): https://www.samford.edu/sports-analytics/fans/2022/MLB-Winning-Percentage-Breakdown-Which-Statistics-Help-Teams-Win-More-Games


In [26]:
def simulate_team_runs(player_stats, base_team_runs):
    # Offensive weights that contribute to run production
    run_weights = {
        'R': 0.100512,
        'H': 0.072376,
        'HR': 0.073431,
        'RBI': 0.064800,
        'OBP': 0.105842,
        'SLG': 0.114382
    }
    
    # Calculate additional runs contribution
    runs_contribution = 0
    for stat, weight in run_weights.items():
        if stat in player_stats:
            runs_contribution += player_stats[stat] * weight
    
    # Add to base team runs
    simulated_runs = base_team_runs + runs_contribution
    return simulated_runs

def predict_win_percentage(simulated_runs, runs_allowed):
    return (simulated_runs ** 2) / ((simulated_runs ** 2) + (runs_allowed ** 2)) #pythagorean theorem


In [50]:
# Get base team stats (Team 26 - Seattle)
base_runs_scored = 671  
base_runs_allowed = 811
games_played = 162  
print(f"Base Runs Scored: {base_runs_scored}\n")
print(f"Base Runs Allowed: {base_runs_allowed}\n")

Base Runs Scored: 671

Base Runs Allowed: 811



In [58]:
# For each highlighted player
win_rate_dict = {}

for _, player in top_10.iterrows():
    # Simulate new runs scored with player
    new_runs = simulate_team_runs(player, base_runs_scored)
    
    # Calculate new win percentage
    predicted_win_pct = predict_win_percentage(new_runs, base_runs_allowed)

    #append to dictionary 
    win_rate_dict[player['Player #']] = predicted_win_pct

    print(f"Player {int(player['Player #'])}")
    print(f"Simulated Runs: {new_runs:.1f}")
    print(f"Predicted Win%: {(predicted_win_pct*100):.2f}%\n")
    team_win_pct = team_data.loc[team_data['team_rank']==26,'win_percentage'].values[0]
    print(f"Predicted %Gain For Player: {(predicted_win_pct-team_win_pct)*100:.2f}%\n")



Player 13
Simulated Runs: 706.8
Predicted Win%: 43.17%

Predicted %Gain For Player: 5.51%

Player 41
Simulated Runs: 703.2
Predicted Win%: 42.92%

Predicted %Gain For Player: 5.27%

Player 49
Simulated Runs: 704.4
Predicted Win%: 43.00%

Predicted %Gain For Player: 5.35%

Player 4
Simulated Runs: 705.0
Predicted Win%: 43.04%

Predicted %Gain For Player: 5.39%

Player 26
Simulated Runs: 704.4
Predicted Win%: 43.00%

Predicted %Gain For Player: 5.35%

Player 1
Simulated Runs: 702.8
Predicted Win%: 42.89%

Predicted %Gain For Player: 5.23%

Player 14
Simulated Runs: 705.5
Predicted Win%: 43.07%

Predicted %Gain For Player: 5.42%

Player 37
Simulated Runs: 702.0
Predicted Win%: 42.84%

Predicted %Gain For Player: 5.18%

Player 28
Simulated Runs: 704.4
Predicted Win%: 43.00%

Predicted %Gain For Player: 5.35%

Player 31
Simulated Runs: 703.3
Predicted Win%: 42.92%

Predicted %Gain For Player: 5.27%



In [64]:
win_rate_dict
avg_rate = sum(win_rate_dict.values())/ len(win_rate_dict)
print(f"The predicted win percentage average:{avg_rate*100:.2f}% \n")
print(f"The approximate (2 decimal places) predicted games won: {avg_rate*games_played:.0f}\n")


The predicted win percentage average:42.99% 

The approximate (2 decimal places) predicted games won: 70

