<a href="https://colab.research.google.com/github/tracyhua2/SYS3034-BaseballCase/blob/main/Code/BaseballDraft_Top10.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [134]:
import pandas as pd
from itertools import combinations

player_url = "https://raw.githubusercontent.com/tracyhua2/SYS3034-BaseballCase/refs/heads/main/Data/player_data.csv"
player_data = pd.read_csv(player_url)

weights_url = "https://raw.githubusercontent.com/tracyhua2/SYS3034-BaseballCase/refs/heads/main/Data/metrics_weights.csv"
weights_data = pd.read_csv(weights_url)

team_url = "https://raw.githubusercontent.com/tracyhua2/SYS3034-BaseballCase/refs/heads/main/Data/team_performance.csv"
team_data = pd.read_csv(team_url)

In [135]:
# Drop NAs in weights_data
weights_data = weights_data.dropna()
weights_data.head()

Unnamed: 0,team_statistic,team_weights,ai_weights,research_weights,normalized_weights
0,AB,0.069507,0.04,0.0,0.036502
1,R,0.084823,0.15,0.1,0.111608
2,H,0.101106,0.08,0.05,0.077035
3,2B,0.054867,0.06,0.0,0.038289
4,3B,0.034097,0.03,0.0,0.021366


In [136]:
# Create copy of player_data
player_df = player_data.copy()

# Change Player column to float
player_df['Player'] = player_df['Player'].str.replace(',', '').astype(float)

player_df.head()

Unnamed: 0,Player,Player #,AB,R,H,2B,3B,HR,RBI,SB,CS,BB,BA,OBP,SLG
0,6125000.0,1,688,113,204,37,19,16,68,56,15,66,0.297,0.358,0.475
1,18000000.0,2,686,103,213,20,7,6,42,43,4,51,0.31,0.361,0.386
2,4000000.0,3,661,93,186,33,1,8,57,19,6,56,0.281,0.334,0.371
3,1750000.0,4,653,118,213,54,2,17,83,20,1,50,0.326,0.376,0.493
4,13054526.0,5,645,102,183,36,2,12,82,10,0,55,0.284,0.339,0.402


In [137]:
# View team_data
team_data.head()

Unnamed: 0,win_percentage,team_rank,SLG,OBP,RBI,OPS
0,0.364198,28,0.373,0.323,608,0.696
1,0.376543,26,0.389,0.318,631,0.707
2,0.388889,30,0.39,0.317,615,0.707
3,0.416149,19,0.403,0.32,705,0.723
4,0.417178,11,0.429,0.333,750,0.762


In [138]:
normalized_weights = weights_data.set_index('team_statistic')['normalized_weights']
normalized_weights

Unnamed: 0_level_0,normalized_weights
team_statistic,Unnamed: 1_level_1
AB,0.036502
R,0.111608
H,0.077035
2B,0.038289
3B,0.021366
HR,0.085576
TB,0.099597
RBI,0.072281
BA,0.066126
OBP,0.131591


In [139]:
# Calculate composite score for each player
def calculate_composite_scores(player_df, weights):
    adjusted_weights = weights.drop(['TB', 'OPS'], errors='ignore')
    player_df['composite_score'] = sum(player_df[stat] * weight for stat, weight in adjusted_weights.items())
    return player_df.sort_values('composite_score', ascending=False)

calculate_composite_scores(player_df, normalized_weights).head()

Unnamed: 0,Player,Player #,AB,R,H,2B,3B,HR,RBI,SB,CS,BB,BA,OBP,SLG,composite_score
12,7750000.0,13,626,115,189,42,2,33,124,15,5,94,0.302,0.39,0.534,68.712292
0,6125000.0,1,688,113,204,37,19,16,68,56,15,66,0.297,0.358,0.475,67.443823
3,1750000.0,4,653,118,213,54,2,17,83,20,1,50,0.326,0.376,0.493,66.314923
27,19243683.0,28,606,116,172,40,5,27,112,25,3,92,0.284,0.376,0.5,65.792445
13,555000.0,14,624,98,190,35,5,32,130,9,1,64,0.304,0.371,0.53,65.330629


In [141]:
# Find the 4 best-performing players within the budget of 30M
def select_top_players_budget(player_df, max_players = 4, budget=30000000):
  player_df = player_df.sort_values('composite_score', ascending=False).reset_index()
  for num_players in range(1, max_players + 1):
    for combo in combinations(player_df.index, num_players):
          selected_players = player_df.loc[list(combo)]
          total_salary = selected_players['Player'].sum()

          best_players = None
          best_score = 0

          if total_salary <= budget:
              total_score = selected_players['composite_score'].sum()
              if total_score > best_score:
                  best_players = selected_players
                  best_score = total_score
  return best_players

select_top_players_budget(player_df, max_players=4, budget=30000000)

Unnamed: 0,index,Player,Player #,AB,R,H,2B,3B,HR,RBI,SB,CS,BB,BA,OBP,SLG,composite_score
46,44,8000000.0,45,579,77,183,35,5,9,55,6,5,23,0.316,0.345,0.44,51.471297
47,34,6000000.0,35,597,70,162,35,3,14,72,2,4,26,0.271,0.305,0.41,51.256835
48,46,437500.0,47,577,79,149,24,7,7,59,33,11,25,0.258,0.296,0.36,50.191687
49,49,3575000.0,50,572,66,146,31,4,15,59,4,3,46,0.255,0.313,0.402,48.650235


In [144]:
# Weights with metrics from data frame into list
weights_list = weights_data.values.tolist()

# Simulate the effect on Seattle's win perecentage based on top players
def optimize_team_performance(player_df, team_data, team_id, budget=30000000):
    def win_contribution(player):
      return (player['stat'] * weight for stat, weight in weights.items() if stat in player)

    player_df = calculate_composite_scores(player_df, normalized_weights)
    optimized_players = select_top_players_budget(player_df, max_players=4, budget=budget)
    team = team_data.loc[team_data['team_rank'] == team_id].copy()
    team['RBI'] += optimized_players['RBI'].sum() * 0.1 # Interval increase for each player to measure improvement
    team['win_percentage'] += (optimized_players['HR'].sum() * 0.01)  # Using home runs as variable for performance boost

    return team, optimized_players

In [148]:
# Simulate team 26 (Seattle) performance with budget
team_performance, top_players = optimize_team_performance(player_df, team_data,
                                                             team_id=26, budget=30000000)

In [150]:
# Team performance based on selected players
print(team_performance)

   win_percentage  team_rank    SLG    OBP    RBI    OPS
1        0.826543         26  0.389  0.318  655.5  0.707


In [152]:
# Top 4 players chosen based on optimization
print(top_players)

    index     Player  Player #   AB   R    H  2B  3B  HR  RBI  SB  CS  BB  \
46     44  8000000.0        45  579  77  183  35   5   9   55   6   5  23   
47     34  6000000.0        35  597  70  162  35   3  14   72   2   4  26   
48     46   437500.0        47  577  79  149  24   7   7   59  33  11  25   
49     49  3575000.0        50  572  66  146  31   4  15   59   4   3  46   

       BA    OBP    SLG  composite_score  
46  0.316  0.345  0.440        51.471297  
47  0.271  0.305  0.410        51.256835  
48  0.258  0.296  0.360        50.191687  
49  0.255  0.313  0.402        48.650235  
