In [1]:
# Import our dependencies
import pandas as pd
import matplotlib.pyplot as plt
import sklearn as skl
import tensorflow as tf
from sklearn.model_selection import train_test_split

In [3]:
mlb_df = pd.read_csv("Resources/mlb_complete_8years.csv")
mlb_df.drop(columns=['Unnamed: 0','W','L','W-L%'], inplace=True)
mlb_df.head(30)

Unnamed: 0,Tm,BatAge,R/G,AB,R_x,H_x,2B,3B,HR_x,RBI,...,BF,LOB_y,DefEff,Ch,PO,A,E,DP,post_season,Year
0,Arizona Diamondbacks_2022,26.5,4.33,5351,702,1232,262,24,173,658,...,6065,1051,0.704,5746,4290,1370,86,134,False,2022
1,Atlanta Braves_2022,27.5,4.87,5509,789,1394,298,11,243,753,...,6031,1101,0.701,5803,4344,1382,77,110,True,2022
2,Baltimore Orioles_2022,27.0,4.16,5429,674,1281,275,25,171,639,...,6058,1092,0.69,5920,4300,1529,91,151,False,2022
3,Boston Red Sox_2022,28.8,4.54,5539,735,1427,352,12,155,704,...,6167,1109,0.683,5825,4293,1447,85,134,False,2022
4,Chicago Cubs_2022,27.9,4.06,5425,657,1293,265,31,159,620,...,6162,1130,0.697,5880,4331,1453,96,139,False,2022
5,Chicago White Sox_2022,29.3,4.23,5611,686,1435,272,9,149,654,...,6145,1110,0.688,5738,4343,1293,102,122,False,2022
6,Cincinnati Reds_2022,29.4,4.0,5380,648,1264,235,18,156,618,...,6220,1152,0.689,5647,4270,1296,81,115,False,2022
7,Cleveland Guardians_2022,25.9,4.31,5558,698,1410,273,31,127,662,...,5989,1018,0.709,5908,4368,1443,97,127,True,2022
8,Colorado Rockies_2022,29.1,4.31,5540,698,1408,280,34,149,669,...,6240,1110,0.673,5968,4276,1592,100,154,False,2022
9,Detroit Tigers_2022,27.9,3.44,5378,557,1240,235,27,110,530,...,6047,1086,0.701,5757,4259,1404,94,137,False,2022


In [4]:
features_df = mlb_df.drop(columns=['Tm','post_season','Year'])
target_df = mlb_df['post_season'].astype(int)

In [5]:
target_df

0      0
1      1
2      0
3      0
4      0
      ..
235    1
236    0
237    1
238    1
239    0
Name: post_season, Length: 240, dtype: int32

In [6]:
features_df.shape

(240, 36)

In [7]:
features_df

Unnamed: 0,BatAge,R/G,AB,R_x,H_x,2B,3B,HR_x,RBI,SB,...,SO_y,HBP_y,BF,LOB_y,DefEff,Ch,PO,A,E,DP
0,26.5,4.33,5351,702,1232,262,24,173,658,104,...,1216,59,6065,1051,0.704,5746,4290,1370,86,134
1,27.5,4.87,5509,789,1394,298,11,243,753,87,...,1554,62,6031,1101,0.701,5803,4344,1382,77,110
2,27.0,4.16,5429,674,1281,275,25,171,639,95,...,1214,64,6058,1092,0.690,5920,4300,1529,91,151
3,28.8,4.54,5539,735,1427,352,12,155,704,52,...,1346,72,6167,1109,0.683,5825,4293,1447,85,134
4,27.9,4.06,5425,657,1293,265,31,159,620,111,...,1383,73,6162,1130,0.697,5880,4331,1453,96,139
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
235,28.4,3.99,5484,647,1386,288,39,137,619,69,...,1329,45,6134,1215,0.688,6107,4394,1617,96,159
236,28.5,3.98,5485,644,1383,278,32,167,612,87,...,1355,46,6069,1067,0.701,5863,4360,1408,95,118
237,28.7,4.64,5511,751,1419,279,32,172,707,101,...,1095,63,6215,1154,0.687,6142,4328,1695,119,169
238,29.5,5.50,5509,891,1480,308,17,232,852,88,...,1117,59,5984,991,0.708,6003,4323,1592,88,145


In [8]:
X_train, X_test, y_train, y_test = train_test_split(features_df, target_df, random_state=78)
X_scaler = skl.preprocessing.StandardScaler()
X_scaler.fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [9]:
# Create a method that creates a new Sequential model with hyperparameter options
def create_model(hp):
    nn_model = tf.keras.models.Sequential()

    # Allow kerastuner to decide which activation function to use in hidden layers
    activation = hp.Choice('activation',['relu','tanh','sigmoid'])
    
    # Allow kerastuner to decide number of neurons in first layer
    nn_model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
        min_value=1,
        max_value=10,
        step=2), activation=activation, input_dim=36))

    # Allow kerastuner to decide number of hidden layers and neurons in hidden layers
    for i in range(hp.Int('num_layers', 1, 6)):
        nn_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
            min_value=1,
            max_value=10,
            step=2),
            activation=activation))
    
    nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

    # Compile the model
    nn_model.compile(loss="binary_crossentropy", optimizer='adam', metrics=["accuracy"])
    
    return nn_model

In [12]:
# Import the kerastuner library
import keras_tuner as kt

tuner = kt.Hyperband(
    create_model,
    objective="val_accuracy",
    max_epochs=20,
    hyperband_iterations=2)

INFO:tensorflow:Reloading Tuner from .\untitled_project\tuner0.json


In [13]:
tuner.search(X_train_scaled,y_train,epochs=20,validation_data=(X_test_scaled,y_test))

INFO:tensorflow:Oracle triggered exit


In [14]:
# Get best model hyperparameters
best_hyper = tuner.get_best_hyperparameters(1)[0]
best_hyper.values

{'activation': 'relu',
 'first_units': 3,
 'num_layers': 2,
 'units_0': 9,
 'units_1': 7,
 'units_2': 5,
 'units_3': 7,
 'units_4': 1,
 'units_5': 3,
 'tuner/epochs': 20,
 'tuner/initial_epoch': 0,
 'tuner/bracket': 0,
 'tuner/round': 0}

In [15]:
# Evaluate best model against full test data
best_model = tuner.get_best_models(1)[0]
model_loss, model_accuracy = best_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

2/2 - 1s - loss: 0.4233 - accuracy: 0.8833 - 539ms/epoch - 270ms/step
Loss: 0.42331045866012573, Accuracy: 0.8833333253860474


In [16]:
best_model

<keras.src.engine.sequential.Sequential at 0x1c9f6f4c3a0>

In [17]:
from keras.models import load_model
best_model.save('mlb_network.h5')
mlb_model = load_model("mlb_network.h5")
loss, accuracy = mlb_model.evaluate(X_test, y_test)

  saving_api.save_model(




In [19]:
bat_23 = pd.read_csv("Resources/2023_Batting.csv")
bat_23.drop(columns=['#Bat','G','PA','BA','OBP','SLG', 'OPS', 'OPS+','TB', 'GDP','IBB'], inplace=True)
pit_23 = pd.read_csv("Resources/2023_Pitching.csv")
pit_23.drop(columns=['#P', 'ERA', 'G', 'GS', 'GF','CG', 'cSho', 'IP', 'ER', 'BK', 'WP','ERA+', 'FIP', 'WHIP', 'H9', 'HR9','BB9', 'SO9', 'SO/W'], inplace=True)
def_23 = pd.read_csv("Resources/2023_Fielding.csv")
def_23.drop(columns=['#Fld', 'RA/G', 'G', 'GS', 'CG', 'Inn', 'Fld%', 'Rtot', 'Rtot/yr', 'Rdrs', 'Rdrs/yr', 'Rgood'], inplace=True)
merged_23 = pd.merge(bat_23, pit_23, on='Tm')
merged_23 = pd.merge(merged_23, def_23, on = 'Tm')
merged_23.head(30)

Unnamed: 0,Tm,BatAge,R/G,AB,R_x,H_x,2B,3B,HR_x,RBI,...,SO_y,HBP_y,BF,LOB_y,DefEff,Ch,PO,A,E,DP
0,Arizona Diamondbacks,27.4,5.02,1864,276,487,107,12,64,262,...,463,13,2083,363,0.697,1985,1465,501,19,38
1,Atlanta Braves,27.9,5.05,1861,278,478,98,4,91,269,...,524,25,2064,385,0.673,1951,1461,454,36,48
2,Baltimore Orioles,27.2,4.96,1850,273,464,100,10,64,265,...,496,17,2099,377,0.683,1992,1490,478,24,61
3,Boston Red Sox,28.6,5.2,1851,281,489,120,4,61,271,...,450,26,2047,356,0.691,1922,1422,466,34,37
4,Chicago Cubs,28.7,4.54,1837,245,468,82,7,66,237,...,453,20,2006,339,0.69,1956,1434,497,25,55
5,Chicago White Sox,27.9,4.32,1939,246,466,101,7,59,241,...,529,33,2208,415,0.691,1959,1507,424,28,39
6,Cincinnati Reds,27.5,4.65,1822,251,469,101,10,42,242,...,489,25,2099,403,0.661,1877,1426,429,22,42
7,Cleveland Guardians,27.0,3.48,1788,188,409,92,10,30,172,...,409,17,2035,370,0.701,1986,1451,507,28,37
8,Colorado Rockies,29.4,4.57,1895,256,498,127,7,47,246,...,405,19,2145,376,0.678,2034,1458,545,31,58
9,Detroit Tigers,27.3,3.79,1801,201,412,85,3,46,195,...,419,16,2003,334,0.705,1919,1422,468,29,37


In [20]:
merged_23.to_csv('Resources/season2023.csv')

In [21]:
season_2023 = pd.read_csv('Resources/season2023.csv')
season_2023.drop(columns=['Unnamed: 0','W','L','W-L%'], inplace=True)
season_2023.set_index('Tm', inplace=True)

In [22]:
season_2023.head(30)

Unnamed: 0_level_0,BatAge,R/G,AB,R_x,H_x,2B,3B,HR_x,RBI,SB,...,SO_y,HBP_y,BF,LOB_y,DefEff,Ch,PO,A,E,DP
Tm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Arizona Diamondbacks,27.4,5.02,1864,276,487,107,12,64,262,51,...,463,13,2083,363,0.697,1985,1465,501,19,38
Atlanta Braves,27.9,5.05,1861,278,478,98,4,91,269,36,...,524,25,2064,385,0.673,1951,1461,454,36,48
Baltimore Orioles,27.2,4.96,1850,273,464,100,10,64,265,45,...,496,17,2099,377,0.683,1992,1490,478,24,61
Boston Red Sox,28.6,5.2,1851,281,489,120,4,61,271,31,...,450,26,2047,356,0.691,1922,1422,466,34,37
Chicago Cubs,28.7,4.54,1837,245,468,82,7,66,237,41,...,453,20,2006,339,0.69,1956,1434,497,25,55
Chicago White Sox,27.9,4.32,1939,246,466,101,7,59,241,31,...,529,33,2208,415,0.691,1959,1507,424,28,39
Cincinnati Reds,27.5,4.65,1822,251,469,101,10,42,242,43,...,489,25,2099,403,0.661,1877,1426,429,22,42
Cleveland Guardians,27.0,3.48,1788,188,409,92,10,30,172,46,...,409,17,2035,370,0.701,1986,1451,507,28,37
Colorado Rockies,29.4,4.57,1895,256,498,127,7,47,246,19,...,405,19,2145,376,0.678,2034,1458,545,31,58
Detroit Tigers,27.3,3.79,1801,201,412,85,3,46,195,33,...,419,16,2003,334,0.705,1919,1422,468,29,37


In [23]:
season_2023.shape

(30, 36)

In [24]:
scaled_2023 = X_scaler.transform(season_2023)

In [25]:
predictions = best_model.predict(scaled_2023)



In [26]:
predictions

array([[0.51943964],
       [0.48676503],
       [0.525259  ],
       [0.30435336],
       [0.5282531 ],
       [0.32827717],
       [0.3560971 ],
       [0.42987794],
       [0.3402744 ],
       [0.34697756],
       [0.5282531 ],
       [0.29210687],
       [0.46718132],
       [0.5282531 ],
       [0.4258569 ],
       [0.45116153],
       [0.5282531 ],
       [0.4038091 ],
       [0.5282531 ],
       [0.07422271],
       [0.29512104],
       [0.5015542 ],
       [0.4490483 ],
       [0.5282531 ],
       [0.39988825],
       [0.41407958],
       [0.5081903 ],
       [0.5282531 ],
       [0.51112866],
       [0.31901646]], dtype=float32)

In [27]:
season_2023['predictions'] = predictions
season_2023_predict = season_2023.copy()

In [28]:
season_2023_predict = season_2023_predict[['predictions']]

In [31]:
season_2023_predict.sort_values(by=['predictions'], ascending=False,inplace=True)
season_2023_predict.head(30)


Unnamed: 0_level_0,predictions
Tm,Unnamed: 1_level_1
Chicago Cubs,0.528253
Texas Rangers,0.528253
Seattle Mariners,0.528253
New York Yankees,0.528253
Houston Astros,0.528253
Minnesota Twins,0.528253
Los Angeles Dodgers,0.528253
Baltimore Orioles,0.525259
Arizona Diamondbacks,0.51944
Toronto Blue Jays,0.511129
