# Neural Network for Predicting World Series Champions
The procedure has been borrowed in part from the machine learning class examples

# Read in data and pre-process

In [1]:
# import dependencies
import pandas as pd

In [2]:
# read in data
pd.set_option('display.max_columns', None)
mlb_df = pd.read_csv('resources/mlb_data.csv')
# mlb_df.head()

In [3]:
# change the League column to be binary instead of string
for i, row in mlb_df.iterrows():
#     print(i,row['Lg'])
    if row['Lg'] == 'AL':
        mlb_df.at[i,'Lg'] = 0
#         print(f'new value at {i} is {mlb_df.at[i,"Lg"]}')
    elif row['Lg'] == 'NL':
        mlb_df.at[i,'Lg'] = 1
#         print(f'new value at {i} is {mlb_df.at[i,"Lg"]}')
    

In [4]:
# league is now a 1 or 0
# mlb_df.head()
# mlb_df.dtypes

# Split into training and testing set
Manually split the data by season. Odd years will be used for training, while even years will be used for testing

In [5]:
# get X and Y train for making the model
train_df = mlb_df.loc[mlb_df['Year'] % 2 == 1]
train_data = train_df.values
X_train = train_data[:, 3:22]
y_train = train_data[:,23]

In [6]:
# get X and Y for making the model
test_df = mlb_df.loc[mlb_df['Year'] % 2 == 0]
test_data = test_df.values
X_test = test_data[:, 3:22]
y_test = test_data[:,23]

# Scaling and One-hot encoding

In [7]:
from sklearn.preprocessing import StandardScaler
X_scaler = StandardScaler().fit(X_train)

In [8]:
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [9]:
# label encode the winner column
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
label_encoder.fit(y_test)
encoded_y_test = label_encoder.transform(y_test)

In [10]:
# One-hot encoding
from keras.utils import to_categorical

y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)
# y_train_categorical

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


# Create model

In [11]:
from tensorflow.keras.models import Sequential

model = Sequential()

In [12]:
X_train.shape

(300, 19)

In [13]:
from tensorflow.keras.layers import Dense
number_inputs = 19
number_hidden_nodes = 4
model.add(Dense(units=number_hidden_nodes, activation='relu', input_dim=number_inputs))

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


In [14]:
number_classes = 2
model.add(Dense(units=number_classes, activation='softmax'))

In [15]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 4)                 80        
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 10        
Total params: 90
Trainable params: 90
Non-trainable params: 0
_________________________________________________________________


In [16]:
# # Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])


In [17]:
print(X_train_scaled.shape)
print(y_train_categorical.shape)

(300, 19)
(300, 2)


In [18]:
# Fit the model
model.fit(
    x=X_train_scaled,
    y=y_train_categorical,
    epochs=1000,
    shuffle=True,
    verbose=2
)

Epoch 1/1000
300/300 - 0s - loss: 0.4507 - acc: 0.8233
Epoch 2/1000
300/300 - 0s - loss: 0.4223 - acc: 0.8600
Epoch 3/1000
300/300 - 0s - loss: 0.3977 - acc: 0.8800
Epoch 4/1000
300/300 - 0s - loss: 0.3769 - acc: 0.9000
Epoch 5/1000
300/300 - 0s - loss: 0.3595 - acc: 0.9167
Epoch 6/1000
300/300 - 0s - loss: 0.3435 - acc: 0.9333
Epoch 7/1000
300/300 - 0s - loss: 0.3298 - acc: 0.9433
Epoch 8/1000
300/300 - 0s - loss: 0.3174 - acc: 0.9500
Epoch 9/1000
300/300 - 0s - loss: 0.3066 - acc: 0.9600
Epoch 10/1000
300/300 - 0s - loss: 0.2968 - acc: 0.9600
Epoch 11/1000
300/300 - 0s - loss: 0.2879 - acc: 0.9600
Epoch 12/1000
300/300 - 0s - loss: 0.2799 - acc: 0.9600
Epoch 13/1000
300/300 - 0s - loss: 0.2724 - acc: 0.9600
Epoch 14/1000
300/300 - 0s - loss: 0.2659 - acc: 0.9600
Epoch 15/1000
300/300 - 0s - loss: 0.2598 - acc: 0.9667
Epoch 16/1000
300/300 - 0s - loss: 0.2539 - acc: 0.9667
Epoch 17/1000
300/300 - 0s - loss: 0.2485 - acc: 0.9667
Epoch 18/1000
300/300 - 0s - loss: 0.2437 - acc: 0.9667
E

Epoch 147/1000
300/300 - 0s - loss: 0.0838 - acc: 0.9667
Epoch 148/1000
300/300 - 0s - loss: 0.0834 - acc: 0.9667
Epoch 149/1000
300/300 - 0s - loss: 0.0832 - acc: 0.9667
Epoch 150/1000
300/300 - 0s - loss: 0.0829 - acc: 0.9667
Epoch 151/1000
300/300 - 0s - loss: 0.0826 - acc: 0.9667
Epoch 152/1000
300/300 - 0s - loss: 0.0824 - acc: 0.9667
Epoch 153/1000
300/300 - 0s - loss: 0.0822 - acc: 0.9667
Epoch 154/1000
300/300 - 0s - loss: 0.0820 - acc: 0.9667
Epoch 155/1000
300/300 - 0s - loss: 0.0817 - acc: 0.9667
Epoch 156/1000
300/300 - 0s - loss: 0.0815 - acc: 0.9667
Epoch 157/1000
300/300 - 0s - loss: 0.0814 - acc: 0.9667
Epoch 158/1000
300/300 - 0s - loss: 0.0811 - acc: 0.9667
Epoch 159/1000
300/300 - 0s - loss: 0.0809 - acc: 0.9667
Epoch 160/1000
300/300 - 0s - loss: 0.0808 - acc: 0.9667
Epoch 161/1000
300/300 - 0s - loss: 0.0805 - acc: 0.9667
Epoch 162/1000
300/300 - 0s - loss: 0.0806 - acc: 0.9667
Epoch 163/1000
300/300 - 0s - loss: 0.0803 - acc: 0.9667
Epoch 164/1000
300/300 - 0s - l

Epoch 291/1000
300/300 - 0s - loss: 0.0548 - acc: 0.9667
Epoch 292/1000
300/300 - 0s - loss: 0.0545 - acc: 0.9667
Epoch 293/1000
300/300 - 0s - loss: 0.0543 - acc: 0.9667
Epoch 294/1000
300/300 - 0s - loss: 0.0541 - acc: 0.9667
Epoch 295/1000
300/300 - 0s - loss: 0.0540 - acc: 0.9667
Epoch 296/1000
300/300 - 0s - loss: 0.0540 - acc: 0.9667
Epoch 297/1000
300/300 - 0s - loss: 0.0539 - acc: 0.9667
Epoch 298/1000
300/300 - 0s - loss: 0.0536 - acc: 0.9667
Epoch 299/1000
300/300 - 0s - loss: 0.0533 - acc: 0.9667
Epoch 300/1000
300/300 - 0s - loss: 0.0532 - acc: 0.9667
Epoch 301/1000
300/300 - 0s - loss: 0.0529 - acc: 0.9667
Epoch 302/1000
300/300 - 0s - loss: 0.0528 - acc: 0.9667
Epoch 303/1000
300/300 - 0s - loss: 0.0526 - acc: 0.9667
Epoch 304/1000
300/300 - 0s - loss: 0.0525 - acc: 0.9667
Epoch 305/1000
300/300 - 0s - loss: 0.0522 - acc: 0.9667
Epoch 306/1000
300/300 - 0s - loss: 0.0521 - acc: 0.9667
Epoch 307/1000
300/300 - 0s - loss: 0.0519 - acc: 0.9667
Epoch 308/1000
300/300 - 0s - l

Epoch 435/1000
300/300 - 0s - loss: 0.0341 - acc: 0.9933
Epoch 436/1000
300/300 - 0s - loss: 0.0337 - acc: 0.9933
Epoch 437/1000
300/300 - 0s - loss: 0.0338 - acc: 0.9933
Epoch 438/1000
300/300 - 0s - loss: 0.0336 - acc: 0.9933
Epoch 439/1000
300/300 - 0s - loss: 0.0335 - acc: 0.9933
Epoch 440/1000
300/300 - 0s - loss: 0.0333 - acc: 0.9933
Epoch 441/1000
300/300 - 0s - loss: 0.0332 - acc: 0.9933
Epoch 442/1000
300/300 - 0s - loss: 0.0330 - acc: 0.9933
Epoch 443/1000
300/300 - 0s - loss: 0.0330 - acc: 0.9933
Epoch 444/1000
300/300 - 0s - loss: 0.0328 - acc: 0.9933
Epoch 445/1000
300/300 - 0s - loss: 0.0327 - acc: 0.9933
Epoch 446/1000
300/300 - 0s - loss: 0.0328 - acc: 0.9933
Epoch 447/1000
300/300 - 0s - loss: 0.0326 - acc: 0.9933
Epoch 448/1000
300/300 - 0s - loss: 0.0325 - acc: 0.9933
Epoch 449/1000
300/300 - 0s - loss: 0.0325 - acc: 0.9933
Epoch 450/1000
300/300 - 0s - loss: 0.0323 - acc: 0.9933
Epoch 451/1000
300/300 - 0s - loss: 0.0322 - acc: 0.9933
Epoch 452/1000
300/300 - 0s - l

Epoch 579/1000
300/300 - 0s - loss: 0.0209 - acc: 0.9967
Epoch 580/1000
300/300 - 0s - loss: 0.0209 - acc: 0.9967
Epoch 581/1000
300/300 - 0s - loss: 0.0209 - acc: 0.9967
Epoch 582/1000
300/300 - 0s - loss: 0.0209 - acc: 0.9967
Epoch 583/1000
300/300 - 0s - loss: 0.0207 - acc: 0.9967
Epoch 584/1000
300/300 - 0s - loss: 0.0207 - acc: 0.9967
Epoch 585/1000
300/300 - 0s - loss: 0.0205 - acc: 0.9967
Epoch 586/1000
300/300 - 0s - loss: 0.0206 - acc: 0.9967
Epoch 587/1000
300/300 - 0s - loss: 0.0205 - acc: 0.9967
Epoch 588/1000
300/300 - 0s - loss: 0.0205 - acc: 0.9967
Epoch 589/1000
300/300 - 0s - loss: 0.0204 - acc: 0.9967
Epoch 590/1000
300/300 - 0s - loss: 0.0203 - acc: 0.9967
Epoch 591/1000
300/300 - 0s - loss: 0.0202 - acc: 0.9967
Epoch 592/1000
300/300 - 0s - loss: 0.0202 - acc: 0.9967
Epoch 593/1000
300/300 - 0s - loss: 0.0202 - acc: 0.9967
Epoch 594/1000
300/300 - 0s - loss: 0.0201 - acc: 0.9967
Epoch 595/1000
300/300 - 0s - loss: 0.0200 - acc: 0.9967
Epoch 596/1000
300/300 - 0s - l

Epoch 723/1000
300/300 - 0s - loss: 0.0147 - acc: 0.9967
Epoch 724/1000
300/300 - 0s - loss: 0.0147 - acc: 0.9967
Epoch 725/1000
300/300 - 0s - loss: 0.0147 - acc: 0.9967
Epoch 726/1000
300/300 - 0s - loss: 0.0147 - acc: 0.9967
Epoch 727/1000
300/300 - 0s - loss: 0.0146 - acc: 0.9967
Epoch 728/1000
300/300 - 0s - loss: 0.0146 - acc: 0.9967
Epoch 729/1000
300/300 - 0s - loss: 0.0146 - acc: 0.9967
Epoch 730/1000
300/300 - 0s - loss: 0.0146 - acc: 0.9967
Epoch 731/1000
300/300 - 0s - loss: 0.0146 - acc: 0.9967
Epoch 732/1000
300/300 - 0s - loss: 0.0146 - acc: 0.9967
Epoch 733/1000
300/300 - 0s - loss: 0.0145 - acc: 0.9967
Epoch 734/1000
300/300 - 0s - loss: 0.0145 - acc: 0.9967
Epoch 735/1000
300/300 - 0s - loss: 0.0145 - acc: 0.9967
Epoch 736/1000
300/300 - 0s - loss: 0.0144 - acc: 0.9967
Epoch 737/1000
300/300 - 0s - loss: 0.0144 - acc: 0.9967
Epoch 738/1000
300/300 - 0s - loss: 0.0144 - acc: 0.9967
Epoch 739/1000
300/300 - 0s - loss: 0.0143 - acc: 0.9967
Epoch 740/1000
300/300 - 0s - l

Epoch 867/1000
300/300 - 0s - loss: 0.0125 - acc: 0.9967
Epoch 868/1000
300/300 - 0s - loss: 0.0125 - acc: 0.9967
Epoch 869/1000
300/300 - 0s - loss: 0.0124 - acc: 0.9967
Epoch 870/1000
300/300 - 0s - loss: 0.0125 - acc: 0.9967
Epoch 871/1000
300/300 - 0s - loss: 0.0125 - acc: 0.9967
Epoch 872/1000
300/300 - 0s - loss: 0.0125 - acc: 0.9967
Epoch 873/1000
300/300 - 0s - loss: 0.0124 - acc: 0.9967
Epoch 874/1000
300/300 - 0s - loss: 0.0124 - acc: 0.9967
Epoch 875/1000
300/300 - 0s - loss: 0.0124 - acc: 0.9967
Epoch 876/1000
300/300 - 0s - loss: 0.0124 - acc: 0.9967
Epoch 877/1000
300/300 - 0s - loss: 0.0124 - acc: 0.9967
Epoch 878/1000
300/300 - 0s - loss: 0.0125 - acc: 0.9967
Epoch 879/1000
300/300 - 0s - loss: 0.0125 - acc: 0.9967
Epoch 880/1000
300/300 - 0s - loss: 0.0125 - acc: 0.9967
Epoch 881/1000
300/300 - 0s - loss: 0.0124 - acc: 0.9967
Epoch 882/1000
300/300 - 0s - loss: 0.0124 - acc: 0.9967
Epoch 883/1000
300/300 - 0s - loss: 0.0124 - acc: 0.9967
Epoch 884/1000
300/300 - 0s - l

<tensorflow.python.keras.callbacks.History at 0x1b0bfaeb748>

In [19]:
# Evaluate the model using the testing data
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

330/330 - 0s - loss: 0.5831 - acc: 0.9455
Loss: 0.5830996637993817, Accuracy: 0.9454545378684998


In [None]:
encoded_predictions = model.predict_classes(X_test_scaled)
prediction_labels = label_encoder.inverse_transform(encoded_predictions)
# print(f"Predicted classes: {prediction_labels}")
# print(f"Actual Labels: {list(y_test)}")

In [32]:
pd.set_option('display.max_rows', None)
predictions_df = pd.DataFrame({'Prediction':prediction_labels, 'Actual':list(y_test)})
# predictions_df.head()

In [35]:
winners_df = predictions_df.loc[predictions_df['Actual']]
winners_df

Unnamed: 0,Prediction,Actual
23,False,True
39,False,True
48,False,True
54,False,True
69,False,True
103,False,True
173,False,True
210,False,True
257,False,True
285,True,True


In [None]:
# connect predictions to the team and season. 
# can we have it predict the most likely winner by season? Maybe use some sort of "confidence" metric
