In [1]:
# Initial imports
import pandas as pd
import numpy as np
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import h5py
import tensorflow as tf

In [27]:
season_2021_2011= pd.read_csv("2021_2011_regular_season_super_bowl.csv", 
                          index_col="Years",
                          parse_dates = True, 
                          infer_datetime_format = True
)

In [28]:
# Review the data types associated with the columns
season_2021_2011.dtypes

Date            int64
Teams          object
W               int64
L               int64
T               int64
Super Bowl      int64
PCT           float64
PF              int64
PA              int64
Net Pts         int64
Home           object
Road           object
Div            object
Pct           float64
Conf           object
Pct.1         float64
Non-Conf       object
Strk           object
Last 5         object
dtype: object

In [29]:
season_2021_2011.columns

Index(['Date', 'Teams', 'W', 'L', 'T', 'Super Bowl', 'PCT', 'PF', 'PA',
       'Net Pts', 'Home', 'Road', 'Div', 'Pct', 'Conf', 'Pct.1', 'Non-Conf',
       'Strk', 'Last 5'],
      dtype='object')

In [31]:
# Drop the 'Date' 'W', 'L', 'T', 'PCT',  'NAME' columns from the DataFrame
season_2021_2011_obj = season_2021_2011.drop(
    columns = ["Super Bowl", "Date","W", "L", "T", "PCT", "PF", "PA", "Pct", "Pct.1", "Net Pts"]
)

# Review the DataFrame
season_2021_2011_obj

Unnamed: 0_level_0,Teams,Home,Road,Div,Conf,Non-Conf,Strk,Last 5
Years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2021-01-01,Bills,6 - 3 - 0,5 - 3 - 0,5 - 1 - 0,7 - 5 - 0,4 - 1 - 0,4W,4 - 1 - 0
2021-01-01,Dolphins,6 - 3 - 0,3 - 5 - 0,4 - 2 - 0,6 - 6 - 0,3 - 2 - 0,1W,4 - 1 - 0
2021-01-01,Patriots,4 - 5 - 0,6 - 2 - 0,3 - 3 - 0,8 - 4 - 0,2 - 3 - 0,1L,2 - 3 - 0
2021-01-01,Jets,3 - 6 - 0,1 - 7 - 0,0 - 6 - 0,4 - 8 - 0,0 - 5 - 0,2L,1 - 4 - 0
2021-01-01,Bengals,5 - 4 - 0,5 - 3 - 0,4 - 2 - 0,8 - 4 - 0,2 - 3 - 0,1L,3 - 2 - 0
...,...,...,...,...,...,...,...,...
2011-01-01,Buccaneers,3 - 5 - 0,1 - 7 - 0,2 - 4 - 0,3 - 9 - 0,1 - 3 - 0,10L,0 - 5 - 0
2011-01-01,49ers,7 - 1 - 0,6 - 2 - 0,5 - 1 - 0,10 - 2 - 0,3 - 1 - 0,3W,4 - 1 - 0
2011-01-01,Cardinals,6 - 2 - 0,2 - 6 - 0,4 - 2 - 0,7 - 5 - 0,1 - 3 - 0,1W,4 - 1 - 0
2011-01-01,Seahawks,4 - 4 - 0,3 - 5 - 0,3 - 3 - 0,6 - 6 - 0,1 - 3 - 0,2L,3 - 2 - 0


In [32]:
# Create a list of categorical variables 
teams_variables = list(season_2021_2011.dtypes[season_2021_2011.dtypes == "object"].index)


In [33]:
from sklearn.preprocessing import OneHotEncoder

In [34]:
# Create a OneHotEncoder instance (Takes non-numerical data into 0 or 1)
enc =  OneHotEncoder(sparse=False)

In [35]:
# Encode the categorcal variables using OneHotEncoder
encoded_data = enc.fit_transform(season_2021_2011[teams_variables])
encoded_data

array([[0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [36]:
# Create a DataFrame with the encoded variables
encoded_df = pd.DataFrame(
    encoded_data,
    columns = enc.get_feature_names_out(teams_variables)
)
# # Review the DataFrame
# encoded_df

In [37]:
encoded_df.columns

Index(['Teams_49ers', 'Teams_Bears', 'Teams_Bengals', 'Teams_Bills',
       'Teams_Broncos', 'Teams_Browns', 'Teams_Buccaneers', 'Teams_Cardinals',
       'Teams_Chargers', 'Teams_Chiefs',
       ...
       'Strk_7W', 'Strk_8L', 'Strk_8W', 'Strk_9L', 'Last 5_0 - 5 - 0',
       'Last 5_1 - 4 - 0', 'Last 5_2 - 3 - 0', 'Last 5_3 - 2 - 0',
       'Last 5_4 - 1 - 0', 'Last 5_5 - 0 - 0'],
      dtype='object', length=151)

In [38]:
# Create a DataFrame with the columnns containing numerical variables from the original dataset
numerical_variables_df = season_2021_2011.drop(columns = teams_variables)


In [39]:
numerical_variables_df

Unnamed: 0_level_0,Date,W,L,T,Super Bowl,PCT,PF,PA,Net Pts,Pct,Pct.1
Years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2021-01-01,2021,11,6,0,0,0.647,483,289,194,0.833,0.583
2021-01-01,2021,9,8,0,0,0.529,341,373,-32,0.667,0.500
2021-01-01,2021,10,7,0,0,0.588,462,303,159,0.500,0.667
2021-01-01,2021,4,13,0,0,0.235,310,504,-194,0.000,0.333
2021-01-01,2021,10,7,0,0,0.588,460,376,84,0.667,0.667
...,...,...,...,...,...,...,...,...,...,...,...
2011-01-01,2011,4,12,0,0,0.250,287,494,-207,0.333,0.250
2011-01-01,2011,13,3,0,0,0.813,380,229,151,0.833,0.833
2011-01-01,2011,8,8,0,0,0.500,312,348,-36,0.667,0.583
2011-01-01,2011,7,9,0,0,0.438,321,315,6,0.500,0.500


In [40]:
x = season_2021_2011.loc[:,season_2021_2011.columns != "Super Bowl"]
X = pd.get_dummies(x)
y = season_2021_2011['Super Bowl'].astype("int")

In [41]:
# Split into testing and training sets using train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [42]:
# Create a list of the columns with categorical variables
Teams_code = season_2021_2011["Teams"]

# Use the fit_transform method from the OneHotEncoder to encode the data
encoded_data = enc.fit_transform([Teams_code])

In [65]:
test = season_2021_2011.copy()
test_for_dummies = test[['Date','Teams','W','L','T', 'PCT', 'PF', 'PA']]
test_for_dummies.head()

Unnamed: 0_level_0,Date,Teams,W,L,T,PCT,PF,PA
Years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2021-01-01,2021,Bills,11,6,0,0.647,483,289
2021-01-01,2021,Dolphins,9,8,0,0.529,341,373
2021-01-01,2021,Patriots,10,7,0,0.588,462,303
2021-01-01,2021,Jets,4,13,0,0.235,310,504
2021-01-01,2021,Bengals,10,7,0,0.588,460,376


In [66]:
# Create a StandardScaler instance
scaler = StandardScaler().fit(X_train)

# Fit the scaler to the features training dataset
X_scaler = scaler.fit(X_train)

# Fit the scaler to the features training dataset
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [67]:
from sklearn.linear_model import LogisticRegression

In [68]:
# Declare a logistic regression model.
# Apply a random_state of 9 to the model
logistic_regression_model = LogisticRegression(random_state=9)

# Fit and save the logistic regression model using the training data
lr_model = logistic_regression_model.fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [69]:
# Define the the number of inputs (features) to the model
number_input_features = len(X_train.iloc[1])

# Review the number of features
number_input_features

161

In [70]:
# Define the number of neurons in the output layer
number_output_neurons = 1

In [71]:
# Define the number of hidden nodes for the first hidden layer
hidden_nodes_layer1 =  (number_input_features + 674) // 2 

# Review the number hidden nodes in the first layer
hidden_nodes_layer1

417

In [72]:
# Define the number of hidden nodes for the second hidden layer
hidden_nodes_layer2 =  (hidden_nodes_layer1 + 32) // 2

# Review the number hidden nodes in the second layer
hidden_nodes_layer2

224

In [73]:
# Create the Sequential model instance
nn = Sequential()

In [74]:
# Add the first hidden layer
nn.add(Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu"))

In [75]:
# Add the second hidden layer
nn.add(Dense(units=hidden_nodes_layer2, activation="relu"))

In [76]:
# Add the output layer to the model specifying the number of output neurons and activation function
nn.add(Dense(units=1, activation="sigmoid"))

In [77]:
# Display the Sequential model summary
nn.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 417)               67554     
                                                                 
 dense_4 (Dense)             (None, 224)               93632     
                                                                 
 dense_5 (Dense)             (None, 1)                 225       
                                                                 
Total params: 161,411
Trainable params: 161,411
Non-trainable params: 0
_________________________________________________________________


In [78]:
# Compile the Sequential model
nn.compile(loss="binary_crossentropy",optimizer="adam",metrics=["accuracy"])

In [79]:
# Fit the model using epochs and the training data
history=nn.fit(X_train_scaled, y_train, epochs=50, batch_size=128, validation_split=0.2)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [81]:
# Evaluate the model loss and accuracy metrics using the evaluate method and the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled, y_test, verbose=2)

# Display the model loss and accuracy results
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

3/3 - 0s - loss: 0.0954 - accuracy: 0.9886 - 20ms/epoch - 7ms/step
Loss: 0.09538919478654861, Accuracy: 0.9886363744735718
