In [1]:
# Dependencies
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv("data/nfl.csv")

In [3]:
df["date"] = pd.to_datetime(df["date"])

In [4]:
df.head()

Unnamed: 0,third_per,third_per_allowed,TOP,date,first_downs,first_downs_allowed,ha,margin,opp,pass_yards,...,rush_yards,rush_yards_allowed,sacked,sacks,takeaways,team,total_points,total_yards,total_yards_allowed,turnovers
0,0.307692,0.285714,28.35,2009-09-10,18,19,away,-3,PIT,234,...,86,36,1,4,3,TEN,23,320,357,2
1,0.285714,0.307692,36.183333,2009-09-10,19,18,home,3,TEN,321,...,36,86,4,1,2,PIT,23,357,320,3
2,0.363636,0.4,29.116667,2009-09-13,16,19,away,-12,ATL,163,...,96,68,4,2,0,MIA,26,259,281,4
3,0.4,0.363636,30.883333,2009-09-13,19,16,home,12,MIA,213,...,68,96,2,4,4,ATL,26,281,259,0
4,0.25,0.333333,26.55,2009-09-13,10,16,away,5,CIN,227,...,75,86,3,3,2,DEN,19,302,307,0


In [5]:
df.columns

Index(['third_per', 'third_per_allowed', 'TOP', 'date', 'first_downs',
       'first_downs_allowed', 'ha', 'margin', 'opp', 'pass_yards',
       'pass_yards_allowed', 'penalty_yards', 'plays', 'points',
       'points_allowed', 'result', 'rush_yards', 'rush_yards_allowed',
       'sacked', 'sacks', 'takeaways', 'team', 'total_points', 'total_yards',
       'total_yards_allowed', 'turnovers'],
      dtype='object')

In [6]:
# Dummify ha where away is 0 and home is 1
df["ha"] = df["ha"].map(lambda x: 0 if x == "home" else 1)

In [7]:
df.head()

Unnamed: 0,third_per,third_per_allowed,TOP,date,first_downs,first_downs_allowed,ha,margin,opp,pass_yards,...,rush_yards,rush_yards_allowed,sacked,sacks,takeaways,team,total_points,total_yards,total_yards_allowed,turnovers
0,0.307692,0.285714,28.35,2009-09-10,18,19,1,-3,PIT,234,...,86,36,1,4,3,TEN,23,320,357,2
1,0.285714,0.307692,36.183333,2009-09-10,19,18,0,3,TEN,321,...,36,86,4,1,2,PIT,23,357,320,3
2,0.363636,0.4,29.116667,2009-09-13,16,19,1,-12,ATL,163,...,96,68,4,2,0,MIA,26,259,281,4
3,0.4,0.363636,30.883333,2009-09-13,19,16,0,12,MIA,213,...,68,96,2,4,4,ATL,26,281,259,0
4,0.25,0.333333,26.55,2009-09-13,10,16,1,5,CIN,227,...,75,86,3,3,2,DEN,19,302,307,0


In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4806 entries, 0 to 4805
Data columns (total 26 columns):
third_per              4806 non-null float64
third_per_allowed      4806 non-null float64
TOP                    4806 non-null float64
date                   4806 non-null datetime64[ns]
first_downs            4806 non-null int64
first_downs_allowed    4806 non-null int64
ha                     4806 non-null int64
margin                 4806 non-null int64
opp                    4806 non-null object
pass_yards             4806 non-null int64
pass_yards_allowed     4806 non-null int64
penalty_yards          4806 non-null int64
plays                  4806 non-null int64
points                 4806 non-null int64
points_allowed         4806 non-null int64
result                 4806 non-null object
rush_yards             4806 non-null int64
rush_yards_allowed     4806 non-null int64
sacked                 4806 non-null int64
sacks                  4806 non-null int64
takeaways       

# Data Preprocessing

In [9]:
# Grab dummy variables
dummy_vars = pd.get_dummies(df[["opp", "team"]])
dummy_vars.head()

Unnamed: 0,opp_ARI,opp_ATL,opp_BAL,opp_BUF,opp_CAR,opp_CHI,opp_CIN,opp_CLE,opp_DAL,opp_DEN,...,team_NYG,team_NYJ,team_OAK,team_PHI,team_PIT,team_SEA,team_SFO,team_TBB,team_TEN,team_WAS
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
2,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [10]:
# Add our dummy columns into the features df
df[list(dummy_vars.columns)] = dummy_vars

In [11]:
df.head()

Unnamed: 0,third_per,third_per_allowed,TOP,date,first_downs,first_downs_allowed,ha,margin,opp,pass_yards,...,team_NYG,team_NYJ,team_OAK,team_PHI,team_PIT,team_SEA,team_SFO,team_TBB,team_TEN,team_WAS
0,0.307692,0.285714,28.35,2009-09-10,18,19,1,-3,PIT,234,...,0,0,0,0,0,0,0,0,1,0
1,0.285714,0.307692,36.183333,2009-09-10,19,18,0,3,TEN,321,...,0,0,0,0,1,0,0,0,0,0
2,0.363636,0.4,29.116667,2009-09-13,16,19,1,-12,ATL,163,...,0,0,0,0,0,0,0,0,0,0
3,0.4,0.363636,30.883333,2009-09-13,19,16,0,12,MIA,213,...,0,0,0,0,0,0,0,0,0,0
4,0.25,0.333333,26.55,2009-09-13,10,16,1,5,CIN,227,...,0,0,0,0,0,0,0,0,0,0


In [12]:
df.columns

Index(['third_per', 'third_per_allowed', 'TOP', 'date', 'first_downs',
       'first_downs_allowed', 'ha', 'margin', 'opp', 'pass_yards',
       'pass_yards_allowed', 'penalty_yards', 'plays', 'points',
       'points_allowed', 'result', 'rush_yards', 'rush_yards_allowed',
       'sacked', 'sacks', 'takeaways', 'team', 'total_points', 'total_yards',
       'total_yards_allowed', 'turnovers', 'opp_ARI', 'opp_ATL', 'opp_BAL',
       'opp_BUF', 'opp_CAR', 'opp_CHI', 'opp_CIN', 'opp_CLE', 'opp_DAL',
       'opp_DEN', 'opp_DET', 'opp_GBP', 'opp_HOU', 'opp_IND', 'opp_JAX',
       'opp_KCC', 'opp_LAC', 'opp_LAR', 'opp_MIA', 'opp_MIN', 'opp_NEP',
       'opp_NOS', 'opp_NYG', 'opp_NYJ', 'opp_OAK', 'opp_PHI', 'opp_PIT',
       'opp_SEA', 'opp_SFO', 'opp_TBB', 'opp_TEN', 'opp_WAS', 'team_ARI',
       'team_ATL', 'team_BAL', 'team_BUF', 'team_CAR', 'team_CHI', 'team_CIN',
       'team_CLE', 'team_DAL', 'team_DEN', 'team_DET', 'team_GBP', 'team_HOU',
       'team_IND', 'team_JAX', 'team_KCC', 'team

In [13]:
# define the features and label
X = df.drop(["result", "date", "opp", "team", "margin", "points", "points_allowed", "total_points"], axis=1)
y = df['result']
print(X.shape, y.shape)

(4806, 82) (4806,)


In [14]:
X.columns

Index(['third_per', 'third_per_allowed', 'TOP', 'first_downs',
       'first_downs_allowed', 'ha', 'pass_yards', 'pass_yards_allowed',
       'penalty_yards', 'plays', 'rush_yards', 'rush_yards_allowed', 'sacked',
       'sacks', 'takeaways', 'total_yards', 'total_yards_allowed', 'turnovers',
       'opp_ARI', 'opp_ATL', 'opp_BAL', 'opp_BUF', 'opp_CAR', 'opp_CHI',
       'opp_CIN', 'opp_CLE', 'opp_DAL', 'opp_DEN', 'opp_DET', 'opp_GBP',
       'opp_HOU', 'opp_IND', 'opp_JAX', 'opp_KCC', 'opp_LAC', 'opp_LAR',
       'opp_MIA', 'opp_MIN', 'opp_NEP', 'opp_NOS', 'opp_NYG', 'opp_NYJ',
       'opp_OAK', 'opp_PHI', 'opp_PIT', 'opp_SEA', 'opp_SFO', 'opp_TBB',
       'opp_TEN', 'opp_WAS', 'team_ARI', 'team_ATL', 'team_BAL', 'team_BUF',
       'team_CAR', 'team_CHI', 'team_CIN', 'team_CLE', 'team_DAL', 'team_DEN',
       'team_DET', 'team_GBP', 'team_HOU', 'team_IND', 'team_JAX', 'team_KCC',
       'team_LAC', 'team_LAR', 'team_MIA', 'team_MIN', 'team_NEP', 'team_NOS',
       'team_NYG', 'team_NY

In [15]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from keras.utils import to_categorical

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)
X_scaler = StandardScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


# Create Deep Learning Model

In [16]:
from keras.models import Sequential
from keras.layers import Dense

# Create model and add layers
model = Sequential()
model.add(Dense(units=200, activation='relu', input_dim=82))
model.add(Dense(units=200, activation='relu'))
model.add(Dense(units=3, activation='softmax'))

In [17]:
# Compile and fit the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=60,
    shuffle=True,
    verbose=2
)

Epoch 1/60
 - 0s - loss: 0.4308 - acc: 0.7974
Epoch 2/60
 - 0s - loss: 0.3000 - acc: 0.8618
Epoch 3/60
 - 0s - loss: 0.2468 - acc: 0.8965
Epoch 4/60
 - 0s - loss: 0.2089 - acc: 0.9070
Epoch 5/60
 - 0s - loss: 0.1753 - acc: 0.9295
Epoch 6/60
 - 0s - loss: 0.1393 - acc: 0.9448
Epoch 7/60
 - 0s - loss: 0.1138 - acc: 0.9578
Epoch 8/60
 - 0s - loss: 0.0927 - acc: 0.9678
Epoch 9/60
 - 0s - loss: 0.0735 - acc: 0.9761
Epoch 10/60
 - 0s - loss: 0.0567 - acc: 0.9814
Epoch 11/60
 - 0s - loss: 0.0489 - acc: 0.9842
Epoch 12/60
 - 0s - loss: 0.0318 - acc: 0.9911
Epoch 13/60
 - 0s - loss: 0.0261 - acc: 0.9920
Epoch 14/60
 - 0s - loss: 0.0245 - acc: 0.9928
Epoch 15/60
 - 0s - loss: 0.0265 - acc: 0.9933
Epoch 16/60
 - 0s - loss: 0.0154 - acc: 0.9967
Epoch 17/60
 - 0s - loss: 0.0166 - acc: 0.9958
Epoch 18/60
 - 0s - loss: 0.0084 - acc: 0.9989
Epoch 19/60
 - 0s - loss: 0.0118 - acc: 0.9967
Epoch 20/60
 - 0s - loss: 0.0138 - acc: 0.9961
Epoch 21/60
 - 0s - loss: 0.0453 - acc: 0.9839
Epoch 22/60
 - 0s - lo

<keras.callbacks.History at 0x1d4408d5c50>

# Quantify our Trained Model

In [18]:
# train_model_loss, train_model_accuracy = model.evaluate(X_train_scaled, y_train_categorical, verbose=2)
test_model_loss, test_model_accuracy = model.evaluate(X_test_scaled, y_test_categorical, verbose=2)

# print(f"Train Deep Neural Network - Loss: {train_model_loss}, Accuracy: {train_model_accuracy}")
print(f"Test Deep Neural Network - Loss: {test_model_loss}, Accuracy: {test_model_accuracy}")

Test Deep Neural Network - Loss: 1.351801462656665, Accuracy: 0.8227953410981698


# Save our neural network model

In [19]:
# Save the model
# model.save("models/deep_neural_model_trained.h5")

# Make Predictions

In [20]:
encoded_predictions = model.predict_classes(X_test_scaled[:10])
prediction_labels = label_encoder.inverse_transform(encoded_predictions)

  if diff:


In [21]:
encoded_predictions

array([2, 0, 0, 0, 0, 2, 0, 0, 2, 0], dtype=int64)

In [22]:
print(f"Predicted classes: {prediction_labels}")
print(f"Actual Labels: {list(y_test[:10])}")

Predicted classes: ['Win' 'Loss' 'Loss' 'Loss' 'Loss' 'Win' 'Loss' 'Loss' 'Win' 'Loss']
Actual Labels: ['Win', 'Loss', 'Loss', 'Loss', 'Loss', 'Win', 'Loss', 'Loss', 'Win', 'Loss']


# Get results of test data

In [23]:
predicted_encoded = model.predict_classes(X_test_scaled)
predicted_label_column = label_encoder.inverse_transform(predicted_encoded)

  if diff:


In [24]:
print(predicted_label_column, len(predicted_label_column))

['Win' 'Loss' 'Loss' ... 'Win' 'Loss' 'Win'] 1202


In [25]:
new_test_df = X_test
new_test_df["prediction"] = predicted_label_column

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [26]:
new_test_df.head()

Unnamed: 0,third_per,third_per_allowed,TOP,first_downs,first_downs_allowed,ha,pass_yards,pass_yards_allowed,penalty_yards,plays,...,team_NYJ,team_OAK,team_PHI,team_PIT,team_SEA,team_SFO,team_TBB,team_TEN,team_WAS,prediction
2285,0.363636,0.375,26.133333,17,17,0,107,182,17,52,...,0,0,0,0,0,1,0,0,0,Win
1233,0.1,0.375,21.866667,17,22,0,245,230,75,53,...,0,0,0,0,0,0,0,0,1,Loss
1396,0.2,0.411765,26.616667,15,18,1,186,150,30,60,...,0,0,0,0,0,0,1,0,0,Loss
2555,0.533333,0.384615,27.416667,22,25,0,195,168,80,72,...,0,0,0,0,0,0,0,0,0,Loss
3393,0.111111,0.357143,25.25,15,21,0,179,251,45,52,...,0,0,0,0,0,0,0,1,0,Loss


In [27]:
dummy_header_list = list(dummy_vars)
print(dummy_header_list)

['opp_ARI', 'opp_ATL', 'opp_BAL', 'opp_BUF', 'opp_CAR', 'opp_CHI', 'opp_CIN', 'opp_CLE', 'opp_DAL', 'opp_DEN', 'opp_DET', 'opp_GBP', 'opp_HOU', 'opp_IND', 'opp_JAX', 'opp_KCC', 'opp_LAC', 'opp_LAR', 'opp_MIA', 'opp_MIN', 'opp_NEP', 'opp_NOS', 'opp_NYG', 'opp_NYJ', 'opp_OAK', 'opp_PHI', 'opp_PIT', 'opp_SEA', 'opp_SFO', 'opp_TBB', 'opp_TEN', 'opp_WAS', 'team_ARI', 'team_ATL', 'team_BAL', 'team_BUF', 'team_CAR', 'team_CHI', 'team_CIN', 'team_CLE', 'team_DAL', 'team_DEN', 'team_DET', 'team_GBP', 'team_HOU', 'team_IND', 'team_JAX', 'team_KCC', 'team_LAC', 'team_LAR', 'team_MIA', 'team_MIN', 'team_NEP', 'team_NOS', 'team_NYG', 'team_NYJ', 'team_OAK', 'team_PHI', 'team_PIT', 'team_SEA', 'team_SFO', 'team_TBB', 'team_TEN', 'team_WAS']


In [28]:
opp_headers = []
team_headers = []

for item in dummy_header_list:
    
    if item[:4] == "opp_":
        opp_headers.append(item)
    else:
        team_headers.append(item)

print(opp_headers)
print(team_headers)

['opp_ARI', 'opp_ATL', 'opp_BAL', 'opp_BUF', 'opp_CAR', 'opp_CHI', 'opp_CIN', 'opp_CLE', 'opp_DAL', 'opp_DEN', 'opp_DET', 'opp_GBP', 'opp_HOU', 'opp_IND', 'opp_JAX', 'opp_KCC', 'opp_LAC', 'opp_LAR', 'opp_MIA', 'opp_MIN', 'opp_NEP', 'opp_NOS', 'opp_NYG', 'opp_NYJ', 'opp_OAK', 'opp_PHI', 'opp_PIT', 'opp_SEA', 'opp_SFO', 'opp_TBB', 'opp_TEN', 'opp_WAS']
['team_ARI', 'team_ATL', 'team_BAL', 'team_BUF', 'team_CAR', 'team_CHI', 'team_CIN', 'team_CLE', 'team_DAL', 'team_DEN', 'team_DET', 'team_GBP', 'team_HOU', 'team_IND', 'team_JAX', 'team_KCC', 'team_LAC', 'team_LAR', 'team_MIA', 'team_MIN', 'team_NEP', 'team_NOS', 'team_NYG', 'team_NYJ', 'team_OAK', 'team_PHI', 'team_PIT', 'team_SEA', 'team_SFO', 'team_TBB', 'team_TEN', 'team_WAS']


In [29]:
opp = new_test_df[opp_headers].stack()
team = new_test_df[team_headers].stack()

opp_column = pd.Series(pd.Categorical(opp[opp!=0].index.get_level_values(1)))
team_column = pd.Series(pd.Categorical(team[team!=0].index.get_level_values(1)))

In [30]:
new_opp_column = [opp[4:] for opp in opp_column]
new_team_column = [team[5:] for team in team_column]

In [31]:
len(new_opp_column)

1202

In [32]:
new_test_df = new_test_df.drop(dummy_vars, axis=1)
new_test_df.head()

Unnamed: 0,third_per,third_per_allowed,TOP,first_downs,first_downs_allowed,ha,pass_yards,pass_yards_allowed,penalty_yards,plays,rush_yards,rush_yards_allowed,sacked,sacks,takeaways,total_yards,total_yards_allowed,turnovers,prediction
2285,0.363636,0.375,26.133333,17,17,0,107,182,17,52,177,131,1,1,4,284,313,0,Win
1233,0.1,0.375,21.866667,17,22,0,245,230,75,53,42,192,2,2,2,287,422,4,Loss
1396,0.2,0.411765,26.616667,15,18,1,186,150,30,60,122,202,2,2,4,308,352,5,Loss
2555,0.533333,0.384615,27.416667,22,25,0,195,168,80,72,159,198,5,4,2,354,366,4,Loss
3393,0.111111,0.357143,25.25,15,21,0,179,251,45,52,77,127,1,0,2,256,378,2,Loss


In [33]:
new_test_df["team"] = new_team_column
new_test_df["opp"] = new_opp_column
new_test_df["result"] = y_test

In [34]:
new_test_df.head()

Unnamed: 0,third_per,third_per_allowed,TOP,first_downs,first_downs_allowed,ha,pass_yards,pass_yards_allowed,penalty_yards,plays,...,sacked,sacks,takeaways,total_yards,total_yards_allowed,turnovers,prediction,team,opp,result
2285,0.363636,0.375,26.133333,17,17,0,107,182,17,52,...,1,1,4,284,313,0,Win,SFO,HOU,Win
1233,0.1,0.375,21.866667,17,22,0,245,230,75,53,...,2,2,2,287,422,4,Loss,WAS,PHI,Loss
1396,0.2,0.411765,26.616667,15,18,1,186,150,30,60,...,2,2,4,308,352,5,Loss,TBB,TEN,Loss
2555,0.533333,0.384615,27.416667,22,25,0,195,168,80,72,...,5,4,2,354,366,4,Loss,JAX,BUF,Loss
3393,0.111111,0.357143,25.25,15,21,0,179,251,45,52,...,1,0,2,256,378,2,Loss,TEN,ATL,Loss


In [35]:
# new_test_df.to_csv("data/nfl_neural_prediction.csv", encoding="utf-8", index=False)