# 2021 NFL Neural Network Prediction Model 

In [1]:
import csv
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
import itertools





In [2]:
##Neural Network Machine Learning Model to Predict Outcome of NFL Games 2021

##Pre-processing of Data for model found online at https://www.pro-football-reference.com/

##Import 2021 NFL Schedule from .csv
schedule = []
path = '/Users/marli/Desktop/NFL_2021/NFL_Schedule.csv'
with open(path,'r',newline='') as csvfile:
        reader = csv.reader(csvfile,delimiter = ',')
        for row in reader:
            x = row[0].split()
            x  = list(map(lambda elem: elem.replace('@', ''), x))
            schedule.append(x)
            
##Import Winners of Games (Bool) from .csv
path = '/Users/marli/Desktop/NFL_2021/winner.csv'
train_y = pd.read_csv(path)
train_y = train_y['Winner']

##Import Team Offensive Stats from .csv

path = '/Users/marli/Desktop/NFL_2021/total_off.csv'
total_off = pd.read_csv(path,index_col=1,skiprows=[0,36,35,34])
total_off.pop('Rk')


##Import Total Scoring from .csv

path = '/Users/marli/Desktop/NFL_2021/Total_scoring.csv'

total_scoring = pd.read_csv(path,index_col=1,skiprows=[35,34,33])
total_scoring.pop('Rk')
total_scoring.pop('Pts')
total_scoring = total_scoring.dropna(axis=1, how='any')


##Import Total Def per game from .csv

path = '/Users/marli/Desktop/NFL_2021/team_def.csv'

total_def = pd.read_csv(path,index_col=1,skiprows=[0,36,35,34])
total_def.pop('Rk')

            
##Import Advanced Def from .csv

path = '/Users/marli/Desktop/NFL_2021/adv_team_def.csv'
adv_def = pd.read_csv(path, index_col='Tm')
col = ['Bltz%','Hrry%','QBKD%','Prss%']
for cat in col:
    adv_def[cat] = np.where(adv_def[cat].str.endswith('%'),adv_def[cat].str.replace('%',''),adv_def[cat].str)
adv_def = adv_def.rename(columns={'Bltz%': 'Bltzp', 'Hrry%': 'Hrryp','QBKD%':'QBKDp','Prss%':'Prssp'})




## Merge Dataframes to One File
df = pd.concat([total_off,total_scoring,total_def,adv_def],axis=1)

## Set Unique Column Names

# Columns to not rename
excluded = df.columns[~df.columns.duplicated(keep=False)]

# An incrementer
inc = 0
inc = itertools.count().__next__

# Rename Duplicate Columns with unique Number
def ren(name):
    return f"{name}{inc()}" if name not in excluded else name
df = df.rename(columns=ren)

## Removing Special Characters from column Names
col = df.columns.tolist()
col = list(map(lambda elem: elem.replace('/', ''), col))
col = list(map(lambda elem: elem.replace('%', ''), col))
col = list(map(lambda elem: elem.replace('.', ''), col))
df.columns = col


## Change Names so teams are consistent through dataframes            
teams_abbv = {
    'Arizona Cardinals':'ARI',
    'Atlanta Falcons':'ATL',
    'Baltimore Ravens':'BAL',
    'Buffalo Bills':'BUF',
    'Carolina Panthers': 'CAR',
    'Chicago Bears': 'CHI',
    'Cincinnati Bengals':'CIN',
    'Cleveland Browns': 'CLE',
    'Dallas Cowboys':'DAL',
    'Denver Broncos':'DEN',
    'Detroit Lions':'DET',
    'Green Bay Packers':'GB',
    'Houston Texans': 'HOU',
    'Indianapolis Colts': 'IND',
    'Jacksonville Jaguars':'JAX',
    'Kansas City Chiefs':'KC',
    'Las Vegas Raiders':'LV',
    'Los Angeles Rams': 'LAR',
    'Los Angeles Chargers': 'LAC',
    'Miami Dolphins': 'MIA',
    'Minnesota Vikings':'MIN',
    'New England Patriots':'NE',
    'New Orleans Saints': 'NO',
    'New York Giants':'NYG',
    'New York Jets': 'NYJ',
    'Philadelphia Eagles': 'PHI',
    'Pittsburgh Steelers':'PIT',
    'San Francisco 49ers': 'SF',
    'Seattle Seahawks':'SEA',
    'Tampa Bay Buccaneers':'TB',
    'Tennessee Titans': 'TEN',
    'Washington Football Team': 'WSH'      
     }

df = df.rename(index=teams_abbv)

In [3]:
df.head()

Unnamed: 0,G0,PF,Yds1,Ply2,YP3,TO4,FL5,1stD6,Cmp7,Att8,...,Bltz,Bltzp,Hrry,Hrryp,QBKD,QBKDp,Sk,Prss,Prssp,MTkl
ARI,17,26.4,373.6,66.2,5.6,0.88,0.24,21.6,24.4,34.8,...,210,33.6,61,9.8,60,10.7,41,162,25.9,110
ATL,17,18.4,303.8,59.2,5.1,1.53,0.65,18.0,22.2,33.7,...,154,24.5,49,7.8,39,6.8,18,106,16.9,120
BAL,17,22.8,378.8,69.7,5.4,1.53,0.47,23.2,23.3,35.9,...,209,31.1,58,8.6,62,10.0,34,154,23.0,115
BUF,17,28.4,381.9,67.2,5.7,1.29,0.35,23.4,24.4,38.5,...,157,26.0,93,15.4,51,9.6,42,186,30.8,118
CAR,17,17.9,298.9,65.1,4.6,1.71,0.47,18.9,20.5,35.2,...,192,33.7,64,11.2,48,9.3,39,151,26.5,108


In [4]:
df_team1 = df.copy().reset_index()
df_team1.pop('index')

df_team2  = df.copy().reset_index()
df_team2.pop('index')

games = pd.DataFrame(columns = ['Team 1','Team 2'])


In [5]:
##Arrange the data to align with the 2021 NFL Schedule
i=0
j=0
for team in df.index.values.tolist():
    
    i=i+1

    for game in range(1,19):
        if schedule[i][game] != 'BYE':
            df_team1.loc[j] = df.loc[team]
            df_team2.loc[j] = df.loc[schedule[i][game]]
            games = games.append({'Team 1': team, 'Team 2':schedule[i][game]},ignore_index=True)
            
            
            j=j+1
            
        else:
            pass
        
        

In [6]:
##Making Unique Column Names before joining
df_team1 = df_team1.rename(columns = str.upper)
df_team2 = df_team2.rename(columns = str.lower)

train_x = pd.concat([df_team1,df_team2],axis=1)
train_x = train_x.astype(float)
train_x.head()

Unnamed: 0,G0,PF,YDS1,PLY2,YP3,TO4,FL5,1STD6,CMP7,ATT8,...,bltz,bltzp,hrry,hrryp,qbkd,qbkdp,sk,prss,prssp,mtkl
0,17.0,26.4,373.6,66.2,5.6,0.88,0.24,21.6,24.4,34.8,...,140.0,19.9,79.0,11.2,47.0,7.5,43.0,169.0,24.0,94.0
1,17.0,26.4,373.6,66.2,5.6,0.88,0.24,21.6,24.4,34.8,...,186.0,26.1,91.0,12.7,45.0,7.2,51.0,187.0,26.2,109.0
2,17.0,26.4,373.6,66.2,5.6,0.88,0.24,21.6,24.4,34.8,...,190.0,31.5,65.0,10.8,49.0,9.0,32.0,146.0,24.2,131.0
3,17.0,26.4,373.6,66.2,5.6,0.88,0.24,21.6,24.4,34.8,...,188.0,26.6,74.0,10.5,37.0,5.9,50.0,161.0,22.8,119.0
4,17.0,26.4,373.6,66.2,5.6,0.88,0.24,21.6,24.4,34.8,...,124.0,19.8,51.0,8.1,52.0,9.5,48.0,151.0,24.1,99.0


In [7]:
##Standardizing Data by Normalizing it

train_x_norm = (train_x-train_x.min())/(train_x.max()-train_x.min())

train_x = train_x_norm.dropna(axis = 1, how='any')
train_x.head()

Unnamed: 0,PF,YDS1,PLY2,YP3,TO4,FL5,1STD6,CMP7,ATT8,YDS19,...,bltz,bltzp,hrry,hrryp,qbkd,qbkdp,sk,prss,prssp,mtkl
0,0.705521,0.740884,0.742647,0.666667,0.12,0.0,0.666667,0.615385,0.410072,0.533278,...,0.251082,0.271777,0.724138,0.571429,0.375,0.297297,0.675676,0.557522,0.510791,0.196078
1,0.705521,0.740884,0.742647,0.666667,0.12,0.0,0.666667,0.615385,0.410072,0.533278,...,0.450216,0.487805,0.931034,0.72449,0.333333,0.256757,0.891892,0.716814,0.669065,0.490196
2,0.705521,0.740884,0.742647,0.666667,0.12,0.0,0.666667,0.615385,0.410072,0.533278,...,0.467532,0.675958,0.482759,0.530612,0.416667,0.5,0.378378,0.353982,0.52518,0.921569
3,0.705521,0.740884,0.742647,0.666667,0.12,0.0,0.666667,0.615385,0.410072,0.533278,...,0.458874,0.505226,0.637931,0.5,0.166667,0.081081,0.864865,0.486726,0.42446,0.686275
4,0.705521,0.740884,0.742647,0.666667,0.12,0.0,0.666667,0.615385,0.410072,0.533278,...,0.181818,0.268293,0.241379,0.255102,0.479167,0.567568,0.810811,0.39823,0.517986,0.294118


In [69]:
##Defining the input function for NN classification Model

def input_fn(features, labels, training=True, batch_size=32):
    # Convert the inputs to a Dataset.
    dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))

    # Shuffle and repeat if you are in training mode.
    if training:
        dataset = dataset.shuffle(1000).repeat()
    
    return dataset.batch(batch_size)

feature_columns = []
for key in train_x.keys():
    feature_columns.append(tf.feature_column.numeric_column(key=key))


# Build a DNN with 2 hidden layers
classifier = tf.estimator.DNNClassifier(
    feature_columns=feature_columns,
    # Two hidden layers of 30 and 10 nodes respectively.
    hidden_units=[30, 10],
    # The model must choose winner between 2 classes, Team1 or Team2.
    n_classes=2)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\marli\\AppData\\Local\\Temp\\tmpwq5g_05e', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [70]:
## Train Model with results from 2021 NFL Regular Season

classifier.train(
    input_fn=lambda: input_fn(train_x, train_y, training=True),
    steps=5000)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 0...
INFO:tensorflow:Saving checkpoints for 0 into C:\Users\marli\AppData\Local\Temp\tmpwq5g_05e\model.ckpt.
INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 0...
INFO:tensorflow:loss = 0.7224225, step = 0
INFO:tensorflow:global_step/sec: 151.34
INFO:tensorflow:loss = 0.7498652, step = 100 (0.663 sec)
INFO:tensorflow:global_step/sec: 226.483
INFO:tensorflow:loss = 0.6344001, step = 200 (0.442 sec)
INFO:tensorflow:global_step/sec: 199.736
INFO:tensorflow:loss = 0.66760445, step = 300 (0.502 sec)
INFO:tensorflow:global_step/sec: 192.071
INFO:tensorflow:loss = 0.61131996, step = 400 (0.520 sec)
INFO:tensorflow:global_step/sec: 231.842
INFO:tensorflow:loss = 0.6501684,

<tensorflow_estimator.python.estimator.canned.dnn.DNNClassifierV2 at 0x1c84bbcc518>

In [71]:
## Create test dataset with Results from first 2 rounds of 2021 NFL playoffs

playoffs = [
    ['BUF','NE'],
    ['KC','PIT'],
    ['CIN','LV'],
    ['TB','PHI'],
    ['LAR','ARI'],
    ['DAL','SF'],
    ['BUF','KC'],
    ['CIN', 'TEN'],
    ['TB','LAR'],
    ['GB','SF'],
]

df_team1 = df.head(10).reset_index()
df_team1.pop('index')

df_team2  = df.head(10).reset_index()
df_team2.pop('index')


j = 0
for game in playoffs:
    
    df_team1.loc[j] = df.loc[game[0]]
    df_team2.loc[j] = df.loc[game[1]]
    j =j+1
    
##Making Unique Column Names before joining
df_team1 = df_team1.rename(columns = str.upper)
df_team2 = df_team2.rename(columns = str.lower)

test_x = pd.concat([df_team1,df_team2],axis=1)
test_x = test_x.astype(float)  

##Standardizing Data by Normalizing it

test_x_norm = (test_x-test_x.min())/(test_x.max()-test_x.min())

test_x = test_x_norm.dropna(axis = 1, how='any')


outcomes = [1,1,1,1,1,0,0,1,0,0]
test_y = pd.Series(outcomes, name = 'Playoffs')
    

In [72]:
## Evaluate the Model with Playoff Results and Print Model Accuracy

eval_result = classifier.evaluate(
    input_fn=lambda: input_fn(test_x, test_y, training=False))

print('\nTest set accuracy: {accuracy:0.3f}\n'.format(**eval_result))

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2022-01-27T23:22:08
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\marli\AppData\Local\Temp\tmpwq5g_05e\model.ckpt-5000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Inference Time : 1.18886s
INFO:tensorflow:Finished evaluation at 2022-01-27-23:22:09
INFO:tensorflow:Saving dict for global step 5000: accuracy = 0.6, accuracy_baseline = 0.6, auc = 0.5, auc_precision_recall = 0.72621524, average_loss = 0.68631345, global_step = 5000, label/mean = 0.6, loss = 0.68631345, precision = 0.75, prediction/mean = 0.46549758, recall = 0.5
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 5000: C:\Users\marli\AppData\Local\Temp\tmpwq5g_05e\model.ckpt-5000

Test set accuracy: 0.600



In [76]:
##Predict the Outcome of the 2021 NFL Conference Championships by setting conf = "AFC" or "NFC"

conf = 'AFC'  #  'NFC'

def pred_input_fn(features, batch_size=32):
    # Convert the inputs to a Dataset without labels.
    return tf.data.Dataset.from_tensor_slices(dict(features)).batch(batch_size)

teams = [
    ['KC', 'CIN'],
    ['LAR','SF'],
]
predict = {}

df_team1 = df.head(1).reset_index()
df_team1.pop('index')

df_team2  = df.head(1).reset_index()
df_team2.pop('index')





if conf == 'AFC':
    conf = 0
else:
    conf = 1
df_team1.loc[0] = df.loc[teams[conf][0]]
df_team2.loc[0] = df.loc[teams[conf][1]]

    
##Making Unique Column Names before joining
df_team1 = df_team1.rename(columns = str.upper)
df_team2 = df_team2.rename(columns = str.lower)

#Combining team DataFrames for input into Prediction Model
df_pred = pd.concat([df_team1,df_team2],axis=1)
df_pred = df_pred.astype(float)  

##Normalizing data
pred_norm = (df_pred-df_pred.min().min())/(df_pred.max().max()-df_pred.min().min())

df_pred = pred_norm.dropna(axis = 1, how='any')

## Turn features into Dict for input into Prediction Model
features = df_pred.columns.tolist()
 
for feature in features:
    predict[feature] = [float(df_pred[feature])]



## Run Prediction Model and Print predicited Winner and probability 
predictions = classifier.predict(input_fn=lambda: pred_input_fn(predict))

for pred_dict in predictions:

    class_id = pred_dict['class_ids'][0]
    
    probability = pred_dict['probabilities'][class_id]

    print('Prediction is "{}" ({:.1f}%)'.format(
        teams[conf][class_id], 100 * probability))

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\marli\AppData\Local\Temp\tmpwq5g_05e\model.ckpt-5000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
Prediction is "CIN" (51.6%)
