# Predict Super Bowl Winner
This program attempts to create a model that can predict the winner of a Super Bowl game using machine learning algorithms.

In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

### Load the data

In [2]:
df = pd.read_excel('NFL Superbowl Data v2.xlsx')

In [3]:
df.head(5)

Unnamed: 0,Season,Year,Super Bowl Team 1,Super Bowl Team 2,Result,Point Differential (Super Bowl),Point Differential (Team1 - Team2),Super Bowl Team 1 Offense Rank,Super Bowl Team 2 Offense Rank,Super Bowl Team 1 Defense Rank,Super Bowl Team 2 Defense Rank,Win - Loss Score,Super Bowl Team 1 win/loss %,Super Bowl Team 2 win/loss %,Super Bowl Team 1 Pts Scored,Super Bowl Team 2 Pts Scored,Outcome of Team 1
0,1999,2000,St. Louis Rams,Tennessee Titans,St. Louis Rams (W),7,134,1,7,4,15,23–16,0.813,0.813,526,392,Win
1,2000,2001,Baltimore Ravens,New York Giants,Baltimore Ravens (W),27,5,14,15,1,5,34–7,0.75,0.75,333,328,Win
2,2001,2002,New England Patriots,St. Louis Rams,New England Patriots (W),3,-132,6,1,6,7,20–17,0.688,0.875,371,503,Win
3,2002,2003,Oakland Raiders,Tampa Bay Buccaneers,Tampa Bay Buccaneers (W),27,104,2,18,6,1,48–21,0.688,0.75,450,346,Lose
4,2003,2004,New England Patriots,Carolina Panthers,New England Patriots (W),3,23,12,15,1,10,32–29,0.875,0.688,348,325,Win


In [4]:
# convert the team name to integer values
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

# create new columns to store the team name integer values
df['Team1'] = le.fit_transform(df['Super Bowl Team 1'])
df['Team2'] = le.fit_transform(df['Super Bowl Team 2'])

df.head(5)

Unnamed: 0,Season,Year,Super Bowl Team 1,Super Bowl Team 2,Result,Point Differential (Super Bowl),Point Differential (Team1 - Team2),Super Bowl Team 1 Offense Rank,Super Bowl Team 2 Offense Rank,Super Bowl Team 1 Defense Rank,Super Bowl Team 2 Defense Rank,Win - Loss Score,Super Bowl Team 1 win/loss %,Super Bowl Team 2 win/loss %,Super Bowl Team 1 Pts Scored,Super Bowl Team 2 Pts Scored,Outcome of Team 1,Team1,Team2
0,1999,2000,St. Louis Rams,Tennessee Titans,St. Louis Rams (W),7,134,1,7,4,15,23–16,0.813,0.813,526,392,Win,14,16
1,2000,2001,Baltimore Ravens,New York Giants,Baltimore Ravens (W),27,5,14,15,1,5,34–7,0.75,0.75,333,328,Win,1,9
2,2001,2002,New England Patriots,St. Louis Rams,New England Patriots (W),3,-132,6,1,6,7,20–17,0.688,0.875,371,503,Win,7,14
3,2002,2003,Oakland Raiders,Tampa Bay Buccaneers,Tampa Bay Buccaneers (W),27,104,2,18,6,1,48–21,0.688,0.75,450,346,Lose,10,15
4,2003,2004,New England Patriots,Carolina Panthers,New England Patriots (W),3,23,12,15,1,10,32–29,0.875,0.688,348,325,Win,7,1


## Remove Last Row to clean up the current Season results
But in this exercise, we alreay know the 2022 season Super Bowl result. Update the Excel file and add the 2023 season Super Bowl teams. 

In [5]:
df.tail(5)

Unnamed: 0,Season,Year,Super Bowl Team 1,Super Bowl Team 2,Result,Point Differential (Super Bowl),Point Differential (Team1 - Team2),Super Bowl Team 1 Offense Rank,Super Bowl Team 2 Offense Rank,Super Bowl Team 1 Defense Rank,Super Bowl Team 2 Defense Rank,Win - Loss Score,Super Bowl Team 1 win/loss %,Super Bowl Team 2 win/loss %,Super Bowl Team 1 Pts Scored,Super Bowl Team 2 Pts Scored,Outcome of Team 1,Team1,Team2
20,2019,2020,Kansas City Chiefs,San Francisco 49ers,Kansas City Chiefs (W),11,-28,5,2,7,8,31–20,0.75,0.813,451,479,Win,5,12
21,2020,2021,Tampa Bay Buccaneers,Kansas City Chiefs,Tampa Bay Buccaneers (W),22,19,3,6,8,11,31–9,0.688,0.875,492,473,Win,15,6
22,2021,2022,Los Angeles Rams,Cincinnati Bengals,Los Angeles Rams (W),3,51,8,7,15,17,23–20,0.706,0.588,511,460,Win,6,3
23,2022,2023,Kansas City Chiefs,Philadelphia Eagles,Kansas City Chiefs,3,19,1,2,16,8,38-35,0.824,0.824,496,477,Win,5,10
24,2023,2024,Kansas City Chiefs,San Francisco 49ers,?,?,73,15,3,2,7,?-?,0.824,0.824,371,298,?,5,12


In [6]:
# remove the last row of data from the dataset
data = df.drop(df.tail(1).index)
data.tail(5)

Unnamed: 0,Season,Year,Super Bowl Team 1,Super Bowl Team 2,Result,Point Differential (Super Bowl),Point Differential (Team1 - Team2),Super Bowl Team 1 Offense Rank,Super Bowl Team 2 Offense Rank,Super Bowl Team 1 Defense Rank,Super Bowl Team 2 Defense Rank,Win - Loss Score,Super Bowl Team 1 win/loss %,Super Bowl Team 2 win/loss %,Super Bowl Team 1 Pts Scored,Super Bowl Team 2 Pts Scored,Outcome of Team 1,Team1,Team2
19,2018,2019,New England Patriots,Los Angeles Rams,New England Patriots (W),10,-91,4,2,7,20,13–3,0.688,0.813,436,527,Win,7,7
20,2019,2020,Kansas City Chiefs,San Francisco 49ers,Kansas City Chiefs (W),11,-28,5,2,7,8,31–20,0.75,0.813,451,479,Win,5,12
21,2020,2021,Tampa Bay Buccaneers,Kansas City Chiefs,Tampa Bay Buccaneers (W),22,19,3,6,8,11,31–9,0.688,0.875,492,473,Win,15,6
22,2021,2022,Los Angeles Rams,Cincinnati Bengals,Los Angeles Rams (W),3,51,8,7,15,17,23–20,0.706,0.588,511,460,Win,6,3
23,2022,2023,Kansas City Chiefs,Philadelphia Eagles,Kansas City Chiefs,3,19,1,2,16,8,38-35,0.824,0.824,496,477,Win,5,10


In [7]:
# get the relevant features for training the model
features = ['Super Bowl Team 1 Offense Rank', 'Super Bowl Team 2 Offense Rank', 'Super Bowl Team 1 Defense Rank', 'Super Bowl Team 2 Defense Rank', 
           'Super Bowl Team 1 win/loss %', 'Super Bowl Team 2 win/loss %', 'Super Bowl Team 1 Pts Scored', 'Super Bowl Team 2 Pts Scored',
           'Point Differential (Team1 - Team2)', 'Team1', 'Team2']

In [8]:
# split the data into features and target
X = data[features]
y = data["Outcome of Team 1"]

In [9]:
# continue splitting the data into 80% training and 20% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=0)

In [10]:
# train the random forest classifier model on the training dataset
model = RandomForestClassifier(n_estimators=100, random_state=0)
model.fit(X_train, y_train)

In [11]:
# test this model: make predictions on the test data
y_pred = model.predict(X_test)

In [12]:
# evaluate the model's accuracy
accuracy = accuracy_score(y_test, y_pred)
print('Test Data Accuracy:', accuracy)

Test Data Accuracy: 1.0


### Let's Predict the Outcome of the game for year 2024 the Chiefs and the Niners

In [13]:
last_row = df.tail(1)
last_row

Unnamed: 0,Season,Year,Super Bowl Team 1,Super Bowl Team 2,Result,Point Differential (Super Bowl),Point Differential (Team1 - Team2),Super Bowl Team 1 Offense Rank,Super Bowl Team 2 Offense Rank,Super Bowl Team 1 Defense Rank,Super Bowl Team 2 Defense Rank,Win - Loss Score,Super Bowl Team 1 win/loss %,Super Bowl Team 2 win/loss %,Super Bowl Team 1 Pts Scored,Super Bowl Team 2 Pts Scored,Outcome of Team 1,Team1,Team2
24,2023,2024,Kansas City Chiefs,San Francisco 49ers,?,?,73,15,3,2,7,?-?,0.824,0.824,371,298,?,5,12


In [14]:
# use the trained model to make predictions on this new game
new_game = last_row[features]
prediction = model.predict(new_game)
print(last_row['Super Bowl Team 1'][24]+' are predicted to', prediction[0])

Kansas City Chiefs are predicted to Win
