In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score
import warnings

warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv('nfl_data_2000_2023_seasons.csv', parse_dates=['date'], date_format='%Y-%m-%d')

In [3]:
df.shape

(6175, 70)

In [4]:
df.head(3)

Unnamed: 0,game_id,home_team,away_team,date,season,month,week,day_of_week,overtime,home_pts,...,away_4datt,home_top,away_top,pts_diff_home,pass_yds_diff_home,rush_yds_diff_home,top_diff_home,home_total_yds,away_total_yds,home_results
0,2000_1_New York Giants_Arizona Cardinals,New York Giants,Arizona Cardinals,2000-09-03,2000,9,1,6,0,21,...,2,1890,1710,5,-140,180,180,395,355,1
1,2000_1_New England Patriots_Tampa Bay Buccaneers,New England Patriots,Tampa Bay Buccaneers,2000-09-03,2000,9,1,6,0,16,...,0,1705,1895,-5,34,-52,-190,278,296,0
2,2000_1_Pittsburgh Steelers_Baltimore Ravens,Pittsburgh Steelers,Baltimore Ravens,2000-09-03,2000,9,1,6,0,0,...,1,1493,2107,-16,-3,-110,-614,223,336,0


## Perform One-Hot Encoding

In [5]:
encoder = OneHotEncoder(sparse_output=False, drop='first')

# Combine home and away teams for encoding
teams = pd.concat([df['home_team'], df['away_team']]).unique()
encoder.fit(teams.reshape(-1, 1))

# Encode home teams
home_encoded = encoder.transform(df[['home_team']])
home_feature_names = encoder.get_feature_names_out(['home_team'])
home_encoded_df = pd.DataFrame(home_encoded, columns=home_feature_names)

# Encode away teams
away_encoded = encoder.transform(df[['away_team']])
away_feature_names = encoder.get_feature_names_out(['away_team'])
away_encoded_df = pd.DataFrame(away_encoded, columns=away_feature_names)

# Concatenate with original DataFrame
df_encoded = pd.concat([df, home_encoded_df, away_encoded_df], axis=1)

# Drop original 'home_team' and 'away_team' columns
df_encoded = df_encoded.drop(['home_team', 'away_team'], axis=1)

In [6]:
df_encoded.shape, df.shape

((6175, 130), (6175, 70))

## Prepare Your Features And Target

In [7]:
# Assuming 'home_results' is your target variable
X = df_encoded.drop(['game_id', 'date','home_results'], axis=1)
y = df_encoded['home_results']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


## Create And Train Logistic Regression Model

In [23]:
# Scale numerical features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Create and train the model
model = LogisticRegression(random_state=42, class_weight= "balanced")
model.fit(X_train_scaled, y_train)

## Make Predictions

In [24]:
y_pred = model.predict(X_test_scaled)
y_pred_proba = model.predict_proba(X_test_scaled)[:, 1]  # Probability of home team winning

## Evaluate The Model

In [25]:
print("Accuracy:", f'{accuracy_score(y_test, y_pred):.4%}')
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("ROC AUC Score:", f'{roc_auc_score(y_test, y_pred_proba):%}')

Accuracy: 97.9757%

Classification Report:
               precision    recall  f1-score   support

           0       0.97      0.98      0.98       542
           1       0.99      0.98      0.98       693

    accuracy                           0.98      1235
   macro avg       0.98      0.98      0.98      1235
weighted avg       0.98      0.98      0.98      1235

ROC AUC Score: 98.464348%


## Future Predictions

In [11]:
# make dummies or get the average of the last season to predict in the first week of games then be able to 
# make predictions, then take each week's stats and plug in to the week to best predict

In [49]:
y_predictions = np.round(y_pred_proba, 2)
y_predictions = pd.DataFrame(y_predictions, columns= ['home_win_prob'], index= X_test.index)
y_predictions # predictions are on X_test dataset subset

Unnamed: 0,home_win_prob
2402,0.00
2662,0.08
6102,1.00
346,1.00
4681,1.00
...,...
5908,0.00
3846,1.00
3008,1.00
2223,1.00


In [41]:
# Assuming df is your original dataset containing 'home_team' and 'away_team'
df_filtered = df.loc[X_test.index]

# Display the result
df_filtered


Unnamed: 0,game_id,home_team,away_team,date,season,month,week,day_of_week,overtime,home_pts,...,away_4datt,home_top,away_top,pts_diff_home,pass_yds_diff_home,rush_yds_diff_home,top_diff_home,home_total_yds,away_total_yds,home_results
2402,2009_8_Buffalo Bills_Houston Texans,Buffalo Bills,Houston Texans,2009-11-01,2009,11,8,6,0,10,...,0,1252,2348,-21,-150,-85,-1096,204,439,0
2662,2010_9_Detroit Lions_New York Jets,Detroit Lions,New York Jets,2010-11-07,2010,11,9,6,1,20,...,0,1966,1772,-3,-99,-32,194,306,437,0
6102,2023_14_Dallas Cowboys_Philadelphia Eagles,Dallas Cowboys,Philadelphia Eagles,2023-12-10,2023,12,14,6,0,33,...,2,2196,1404,20,38,32,792,394,324,1
346,2001_7_Pittsburgh Steelers_Tennessee Titans,Pittsburgh Steelers,Tennessee Titans,2001-10-29,2001,10,7,0,0,34,...,1,2057,1543,27,115,76,514,405,214,1
4681,2018_6_Oakland Raiders_Seattle Seahawks,Las Vegas Raiders,Seattle Seahawks,2018-10-14,2018,10,6,6,0,27,...,1,1886,1714,24,108,76,172,369,185,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5908,2023_1_Chicago Bears_Green Bay Packers,Chicago Bears,Green Bay Packers,2023-09-10,2023,9,1,6,0,20,...,2,1843,1757,-18,-48,30,86,311,329,0
3846,2015_2_Pittsburgh Steelers_San Francisco 49ers,Pittsburgh Steelers,San Francisco 49ers,2015-09-20,2015,9,2,6,0,43,...,5,1381,2219,25,71,-27,-838,453,409,1
3008,2011_15_Atlanta Falcons_Jacksonville Jaguars,Atlanta Falcons,Jacksonville Jaguars,2011-12-15,2011,12,15,3,0,41,...,0,2311,1289,27,185,-19,1022,373,207,1
2223,2008_13_Houston Texans_Jacksonville Jaguars,Houston Texans,Jacksonville Jaguars,2008-12-01,2008,12,13,0,0,30,...,2,1617,1983,13,-62,0,-366,326,388,1


In [50]:
df_example = pd.concat([df_filtered, y_predictions], axis = 1)
df_example

Unnamed: 0,game_id,home_team,away_team,date,season,month,week,day_of_week,overtime,home_pts,...,home_top,away_top,pts_diff_home,pass_yds_diff_home,rush_yds_diff_home,top_diff_home,home_total_yds,away_total_yds,home_results,home_win_prob
2402,2009_8_Buffalo Bills_Houston Texans,Buffalo Bills,Houston Texans,2009-11-01,2009,11,8,6,0,10,...,1252,2348,-21,-150,-85,-1096,204,439,0,0.00
2662,2010_9_Detroit Lions_New York Jets,Detroit Lions,New York Jets,2010-11-07,2010,11,9,6,1,20,...,1966,1772,-3,-99,-32,194,306,437,0,0.08
6102,2023_14_Dallas Cowboys_Philadelphia Eagles,Dallas Cowboys,Philadelphia Eagles,2023-12-10,2023,12,14,6,0,33,...,2196,1404,20,38,32,792,394,324,1,1.00
346,2001_7_Pittsburgh Steelers_Tennessee Titans,Pittsburgh Steelers,Tennessee Titans,2001-10-29,2001,10,7,0,0,34,...,2057,1543,27,115,76,514,405,214,1,1.00
4681,2018_6_Oakland Raiders_Seattle Seahawks,Las Vegas Raiders,Seattle Seahawks,2018-10-14,2018,10,6,6,0,27,...,1886,1714,24,108,76,172,369,185,0,1.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5908,2023_1_Chicago Bears_Green Bay Packers,Chicago Bears,Green Bay Packers,2023-09-10,2023,9,1,6,0,20,...,1843,1757,-18,-48,30,86,311,329,0,0.00
3846,2015_2_Pittsburgh Steelers_San Francisco 49ers,Pittsburgh Steelers,San Francisco 49ers,2015-09-20,2015,9,2,6,0,43,...,1381,2219,25,71,-27,-838,453,409,1,1.00
3008,2011_15_Atlanta Falcons_Jacksonville Jaguars,Atlanta Falcons,Jacksonville Jaguars,2011-12-15,2011,12,15,3,0,41,...,2311,1289,27,185,-19,1022,373,207,1,1.00
2223,2008_13_Houston Texans_Jacksonville Jaguars,Houston Texans,Jacksonville Jaguars,2008-12-01,2008,12,13,0,0,30,...,1617,1983,13,-62,0,-366,326,388,1,1.00


In [51]:
df_example['home_win_prob'].unique()

array([0.  , 0.08, 1.  , 0.03, 0.02, 0.89, 0.01, 0.07, 0.99, 0.74, 0.88,
       0.05, 0.95, 0.09, 0.9 , 0.06, 0.93, 0.65, 0.04, 0.97, 0.82, 0.44,
       0.86, 0.92, 0.61, 0.98, 0.87, 0.8 , 0.46, 0.23, 0.79, 0.67, 0.15,
       0.31, 0.12, 0.94, 0.29, 0.96, 0.21, 0.1 , 0.49, 0.83, 0.11, 0.13,
       0.18, 0.27, 0.57, 0.69, 0.81, 0.77, 0.17, 0.85, 0.24, 0.34, 0.2 ,
       0.64, 0.37, 0.66, 0.7 , 0.14, 0.78, 0.39, 0.84, 0.75, 0.58, 0.73,
       0.91, 0.19, 0.72, 0.62, 0.48, 0.41, 0.25, 0.43, 0.16, 0.28, 0.45,
       0.55, 0.47, 0.38, 0.4 ])

In [52]:
# Apply conditions to create the 'correct pick' column
df_example['correct pick'] = df_example.apply(
    lambda row: 'yes' if (row['home_results'] == 1 and row['home_win_prob'] >= 0.60) or
                (row['home_results'] == 0 and row['home_win_prob'] < 0.60) else 'no', axis=1
)

# Display the updated dataframe with the 'correct pick' column
df_example


Unnamed: 0,game_id,home_team,away_team,date,season,month,week,day_of_week,overtime,home_pts,...,away_top,pts_diff_home,pass_yds_diff_home,rush_yds_diff_home,top_diff_home,home_total_yds,away_total_yds,home_results,home_win_prob,correct pick
2402,2009_8_Buffalo Bills_Houston Texans,Buffalo Bills,Houston Texans,2009-11-01,2009,11,8,6,0,10,...,2348,-21,-150,-85,-1096,204,439,0,0.00,yes
2662,2010_9_Detroit Lions_New York Jets,Detroit Lions,New York Jets,2010-11-07,2010,11,9,6,1,20,...,1772,-3,-99,-32,194,306,437,0,0.08,yes
6102,2023_14_Dallas Cowboys_Philadelphia Eagles,Dallas Cowboys,Philadelphia Eagles,2023-12-10,2023,12,14,6,0,33,...,1404,20,38,32,792,394,324,1,1.00,yes
346,2001_7_Pittsburgh Steelers_Tennessee Titans,Pittsburgh Steelers,Tennessee Titans,2001-10-29,2001,10,7,0,0,34,...,1543,27,115,76,514,405,214,1,1.00,yes
4681,2018_6_Oakland Raiders_Seattle Seahawks,Las Vegas Raiders,Seattle Seahawks,2018-10-14,2018,10,6,6,0,27,...,1714,24,108,76,172,369,185,0,1.00,no
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5908,2023_1_Chicago Bears_Green Bay Packers,Chicago Bears,Green Bay Packers,2023-09-10,2023,9,1,6,0,20,...,1757,-18,-48,30,86,311,329,0,0.00,yes
3846,2015_2_Pittsburgh Steelers_San Francisco 49ers,Pittsburgh Steelers,San Francisco 49ers,2015-09-20,2015,9,2,6,0,43,...,2219,25,71,-27,-838,453,409,1,1.00,yes
3008,2011_15_Atlanta Falcons_Jacksonville Jaguars,Atlanta Falcons,Jacksonville Jaguars,2011-12-15,2011,12,15,3,0,41,...,1289,27,185,-19,1022,373,207,1,1.00,yes
2223,2008_13_Houston Texans_Jacksonville Jaguars,Houston Texans,Jacksonville Jaguars,2008-12-01,2008,12,13,0,0,30,...,1983,13,-62,0,-366,326,388,1,1.00,yes


In [61]:
base_model_correct_picks_percentage = df_example['correct pick'].value_counts()[0]/ df_example.shape[0]
base_model_correct_picks_percentage

0.9781376518218623

## Modeling on 2024 Season

In [89]:
season_24 = pd.read_csv('nfl_data_2024_season.csv', parse_dates=['date'], date_format='%Y-%m-%d')

In [90]:
season_24

Unnamed: 0,game_id,home_team,away_team,date,season,month,week,day_of_week,overtime,home_pts,...,away_4datt,home_top,away_top,pts_diff_home,pass_yds_diff_home,rush_yds_diff_home,top_diff_home,home_total_yds,away_total_yds,home_results
0,2024_1_Kansas City Chiefs_Baltimore Ravens,Kansas City Chiefs,Baltimore Ravens,2024-09-05,2024,9,1,3,0,27,...,2,1577,2023,7,14,-113,-446,353,452,1
1,2024_1_Philadelphia Eagles_Green Bay Packers,Philadelphia Eagles,Green Bay Packers,2024-09-06,2024,9,1,4,0,34,...,1,1967,1633,5,15,-19,334,410,414,1
2,2024_1_New Orleans Saints_Carolina Panthers,New Orleans Saints,Carolina Panthers,2024-09-08,2024,9,1,6,0,47,...,3,2199,1401,37,64,122,798,379,193,1
3,2024_1_Chicago Bears_Tennessee Titans,Chicago Bears,Tennessee Titans,2024-09-08,2024,9,1,6,0,24,...,2,1506,2094,7,-40,-56,-588,148,244,1
4,2024_1_Cincinnati Bengals_New England Patriots,Cincinnati Bengals,New England Patriots,2024-09-08,2024,9,1,6,0,10,...,0,1557,2043,-6,34,-100,-486,224,290,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
267,2024_18_New York Jets_Miami Dolphins,New York Jets,Miami Dolphins,2025-01-05,2024,1,18,4,0,32,...,3,1676,1924,12,58,-17,-248,375,334,1
268,2024_18_Tennessee Titans_Houston Texans,Tennessee Titans,Houston Texans,2025-01-05,2024,1,18,4,0,14,...,2,1799,1801,-9,62,-122,-2,314,374,0
269,2024_18_Arizona Cardinals_San Francisco 49ers,Arizona Cardinals,San Francisco 49ers,2025-01-05,2024,1,18,4,0,47,...,2,1689,1911,23,-56,13,-222,393,436,1
270,2024_18_Philadelphia Eagles_New York Giants,Philadelphia Eagles,New York Giants,2025-01-05,2024,1,18,4,0,20,...,4,1788,1812,7,114,-49,-24,303,238,1


## Perform One-Hot Encoding

In [91]:
encoder = OneHotEncoder(sparse_output=False, drop='first')

# Combine home and away teams for encoding
teams = pd.concat([season_24['home_team'], season_24['away_team']]).unique()
encoder.fit(teams.reshape(-1, 1))

# Encode home teams
home_encoded = encoder.transform(season_24[['home_team']])
home_feature_names = encoder.get_feature_names_out(['home_team'])
home_encoded_df = pd.DataFrame(home_encoded, columns=home_feature_names)

# Encode away teams
away_encoded = encoder.transform(season_24[['away_team']])
away_feature_names = encoder.get_feature_names_out(['away_team'])
away_encoded_df = pd.DataFrame(away_encoded, columns=away_feature_names)

# Concatenate with original DataFrame
df_encoded = pd.concat([season_24, home_encoded_df, away_encoded_df], axis=1)

# Drop original 'home_team' and 'away_team' columns
df_encoded = df_encoded.drop(['home_team', 'away_team'], axis=1)

## Preprocessing New Data

In [92]:
X_24 = df_encoded.drop(['game_id', 'date','home_results'], axis=1)
X_24_scaled = scaler.transform(X_24)


## Make Predictions

In [93]:
y_pred_24_season= model.predict(X_24_scaled)
y_pred_proba_24_season = model.predict_proba(X_24_scaled)[:, 1]  # Probability of home team winning

In [95]:
y_predictions_24_season = np.round(y_pred_proba_24_season, 2)
y_predictions_24_season = pd.DataFrame(y_predictions_24_season, columns= ['home_win_prob'])
y_predictions_24_season # predictions are on X_24 dataset subset

Unnamed: 0,home_win_prob
0,0.99
1,0.99
2,1.00
3,0.99
4,0.00
...,...
267,1.00
268,0.00
269,1.00
270,1.00


In [96]:
season_24_preds = pd.concat([season_24,y_predictions_24_season], axis = 1)

In [97]:
df

Unnamed: 0,game_id,home_team,away_team,date,season,month,week,day_of_week,overtime,home_pts,...,away_4datt,home_top,away_top,pts_diff_home,pass_yds_diff_home,rush_yds_diff_home,top_diff_home,home_total_yds,away_total_yds,home_results
0,2000_1_New York Giants_Arizona Cardinals,New York Giants,Arizona Cardinals,2000-09-03,2000,9,1,6,0,21,...,2,1890,1710,5,-140,180,180,395,355,1
1,2000_1_New England Patriots_Tampa Bay Buccaneers,New England Patriots,Tampa Bay Buccaneers,2000-09-03,2000,9,1,6,0,16,...,0,1705,1895,-5,34,-52,-190,278,296,0
2,2000_1_Pittsburgh Steelers_Baltimore Ravens,Pittsburgh Steelers,Baltimore Ravens,2000-09-03,2000,9,1,6,0,0,...,1,1493,2107,-16,-3,-110,-614,223,336,0
3,2000_1_Atlanta Falcons_San Francisco 49ers,Atlanta Falcons,San Francisco 49ers,2000-09-03,2000,9,1,6,0,36,...,1,1899,1701,8,17,3,198,359,339,1
4,2000_1_Oakland Raiders_San Diego Chargers,Las Vegas Raiders,Los Angeles Chargers,2000-09-03,2000,9,1,6,0,9,...,1,1890,1710,3,-13,-9,180,233,255,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6170,2023_18_Washington Commanders_Dallas Cowboys,Washington Commanders,Dallas Cowboys,2024-01-07,2023,1,18,5,0,10,...,1,1565,2035,-28,-179,-81,-470,180,440,0
6171,2023_18_Cincinnati Bengals_Cleveland Browns,Cincinnati Bengals,Cleveland Browns,2024-01-07,2023,1,18,5,0,31,...,1,2146,1454,17,5,79,692,328,244,1
6172,2023_18_Tennessee Titans_Jacksonville Jaguars,Tennessee Titans,Jacksonville Jaguars,2024-01-07,2023,1,18,5,0,28,...,3,1663,1937,8,-128,93,-274,327,362,1
6173,2023_18_Carolina Panthers_Tampa Bay Buccaneers,Carolina Panthers,Tampa Bay Buccaneers,2024-01-07,2023,1,18,5,0,0,...,0,1716,1884,-9,-47,18,-168,199,228,0


In [98]:
season_24_preds['home_win_prob'].nunique()

49

In [99]:
# Apply conditions to create the 'correct pick' column
season_24_preds['correct pick'] = season_24_preds.apply(
    lambda row: 'yes' if (row['home_results'] == 1 and row['home_win_prob'] >= 0.60) or
                (row['home_results'] == 0 and row['home_win_prob'] < 0.60) else 'no', axis=1
)

# Display the updated dataframe with the 'correct pick' column
season_24_preds


Unnamed: 0,game_id,home_team,away_team,date,season,month,week,day_of_week,overtime,home_pts,...,away_top,pts_diff_home,pass_yds_diff_home,rush_yds_diff_home,top_diff_home,home_total_yds,away_total_yds,home_results,home_win_prob,correct pick
0,2024_1_Kansas City Chiefs_Baltimore Ravens,Kansas City Chiefs,Baltimore Ravens,2024-09-05,2024,9,1,3,0,27,...,2023,7,14,-113,-446,353,452,1,0.99,yes
1,2024_1_Philadelphia Eagles_Green Bay Packers,Philadelphia Eagles,Green Bay Packers,2024-09-06,2024,9,1,4,0,34,...,1633,5,15,-19,334,410,414,1,0.99,yes
2,2024_1_New Orleans Saints_Carolina Panthers,New Orleans Saints,Carolina Panthers,2024-09-08,2024,9,1,6,0,47,...,1401,37,64,122,798,379,193,1,1.00,yes
3,2024_1_Chicago Bears_Tennessee Titans,Chicago Bears,Tennessee Titans,2024-09-08,2024,9,1,6,0,24,...,2094,7,-40,-56,-588,148,244,1,0.99,yes
4,2024_1_Cincinnati Bengals_New England Patriots,Cincinnati Bengals,New England Patriots,2024-09-08,2024,9,1,6,0,10,...,2043,-6,34,-100,-486,224,290,0,0.00,yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
267,2024_18_New York Jets_Miami Dolphins,New York Jets,Miami Dolphins,2025-01-05,2024,1,18,4,0,32,...,1924,12,58,-17,-248,375,334,1,1.00,yes
268,2024_18_Tennessee Titans_Houston Texans,Tennessee Titans,Houston Texans,2025-01-05,2024,1,18,4,0,14,...,1801,-9,62,-122,-2,314,374,0,0.00,yes
269,2024_18_Arizona Cardinals_San Francisco 49ers,Arizona Cardinals,San Francisco 49ers,2025-01-05,2024,1,18,4,0,47,...,1911,23,-56,13,-222,393,436,1,1.00,yes
270,2024_18_Philadelphia Eagles_New York Giants,Philadelphia Eagles,New York Giants,2025-01-05,2024,1,18,4,0,20,...,1812,7,114,-49,-24,303,238,1,1.00,yes


In [100]:
season_24_preds['correct pick'].value_counts()

correct pick
yes    269
no       3
Name: count, dtype: int64

In [101]:
base_model_season_2024_percentage = season_24_preds['correct pick'].value_counts()[0]/ season_24_preds.shape[0]
base_model_season_2024_percentage

0.9889705882352942

## Evaluting Model Predicitions

In [102]:
y_true = season_24['home_results'] 

# Accuracy
accuracy = accuracy_score(y_true, y_pred_24_season)
print("Accuracy:", f'{accuracy:.4%}')

# Classification Report
print("\nClassification Report:\n", classification_report(y_true, y_pred_24_season))

# ROC AUC Score
roc_auc = roc_auc_score(y_true, y_pred_proba_24_season)
print("ROC AUC Score:", f'{roc_auc:.4%}')


Accuracy: 98.8971%

Classification Report:
               precision    recall  f1-score   support

           0       0.98      1.00      0.99       127
           1       1.00      0.98      0.99       145

    accuracy                           0.99       272
   macro avg       0.99      0.99      0.99       272
weighted avg       0.99      0.99      0.99       272

ROC AUC Score: 99.9837%


In [103]:
'''Suggestions for Improvement:
You might want to balance the model's precision and recall for both classes. If the imbalance in the dataset 
is causing issues, you could try techniques like oversampling the minority class, undersampling the majority class, 
or using different class weights in your model.

Threshold Tuning: You can experiment with adjusting the decision threshold to see if you can improve the balance 
between precision and recall for both classes. For example, changing the threshold to consider a prediction as a 
"win" when the probability is higher than 0.6 (instead of the default 0.5) might improve your precision for home 
team wins.'''


'Suggestions for Improvement:\nYou might want to balance the model\'s precision and recall for both classes. If the imbalance in the dataset \nis causing issues, you could try techniques like oversampling the minority class, undersampling the majority class, \nor using different class weights in your model.\n\nThreshold Tuning: You can experiment with adjusting the decision threshold to see if you can improve the balance \nbetween precision and recall for both classes. For example, changing the threshold to consider a prediction as a \n"win" when the probability is higher than 0.6 (instead of the default 0.5) might improve your precision for home \nteam wins.'

## Super Bowl 59 Predictions

In [169]:
season_24.head()

Unnamed: 0,game_id,home_team,away_team,date,season,month,week,day_of_week,overtime,home_pts,away_pts,home_pass_cmp,away_pass_cmp,home_pass_att,away_pass_att,home_pass_yds,away_pass_yds,home_pass_td,away_pass_td,home_int,away_int,home_sk,away_sk,home_yds_lost_sks,away_yds_lost_sks,home_yds_pass_att,away_yds_pass_att,home_net_yds_pass_att,away_net_yds_pass_att,home_cmp%,away_cmp%,home_pass_rating,away_pass_rating,home_rush_att,away_rush_att,home_rush_yds,away_rush_yds,home_rush_yds_att,away_rush_yds_att,home_rush_td,away_rush_td,home_fgm,away_fgm,home_fga,away_fga,home_xpm,away_xpm,home_xpa,away_xpa,home_pnt,away_pnt,home_punt_yds,away_punt_yds,home_3dconv,away_3dconv,home_3datt,away_3datt,home_4dconv,away_4dconv,home_4datt,away_4datt,home_top,away_top,pts_diff_home,pass_yds_diff_home,rush_yds_diff_home,top_diff_home,home_total_yds,away_total_yds,home_results
0,2024_1_Kansas City Chiefs_Baltimore Ravens,Kansas City Chiefs,Baltimore Ravens,2024-09-05,2024,9,1,3,0,27,20,20,26,28,41,281,267,1,1,1,0,2,1,10,6,10.4,6.7,9.4,6.4,71.4,63.4,100.4,90.2,20,32,72,185,3.6,5.8,2,1,2,2,2,3,3,2,3,2,3,2,144,75,4,7,9,14,0,1,0,2,1577,2023,7,14,-113,-446,353,452,1
1,2024_1_Philadelphia Eagles_Green Bay Packers,Philadelphia Eagles,Green Bay Packers,2024-09-06,2024,9,1,4,0,34,29,20,17,34,35,266,251,2,2,2,1,2,2,12,9,8.2,7.4,7.4,6.8,58.8,48.6,78.8,79.6,38,21,144,163,3.8,7.8,2,1,2,3,2,4,4,2,4,2,2,2,101,74,4,3,14,11,1,1,1,1,1967,1633,5,15,-19,334,410,414,1
2,2024_1_New Orleans Saints_Carolina Panthers,New Orleans Saints,Carolina Panthers,2024-09-08,2024,9,1,6,0,47,10,19,13,24,31,199,135,3,0,0,2,1,4,1,26,8.3,5.2,8.0,3.9,79.2,41.9,140.8,28.3,37,20,180,58,4.9,2.9,2,1,4,1,4,1,5,1,5,1,2,5,73,191,7,1,13,10,0,1,0,3,2199,1401,37,64,122,798,379,193,1
3,2024_1_Chicago Bears_Tennessee Titans,Chicago Bears,Tennessee Titans,2024-09-08,2024,9,1,6,0,24,17,14,19,29,32,64,104,0,1,0,2,2,3,29,23,3.2,4.0,2.1,3.0,48.3,59.4,54.8,49.5,22,26,84,140,3.8,5.4,0,1,3,1,3,1,1,2,1,2,6,6,270,268,2,3,13,14,2,1,2,2,1506,2094,7,-40,-56,-588,148,244,1
4,2024_1_Cincinnati Bengals_New England Patriots,Cincinnati Bengals,New England Patriots,2024-09-08,2024,9,1,6,0,10,16,21,15,29,24,154,120,0,0,0,0,3,1,10,1,5.7,5.0,4.8,4.8,72.4,62.5,84.6,75.0,16,39,70,170,4.4,4.4,1,1,1,3,1,3,1,1,1,1,4,5,258,250,4,6,11,15,0,0,1,0,1557,2043,-6,34,-100,-486,224,290,0


In [112]:
# Assuming your dataframe is named 'df'
# Filter for Kansas City games in the 2024 season
kc_games = season_24[(season_24['home_team'] == 'Kansas City Chiefs') | (season_24['away_team'] == 'Kansas City Chiefs')]

# Further filter for the specific season (e.g., 2024)
#kc_games = kc_games[kc_games['season'] == 2024]

# Select the relevant columns for stats (e.g., points, yards, etc.)
stats_columns = [
    'home_pts', 'away_pts', 'home_pass_cmp',
   'away_pass_cmp', 'home_pass_att', 'away_pass_att', 'home_pass_yds',
   'away_pass_yds', 'home_pass_td', 'away_pass_td', 'home_int', 'away_int',
   'home_sk', 'away_sk', 'home_yds_lost_sks', 'away_yds_lost_sks',
   'home_yds_pass_att', 'away_yds_pass_att', 'home_net_yds_pass_att',
   'away_net_yds_pass_att', 'home_cmp%', 'away_cmp%', 'home_pass_rating',
   'away_pass_rating', 'home_rush_att', 'away_rush_att', 'home_rush_yds',
   'away_rush_yds', 'home_rush_yds_att', 'away_rush_yds_att',
   'home_rush_td', 'away_rush_td', 'home_fgm', 'away_fgm', 'home_fga',
   'away_fga', 'home_xpm', 'away_xpm', 'home_xpa', 'away_xpa', 'home_pnt',
   'away_pnt', 'home_punt_yds', 'away_punt_yds', 'home_3dconv',
   'away_3dconv', 'home_3datt', 'away_3datt', 'home_4dconv', 'away_4dconv',
   'home_4datt', 'away_4datt', 'home_top', 'away_top', 'pts_diff_home',
   'pass_yds_diff_home', 'rush_yds_diff_home', 'top_diff_home',
   'home_total_yds', 'away_total_yds', 'home_results']


# Filter the columns
kc_stats = kc_games[stats_columns]

# Calculate the averages
kc_avg_stats = kc_stats.mean()

# Show the average statistics for Kansas City in the 2024 season
kc_avg_stats


home_pts               21.588235
away_pts               20.235294
home_pass_cmp          23.529412
away_pass_cmp          22.294118
home_pass_att          34.941176
                         ...    
rush_yds_diff_home      2.647059
top_diff_home         136.470588
home_total_yds        333.352941
away_total_yds        314.941176
home_results            0.588235
Length: 61, dtype: float64

In [111]:
def get_avg_team_stats(df, season_year, team_name):

    # Further filter for the specific season (e.g., 2024)
    season_data = df[df['season'] == season_year]
    
    # Filter for team_name games in the season_year
    team_games = season_data[(season_data['home_team'] == team_name) | (season_data['away_team'] == team_name)]

    # Select the relevant columns for stats (e.g., points, yards, etc.)
    stats_columns = [
        'home_pts', 'away_pts', 'home_pass_cmp',
       'away_pass_cmp', 'home_pass_att', 'away_pass_att', 'home_pass_yds',
       'away_pass_yds', 'home_pass_td', 'away_pass_td', 'home_int', 'away_int',
       'home_sk', 'away_sk', 'home_yds_lost_sks', 'away_yds_lost_sks',
       'home_yds_pass_att', 'away_yds_pass_att', 'home_net_yds_pass_att',
       'away_net_yds_pass_att', 'home_cmp%', 'away_cmp%', 'home_pass_rating',
       'away_pass_rating', 'home_rush_att', 'away_rush_att', 'home_rush_yds',
       'away_rush_yds', 'home_rush_yds_att', 'away_rush_yds_att',
       'home_rush_td', 'away_rush_td', 'home_fgm', 'away_fgm', 'home_fga',
       'away_fga', 'home_xpm', 'away_xpm', 'home_xpa', 'away_xpa', 'home_pnt',
       'away_pnt', 'home_punt_yds', 'away_punt_yds', 'home_3dconv',
       'away_3dconv', 'home_3datt', 'away_3datt', 'home_4dconv', 'away_4dconv',
       'home_4datt', 'away_4datt', 'home_top', 'away_top', 'pts_diff_home',
       'pass_yds_diff_home', 'rush_yds_diff_home', 'top_diff_home',
       'home_total_yds', 'away_total_yds', 'home_results']

    # Filter the columns
    team_stats = team_games[stats_columns]
    
    # Calculate the averages
    team_avg_stats = team_stats.mean()
    
    # Show the average statistics for team_name in the season_year
    return team_avg_stats

In [117]:
eag_avg_stats = get_avg_team_stats(season_24, 2024, 'Philadelphia Eagles')

In [116]:
kc_avg_stats

home_pts               21.588235
away_pts               20.235294
home_pass_cmp          23.529412
away_pass_cmp          22.294118
home_pass_att          34.941176
                         ...    
rush_yds_diff_home      2.647059
top_diff_home         136.470588
home_total_yds        333.352941
away_total_yds        314.941176
home_results            0.588235
Length: 61, dtype: float64

In [118]:
eag_avg_stats

home_pts               22.647059
away_pts               22.411765
home_pass_cmp          20.529412
away_pass_cmp          17.117647
home_pass_att          31.294118
                         ...    
rush_yds_diff_home    -14.882353
top_diff_home          98.352941
home_total_yds        331.705882
away_total_yds        313.823529
home_results            0.588235
Length: 61, dtype: float64

In [134]:
pd.options.display.max_columns = None
pd.DataFrame(eag_avg_stats).T

Unnamed: 0,home_pts,away_pts,home_pass_cmp,away_pass_cmp,home_pass_att,away_pass_att,home_pass_yds,away_pass_yds,home_pass_td,away_pass_td,home_int,away_int,home_sk,away_sk,home_yds_lost_sks,away_yds_lost_sks,home_yds_pass_att,away_yds_pass_att,home_net_yds_pass_att,away_net_yds_pass_att,home_cmp%,away_cmp%,home_pass_rating,away_pass_rating,home_rush_att,away_rush_att,home_rush_yds,away_rush_yds,home_rush_yds_att,away_rush_yds_att,home_rush_td,away_rush_td,home_fgm,away_fgm,home_fga,away_fga,home_xpm,away_xpm,home_xpa,away_xpa,home_pnt,away_pnt,home_punt_yds,away_punt_yds,home_3dconv,away_3dconv,home_3datt,away_3datt,home_4dconv,away_4dconv,home_4datt,away_4datt,home_top,away_top,pts_diff_home,pass_yds_diff_home,rush_yds_diff_home,top_diff_home,home_total_yds,away_total_yds,home_results
0,22.647059,22.411765,20.529412,17.117647,31.294118,26.941176,197.411765,164.647059,1.705882,1.0,0.470588,0.647059,2.470588,2.588235,18.588235,18.352941,6.994118,6.935294,5.988235,5.682353,65.723529,64.064706,96.1,85.352941,31.0,30.0,134.294118,149.176471,4.270588,4.882353,0.882353,1.352941,1.470588,1.647059,2.058824,1.941176,2.0,2.058824,2.176471,2.117647,3.470588,3.705882,157.823529,176.058824,5.882353,4.294118,13.705882,12.529412,1.0,1.235294,1.470588,1.882353,1849.176471,1750.823529,0.235294,32.764706,-14.882353,98.352941,331.705882,313.823529,0.588235


In [128]:
kc_games.loc[:1]

Unnamed: 0,game_id,home_team,away_team,date,season,month,week,day_of_week,overtime,home_pts,...,away_4datt,home_top,away_top,pts_diff_home,pass_yds_diff_home,rush_yds_diff_home,top_diff_home,home_total_yds,away_total_yds,home_results
0,2024_1_Kansas City Chiefs_Baltimore Ravens,Kansas City Chiefs,Baltimore Ravens,2024-09-05,2024,9,1,3,0,27,...,2,1577,2023,7,14,-113,-446,353,452,1


In [131]:
kc_avg_stats

home_pts               21.588235
away_pts               20.235294
home_pass_cmp          23.529412
away_pass_cmp          22.294118
home_pass_att          34.941176
                         ...    
rush_yds_diff_home      2.647059
top_diff_home         136.470588
home_total_yds        333.352941
away_total_yds        314.941176
home_results            0.588235
Length: 61, dtype: float64

In [135]:
# Average stats for Kansas City Chiefs (home and away)
kc_stats = {
    'home_pts': 21.588235, 'away_pts': 20.235294,
    'home_pass_cmp': 23.529412, 'away_pass_cmp': 22.294118,
    'home_pass_att': 34.941176, 'away_pass_att': 33.823529,
    'home_pass_yds': 228.470588, 'away_pass_yds': 212.705882,
    'home_pass_td': 1.176471, 'away_pass_td': 1.764706,
    'home_int': 0.823529, 'away_int': 0.588235,
    'home_sk': 2.647059, 'away_sk': 2.058824,
    'home_yds_lost_sks': 15.705882, 'away_yds_lost_sks': 14.647059,
    'home_yds_pass_att': 7.076471, 'away_yds_pass_att': 6.729412,
    'home_net_yds_pass_att': 6.247059, 'away_net_yds_pass_att': 5.858824,
    'home_cmp%': 67.658824, 'away_cmp%': 65.852941,
    'home_pass_rating': 86.794118, 'away_pass_rating': 92.205882,
    'home_rush_att': 26.764706, 'away_rush_att': 24.294118,
    'home_rush_yds': 104.882353, 'away_rush_yds': 102.235294,
    'home_rush_yds_att': 3.888235, 'away_rush_yds_att': 4.123529,
    'home_rush_td': 1.0, 'away_rush_td': 0.647059,
    'home_fgm': 2.058824, 'away_fgm': 1.235294,
    'home_fga': 2.235294, 'away_fga': 1.764706,
    'home_xpm': 2.352941, 'away_xpm': 3.352941,
    'home_xpa': 3.823529, 'away_xpa': 3.705882,
    'home_pnt': 164.411765, 'away_pnt': 182.294118,
    'home_punt_yds': 6.0, 'away_punt_yds': 6.0,
    'home_3dconv': 13.235294, 'away_3dconv': 12.882353,
    'home_3datt': 0.882353, 'away_3datt': 0.529412,
    'home_4dconv': 1.352941, 'away_4dconv': 1.0,
    'home_4datt': 1.352941, 'away_4datt': 1.0,
    'home_top': 1878.588235, 'away_top': 1742.117647,
    'home_total_yds': 333.352941, 'away_total_yds': 314.941176,
    'home_results': 0.588235
}

# Average stats for Philadelphia Eagles (home and away)
philly_stats = {
    'home_pts': 22.647059, 'away_pts': 22.411765,
    'home_pass_cmp': 20.529412, 'away_pass_cmp': 17.117647,
    'home_pass_att': 31.294118, 'away_pass_att': 26.941176,
    'home_pass_yds': 197.411765, 'away_pass_yds': 164.647059,
    'home_pass_td': 1.705882, 'away_pass_td': 1.0,
    'home_int': 0.470588, 'away_int': 0.647059,
    'home_sk': 2.470588, 'away_sk': 2.588235,
    'home_yds_lost_sks': 18.588235, 'away_yds_lost_sks': 18.352941,
    'home_yds_pass_att': 6.994118, 'away_yds_pass_att': 6.935294,
    'home_net_yds_pass_att': 5.988235, 'away_net_yds_pass_att': 5.682353,
    'home_cmp%': 65.723529, 'away_cmp%': 64.064706,
    'home_pass_rating': 96.1, 'away_pass_rating': 85.352941,
    'home_rush_att': 31.0, 'away_rush_att': 30.0,
    'home_rush_yds': 134.294118, 'away_rush_yds': 149.176471,
    'home_rush_yds_att': 4.270588, 'away_rush_yds_att': 4.882353,
    'home_rush_td': 0.882353, 'away_rush_td': 1.352941,
    'home_fgm': 1.470588, 'away_fgm': 1.647059,
    'home_fga': 1.941176, 'away_fga': 1.764706,
    'home_xpm': 2.176471, 'away_xpm': 2.117647,
    'home_xpa': 3.470588, 'away_xpa': 3.705882,
    'home_pnt': 157.823529, 'away_pnt': 176.058824,
    'home_punt_yds': 5.882353, 'away_punt_yds': 4.294118,
    'home_3dconv': 5.882353, 'away_3dconv': 4.294118,
    'home_3datt': 13.705882, 'away_3datt': 12.529412,
    'home_4dconv': 1.0, 'away_4dconv': 1.235294,
    'home_4datt': 1.470588, 'away_4datt': 1.882353,
    'home_top': 1849.176471, 'away_top': 1750.823529,
    'home_total_yds': 331.705882, 'away_total_yds': 313.823529,
    'home_results': 0.588235
}

# Create a function to merge these stats
def get_super_bowl_stats(home_stats, away_stats):
    game_stats = {}
    
    # Home stats for Philly and Away stats for KC
    for stat in home_stats:
        if 'home' in stat:
            game_stats[stat] = home_stats[stat]  # i.e. Philly home stats
        elif 'away' in stat:
            game_stats[stat] = away_stats[stat]  # i.e. KC away stats
    
    # Create the DataFrame with the game statistics
    return pd.DataFrame(game_stats, index=[0])

In [137]:
# Call the function and display the stats
super_bowl_df = get_super_bowl_stats(philly_stats, kc_stats)
super_bowl_df

Unnamed: 0,home_pts,away_pts,home_pass_cmp,away_pass_cmp,home_pass_att,away_pass_att,home_pass_yds,away_pass_yds,home_pass_td,away_pass_td,home_int,away_int,home_sk,away_sk,home_yds_lost_sks,away_yds_lost_sks,home_yds_pass_att,away_yds_pass_att,home_net_yds_pass_att,away_net_yds_pass_att,home_cmp%,away_cmp%,home_pass_rating,away_pass_rating,home_rush_att,away_rush_att,home_rush_yds,away_rush_yds,home_rush_yds_att,away_rush_yds_att,home_rush_td,away_rush_td,home_fgm,away_fgm,home_fga,away_fga,home_xpm,away_xpm,home_xpa,away_xpa,home_pnt,away_pnt,home_punt_yds,away_punt_yds,home_3dconv,away_3dconv,home_3datt,away_3datt,home_4dconv,away_4dconv,home_4datt,away_4datt,home_top,away_top,home_total_yds,away_total_yds,home_results
0,22.647059,20.235294,20.529412,22.294118,31.294118,33.823529,197.411765,212.705882,1.705882,1.764706,0.470588,0.588235,2.470588,2.058824,18.588235,14.647059,6.994118,6.729412,5.988235,5.858824,65.723529,65.852941,96.1,92.205882,31.0,24.294118,134.294118,102.235294,4.270588,4.123529,0.882353,0.647059,1.470588,1.235294,1.941176,1.764706,2.176471,3.352941,3.470588,3.705882,157.823529,182.294118,5.882353,6.0,5.882353,12.882353,13.705882,0.529412,1.0,1.0,1.470588,1.0,1849.176471,1742.117647,331.705882,314.941176,0.588235


In [162]:
super_bowl_info = pd.DataFrame({
    'game_id': ['2024_23_Philadelphia Eagles_Kansas City Chiefs'],
    'home_team': ['Philadelphia Eagles'],
    'away_team': ['Kansas City Chiefs'],
    'date': ['2025-02-09'],
    'season': [2024],
    'month': [2],
    'week': [23],
    'day_of_week': [6],
    'overtime': [0]  # Assuming no overtime for now
})

# Calculating the new columns
super_bowl_df['pts_diff_home'] = super_bowl_df['home_pts'] - super_bowl_df['away_pts']
super_bowl_df['pass_yds_diff_home'] = super_bowl_df['home_pass_yds'] - super_bowl_df['away_pass_yds']
super_bowl_df['rush_yds_diff_home'] = super_bowl_df['home_rush_yds'] - super_bowl_df['away_rush_yds']
super_bowl_df['top_diff_home'] = super_bowl_df['home_top'] - super_bowl_df['away_top']


In [163]:
superbowl_eag_df = pd.concat([super_bowl_info,super_bowl_df], axis = 1)
cols = [col for col in superbowl_eag_df.columns if col != 'home_results'] + ['home_results']
superbowl_eag_df = superbowl_eag_df[cols]
superbowl_eag_df

Unnamed: 0,game_id,home_team,away_team,date,season,month,week,day_of_week,overtime,home_pts,away_pts,home_pass_cmp,away_pass_cmp,home_pass_att,away_pass_att,home_pass_yds,away_pass_yds,home_pass_td,away_pass_td,home_int,away_int,home_sk,away_sk,home_yds_lost_sks,away_yds_lost_sks,home_yds_pass_att,away_yds_pass_att,home_net_yds_pass_att,away_net_yds_pass_att,home_cmp%,away_cmp%,home_pass_rating,away_pass_rating,home_rush_att,away_rush_att,home_rush_yds,away_rush_yds,home_rush_yds_att,away_rush_yds_att,home_rush_td,away_rush_td,home_fgm,away_fgm,home_fga,away_fga,home_xpm,away_xpm,home_xpa,away_xpa,home_pnt,away_pnt,home_punt_yds,away_punt_yds,home_3dconv,away_3dconv,home_3datt,away_3datt,home_4dconv,away_4dconv,home_4datt,away_4datt,home_top,away_top,home_total_yds,away_total_yds,pts_diff_home,pass_yds_diff_home,rush_yds_diff_home,top_diff_home,home_results
0,2024_23_Philadelphia Eagles_Kansas City Chiefs,Philadelphia Eagles,Kansas City Chiefs,2025-02-09,2024,2,23,Sunday,0,22.647059,20.235294,20.529412,22.294118,31.294118,33.823529,197.411765,212.705882,1.705882,1.764706,0.470588,0.588235,2.470588,2.058824,18.588235,14.647059,6.994118,6.729412,5.988235,5.858824,65.723529,65.852941,96.1,92.205882,31.0,24.294118,134.294118,102.235294,4.270588,4.123529,0.882353,0.647059,1.470588,1.235294,1.941176,1.764706,2.176471,3.352941,3.470588,3.705882,157.823529,182.294118,5.882353,6.0,5.882353,12.882353,13.705882,0.529412,1.0,1.0,1.470588,1.0,1849.176471,1742.117647,331.705882,314.941176,2.411765,-15.294117,32.058824,107.058824,0.588235


In [158]:
superbowl_kc = get_super_bowl_stats(kc_stats,philly_stats)

In [159]:
super_bowl_info_kc = pd.DataFrame({
    'game_id': ['2024_23_Kansas City Chiefs_Philadelphia Eagles'],
    'home_team': ['Kansas City Chiefs'],
    'away_team': ['Philadelphia Eagles'],
    'date': ['2025-02-09'],
    'season': [2024],
    'month': [2],
    'week': [23],
    'day_of_week': [6],
    'overtime': [0]  # Assuming no overtime for now
})

# Calculating the new columns
superbowl_kc['pts_diff_home'] = superbowl_kc['home_pts'] - superbowl_kc['away_pts']
superbowl_kc['pass_yds_diff_home'] = superbowl_kc['home_pass_yds'] - superbowl_kc['away_pass_yds']
superbowl_kc['rush_yds_diff_home'] = superbowl_kc['home_rush_yds'] - superbowl_kc['away_rush_yds']
superbowl_kc['top_diff_home'] = superbowl_kc['home_top'] - superbowl_kc['away_top']


In [167]:
superbowl_kc_df = pd.concat([super_bowl_info_kc,superbowl_kc], axis = 1)
cols = [col for col in superbowl_kc_df.columns if col != 'home_results'] + ['home_results']
superbowl_kc_df = superbowl_kc_df[cols]
superbowl_59 = pd.concat([superbowl_kc_df,superbowl_eag_df], axis = 0, ignore_index=True)

In [168]:
superbowl_59

Unnamed: 0,game_id,home_team,away_team,date,season,month,week,day_of_week,overtime,home_pts,away_pts,home_pass_cmp,away_pass_cmp,home_pass_att,away_pass_att,home_pass_yds,away_pass_yds,home_pass_td,away_pass_td,home_int,away_int,home_sk,away_sk,home_yds_lost_sks,away_yds_lost_sks,home_yds_pass_att,away_yds_pass_att,home_net_yds_pass_att,away_net_yds_pass_att,home_cmp%,away_cmp%,home_pass_rating,away_pass_rating,home_rush_att,away_rush_att,home_rush_yds,away_rush_yds,home_rush_yds_att,away_rush_yds_att,home_rush_td,away_rush_td,home_fgm,away_fgm,home_fga,away_fga,home_xpm,away_xpm,home_xpa,away_xpa,home_pnt,away_pnt,home_punt_yds,away_punt_yds,home_3dconv,away_3dconv,home_3datt,away_3datt,home_4dconv,away_4dconv,home_4datt,away_4datt,home_top,away_top,home_total_yds,away_total_yds,pts_diff_home,pass_yds_diff_home,rush_yds_diff_home,top_diff_home,home_results
0,2024_23_Kansas City Chiefs_Philadelphia Eagles,Kansas City Chiefs,Philadelphia Eagles,2025-02-09,2024,2,23,Sunday,0,21.588235,22.411765,23.529412,17.117647,34.941176,26.941176,228.470588,164.647059,1.176471,1.0,0.823529,0.647059,2.647059,2.588235,15.705882,18.352941,7.076471,6.935294,6.247059,5.682353,67.658824,64.064706,86.794118,85.352941,26.764706,30.0,104.882353,149.176471,3.888235,4.882353,1.0,1.352941,2.058824,1.647059,2.235294,1.764706,2.352941,2.117647,3.823529,3.705882,164.411765,176.058824,6.0,4.294118,13.235294,4.294118,0.882353,12.529412,1.352941,1.235294,1.352941,1.882353,1878.588235,1750.823529,333.352941,313.823529,-0.82353,63.823529,-44.294118,127.764706,0.588235
1,2024_23_Philadelphia Eagles_Kansas City Chiefs,Philadelphia Eagles,Kansas City Chiefs,2025-02-09,2024,2,23,Sunday,0,22.647059,20.235294,20.529412,22.294118,31.294118,33.823529,197.411765,212.705882,1.705882,1.764706,0.470588,0.588235,2.470588,2.058824,18.588235,14.647059,6.994118,6.729412,5.988235,5.858824,65.723529,65.852941,96.1,92.205882,31.0,24.294118,134.294118,102.235294,4.270588,4.123529,0.882353,0.647059,1.470588,1.235294,1.941176,1.764706,2.176471,3.352941,3.470588,3.705882,157.823529,182.294118,5.882353,6.0,5.882353,12.882353,13.705882,0.529412,1.0,1.0,1.470588,1.0,1849.176471,1742.117647,331.705882,314.941176,2.411765,-15.294117,32.058824,107.058824,0.588235


## Encode Data

In [170]:
encoder = OneHotEncoder(sparse_output=False, drop='first')

# Combine home and away teams for encoding
teams = pd.concat([superbowl_59['home_team'], superbowl_59['away_team']]).unique()
encoder.fit(teams.reshape(-1, 1))

# Encode home teams
home_encoded = encoder.transform(superbowl_59[['home_team']])
home_feature_names = encoder.get_feature_names_out(['home_team'])
home_encoded_df = pd.DataFrame(home_encoded, columns=home_feature_names)

# Encode away teams
away_encoded = encoder.transform(superbowl_59[['away_team']])
away_feature_names = encoder.get_feature_names_out(['away_team'])
away_encoded_df = pd.DataFrame(away_encoded, columns=away_feature_names)

# Concatenate with original DataFrame
df_encoded = pd.concat([superbowl_59, home_encoded_df, away_encoded_df], axis=1)

# Drop original 'home_team' and 'away_team' columns
df_encoded = df_encoded.drop(['home_team', 'away_team'], axis=1)

In [174]:
df_encoded

The history saving thread hit an unexpected error (OperationalError('attempt to write a readonly database')).History will not be written to the database.


Unnamed: 0,game_id,date,season,month,week,day_of_week,overtime,home_pts,away_pts,home_pass_cmp,away_pass_cmp,home_pass_att,away_pass_att,home_pass_yds,away_pass_yds,home_pass_td,away_pass_td,home_int,away_int,home_sk,away_sk,home_yds_lost_sks,away_yds_lost_sks,home_yds_pass_att,away_yds_pass_att,home_net_yds_pass_att,away_net_yds_pass_att,home_cmp%,away_cmp%,home_pass_rating,away_pass_rating,home_rush_att,away_rush_att,home_rush_yds,away_rush_yds,home_rush_yds_att,away_rush_yds_att,home_rush_td,away_rush_td,home_fgm,away_fgm,home_fga,away_fga,home_xpm,away_xpm,home_xpa,away_xpa,home_pnt,away_pnt,home_punt_yds,away_punt_yds,home_3dconv,away_3dconv,home_3datt,away_3datt,home_4dconv,away_4dconv,home_4datt,away_4datt,home_top,away_top,home_total_yds,away_total_yds,pts_diff_home,pass_yds_diff_home,rush_yds_diff_home,top_diff_home,home_results,home_team_Philadelphia Eagles,away_team_Philadelphia Eagles
0,2024_23_Kansas City Chiefs_Philadelphia Eagles,2025-02-09,2024,2,23,Sunday,0,21.588235,22.411765,23.529412,17.117647,34.941176,26.941176,228.470588,164.647059,1.176471,1.0,0.823529,0.647059,2.647059,2.588235,15.705882,18.352941,7.076471,6.935294,6.247059,5.682353,67.658824,64.064706,86.794118,85.352941,26.764706,30.0,104.882353,149.176471,3.888235,4.882353,1.0,1.352941,2.058824,1.647059,2.235294,1.764706,2.352941,2.117647,3.823529,3.705882,164.411765,176.058824,6.0,4.294118,13.235294,4.294118,0.882353,12.529412,1.352941,1.235294,1.352941,1.882353,1878.588235,1750.823529,333.352941,313.823529,-0.82353,63.823529,-44.294118,127.764706,0.588235,0.0,1.0
1,2024_23_Philadelphia Eagles_Kansas City Chiefs,2025-02-09,2024,2,23,Sunday,0,22.647059,20.235294,20.529412,22.294118,31.294118,33.823529,197.411765,212.705882,1.705882,1.764706,0.470588,0.588235,2.470588,2.058824,18.588235,14.647059,6.994118,6.729412,5.988235,5.858824,65.723529,65.852941,96.1,92.205882,31.0,24.294118,134.294118,102.235294,4.270588,4.123529,0.882353,0.647059,1.470588,1.235294,1.941176,1.764706,2.176471,3.352941,3.470588,3.705882,157.823529,182.294118,5.882353,6.0,5.882353,12.882353,13.705882,0.529412,1.0,1.0,1.470588,1.0,1849.176471,1742.117647,331.705882,314.941176,2.411765,-15.294117,32.058824,107.058824,0.588235,1.0,0.0


In [175]:
# List of all home and away team columns in the dataset
team_columns = [
    'home_team_Atlanta Falcons', 'home_team_Baltimore Ravens', 'home_team_Buffalo Bills',
    'home_team_Carolina Panthers', 'home_team_Chicago Bears', 'home_team_Cincinnati Bengals',
    'home_team_Cleveland Browns', 'home_team_Dallas Cowboys', 'home_team_Denver Broncos',
    'home_team_Detroit Lions', 'home_team_Green Bay Packers', 'home_team_Houston Texans',
    'home_team_Indianapolis Colts', 'home_team_Jacksonville Jaguars', 'home_team_Kansas City Chiefs',
    'home_team_Las Vegas Raiders', 'home_team_Los Angeles Chargers', 'home_team_Los Angeles Rams',
    'home_team_Miami Dolphins', 'home_team_Minnesota Vikings', 'home_team_New England Patriots',
    'home_team_New Orleans Saints', 'home_team_New York Giants', 'home_team_New York Jets',
    'home_team_Philadelphia Eagles', 'home_team_Pittsburgh Steelers', 'home_team_San Francisco 49ers',
    'home_team_Seattle Seahawks', 'home_team_Tampa Bay Buccaneers', 'home_team_Tennessee Titans',
    'home_team_Washington Commanders',
    
    'away_team_Atlanta Falcons', 'away_team_Baltimore Ravens', 'away_team_Buffalo Bills',
    'away_team_Carolina Panthers', 'away_team_Chicago Bears', 'away_team_Cincinnati Bengals',
    'away_team_Cleveland Browns', 'away_team_Dallas Cowboys', 'away_team_Denver Broncos',
    'away_team_Detroit Lions', 'away_team_Green Bay Packers', 'away_team_Houston Texans',
    'away_team_Indianapolis Colts', 'away_team_Jacksonville Jaguars', 'away_team_Kansas City Chiefs',
    'away_team_Las Vegas Raiders', 'away_team_Los Angeles Chargers', 'away_team_Los Angeles Rams',
    'away_team_Miami Dolphins', 'away_team_Minnesota Vikings', 'away_team_New England Patriots',
    'away_team_New Orleans Saints', 'away_team_New York Giants', 'away_team_New York Jets',
    'away_team_Philadelphia Eagles', 'away_team_Pittsburgh Steelers', 'away_team_San Francisco 49ers',
    'away_team_Seattle Seahawks', 'away_team_Tampa Bay Buccaneers', 'away_team_Tennessee Titans',
    'away_team_Washington Commanders'
]

# Initialize all missing columns to zero in the Super Bowl dataframe
for col in team_columns:
    if col not in df_encoded.columns:
        df_encoded[col] = 0


In [181]:
X_59 = df_encoded.drop(['game_id', 'date','home_results'], axis=1)
X_59

Unnamed: 0,season,month,week,day_of_week,overtime,home_pts,away_pts,home_pass_cmp,away_pass_cmp,home_pass_att,away_pass_att,home_pass_yds,away_pass_yds,home_pass_td,away_pass_td,home_int,away_int,home_sk,away_sk,home_yds_lost_sks,away_yds_lost_sks,home_yds_pass_att,away_yds_pass_att,home_net_yds_pass_att,away_net_yds_pass_att,home_cmp%,away_cmp%,home_pass_rating,away_pass_rating,home_rush_att,away_rush_att,home_rush_yds,away_rush_yds,home_rush_yds_att,away_rush_yds_att,home_rush_td,away_rush_td,home_fgm,away_fgm,home_fga,away_fga,home_xpm,away_xpm,home_xpa,away_xpa,home_pnt,away_pnt,home_punt_yds,away_punt_yds,home_3dconv,away_3dconv,home_3datt,away_3datt,home_4dconv,away_4dconv,home_4datt,away_4datt,home_top,away_top,home_total_yds,away_total_yds,pts_diff_home,pass_yds_diff_home,rush_yds_diff_home,top_diff_home,home_team_Philadelphia Eagles,away_team_Philadelphia Eagles,home_team_Atlanta Falcons,home_team_Baltimore Ravens,home_team_Buffalo Bills,home_team_Carolina Panthers,home_team_Chicago Bears,home_team_Cincinnati Bengals,home_team_Cleveland Browns,home_team_Dallas Cowboys,home_team_Denver Broncos,home_team_Detroit Lions,home_team_Green Bay Packers,home_team_Houston Texans,home_team_Indianapolis Colts,home_team_Jacksonville Jaguars,home_team_Kansas City Chiefs,home_team_Las Vegas Raiders,home_team_Los Angeles Chargers,home_team_Los Angeles Rams,home_team_Miami Dolphins,home_team_Minnesota Vikings,home_team_New England Patriots,home_team_New Orleans Saints,home_team_New York Giants,home_team_New York Jets,home_team_Pittsburgh Steelers,home_team_San Francisco 49ers,home_team_Seattle Seahawks,home_team_Tampa Bay Buccaneers,home_team_Tennessee Titans,home_team_Washington Commanders,away_team_Atlanta Falcons,away_team_Baltimore Ravens,away_team_Buffalo Bills,away_team_Carolina Panthers,away_team_Chicago Bears,away_team_Cincinnati Bengals,away_team_Cleveland Browns,away_team_Dallas Cowboys,away_team_Denver Broncos,away_team_Detroit Lions,away_team_Green Bay Packers,away_team_Houston Texans,away_team_Indianapolis Colts,away_team_Jacksonville Jaguars,away_team_Kansas City Chiefs,away_team_Las Vegas Raiders,away_team_Los Angeles Chargers,away_team_Los Angeles Rams,away_team_Miami Dolphins,away_team_Minnesota Vikings,away_team_New England Patriots,away_team_New Orleans Saints,away_team_New York Giants,away_team_New York Jets,away_team_Pittsburgh Steelers,away_team_San Francisco 49ers,away_team_Seattle Seahawks,away_team_Tampa Bay Buccaneers,away_team_Tennessee Titans,away_team_Washington Commanders
0,2024,2,23,Sunday,0,21.588235,22.411765,23.529412,17.117647,34.941176,26.941176,228.470588,164.647059,1.176471,1.0,0.823529,0.647059,2.647059,2.588235,15.705882,18.352941,7.076471,6.935294,6.247059,5.682353,67.658824,64.064706,86.794118,85.352941,26.764706,30.0,104.882353,149.176471,3.888235,4.882353,1.0,1.352941,2.058824,1.647059,2.235294,1.764706,2.352941,2.117647,3.823529,3.705882,164.411765,176.058824,6.0,4.294118,13.235294,4.294118,0.882353,12.529412,1.352941,1.235294,1.352941,1.882353,1878.588235,1750.823529,333.352941,313.823529,-0.82353,63.823529,-44.294118,127.764706,0.0,1.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,2024,2,23,Sunday,0,22.647059,20.235294,20.529412,22.294118,31.294118,33.823529,197.411765,212.705882,1.705882,1.764706,0.470588,0.588235,2.470588,2.058824,18.588235,14.647059,6.994118,6.729412,5.988235,5.858824,65.723529,65.852941,96.1,92.205882,31.0,24.294118,134.294118,102.235294,4.270588,4.123529,0.882353,0.647059,1.470588,1.235294,1.941176,1.764706,2.176471,3.352941,3.470588,3.705882,157.823529,182.294118,5.882353,6.0,5.882353,12.882353,13.705882,0.529412,1.0,1.0,1.470588,1.0,1849.176471,1742.117647,331.705882,314.941176,2.411765,-15.294117,32.058824,107.058824,1.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [182]:
X_train_columns = X_train.columns  # The columns used during training

# Now reorder X_59 to match the column order of the training data
X_59 = X_59[X_train_columns]

# Add any missing columns to X_59, initializing them to 0
for col in X_train_columns:
    if col not in X_59.columns:
        X_59[col] = 0

In [188]:
X_59['day_of_week' ]= X_59['day_of_week'] = 6

## Preprocessing Superbowl Data

In [190]:

X_59_scaled = scaler.transform(X_59)


## Make Predicitions

In [191]:
y_pred_superbowl= model.predict(X_59_scaled)
y_pred_proba_superbowl = model.predict_proba(X_59_scaled)[:, 1]  # Probability of home team winning

In [196]:
y_predictions_superbowl = np.round(y_pred_proba_superbowl, 2)
y_predictions_superbowl = pd.DataFrame(y_predictions_superbowl, columns= ['home_win_prob'])
y_predictions_superbowl # predictions are on X_24 dataset subset

Unnamed: 0,home_win_prob
0,0.0
1,0.0


In [193]:
season_24_preds_sb = pd.concat([superbowl_59,y_predictions_superbowl], axis = 1)
# Apply conditions to create the 'correct pick' column
season_24_preds_sb['correct pick'] = season_24_preds_sb.apply(
    lambda row: 'yes' if (row['home_results'] == 1 and row['home_win_prob'] >= 0.60) or
                (row['home_results'] == 0 and row['home_win_prob'] < 0.60) else 'no', axis=1
)

# Display the updated dataframe with the 'correct pick' column
season_24_preds_sb


Unnamed: 0,game_id,home_team,away_team,date,season,month,week,day_of_week,overtime,home_pts,away_pts,home_pass_cmp,away_pass_cmp,home_pass_att,away_pass_att,home_pass_yds,away_pass_yds,home_pass_td,away_pass_td,home_int,away_int,home_sk,away_sk,home_yds_lost_sks,away_yds_lost_sks,home_yds_pass_att,away_yds_pass_att,home_net_yds_pass_att,away_net_yds_pass_att,home_cmp%,away_cmp%,home_pass_rating,away_pass_rating,home_rush_att,away_rush_att,home_rush_yds,away_rush_yds,home_rush_yds_att,away_rush_yds_att,home_rush_td,away_rush_td,home_fgm,away_fgm,home_fga,away_fga,home_xpm,away_xpm,home_xpa,away_xpa,home_pnt,away_pnt,home_punt_yds,away_punt_yds,home_3dconv,away_3dconv,home_3datt,away_3datt,home_4dconv,away_4dconv,home_4datt,away_4datt,home_top,away_top,home_total_yds,away_total_yds,pts_diff_home,pass_yds_diff_home,rush_yds_diff_home,top_diff_home,home_results,home_win_prob,correct pick
0,2024_23_Kansas City Chiefs_Philadelphia Eagles,Kansas City Chiefs,Philadelphia Eagles,2025-02-09,2024,2,23,Sunday,0,21.588235,22.411765,23.529412,17.117647,34.941176,26.941176,228.470588,164.647059,1.176471,1.0,0.823529,0.647059,2.647059,2.588235,15.705882,18.352941,7.076471,6.935294,6.247059,5.682353,67.658824,64.064706,86.794118,85.352941,26.764706,30.0,104.882353,149.176471,3.888235,4.882353,1.0,1.352941,2.058824,1.647059,2.235294,1.764706,2.352941,2.117647,3.823529,3.705882,164.411765,176.058824,6.0,4.294118,13.235294,4.294118,0.882353,12.529412,1.352941,1.235294,1.352941,1.882353,1878.588235,1750.823529,333.352941,313.823529,-0.82353,63.823529,-44.294118,127.764706,0.588235,0.0,no
1,2024_23_Philadelphia Eagles_Kansas City Chiefs,Philadelphia Eagles,Kansas City Chiefs,2025-02-09,2024,2,23,Sunday,0,22.647059,20.235294,20.529412,22.294118,31.294118,33.823529,197.411765,212.705882,1.705882,1.764706,0.470588,0.588235,2.470588,2.058824,18.588235,14.647059,6.994118,6.729412,5.988235,5.858824,65.723529,65.852941,96.1,92.205882,31.0,24.294118,134.294118,102.235294,4.270588,4.123529,0.882353,0.647059,1.470588,1.235294,1.941176,1.764706,2.176471,3.352941,3.470588,3.705882,157.823529,182.294118,5.882353,6.0,5.882353,12.882353,13.705882,0.529412,1.0,1.0,1.470588,1.0,1849.176471,1742.117647,331.705882,314.941176,2.411765,-15.294117,32.058824,107.058824,0.588235,0.0,no


In [197]:
y_pred_proba_superbowl

array([5.52722673e-24, 2.79286588e-25])

In [198]:
# Look over superbowl predicitions to build on different iterations for playoffs / superbowl models
# Also look over the avg stat functions created because future games wont have the stats but avg stats to predict games
# create an average to be able to run on wk 1 of the nfl seasons
# for better results in models I think chronologically for games to occur and data point / stats to show case 
#             the form of the team, aka winning streak)
# Build different models to truly establish a base model- (make sure data in chronological and rerun this model to 
#                     figure out true models precision, recal, F1-score, accuracy, and ROC-AUC score)
# appify this code to choose the seasons then be able to make predictions for the week of games
# look to add more data to build better models (weather, QB's, win streak, conference, etc.)