In [91]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.pipeline import make_pipeline
from sklearn.compose import make_column_transformer
from sklearn.metrics import accuracy_score

In [106]:
# Load the datasets
df_2021 = pd.read_csv('2021data.csv')
df_2022 = pd.read_csv('2022data.csv')
df_2023 = pd.read_csv('2023data.csv')

# Combine the datasets
data = pd.concat([df_2021, df_2022, df_2023])

# Save the combined dataset
data.to_csv('combined_dataset.csv', index=False)

In [100]:
data

Unnamed: 0,Team,Opp,Result,OT,H/A,Score_Tm,Score_Opp,Off_1stD,Off_TotYd,Off_PassYd,Off_RushYd,Off_TO,Def_1stD,Def_TotYd,Def_PassYd,Def_RushYD,Def_TO,Exp_Offense,Exp_Defense,Exp_SpecTms
0,49ers,Detroit Lions,1,0,0,41.0,33.0,21.0,442.0,311.0,131.0,2.0,31.0,430.0,314.0,116.0,1.0,15.48,-5.16,-1.73
1,49ers,Arizona Cardinals,0,0,0,10.0,17.0,19.0,338.0,186.0,152.0,1.0,20.0,304.0,210.0,94.0,1.0,-6.53,-1.74,-0.47
2,49ers,Indianapolis Colts,0,0,1,18.0,30.0,13.0,280.0,169.0,111.0,4.0,17.0,295.0,147.0,148.0,2.0,-15.3,-1.16,4.24
3,49ers,Minnesota Vikings,1,0,1,34.0,26.0,23.0,423.0,215.0,208.0,1.0,17.0,323.0,256.0,67.0,2.0,8.93,4.16,-6.1
4,49ers,Seattle Seahawks,0,0,0,23.0,30.0,17.0,365.0,294.0,71.0,3.0,21.0,327.0,181.0,146.0,3.0,-0.51,-2.27,-3.44


In [94]:
data['Result'] = data['Result'].apply(lambda x: 1 if x == 'W' else 0)  # Convert Win to 1 and anything else to 0

#1 if OT, 0 if no OT
data['OT'] = data['OT'].fillna(0).replace('OT', 1)

# 1 is home, 0 is away
data['H/A'] = data['H/A'].fillna(1).replace('@', 0)

data = data.drop(columns = ['Day', 'Date', 'Time', 'Link', 'Rec', 'Week'])

data = data.dropna()

data = data.reset_index()

data = data.drop(columns = ['index'])
data['Score_Tm'] = data['Score_Tm'].astype(float)
data = data[~(data['H/A'] == 'N')]
data['H/A'] = data['H/A'].astype(float)
data['OT'] = data['OT'].astype(float)
data['Result'] = data['Result'].astype(float)
column = data.pop('Opp')
data.insert(1, 'Opp', column)

  data['OT'] = data['OT'].fillna(0).replace('OT', 1)


In [96]:
data.head()

Unnamed: 0,Team,Opp,Result,OT,H/A,Score_Tm,Score_Opp,Off_1stD,Off_TotYd,Off_PassYd,Off_RushYd,Off_TO,Def_1stD,Def_TotYd,Def_PassYd,Def_RushYD,Def_TO,Exp_Offense,Exp_Defense,Exp_SpecTms
0,49ers,Detroit Lions,1,0,0,41.0,33.0,21.0,442.0,311.0,131.0,2.0,31.0,430.0,314.0,116.0,1.0,15.48,-5.16,-1.73
1,49ers,Arizona Cardinals,0,0,0,10.0,17.0,19.0,338.0,186.0,152.0,1.0,20.0,304.0,210.0,94.0,1.0,-6.53,-1.74,-0.47
2,49ers,Indianapolis Colts,0,0,1,18.0,30.0,13.0,280.0,169.0,111.0,4.0,17.0,295.0,147.0,148.0,2.0,-15.3,-1.16,4.24
3,49ers,Minnesota Vikings,1,0,1,34.0,26.0,23.0,423.0,215.0,208.0,1.0,17.0,323.0,256.0,67.0,2.0,8.93,4.16,-6.1
4,49ers,Seattle Seahawks,0,0,0,23.0,30.0,17.0,365.0,294.0,71.0,3.0,21.0,327.0,181.0,146.0,3.0,-0.51,-2.27,-3.44


In [101]:
df_2021

Unnamed: 0,Week,Team,Day,Date,Time,Link,Result,OT,Rec,H/A,...,Off_RushYd,Off_TO,Def_1stD,Def_TotYd,Def_PassYd,Def_RushYD,Def_TO,Exp_Offense,Exp_Defense,Exp_SpecTms
0,1,49ers,Sun,12-Sep,1:00PM ET,boxscore,W,,Jan-00,@,...,131.0,2.0,31.0,430.0,314.0,116.0,1.0,15.48,-5.16,-1.73
1,2,49ers,Sun,19-Sep,1:00PM ET,boxscore,W,,Feb-00,@,...,117.0,,18.0,328.0,177.0,151.0,,7.86,-5.30,2.68
2,3,49ers,Sun,26-Sep,8:20PM ET,boxscore,L,,1-Feb,,...,67.0,2.0,21.0,353.0,253.0,100.0,,12.19,-15.14,0.95
3,4,49ers,Sun,3-Oct,4:05PM ET,boxscore,L,,2-Feb,,...,143.0,2.0,14.0,234.0,129.0,105.0,,8.61,-2.20,-14.31
4,5,49ers,Sun,10-Oct,4:25PM ET,boxscore,L,,3-Feb,@,...,152.0,1.0,20.0,304.0,210.0,94.0,1.0,-6.53,-1.74,-0.47
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
611,14,vikings,Thu,9-Dec,8:20PM ET,boxscore,W,,7-Jun,,...,242.0,2.0,22.0,375.0,269.0,106.0,1.0,8.68,-6.41,1.31
612,15,vikings,Mon,20-Dec,8:15PM ET,boxscore,W,,7-Jul,@,...,132.0,1.0,24.0,370.0,255.0,115.0,3.0,-4.65,7.72,5.92
613,16,vikings,Sun,26-Dec,1:00PM ET,boxscore,L,,8-Jul,,...,66.0,1.0,22.0,356.0,197.0,159.0,3.0,-1.81,0.84,-7.80
614,17,vikings,Sun,2-Jan,8:20PM ET,boxscore,L,,9-Jul,@,...,27.0,,29.0,481.0,307.0,174.0,,-13.56,-21.96,7.80


# THE GOOD STUFF

In [305]:
df_2021 = pd.read_csv('2021data.csv')
df_2022 = pd.read_csv('2022data.csv')
df_2023 = pd.read_csv('2023data.csv')

In [306]:
# Convert Win to 1 and anything else to 0
df_2021['Result'] = df_2021['Result'].apply(lambda x: 1 if x == 'W' else 0) 

# 1 if OT, 0 if no OT
df_2021['OT'] = df_2021['OT'].fillna(0).replace('OT', 1)

# 1 is home, 0 is away
df_2021['H/A'] = df_2021['H/A'].fillna(1).replace('@', 0)

df_2021 = df_2021.drop(columns=['Day', 'Date', 'Time', 'Link', 'Rec', 'Week'])
df_2021 = df_2021.dropna()
df_2021 = df_2021.reset_index()
df_2021 = df_2021.drop(columns=['index'])
df_2021['Score_Tm'] = df_2021['Score_Tm'].astype(float)
df_2021 = df_2021[~(df_2021['H/A'] == 'N')]
df_2021['H/A'] = df_2021['H/A'].astype(float)
df_2021['OT'] = df_2021['OT'].astype(float)
df_2021['Result'] = df_2021['Result'].astype(float)
df_2021.pop('Opp')

df_2021.head()

  df_2021['OT'] = df_2021['OT'].fillna(0).replace('OT', 1)


Unnamed: 0,Team,Result,OT,H/A,Score_Tm,Score_Opp,Off_1stD,Off_TotYd,Off_PassYd,Off_RushYd,Off_TO,Def_1stD,Def_TotYd,Def_PassYd,Def_RushYD,Def_TO,Exp_Offense,Exp_Defense,Exp_SpecTms
0,49ers,1.0,0.0,0.0,41.0,33.0,21.0,442.0,311.0,131.0,2.0,31.0,430.0,314.0,116.0,1.0,15.48,-5.16,-1.73
1,49ers,0.0,0.0,0.0,10.0,17.0,19.0,338.0,186.0,152.0,1.0,20.0,304.0,210.0,94.0,1.0,-6.53,-1.74,-0.47
2,49ers,0.0,0.0,1.0,18.0,30.0,13.0,280.0,169.0,111.0,4.0,17.0,295.0,147.0,148.0,2.0,-15.3,-1.16,4.24
3,49ers,1.0,0.0,1.0,34.0,26.0,23.0,423.0,215.0,208.0,1.0,17.0,323.0,256.0,67.0,2.0,8.93,4.16,-6.1
4,49ers,0.0,0.0,0.0,23.0,30.0,17.0,365.0,294.0,71.0,3.0,21.0,327.0,181.0,146.0,3.0,-0.51,-2.27,-3.44


In [307]:
# Convert Win to 1 and anything else to 0
df_2022['Result'] = df_2022['Result'].apply(lambda x: 1 if x == 'W' else 0) 

# 1 if OT, 0 if no OT
df_2022['OT'] = df_2022['OT'].fillna(0).replace('OT', 1)

# 1 is home, 0 is away
df_2022['H/A'] = df_2022['H/A'].fillna(1).replace('@', 0)

df_2022 = df_2022.drop(columns=['Day', 'Date', 'Time', 'Link', 'Rec', 'Week'])
df_2022 = df_2022.dropna()
df_2022 = df_2022.reset_index()
df_2022 = df_2022.drop(columns=['index'])
df_2022['Score_Tm'] = df_2022['Score_Tm'].astype(float)
df_2022 = df_2022[~(df_2022['H/A'] == 'N')]
df_2022['H/A'] = df_2022['H/A'].astype(float)
df_2022['OT'] = df_2022['OT'].astype(float)
df_2022['Result'] = df_2022['Result'].astype(float)
df_2022.pop('Opp')

df_2022.head()

  df_2022['OT'] = df_2022['OT'].fillna(0).replace('OT', 1)


Unnamed: 0,Team,Result,OT,H/A,Score_Tm,Score_Opp,Off_1stD,Off_TotYd,Off_PassYd,Off_RushYd,Off_TO,Def_1stD,Def_TotYd,Def_PassYd,Def_RushYD,Def_TO,Exp_Offense,Exp_Defense,Exp_SpecTms
0,49ers,0.0,0.0,0.0,10.0,19.0,17.0,331.0,155.0,176.0,2.0,15.0,204.0,105.0,99.0,1.0,-11.26,-0.67,0.42
1,49ers,1.0,0.0,0.0,37.0,15.0,22.0,397.0,244.0,153.0,1.0,15.0,308.0,244.0,64.0,1.0,16.04,13.22,-7.82
2,49ers,0.0,0.0,1.0,23.0,44.0,25.0,444.0,343.0,101.0,3.0,24.0,529.0,417.0,112.0,2.0,3.59,-30.58,8.5
3,49ers,1.0,0.0,1.0,22.0,16.0,22.0,387.0,230.0,157.0,1.0,12.0,238.0,187.0,51.0,1.0,6.2,7.29,-5.38
4,49ers,1.0,0.0,1.0,33.0,17.0,24.0,351.0,230.0,121.0,1.0,14.0,308.0,275.0,33.0,4.0,1.78,13.36,1.0


In [308]:
# Convert Win to 1 and anything else to 0
df_2023['Result'] = df_2023['Result'].apply(lambda x: 1 if x == 'W' else 0) 

# 1 if OT, 0 if no OT
df_2023['OT'] = df_2023['OT'].fillna(0).replace('OT', 1)

# 1 is home, 0 is away
df_2023['H/A'] = df_2023['H/A'].fillna(1).replace('@', 0)

df_2023 = df_2023.drop(columns=['Day', 'Date', 'Time', 'Link', 'Rec', 'Week'])
df_2023 = df_2023.dropna()
df_2023 = df_2023.reset_index()
df_2023 = df_2023.drop(columns=['index'])
df_2023['Score_Tm'] = df_2023['Score_Tm'].astype(float)
df_2023 = df_2023[~(df_2023['H/A'] == 'N')]
df_2023['H/A'] = df_2023['H/A'].astype(float)
df_2023['OT'] = df_2023['OT'].astype(float)
df_2023['Result'] = df_2023['Result'].astype(float)
df_2023.pop('Opp')

df_2023.head()

  df_2023['OT'] = df_2023['OT'].fillna(0).replace('OT', 1)


Unnamed: 0,Team,Result,OT,H/A,Score_Tm,Score_Opp,Off_1stD,Off_TotYd,Off_PassYd,Off_RushYd,Off_TO,Def_1stD,Def_TotYd,Def_PassYd,Def_RushYD,Def_TO,Exp_Offense,Exp_Defense,Exp_SpecTms
0,49ers,1.0,0.0,0.0,30.0,7.0,22.0,391.0,203.0,188.0,1.0,15.0,239.0,198.0,41.0,2.0,9.73,13.55,-0.78
1,49ers,1.0,0.0,1.0,42.0,10.0,25.0,421.0,251.0,170.0,1.0,8.0,197.0,140.0,57.0,4.0,12.84,23.12,-5.21
2,49ers,0.0,0.0,0.0,17.0,19.0,15.0,215.0,107.0,108.0,1.0,18.0,334.0,174.0,160.0,2.0,-6.05,11.32,-7.34
3,49ers,0.0,0.0,0.0,17.0,22.0,19.0,325.0,260.0,65.0,3.0,24.0,452.0,378.0,74.0,1.0,5.43,-16.3,6.07
4,49ers,0.0,0.0,1.0,17.0,31.0,24.0,460.0,347.0,113.0,3.0,29.0,400.0,266.0,134.0,1.0,9.25,-18.3,2.48


In [309]:
agg_2021 = df_2021.groupby(df_2021.columns[0]).mean().reset_index()

agg_2022 = df_2021.groupby(df_2022.columns[0]).mean().reset_index()

agg_2023 = df_2021.groupby(df_2023.columns[0]).mean().reset_index()

In [310]:
agg_2021.head()

Unnamed: 0,Team,Result,OT,H/A,Score_Tm,Score_Opp,Off_1stD,Off_TotYd,Off_PassYd,Off_RushYd,Off_TO,Def_1stD,Def_TotYd,Def_PassYd,Def_RushYD,Def_TO,Exp_Offense,Exp_Defense,Exp_SpecTms
0,49ers,0.636364,0.090909,0.363636,23.636364,20.636364,18.818182,358.636364,225.0,133.636364,1.636364,19.272727,309.727273,219.090909,90.636364,1.454545,3.477273,0.251818,-1.626364
1,bears,0.5,0.0,0.625,17.625,18.5,18.625,331.75,212.75,119.0,2.0,18.5,271.625,154.375,117.25,1.875,-2.57375,3.12,-0.5675
2,bengals,0.6,0.2,0.3,26.9,20.4,18.9,331.7,223.2,108.5,1.7,19.2,342.3,250.8,91.5,2.1,2.364,2.279,2.332
3,bills,0.666667,0.0,0.444444,28.666667,12.111111,22.666667,358.333333,225.111111,133.222222,1.777778,14.777778,249.111111,146.111111,103.0,2.111111,8.024444,9.343333,-1.233333
4,broncos,0.571429,0.0,0.571429,19.857143,16.428571,20.428571,346.285714,221.857143,124.428571,1.285714,18.142857,317.0,211.428571,105.571429,1.428571,4.684286,1.81,-1.164286


In [336]:
df_2022 = pd.read_csv('2022data.csv')
df_2023 = pd.read_csv('2023data.csv')
#df_2024

mask = df_2022['Week'].isin(['Wild Card', 'Division', 'Conf. Champ.', 'SuperBowl'])
df_2022 = df_2022[~mask]
df_2022 = df_2022.dropna(subset = ['Opp'])
df_2022 = df_2022[df_2022['Opp'] != 'Bye Week'].reset_index()

mask = df_2023['Week'].isin(['Wild Card', 'Division', 'Conf. Champ.', 'SuperBowl'])
df_2023 = df_2023[~mask]
df_2023 = df_2023.dropna(subset = ['Opp'])
df_2023 = df_2023[df_2023['Opp'] != 'Bye Week'].reset_index()

In [337]:
wl_2022 = df_2022[['Result']]
wl_2023 = df_2023[['Result']]

df_2022 = df_2022[['Week', 'Team', 'Opp']]
df_2023 = df_2023[['Week', 'Team', 'Opp']]
#df_2024

In [338]:
# Create a mapping dictionary to convert team names
team_mapping = {
    'Chicago Bears': 'bears',
    'Seattle Seahawks': 'seahawks',
    'Denver Broncos': 'broncos',
    'Los Angeles Rams': 'rams',
    'Carolina Panthers': 'panthers',
    'Atlanta Falcons': 'falcons',
    'Kansas City Chiefs': 'chiefs',
    'Los Angeles Chargers': 'chargers',
    'Arizona Cardinals': 'cardinals',
    'New Orleans Saints': 'saints',
    'Miami Dolphins': 'dolphins',
    'Tampa Bay Buccaneers': 'buccaneers',
    'Washington Commanders': 'commanders',
    'Las Vegas Raiders': 'raiders',
    'Dallas Cowboys': 'cowboys',
    'Philadelphia Eagles': 'eagles',
    'San Francisco 49ers': '49ers',
    'Green Bay Packers': 'packers',
    'Houston Texans': 'texans',
    'New York Giants': 'giants',
    'Minnesota Vikings': 'vikings',
    'New England Patriots': 'patriots',
    'Detroit Lions': 'lions',
    'New York Jets': 'jets',
    'Buffalo Bills': 'bills',
    'Pittsburgh Steelers': 'steelers',
    'Baltimore Ravens': 'ravens',
    'Cleveland Browns': 'browns',
    'Tennessee Titans': 'titans',
    'Cincinnati Bengals': 'bengals',
    'Indianapolis Colts': 'colts',
    'Jacksonville Jaguars': 'jaguars'
}

# Map team names to their abbreviated versions in the 'Opp' column
df_2022['Opp'] = df_2022['Opp'].map(team_mapping)
df_2023['Opp'] = df_2023['Opp'].map(team_mapping)

In [339]:
df_2022_team = df_2022[['Week', 'Team']]
df_2022_opp = df_2022[['Week', 'Opp']]

df_2023_team = df_2023[['Week', 'Team']]
df_2023_opp = df_2023[['Week', 'Opp']]

In [340]:
df_2022_team = pd.merge(df_2022_team, agg_2021, how = 'left', on = 'Team')

for col in df_2022_team.columns:
    if col not in ['Team', 'Week']:
        df_2022_team.rename(columns={col: 'Team_' + col}, inplace=True)
     
     
df_2023_team = pd.merge(df_2023_team, agg_2022, how = 'left', on = 'Team')

for col in df_2023_team.columns:
    if col not in ['Team', 'Week']:
        df_2023_team.rename(columns={col: 'Team_' + col}, inplace=True)   

In [341]:
df_2022_opp = pd.merge(df_2022_opp, agg_2021, how = 'left', left_on = 'Opp', right_on = 'Team')
df_2022_opp = df_2022_opp.drop(columns = 'Team')

for col in df_2022_opp.columns:
    if col not in ['Opp', 'Week']:
        df_2022_opp.rename(columns={col: 'Opp_' + col}, inplace=True)
        
df_2022_opp = df_2022_opp.drop(columns = ['Week'])


df_2023_opp = pd.merge(df_2023_opp, agg_2022, how = 'left', left_on = 'Opp', right_on = 'Team')
df_2023_opp = df_2023_opp.drop(columns = 'Team')

for col in df_2023_opp.columns:
    if col not in ['Opp', 'Week']:
        df_2023_opp.rename(columns={col: 'Opp_' + col}, inplace=True)
        
df_2023_opp = df_2023_opp.drop(columns = ['Week'])

In [342]:
df_2022_agg = pd.concat([df_2022_team, df_2022_opp], axis = 1)
df_2022_agg = pd.concat([df_2022_agg, wl_2022], axis = 1)

df_2023_agg = pd.concat([df_2023_team, df_2023_opp], axis = 1)
df_2023_agg = pd.concat([df_2023_agg, wl_2023], axis = 1)

In [343]:
df_2022_agg['Match'] = df_2022_agg.apply(lambda row: '-'.join(sorted([row['Team'], row['Opp']])), axis=1)
df_2022_agg = df_2022_agg.drop_duplicates(subset=['Match', 'Week'])
df_2022_agg = df_2022_agg.drop(columns=['Match'])

df_2023_agg['Match'] = df_2023_agg.apply(lambda row: '-'.join(sorted([row['Team'], row['Opp']])), axis=1)
df_2023_agg = df_2023_agg.drop_duplicates(subset=['Match', 'Week'])
df_2023_agg = df_2023_agg.drop(columns=['Match'])

df_2022_agg

Unnamed: 0,Week,Team,Team_Result,Team_OT,Team_H/A,Team_Score_Tm,Team_Score_Opp,Team_Off_1stD,Team_Off_TotYd,Team_Off_PassYd,...,Opp_Off_TO,Opp_Def_1stD,Opp_Def_TotYd,Opp_Def_PassYd,Opp_Def_RushYD,Opp_Def_TO,Opp_Exp_Offense,Opp_Exp_Defense,Opp_Exp_SpecTms,Result
0,1,49ers,0.636364,0.090909,0.363636,23.636364,20.636364,18.818182,358.636364,225.000000,...,2.000000,18.500000,271.625000,154.375000,117.250000,1.875000,-2.573750,3.120000,-0.567500,L
1,2,49ers,0.636364,0.090909,0.363636,23.636364,20.636364,18.818182,358.636364,225.000000,...,1.625000,21.875000,365.375000,250.375000,115.000000,1.250000,1.265000,-4.305000,1.530000,W
2,3,49ers,0.636364,0.090909,0.363636,23.636364,20.636364,18.818182,358.636364,225.000000,...,1.285714,18.142857,317.000000,211.428571,105.571429,1.428571,4.684286,1.810000,-1.164286,L
3,4,49ers,0.636364,0.090909,0.363636,23.636364,20.636364,18.818182,358.636364,225.000000,...,2.181818,19.272727,323.090909,235.000000,88.090909,1.727273,2.621818,0.015455,0.578182,W
4,5,49ers,0.636364,0.090909,0.363636,23.636364,20.636364,18.818182,358.636364,225.000000,...,2.100000,17.100000,282.400000,178.600000,103.800000,1.500000,-6.894000,4.772000,-0.035000,W
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
445,4,saints,0.142857,0.142857,0.571429,16.571429,27.571429,14.571429,283.428571,182.714286,...,1.375000,22.000000,342.875000,218.375000,124.500000,2.000000,2.143750,0.177500,-1.373750,L
446,5,saints,0.142857,0.142857,0.571429,16.571429,27.571429,14.571429,283.428571,182.714286,...,1.625000,21.875000,365.375000,250.375000,115.000000,1.250000,1.265000,-4.305000,1.530000,W
451,10,saints,0.142857,0.142857,0.571429,16.571429,27.571429,14.571429,283.428571,182.714286,...,1.400000,19.800000,365.100000,215.000000,150.100000,1.600000,-1.306000,-2.900000,-0.072000,L
499,8,texans,0.000000,0.000000,0.375000,9.750000,27.375000,12.000000,221.000000,148.750000,...,1.833333,22.500000,361.000000,271.500000,89.500000,1.833333,3.473333,-0.823333,-0.740000,L


['49ers' 'bears' 'bengals' 'bills' 'broncos' 'browns' 'buccaneers'
 'cardinals' 'chargers' 'chiefs' 'colts' 'commanders' 'cowboys' 'dolphins'
 'eagles' 'falcons' 'giants' 'jaguars' 'jets' 'lions' 'packers' 'panthers'
 'patriots' 'raiders' 'rams' 'ravens' 'saints' 'seahawks' 'steelers'
 'texans' 'titans' 'vikings']

In [344]:
team_name_to_number = {
    '49ers': 0,
    'bears': 1,
    'bengals': 2,
    'bills': 3,
    'broncos': 4,
    'browns': 5,
    'buccaneers': 6,
    'cardinals': 7,
    'chargers': 8,
    'chiefs': 9,
    'colts': 10,
    'commanders': 11,
    'cowboys': 12,
    'dolphins': 13,
    'eagles': 14,
    'falcons': 15,
    'giants': 16,
    'jaguars': 17,
    'jets': 18,
    'lions': 19,
    'packers': 20,
    'panthers': 21,
    'patriots': 22,
    'raiders': 23,
    'rams': 24,
    'ravens': 25,
    'saints': 26,
    'seahawks': 27,
    'steelers': 28,
    'texans': 29,
    'titans': 30,
    'vikings': 31
}

df_2022_agg['Team'] = df_2022_agg['Team'].map(team_name_to_number)
df_2022_agg['Opp'] = df_2022_agg['Opp'].map(team_name_to_number)

result_mapping = {'W': 1, 'L': 0}
df_2022_agg['Result'] = df_2022_agg['Result'].map(result_mapping)


df_2023_agg['Team'] = df_2023_agg['Team'].map(team_name_to_number)
df_2023_agg['Opp'] = df_2023_agg['Opp'].map(team_name_to_number)


df_2023_agg['Result'] = df_2023_agg['Result'].map(result_mapping)

In [345]:
df_2022_agg.dropna(subset = ['Result'], inplace=True)
df_2022_agg['Result'] = df_2022_agg['Result'].astype(int)

df_2023_agg.dropna(subset = ['Result'], inplace=True)
df_2023_agg['Result'] = df_2023_agg['Result'].astype(int)

df_2022_agg

Unnamed: 0,Week,Team,Team_Result,Team_OT,Team_H/A,Team_Score_Tm,Team_Score_Opp,Team_Off_1stD,Team_Off_TotYd,Team_Off_PassYd,...,Opp_Off_TO,Opp_Def_1stD,Opp_Def_TotYd,Opp_Def_PassYd,Opp_Def_RushYD,Opp_Def_TO,Opp_Exp_Offense,Opp_Exp_Defense,Opp_Exp_SpecTms,Result
0,1,0,0.636364,0.090909,0.363636,23.636364,20.636364,18.818182,358.636364,225.000000,...,2.000000,18.500000,271.625000,154.375000,117.250000,1.875000,-2.573750,3.120000,-0.567500,0
1,2,0,0.636364,0.090909,0.363636,23.636364,20.636364,18.818182,358.636364,225.000000,...,1.625000,21.875000,365.375000,250.375000,115.000000,1.250000,1.265000,-4.305000,1.530000,1
2,3,0,0.636364,0.090909,0.363636,23.636364,20.636364,18.818182,358.636364,225.000000,...,1.285714,18.142857,317.000000,211.428571,105.571429,1.428571,4.684286,1.810000,-1.164286,0
3,4,0,0.636364,0.090909,0.363636,23.636364,20.636364,18.818182,358.636364,225.000000,...,2.181818,19.272727,323.090909,235.000000,88.090909,1.727273,2.621818,0.015455,0.578182,1
4,5,0,0.636364,0.090909,0.363636,23.636364,20.636364,18.818182,358.636364,225.000000,...,2.100000,17.100000,282.400000,178.600000,103.800000,1.500000,-6.894000,4.772000,-0.035000,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
445,4,26,0.142857,0.142857,0.571429,16.571429,27.571429,14.571429,283.428571,182.714286,...,1.375000,22.000000,342.875000,218.375000,124.500000,2.000000,2.143750,0.177500,-1.373750,0
446,5,26,0.142857,0.142857,0.571429,16.571429,27.571429,14.571429,283.428571,182.714286,...,1.625000,21.875000,365.375000,250.375000,115.000000,1.250000,1.265000,-4.305000,1.530000,1
451,10,26,0.142857,0.142857,0.571429,16.571429,27.571429,14.571429,283.428571,182.714286,...,1.400000,19.800000,365.100000,215.000000,150.100000,1.600000,-1.306000,-2.900000,-0.072000,0
499,8,29,0.000000,0.000000,0.375000,9.750000,27.375000,12.000000,221.000000,148.750000,...,1.833333,22.500000,361.000000,271.500000,89.500000,1.833333,3.473333,-0.823333,-0.740000,0


In [351]:
from sklearn.linear_model import LogisticRegression

X_train, X_test, y_train, y_test = train_test_split(df_2022_agg.iloc[:, 2:38], df_2022_agg.iloc[:, 39], test_size = 0.2, random_state = 1)

lr = LogisticRegression(max_iter = 10000)

lr.fit(X_train, y_train)

y_pred = lr.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)

print(accuracy)

0.6481481481481481


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [ ]:
i