 # NFL Logistic Regression Model

In [1]:
from path import Path
import pandas as pd

In [2]:
data = Path('../Resources/df2021.csv')
df = pd.read_csv(data)
df.head()

Unnamed: 0,Age,Year,BaseSalary,SigningBonus,Cap Hit,Salary%,Wins,Losses,Ties,Win %,...,Division_AFC South,Division_AFC West,Division_NFC East,Division_NFC North,Division_NFC South,Division_NFC West,Playoff_No,Playoff_Yes,Won Superbowl_No,Won Superbowl_Yes
0,33,2021,19000000,13000000,32000000,0.174,7,10,0,0.411765,...,0,0,0,0,0,1,1,0,1,0
1,33,2021,21000000,10000000,31000000,0.166,8,9,0,0.470588,...,0,0,0,1,0,0,1,0,1,0
2,38,2021,1100000,16321568,27073568,0.145,13,4,0,0.764706,...,0,0,0,1,0,0,0,1,1,0
3,36,2021,2000000,9300000,26912500,0.146,7,10,0,0.411765,...,0,0,0,0,1,0,1,0,1,0
4,30,2021,24100000,1400000,26400000,0.135,10,7,0,0.588235,...,0,0,0,0,0,1,0,1,1,0


In [4]:
# Playoff dataframe for 2021
playoffs_twentyone_df = df[["Year", "Cap Hit", "Salary%", "Playoff_Yes"]]
playoffs_twentyone_df.head(10)

Unnamed: 0,Year,Cap Hit,Salary%,Playoff_Yes
0,2021,32000000,0.174,0
1,2021,31000000,0.166,0
2,2021,27073568,0.145,1
3,2021,26912500,0.146,0
4,2021,26400000,0.135,1
5,2021,25910000,0.138,1
6,2021,25800000,0.137,1
7,2021,25000000,0.119,1
8,2021,22125000,0.116,1
9,2021,22000000,0.105,1


 ## Separate the Features (X) from the Target (y)

In [5]:
y = playoffs_twentyone_df["Playoff_Yes"]
X = playoffs_twentyone_df.drop(columns="Playoff_Yes")

 ## Split our data into training and testing

In [6]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, 
                                                    y, 
                                                    random_state=1, 
                                                    stratify=y)
X_train.shape

(1614, 3)

 ## Create a Logistic Regression Model

In [7]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(solver='lbfgs',
                                max_iter=200,
                                random_state=1)

 ## Fit (train) or model using the training data

In [8]:
classifier.fit(X_train, y_train)

LogisticRegression(max_iter=200, random_state=1)

 ## Make predictions for 2021

In [11]:
y_pred = classifier.predict(X_test)
results = pd.DataFrame({"Prediction": y_pred, "Actual": y_test}).reset_index(drop=True)
results.head(20)

Unnamed: 0,Prediction,Actual
0,0,1
1,0,0
2,0,0
3,0,1
4,0,1
5,0,0
6,0,0
7,0,0
8,0,1
9,0,1


In [12]:
from sklearn.metrics import accuracy_score
print(accuracy_score(y_test, y_pred))

0.6096654275092936


# Make predictions for 2011

In [16]:
data = Path('../Resources/df2011.csv')
df = pd.read_csv(data)
df.head()

Unnamed: 0,Age,Year,BaseSalary,SigningBonus,Cap Hit,Salary%,Wins,Losses,Ties,Win %,...,Division_AFC South,Division_AFC West,Division_NFC East,Division_NFC North,Division_NFC South,Division_NFC West,Playoff_No,Playoff_Yes,Won Superbowl_No,Won Superbowl_Yes
0,25,2011,14225000,0,17228125,0.134,8,8,0,0.5,...,0,0,0,0,0,0,1,0,1,0
1,31,2011,11420000,2500000,16420000,0.134,2,14,0,0.125,...,1,0,0,0,0,0,1,0,1,0
2,28,2011,2000000,5000000,16250000,0.133,8,8,0,0.5,...,0,0,0,0,0,1,1,0,1,0
3,26,2011,13800000,0,16200000,0.135,10,6,0,0.625,...,1,0,0,0,0,0,0,1,1,0
4,35,2011,3400000,4000000,16000000,0.13,2,14,0,0.125,...,1,0,0,0,0,0,1,0,1,0


In [41]:
# Playoff dataframe for 2011
playoffs_eleven_df = df[["Year", "Cap Hit", "Salary%", "Playoff_Yes"]]
playoffs_eleven_df.head(10)

Unnamed: 0,Year,Cap Hit,Salary%,Playoff_Yes
0,2011,17228125,0.134,0
1,2011,16420000,0.134,0
2,2011,16250000,0.133,0
3,2011,16200000,0.135,1
4,2011,16000000,0.13,0
5,2011,15960000,0.123,0
6,2011,15623000,0.107,1
7,2011,14400000,0.11,0
8,2011,14175000,0.117,1
9,2011,14100000,0.116,1


In [42]:
y = playoffs_eleven_df["Playoff_Yes"]
X = playoffs_eleven_df.drop(columns="Playoff_Yes")

In [43]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, 
                                                    y, 
                                                    random_state=1, 
                                                    stratify=y)
X_train.shape

(1215, 3)

In [44]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(solver='lbfgs',
                                max_iter=200,
                                random_state=1)

In [45]:
classifier.fit(X_train, y_train)

LogisticRegression(max_iter=200, random_state=1)

In [46]:
y_pred = classifier.predict(X_test)
results = pd.DataFrame({"Prediction": y_pred, "Actual": y_test}).reset_index(drop=True)
results.head(20)

Unnamed: 0,Prediction,Actual
0,0,1
1,0,0
2,0,1
3,0,0
4,0,0
5,0,1
6,0,1
7,0,0
8,0,0
9,0,0


In [47]:
from sklearn.metrics import accuracy_score
print(accuracy_score(y_test, y_pred))

0.6197530864197531
