In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [2]:
# Loading data
nhl_df = pd.read_csv('nhl_analysis_79-11.csv')
nhl_df.head()

Unnamed: 0,year,lgID,tmID,playoff,made_playoff,made_CF,made_F,G,W,L,...,GA,PIM,PIM/gm,PPG,PPC,SHA,PKG,PKC,GF/gm,GA/gm
0,1979,NHL,ATF,PRE,1,0,0,80,35,32,...,269,1048,13.1,51,216,7,52,272,3.52,3.36
1,1979,NHL,BOS,QF,1,0,0,80,46,21,...,234,1460,18.3,60,246,4,53,312,3.88,2.92
2,1979,NHL,BUF,SF,1,1,0,80,47,17,...,201,967,12.1,67,275,4,43,252,3.98,2.51
3,1979,NHL,CHI,QF,1,0,0,80,34,27,...,250,1325,16.6,64,269,9,56,293,3.01,3.12
4,1979,NHL,COR,,0,0,0,80,19,48,...,308,1020,12.8,53,304,3,52,233,2.92,3.85


In [3]:
selected_columns = ["made_CF", "Pts/gm","PIM/gm", "GF/gm", "GA/gm"]
nhl_playoff_predictor_df = nhl_df[selected_columns]
nhl_playoff_predictor_df.head()

Unnamed: 0,made_CF,Pts/gm,PIM/gm,GF/gm,GA/gm
0,0,1.04,13.1,3.52,3.36
1,0,1.31,18.3,3.88,2.92
2,1,1.38,12.1,3.98,2.51
3,0,1.09,16.6,3.01,3.12
4,0,0.64,12.8,2.92,3.85


In [6]:
# Define target and feature variables
y = nhl_playoff_predictor_df["made_CF"]
X = nhl_playoff_predictor_df.copy()
X.drop("made_CF", axis=1, inplace=True)

In [7]:
# Splitting into Train and Test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [8]:
# Instantiate the Logistic Regression model
# Assign a random_state parameter of 1 to the model
classifier = LogisticRegression(solver='lbfgs', max_iter=200, random_state=1)
classifier

# Fit the model using training data
classifier.fit(X_train, y_train)

In [9]:
# Scoring the model using the test data
print(f"Training Data Score: {classifier.score(X_train, y_train)}")
print(f"Testing Data Score: {classifier.score(X_test, y_test)}")

Training Data Score: 0.8610169491525423
Testing Data Score: 0.7969543147208121


In [10]:
# Make a prediction using the testing data
predicitons = classifier.predict(X_test)
results = pd.DataFrame({"Prediction": predicitons, "Actual": y_test}).reset_index(drop=True)
results.head(10)

Unnamed: 0,Prediction,Actual
0,0,0
1,0,0
2,0,0
3,0,0
4,0,0
5,0,1
6,0,0
7,0,0
8,0,0
9,0,0


In [11]:
# Generate a confusion matrix for the model
confusion_matrix(y_test, predicitons)

array([[154,   4],
       [ 36,   3]])

In [13]:
# Print the classification report for the model
target_names = ["0 (Did not make conference finals)", "1 (Made conference finals)"]
print(classification_report(y_test, predicitons, target_names=target_names))

                                    precision    recall  f1-score   support

0 (Did not make conference finals)       0.81      0.97      0.89       158
        1 (Made conference finals)       0.43      0.08      0.13        39

                          accuracy                           0.80       197
                         macro avg       0.62      0.53      0.51       197
                      weighted avg       0.73      0.80      0.74       197

