In [30]:
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import numpy as np
from sklearn.model_selection import cross_validate, train_test_split, RandomizedSearchCV
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler 
from scipy.stats import randint, loguniform

from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.feature_selection import RFE, RFECV


In [31]:
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)
np.set_printoptions(suppress=True, precision=4)

In [32]:
df = pd.read_csv("raw_starting_data.csv")
df

Unnamed: 0,team,season,corsiPercentage,fenwickPercentage,xGoalsPercentage,xOnGoalFor_PerGame,xGoalsFor_PerGame,xReboundsFor_PerGame,xFreezeFor_PerGame,xPlayStoppedFor_PerGame,xPlayContinuedInZoneFor_PerGame,xPlayContinuedOutsideZoneFor_PerGame,flurryAdjustedxGoalsFor_PerGame,scoreVenueAdjustedxGoalsFor_PerGame,flurryScoreVenueAdjustedxGoalsFor_PerGame,shotsOnGoalFor_PerGame,missedShotsFor_PerGame,blockedShotAttemptsFor_PerGame,shotAttemptsFor_PerGame,goalsFor_PerGame,reboundsFor_PerGame,reboundGoalsFor_PerGame,freezeFor_PerGame,playStoppedFor_PerGame,playContinuedInZoneFor_PerGame,playContinuedOutsideZoneFor_PerGame,savedShotsOnGoalFor_PerGame,savedUnblockedShotAttemptsFor_PerGame,penalityMinutesFor_PerGame,faceOffsWonFor_PerGame,hitsFor_PerGame,takeawaysFor_PerGame,giveawaysFor_PerGame,lowDangerShotsFor_PerGame,mediumDangerShotsFor_PerGame,highDangerShotsFor_PerGame,lowDangerxGoalsFor_PerGame,mediumDangerxGoalsFor_PerGame,highDangerxGoalsFor_PerGame,lowDangerGoalsFor_PerGame,mediumDangerGoalsFor_PerGame,highDangerGoalsFor_PerGame,scoreAdjustedShotsAttemptsFor_PerGame,unblockedShotAttemptsFor_PerGame,scoreAdjustedUnblockedShotAttemptsFor_PerGame,dZoneGiveawaysFor_PerGame,xGoalsFromxReboundsOfShotsFor_PerGame,xGoalsFromActualReboundsOfShotsFor_PerGame,reboundxGoalsFor_PerGame,totalShotCreditFor_PerGame,scoreAdjustedTotalShotCreditFor_PerGame,scoreFlurryAdjustedTotalShotCreditFor_PerGame,xOnGoalAgainst_PerGame,xGoalsAgainst_PerGame,xReboundsAgainst_PerGame,xFreezeAgainst_PerGame,xPlayStoppedAgainst_PerGame,xPlayContinuedInZoneAgainst_PerGame,xPlayContinuedOutsideZoneAgainst_PerGame,flurryAdjustedxGoalsAgainst_PerGame,scoreVenueAdjustedxGoalsAgainst_PerGame,flurryScoreVenueAdjustedxGoalsAgainst_PerGame,shotsOnGoalAgainst_PerGame,missedShotsAgainst_PerGame,blockedShotAttemptsAgainst_PerGame,shotAttemptsAgainst_PerGame,goalsAgainst_PerGame,reboundsAgainst_PerGame,reboundGoalsAgainst_PerGame,freezeAgainst_PerGame,playStoppedAgainst_PerGame,playContinuedInZoneAgainst_PerGame,playContinuedOutsideZoneAgainst_PerGame,savedShotsOnGoalAgainst_PerGame,savedUnblockedShotAttemptsAgainst_PerGame,penalityMinutesAgainst_PerGame,faceOffsWonAgainst_PerGame,hitsAgainst_PerGame,takeawaysAgainst_PerGame,giveawaysAgainst_PerGame,lowDangerShotsAgainst_PerGame,mediumDangerShotsAgainst_PerGame,highDangerShotsAgainst_PerGame,lowDangerxGoalsAgainst_PerGame,mediumDangerxGoalsAgainst_PerGame,highDangerxGoalsAgainst_PerGame,lowDangerGoalsAgainst_PerGame,mediumDangerGoalsAgainst_PerGame,highDangerGoalsAgainst_PerGame,scoreAdjustedShotsAttemptsAgainst_PerGame,unblockedShotAttemptsAgainst_PerGame,scoreAdjustedUnblockedShotAttemptsAgainst_PerGame,dZoneGiveawaysAgainst_PerGame,xGoalsFromxReboundsOfShotsAgainst_PerGame,xGoalsFromActualReboundsOfShotsAgainst_PerGame,reboundxGoalsAgainst_PerGame,totalShotCreditAgainst_PerGame,scoreAdjustedTotalShotCreditAgainst_PerGame,scoreFlurryAdjustedTotalShotCreditAgainst_PerGame,playoff
0,MIN,2008,0.48,0.48,0.48,26.951341,2.292927,1.809512,6.051585,0.879268,15.443049,11.653902,2.225122,2.298780,2.230976,27.536585,11.060976,11.975610,50.573171,2.609756,1.597561,0.304878,5.439024,0.512195,12.146341,16.292683,24.926829,35.987805,10.329268,27.109756,17.902439,5.365854,7.682927,30.353659,6.451220,1.792683,0.887561,0.757927,0.647439,0.902439,0.926829,0.780488,50.710732,38.597561,38.692317,3.743902,0.405244,0.291585,0.293902,2.373049,2.378537,2.328049,29.565854,2.467683,1.993902,6.798049,0.955854,16.666707,12.588902,2.387073,2.463171,2.382683,30.707317,11.573171,12.597561,54.878049,2.402439,1.780488,0.353659,7.548780,0.731707,13.560976,16.256098,28.304878,39.878049,11.207317,28.792683,17.597561,5.402439,7.097561,33.219512,7.158537,1.902439,0.992927,0.835854,0.638902,0.853659,0.853659,0.695122,54.759878,42.280488,42.191707,3.134146,0.442683,0.341341,0.360732,2.495610,2.492439,2.442073,0
1,BOS,2008,0.50,0.50,0.51,29.458889,2.760370,1.976667,6.665309,0.952222,16.346296,12.585556,2.665926,2.789012,2.693704,30.370370,11.123457,12.938272,54.432099,3.296296,1.666667,0.370370,6.518519,0.950617,13.024691,16.037037,27.074074,38.197531,11.370370,29.135802,22.123457,6.111111,5.901235,30.901235,8.000000,2.592593,0.864815,0.959630,0.935802,1.086420,1.185185,1.024691,55.458889,41.493827,42.092469,3.259259,0.438765,0.321111,0.333951,2.850864,2.876543,2.808519,29.280000,2.667037,1.929383,6.617037,0.953210,15.995185,12.556420,2.574444,2.644938,2.552716,30.802469,10.123457,12.962963,53.888889,2.320988,1.925926,0.456790,7.432099,0.740741,11.839506,16.666667,28.481481,38.604938,11.888889,27.975309,23.148148,6.320988,7.691358,30.913580,7.530864,2.481481,0.878395,0.898642,0.890000,0.691358,0.839506,0.790123,53.007284,40.925926,40.420988,3.617284,0.429630,0.439877,0.455679,2.628025,2.603951,2.547407,1
2,ARI,2008,0.46,0.47,0.47,27.009383,2.414321,1.849383,6.110370,0.876790,15.300123,11.586049,2.328148,2.415185,2.328889,28.012346,10.851852,12.456790,51.320988,2.469136,1.716049,0.382716,6.530864,0.740741,11.740741,15.666667,25.543210,36.395062,11.901235,25.506173,22.135802,4.703704,4.962963,30.222222,6.481481,2.160494,0.872346,0.771481,0.770494,0.790123,0.864198,0.814815,51.097654,38.864198,38.810494,2.246914,0.414444,0.335802,0.338765,2.442469,2.442346,2.379877,30.842346,2.762963,2.069630,6.950000,1.006914,17.462346,13.736173,2.676667,2.775802,2.689753,31.543210,13.160494,14.654321,59.358025,3.061728,2.000000,0.592593,6.135802,1.012346,14.469136,18.024691,28.481481,41.641975,13.358025,31.246914,21.864198,5.493827,6.148148,35.012346,7.444444,2.246914,1.011111,0.895185,0.856790,0.876543,1.148148,1.037037,59.741111,44.703704,45.002716,2.814815,0.455926,0.412716,0.420000,2.751235,2.762963,2.708395,0
3,L.A,2008,0.51,0.51,0.51,29.853659,2.747439,2.122439,6.678293,0.975610,17.133659,12.453171,2.635976,2.745366,2.633780,29.060976,13.585366,13.329268,55.975610,2.463415,2.292683,0.560976,5.926829,0.719512,16.621951,14.634146,26.597561,40.182927,13.548780,28.902439,25.951220,5.658537,11.317073,32.365854,7.963415,2.317073,0.964146,0.976707,0.806707,0.719512,0.792683,0.951220,55.825610,42.646341,42.585488,5.792683,0.479634,0.460244,0.468049,2.724146,2.720610,2.639878,28.422073,2.692317,1.969878,6.323171,0.910366,16.105000,11.953659,2.593537,2.695244,2.596951,28.073171,12.621951,12.682927,53.378049,2.756098,1.817073,0.365854,6.060976,0.756098,14.536585,14.780488,25.317073,37.939024,13.195122,26.695122,24.500000,5.780488,11.280488,31.109756,7.134146,2.451220,0.936951,0.864268,0.891220,0.865854,0.975610,0.914634,53.454024,40.695122,40.767317,5.743902,0.439878,0.356463,0.372073,2.711463,2.716341,2.648415,0
4,COL,2008,0.47,0.48,0.49,26.916951,2.483049,1.906220,6.045244,0.882195,14.914756,11.198171,2.389878,2.471707,2.378659,28.768293,10.097561,12.158537,51.024390,2.317073,2.195122,0.463415,6.707317,0.743902,11.256098,15.646341,26.451220,36.548780,11.634146,26.817073,17.829268,8.060976,8.512195,29.646341,7.121951,2.097561,0.936951,0.870976,0.675122,0.780488,0.829268,0.707317,50.368415,38.865854,38.447805,3.768293,0.423780,0.444390,0.444756,2.366585,2.351220,2.307073,28.449756,2.627805,1.920610,6.450244,0.941829,15.753659,12.303780,2.549512,2.653780,2.574512,28.975610,12.390244,16.060976,57.426829,3.085366,2.402439,0.597561,6.280488,0.963415,12.658537,15.975610,25.890244,38.280488,12.048780,29.280488,16.817073,8.158537,8.195122,32.390244,6.695122,2.280488,0.998293,0.812073,0.817439,1.073171,0.987805,1.024390,58.150122,41.365854,41.898049,3.548780,0.424756,0.450244,0.462561,2.500122,2.524268,2.476829,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
549,CBJ,2025,0.50,0.49,0.48,32.952683,3.139512,2.216098,7.940488,1.106829,17.289268,12.892927,2.952927,3.150244,2.964634,30.170732,14.414634,17.609756,62.195122,3.000000,3.512195,0.463415,6.829268,0.097561,16.073171,15.073171,27.170732,41.585366,7.829268,29.804878,19.048780,5.048780,16.439024,32.878049,8.341463,3.365854,0.988780,1.026341,1.124878,1.048780,1.170732,0.780488,62.807317,44.585366,44.996585,7.926829,0.496098,0.710000,0.710000,2.925610,2.943415,2.834878,34.327317,3.352683,2.330000,8.235610,1.146098,18.534390,13.328293,3.129756,3.345366,3.121951,31.000000,15.926829,16.000000,62.926829,3.317073,3.658537,0.292683,6.707317,0.170732,16.658537,16.414634,27.682927,43.609756,7.024390,28.951220,18.146341,4.731707,15.756098,35.341463,7.878049,3.707317,1.108293,0.966585,1.278049,1.585366,0.829268,0.902439,62.596098,46.926829,46.766098,7.097561,0.517317,0.832439,0.832439,3.037561,3.034390,2.949512,0
550,COL,2025,0.56,0.56,0.59,37.434146,3.819512,2.564634,9.163415,1.249512,20.076585,14.419024,3.621220,3.888049,3.687561,34.536585,16.756098,19.243902,70.536585,4.000000,4.024390,0.634146,7.170732,0.048780,19.219512,16.829268,30.536585,47.292683,6.756098,31.170732,17.390244,4.658537,15.634146,38.073171,9.073171,4.146341,1.166829,1.142439,1.510000,1.682927,1.048780,1.268293,72.659756,51.292683,52.509024,7.048780,0.570488,0.841951,0.841951,3.548293,3.610488,3.487561,29.997073,2.650000,2.093659,7.311707,1.005122,16.196098,11.499756,2.490244,2.600488,2.443415,26.731707,14.024390,15.121951,55.878049,2.170732,3.560976,0.317073,5.829268,0.048780,14.731707,14.414634,24.560976,38.585366,7.829268,29.902439,20.000000,4.317073,16.609756,30.926829,6.853659,2.975610,0.856098,0.876829,0.916829,0.878049,0.682927,0.609756,54.356829,40.756098,39.870244,8.292683,0.466341,0.712683,0.714634,2.401463,2.360000,2.291220,1
551,MTL,2025,0.48,0.49,0.49,29.636905,3.093095,1.983571,6.885714,1.009286,15.814048,11.666667,2.928571,3.107857,2.942619,25.738095,14.714286,14.904762,55.357143,3.309524,2.880952,0.333333,5.380952,0.142857,14.238095,14.500000,22.428571,37.142857,9.285714,26.928571,21.380952,4.309524,13.619048,28.952381,8.500000,3.000000,0.921667,1.018810,1.152619,1.119048,1.357143,0.833333,55.474762,40.452381,40.569762,6.833333,0.445476,0.612619,0.621429,2.916905,2.930714,2.837143,30.615000,3.160238,2.111905,7.302619,1.035714,16.274286,11.448810,2.981905,3.174286,2.995000,26.976190,14.357143,17.500000,58.833333,3.261905,3.666667,0.333333,5.214286,0.023810,14.952381,14.214286,23.714286,38.071429,8.666667,27.190476,21.952381,4.500000,15.261905,28.761905,9.142857,3.428571,0.848810,1.110476,1.201190,1.071429,1.380952,0.809524,59.085000,41.333333,41.483810,6.809524,0.475952,0.709286,0.710476,2.925714,2.937857,2.820714,1
552,NSH,2025,0.52,0.50,0.49,31.054146,3.026829,2.192683,7.392927,1.035854,16.776829,11.965122,2.879024,3.009268,2.862439,28.073171,14.317073,16.926829,59.317073,2.829268,3.731707,0.439024,6.000000,0.073171,15.243902,14.512195,25.243902,39.560976,8.536585,30.804878,19.512195,5.170732,15.317073,31.975610,6.902439,3.512195,0.928049,0.875122,1.223902,1.024390,0.829268,0.975610,58.498293,42.390244,41.941951,7.292683,0.491951,0.772927,0.772927,2.745854,2.733171,2.651463,30.845122,3.183659,2.184146,7.253659,1.034878,16.402439,11.819024,3.046341,3.227561,3.087805,28.097561,13.780488,13.268293,55.146341,3.317073,2.902439,0.536585,6.024390,0.024390,14.439024,15.170732,24.780488,38.560976,8.195122,28.268293,20.121951,4.317073,15.682927,30.219512,8.243902,3.414634,0.889756,1.020976,1.272927,1.146341,1.243902,0.926829,56.092927,41.878049,42.502195,7.414634,0.490244,0.656829,0.656829,3.017073,3.051463,2.984634,0


In [33]:
df_train = df[df["season"] != 2025]
df_test = df[df["season"] == 2025]
print(df_train.shape, df_test.shape)

(522, 100) (32, 100)


In [34]:
team_key = df_test.loc[:, "team"]
df_train = df_train.drop(columns=["team", "season"])
df_test = df_test.drop(columns=["team", "season"])

In [35]:
print(df_train.shape, df_test.shape)
team_key

(522, 98) (32, 98)


522    NYI
523    NYR
524    PIT
525    LAK
526    UTA
527    TOR
528    MIN
529    VAN
530    PHI
531    CAR
532    WSH
533    NJD
534    CGY
535    SEA
536    WPG
537    STL
538    OTT
539    ANA
540    DET
541    SJS
542    DAL
543    BUF
544    VGK
545    EDM
546    BOS
547    CHI
548    FLA
549    CBJ
550    COL
551    MTL
552    NSH
553    TBL
Name: team, dtype: object

In [36]:
X_train = df_train.drop(columns=["playoff"])
y_train = df_train.loc[:, "playoff"]
X_test = df_test.drop(columns=["playoff"])
y_test = df_test.loc[:, "playoff"]

In [37]:
pipe = make_pipeline(StandardScaler(), LogisticRegression(max_iter=1000))
result = pd.DataFrame(cross_validate(pipe, X_train, y_train, cv=10, return_train_score=True))
result.sort_values("test_score", ascending=False)

Unnamed: 0,fit_time,score_time,test_score,train_score
4,0.062586,0.011476,0.961538,0.921277
2,0.03439,0.002824,0.923077,0.929787
5,0.035518,0.001926,0.903846,0.929787
9,0.016704,0.003183,0.903846,0.93617
3,0.037072,0.00414,0.884615,0.92766
8,0.039254,0.004081,0.884615,0.93617
7,0.032731,0.003818,0.865385,0.931915
6,0.032921,0.003332,0.865385,0.934043
1,0.028775,0.004013,0.830189,0.929638
0,0.018182,0.003845,0.811321,0.948827


In [38]:
result["test_score"].mean().item()

0.8833817126269956

In [39]:
pipe.fit(X_train, y_train)
pipe.score(X_test, y_test)

0.78125

In [40]:
lr=pipe.steps[-1][1]

In [41]:
coef_df = pd.DataFrame(data=np.abs(lr.coef_.T), index=X_train.columns, columns=["Coefficients"]).sort_values(by="Coefficients", ascending=False)
coef_df

Unnamed: 0,Coefficients
goalsAgainst_PerGame,1.229585
giveawaysAgainst_PerGame,1.129046
highDangerGoalsAgainst_PerGame,1.078679
goalsFor_PerGame,0.965700
lowDangerGoalsAgainst_PerGame,0.949597
...,...
xFreezeFor_PerGame,0.038322
totalShotCreditFor_PerGame,0.036966
hitsFor_PerGame,0.016351
unblockedShotAttemptsAgainst_PerGame,0.007114


In [42]:
# features = []
# train_score = []
# test_score = []

# for f in range(97, 9, -1):
#     print(f)
#     rfe = RFE(lr, n_features_to_select=f)
#     rfe.fit(X_train, y_train)
    
#     features.append(f)
#     train_score.append(rfe.score(X_train, y_train))
#     test_score.append(rfe.score(X_test, y_test))
    


In [43]:
# results_df = pd.DataFrame({
#     "features": features,
#     "train_score": train_score,
#     "test_score": test_score
# })
# results_df

In [44]:
# fig, ax = plt.subplots(figsize=(15, 5))
# ax.plot(results_df.index, results_df['train_score'], marker='o', linewidth=2, markersize=8, label='Train score')
# ax.plot(results_df.index, results_df['test_score'], marker='o', linewidth=2, markersize=8, label='Test score')
# ax.set_xlabel('Model')
# ax.set_ylabel('Score')
# ax.set_title('Line Chart')
# ax.legend()
# ax.grid(alpha=0.3)

# plt.tight_layout()
# plt.show()

In [52]:
ss = StandardScaler()
X_train_scaled = ss.fit_transform(X_train)
X_test_scaled = ss.transform(X_test)

In [54]:
rfe_cv = RFECV(LogisticRegression(max_iter=1000))
rfe_cv.fit(X_train_scaled, y_train)
print(f"Number of selected features: {rfe_cv.n_features_.item(), X_train.shape[1]}")

Number of selected features: (3, 97)


In [None]:
X_train_selected = rfe_cv.transform(X_train_scaled)
X_test_selected = rfe_cv.transform(X_test_scaled)

In [58]:
final_model = LogisticRegression(max_iter=1000)
final_model.fit(X_train_selected, y_train)
final_model.score(X_test_selected, y_test)

0.8125