In [249]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
#from tensorflow.keras.models import Sequential
#from tensorflow.keras.layers import Dense
#from tensorflow.keras.optimizers import Adam

In [251]:
# Import data
df_previous = pd.read_csv('Resources/PlayerSeasonTotals_2122_2324.csv')
df_current = pd.read_csv('Resources/PlayerSeasonTotals_2425.csv')

In [253]:
# Create Points Per Game Metric
df_previous['PPG'] = df_previous['Total Points'] / df_previous['GP']

In [255]:
# Preview data for data from 2021/2022 through 2023/2024 season
df_previous[['Player', 'GP', 'Total Points', 'PPG']]

Unnamed: 0,Player,GP,Total Points,PPG
0,Connor McDavid,238,408,1.714286
1,Leon Draisaitl,241,344,1.427386
2,Nathan MacKinnon,218,339,1.555046
3,Nikita Kucherov,210,326,1.552381
4,Artemi Panarin,239,308,1.288703
...,...,...,...,...
1250,Gavin Brindley,1,0,0.000000
1251,Bradly Nadeau,1,0,0.000000
1252,Ondrej Pavel,2,0,0.000000
1253,Nikolas Matinpalo,4,0,0.000000


In [257]:
# Create Points Per Game Metric
df_current['PPG'] = df_current['Total Points'] / df_current['GP']

In [259]:
# Preview data for data for the 2024/2025 season (current season)
#df_current
df_current[['Player', 'GP', 'Total Points', 'PPG']]

Unnamed: 0,Player,GP,Total Points,PPG
0,Nathan MacKinnon,41,66,1.609756
1,Leon Draisaitl,39,59,1.512821
2,Mikko Rantanen,41,58,1.414634
3,Mitch Marner,41,56,1.365854
4,Nikita Kucherov,35,55,1.571429
...,...,...,...,...
802,Jett Luchanko,4,0,0.000000
803,Justin Hryckowian,2,0,0.000000
804,Juha Jaaska,2,0,0.000000
805,Jere Innala,7,0,0.000000


In [261]:
# Prepare features and target variable
features = ['TOI', 'Shots', 'ixG', 'iCF', 'iFF', 'iSCF', 'iHDCF', 'Rush Attempts', 'Rebounds Created', 'PIM', 
            'Total Penalties', 'Penalties Drawn', 'Giveaways', 'Takeaways', 'Hits', 'Hits Taken', 'Shots Blocked', 'Faceoffs Won', 'Faceoffs Lost']
target = 'PPG'

X = df_previous[features]
y = df_previous[target]

In [263]:
# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [265]:
# Apply StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## Fit Random Forest Model

In [268]:
# Train Random Forest model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train_scaled, y_train)

In [270]:
# Make predictions on test set
y_pred = rf_model.predict(X_test_scaled)

## Evaluate Model

In [277]:
#cm = confusion_matrix(y_test, y_pred)
#cm_df = pd.DataFrame(
 #   cm, index=["Actual 0", "Actual 1"], columns=["Predicted 0", "Predicted 1"]
#)

# Calculating the accuracy score
acc_score = accuracy_score(y_test, y_pred)

ValueError: continuous is not supported

In [206]:
# Evaluate model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Mean Squared Error: {mse}")
print(f"R-squared Score: {r2}")

Mean Squared Error: 0.01687300401566589
R-squared Score: 0.7432691289830688


## Apply Model to Current Season

In [208]:
# Prepare current season data
X_current = df_current[features]
X_current_scaled = scaler.transform(X_current)

In [210]:
# Predict points for current season
current_predictions = rf_model.predict(X_current_scaled)

In [212]:
# Add predictions to current season dataframe
df_current['RF_Predicted_PPG'] = current_predictions

# Calculate projected points based on remaining games
games_remaining = 82 - df_current['GP']
current_points = df_current['Total Points']
projected_remaining_points = df_current['RF_Predicted_PPG'] * games_remaining

# Total projected points = current points + projected remaining points
df_current['RF_Projected_Season_Points'] = (current_points + projected_remaining_points).round().astype(int)
#df_current['RF_Predicted_Points'] = (current_predictions *82).round().astype(int)

In [214]:
# Display results
#print(df_current[['Team', 'GP', 'Points', 'RF_Predicted_PPG', 'RF_Predicted_Points']])
#print(df_current[['Player', 'Team', 'GP', 'Goals', 'Total Assists', 'Total Points', 'RF_Predicted_PPG', 'RF_Predicted_Points']].sort_values(by='RF_Predicted_Points', ascending=False))

# Display results sorted by projected season points
print(df_current[['Player', 'Team', 'GP', 'Total Points', 'RF_Predicted_PPG', 'RF_Projected_Season_Points']]
      .sort_values(by='RF_Projected_Season_Points', ascending=False)
      .head(20))

              Player Team  GP  Total Points  RF_Predicted_PPG  \
0   Nathan MacKinnon  COL  41            66          0.568282   
1     Leon Draisaitl  EDM  39            59          0.541580   
2     Mikko Rantanen  COL  41            58          0.590120   
5     Connor McDavid  EDM  36            54          0.581533   
4    Nikita Kucherov  T.B  35            55          0.516830   
8    Kirill Kaprizov  MIN  34            50          0.597887   
3       Mitch Marner  TOR  41            56          0.502159   
6        Kyle Connor  WPG  41            52          0.584088   
7        Jack Eichel  VGK  39            52          0.515328   
11       Jack Hughes  N.J  43            48          0.587003   
19  William Nylander  TOR  41            43          0.605458   
13    Mark Scheifele  WPG  41            46          0.538213   
14      Sam Reinhart  FLA  41            46          0.539091   
20     Sebastian Aho  CAR  40            43          0.564003   
23     Sidney Crosby  PIT

In [None]:
# Train Neural Network model
nn_model = Sequential([
    Dense(64, activation='relu', input_shape=(len(features),)),
    Dense(32, activation='relu'),
    Dense(16, activation='relu'),
    Dense(1)
])
nn_model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')
nn_model.fit(X_train_scaled, y_train, epochs=100, batch_size=32, verbose=0)

In [None]:
# Predict points for current season using Neural Network
nn_predictions = nn_model.predict(X_current_scaled).flatten()

In [None]:
# Add Neural Network predictions to current season dataframe
#df_current['RF_Predicted_Points'] = rf_adjusted_predictions.round().astype(int)
df_current['NN_Predicted_Points'] = nn_predictions.round().astype(int)

In [None]:
# Display results
print(df_current[['Team', 'GP', 'Points', 'RF_Predicted_Points', 'NN_Predicted_Points']])