# NHL Player Analysis

## Notebook Setup

To import the required python libraries and setting notebook display parameters.

In [1]:
# Import libraries
import pandas as pd
import numpy as np
import os.path as path
from sklearn import preprocessing
import warnings
import math
import glob
import os
warnings.simplefilter(action = 'ignore', category = FutureWarning)
warnings.simplefilter(action = 'ignore', category = UserWarning)

In [2]:
# Settings to allow all columns in dataframe to display
desired_width = 320
display_columns = 222
pd.set_option('display.width', desired_width)
np.set_printoptions(linewidth = desired_width)
pd.set_option('display.max_columns', display_columns)

## Loading Data

Loading NHL player data for multiple years into one master dataset.<br>
Source: https://www.moneypuck.com

In [3]:
def load_csv(folder_path):
    """Load multiple .csv files from a specific folder path"""
    
    # Return a list of paths matching a pathname pattern.
    all_files = glob.glob(folder_path + "/*.csv")

    # Initiate an empty list to store the data
    li = []

    # Loop to append each files data to the list
    for filename in all_files:
        df = pd.read_csv(filename, index_col=None, header=0)
        li.append(df)

    # Join all the files into one DataFrame
    df = pd.concat(li, axis=0, ignore_index=True)

    df = df.sort_values(['name','season'], ascending=False)
    print('The shape of the data is:', df.shape)
    
    return df

In [4]:
# Local path to folder containing files for each year
path = '/Users/nathananderson/Documents/Data_Science/NHL_Player_Analysis/NHL_Player_Analysis/Datasets/nhl_player_data/'

df = load_csv(path)
df.head()

The shape of the data is: (47970, 154)


Unnamed: 0,playerId,season,name,team,position,situation,games_played,icetime,shifts,gameScore,onIce_xGoalsPercentage,offIce_xGoalsPercentage,onIce_corsiPercentage,offIce_corsiPercentage,onIce_fenwickPercentage,offIce_fenwickPercentage,iceTimeRank,I_F_xOnGoal,I_F_xGoals,I_F_xRebounds,I_F_xFreeze,I_F_xPlayStopped,I_F_xPlayContinuedInZone,I_F_xPlayContinuedOutsideZone,I_F_flurryAdjustedxGoals,I_F_scoreVenueAdjustedxGoals,I_F_flurryScoreVenueAdjustedxGoals,I_F_primaryAssists,I_F_secondaryAssists,I_F_shotsOnGoal,I_F_missedShots,I_F_blockedShotAttempts,I_F_shotAttempts,I_F_points,I_F_goals,I_F_rebounds,I_F_reboundGoals,I_F_freeze,I_F_playStopped,I_F_playContinuedInZone,I_F_playContinuedOutsideZone,I_F_savedShotsOnGoal,I_F_savedUnblockedShotAttempts,penalties,I_F_penalityMinutes,I_F_faceOffsWon,I_F_hits,I_F_takeaways,I_F_giveaways,I_F_lowDangerShots,I_F_mediumDangerShots,I_F_highDangerShots,I_F_lowDangerxGoals,I_F_mediumDangerxGoals,I_F_highDangerxGoals,I_F_lowDangerGoals,I_F_mediumDangerGoals,I_F_highDangerGoals,I_F_scoreAdjustedShotsAttempts,I_F_unblockedShotAttempts,I_F_scoreAdjustedUnblockedShotAttempts,I_F_dZoneGiveaways,I_F_xGoalsFromxReboundsOfShots,I_F_xGoalsFromActualReboundsOfShots,I_F_reboundxGoals,I_F_xGoals_with_earned_rebounds,I_F_xGoals_with_earned_rebounds_scoreAdjusted,I_F_xGoals_with_earned_rebounds_scoreFlurryAdjusted,I_F_shifts,I_F_oZoneShiftStarts,I_F_dZoneShiftStarts,I_F_neutralZoneShiftStarts,I_F_flyShiftStarts,I_F_oZoneShiftEnds,I_F_dZoneShiftEnds,I_F_neutralZoneShiftEnds,I_F_flyShiftEnds,faceoffsWon,faceoffsLost,timeOnBench,penalityMinutes,penalityMinutesDrawn,penaltiesDrawn,shotsBlockedByPlayer,OnIce_F_xOnGoal,OnIce_F_xGoals,OnIce_F_flurryAdjustedxGoals,OnIce_F_scoreVenueAdjustedxGoals,OnIce_F_flurryScoreVenueAdjustedxGoals,OnIce_F_shotsOnGoal,OnIce_F_missedShots,OnIce_F_blockedShotAttempts,OnIce_F_shotAttempts,OnIce_F_goals,OnIce_F_rebounds,OnIce_F_reboundGoals,OnIce_F_lowDangerShots,OnIce_F_mediumDangerShots,OnIce_F_highDangerShots,OnIce_F_lowDangerxGoals,OnIce_F_mediumDangerxGoals,OnIce_F_highDangerxGoals,OnIce_F_lowDangerGoals,OnIce_F_mediumDangerGoals,OnIce_F_highDangerGoals,OnIce_F_scoreAdjustedShotsAttempts,OnIce_F_unblockedShotAttempts,OnIce_F_scoreAdjustedUnblockedShotAttempts,OnIce_F_xGoalsFromxReboundsOfShots,OnIce_F_xGoalsFromActualReboundsOfShots,OnIce_F_reboundxGoals,OnIce_F_xGoals_with_earned_rebounds,OnIce_F_xGoals_with_earned_rebounds_scoreAdjusted,OnIce_F_xGoals_with_earned_rebounds_scoreFlurryAdjusted,OnIce_A_xOnGoal,OnIce_A_xGoals,OnIce_A_flurryAdjustedxGoals,OnIce_A_scoreVenueAdjustedxGoals,OnIce_A_flurryScoreVenueAdjustedxGoals,OnIce_A_shotsOnGoal,OnIce_A_missedShots,OnIce_A_blockedShotAttempts,OnIce_A_shotAttempts,OnIce_A_goals,OnIce_A_rebounds,OnIce_A_reboundGoals,OnIce_A_lowDangerShots,OnIce_A_mediumDangerShots,OnIce_A_highDangerShots,OnIce_A_lowDangerxGoals,OnIce_A_mediumDangerxGoals,OnIce_A_highDangerxGoals,OnIce_A_lowDangerGoals,OnIce_A_mediumDangerGoals,OnIce_A_highDangerGoals,OnIce_A_scoreAdjustedShotsAttempts,OnIce_A_unblockedShotAttempts,OnIce_A_scoreAdjustedUnblockedShotAttempts,OnIce_A_xGoalsFromxReboundsOfShots,OnIce_A_xGoalsFromActualReboundsOfShots,OnIce_A_reboundxGoals,OnIce_A_xGoals_with_earned_rebounds,OnIce_A_xGoals_with_earned_rebounds_scoreAdjusted,OnIce_A_xGoals_with_earned_rebounds_scoreFlurryAdjusted,OffIce_F_xGoals,OffIce_A_xGoals,OffIce_F_shotAttempts,OffIce_A_shotAttempts,xGoalsForAfterShifts,xGoalsAgainstAfterShifts,corsiForAfterShifts,corsiAgainstAfterShifts,fenwickForAfterShifts,fenwickAgainstAfterShifts
20255,8469820,2013,Zenon Konopka,BUF,C,other,59,307.0,9.0,-0.4,0.14,0.32,0.47,0.49,0.36,0.46,82.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,2.0,6.0,0.0,1.0,0.0,0.0,1.0,8.0,6.0,6.0,1790.0,0.0,0.0,0.0,0.0,2.99,0.15,0.13,0.15,0.13,1.0,3.0,3.0,7.0,0.0,1.0,0.0,4.0,0.0,0.0,0.15,0.0,0.0,0.0,0.0,0.0,7.0,4.0,4.0,0.13,0.04,0.04,0.24,0.24,0.21,4.92,0.93,0.89,0.93,0.89,4.0,3.0,1.0,8.0,1.0,0.0,0.0,5.0,1.0,1.0,0.12,0.17,0.64,0.0,0.0,1.0,8.0,7.0,7.0,0.13,0.0,0.0,1.06,1.06,1.02,2.06,4.34,38.0,40.0,0.0,0.0,0.0,0.0,0.0,0.0
20256,8469820,2013,Zenon Konopka,BUF,C,all,59,27970.0,849.0,-1.0,0.3,0.49,0.37,0.49,0.39,0.48,651.0,21.29,1.24,1.28,5.03,0.68,12.66,10.11,1.24,1.22,1.22,1.0,1.0,21.0,10.0,5.0,36.0,3.0,1.0,1.0,0.0,5.0,0.0,9.0,15.0,20.0,30.0,25.0,88.0,345.0,46.0,7.0,1.0,28.0,3.0,0.0,0.82,0.42,0.0,1.0,0.0,0.0,35.13,31.0,30.32,0.0,0.26,0.04,0.0,1.5,1.48,1.47,849.0,74.0,283.0,89.0,403.0,97.0,97.0,113.0,542.0,345.0,222.0,188685.0,88.0,64.0,14.0,26.0,142.75,8.05,7.85,7.94,7.75,132.0,70.0,66.0,268.0,8.0,6.0,1.0,175.0,23.0,4.0,4.15,2.57,1.33,5.0,2.0,1.0,262.61,202.0,199.4,1.78,0.96,1.04,8.79,8.72,8.6,225.67,18.89,18.12,18.86,18.09,230.0,88.0,131.0,449.0,25.0,11.0,3.0,250.0,54.0,14.0,6.49,6.68,5.72,9.0,9.0,7.0,449.66,318.0,318.71,3.5,3.0,3.16,19.23,19.22,18.67,132.11,138.62,2728.0,2882.0,0.0,0.0,0.0,0.0,0.0,0.0
20257,8469820,2013,Zenon Konopka,BUF,C,5on5,59,23863.0,686.0,-1.0,0.4,0.5,0.43,0.48,0.45,0.48,655.0,20.57,1.22,1.26,4.9,0.66,12.44,9.52,1.22,1.2,1.2,1.0,1.0,20.0,10.0,5.0,35.0,3.0,1.0,1.0,0.0,5.0,0.0,9.0,14.0,19.0,29.0,25.0,88.0,261.0,46.0,7.0,1.0,27.0,3.0,0.0,0.8,0.42,0.0,1.0,0.0,0.0,34.13,30.0,29.32,0.0,0.26,0.04,0.0,1.48,1.45,1.45,686.0,65.0,169.0,83.0,369.0,95.0,86.0,88.0,417.0,261.0,157.0,146998.0,88.0,64.0,14.0,21.0,134.93,7.32,7.14,7.21,7.04,124.0,67.0,62.0,253.0,8.0,5.0,1.0,165.0,23.0,3.0,3.92,2.57,0.83,5.0,2.0,1.0,247.61,191.0,188.4,1.61,0.92,1.0,7.93,7.86,7.77,166.66,11.08,10.74,11.04,10.7,173.0,62.0,100.0,335.0,17.0,5.0,2.0,195.0,35.0,5.0,5.23,4.38,1.47,8.0,7.0,2.0,335.66,235.0,235.71,2.16,1.15,1.19,12.05,12.03,11.78,84.31,85.36,2042.0,2232.0,0.59,0.78,12.0,25.0,10.0,22.0
20258,8469820,2013,Zenon Konopka,BUF,C,4on5,59,3736.0,147.0,-2.01,0.01,0.07,0.05,0.11,0.07,0.13,216.0,0.72,0.02,0.02,0.13,0.02,0.22,0.59,0.02,0.02,0.02,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,73.0,0.0,0.0,0.0,1.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.01,0.0,0.0,0.02,0.02,0.02,147.0,1.0,108.0,6.0,32.0,1.0,11.0,21.0,114.0,73.0,4.0,12517.0,0.0,0.0,0.0,0.0,3.95,0.08,0.08,0.08,0.08,6.0,0.0,0.0,6.0,0.0,0.0,0.0,6.0,0.0,0.0,0.08,0.0,0.0,0.0,0.0,0.0,6.0,6.0,6.0,0.03,0.0,0.0,0.1,0.1,0.1,53.43,6.86,6.47,6.86,6.47,53.0,22.0,30.0,105.0,7.0,6.0,1.0,49.0,18.0,8.0,1.11,2.14,3.61,1.0,2.0,4.0,105.0,75.0,75.0,1.2,1.85,1.97,6.09,6.09,5.84,1.77,24.05,39.0,301.0,0.01,0.23,1.0,2.0,1.0,2.0
20259,8469820,2013,Zenon Konopka,BUF,C,5on4,59,64.0,7.0,-0.38,1.0,0.98,1.0,0.91,1.0,0.9,59.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,6.0,0.0,0.0,1.0,1.0,0.0,3.0,3.0,3.0,14.0,2683.0,0.0,0.0,0.0,0.0,0.88,0.5,0.5,0.5,0.5,1.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.5,0.0,0.0,0.0,2.0,1.0,1.0,0.01,0.0,0.0,0.51,0.51,0.51,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.02,0.12,73.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0


## Feature Engineering

To create and add new feature columns to the master DataFrame.

In [5]:
def new_features(df):
    """Add new feature columns to the DataFrame."""
    
    # New column for last season played
    df['Last_Season_Played'] = df.groupby(["playerId"])['season'].transform(np.max)
    
    df["% Games Played"] = df['games_played'] / 82
    df["Points per Game"] = df['I_F_points'] / df['games_played']
    df["Goals per Game"] = df['I_F_goals'] / df['games_played']
    df["Shots on Goal per Game"] = df['I_F_shotsOnGoal'] / df['games_played']
    #player_data["Primary Assists per Game"] = player_data['I_F_primaryAssists'] / player_data['games_played']
    #player_data["Secondary Assists per Game"] = player_data['I_F_secondaryAssists'] / player_data['games_played']
        
    return df

In [6]:
df = new_features(df)
df.head()

Unnamed: 0,playerId,season,name,team,position,situation,games_played,icetime,shifts,gameScore,onIce_xGoalsPercentage,offIce_xGoalsPercentage,onIce_corsiPercentage,offIce_corsiPercentage,onIce_fenwickPercentage,offIce_fenwickPercentage,iceTimeRank,I_F_xOnGoal,I_F_xGoals,I_F_xRebounds,I_F_xFreeze,I_F_xPlayStopped,I_F_xPlayContinuedInZone,I_F_xPlayContinuedOutsideZone,I_F_flurryAdjustedxGoals,I_F_scoreVenueAdjustedxGoals,I_F_flurryScoreVenueAdjustedxGoals,I_F_primaryAssists,I_F_secondaryAssists,I_F_shotsOnGoal,I_F_missedShots,I_F_blockedShotAttempts,I_F_shotAttempts,I_F_points,I_F_goals,I_F_rebounds,I_F_reboundGoals,I_F_freeze,I_F_playStopped,I_F_playContinuedInZone,I_F_playContinuedOutsideZone,I_F_savedShotsOnGoal,I_F_savedUnblockedShotAttempts,penalties,I_F_penalityMinutes,I_F_faceOffsWon,I_F_hits,I_F_takeaways,I_F_giveaways,I_F_lowDangerShots,I_F_mediumDangerShots,I_F_highDangerShots,I_F_lowDangerxGoals,I_F_mediumDangerxGoals,I_F_highDangerxGoals,I_F_lowDangerGoals,I_F_mediumDangerGoals,I_F_highDangerGoals,I_F_scoreAdjustedShotsAttempts,I_F_unblockedShotAttempts,I_F_scoreAdjustedUnblockedShotAttempts,I_F_dZoneGiveaways,I_F_xGoalsFromxReboundsOfShots,I_F_xGoalsFromActualReboundsOfShots,I_F_reboundxGoals,I_F_xGoals_with_earned_rebounds,I_F_xGoals_with_earned_rebounds_scoreAdjusted,I_F_xGoals_with_earned_rebounds_scoreFlurryAdjusted,I_F_shifts,I_F_oZoneShiftStarts,I_F_dZoneShiftStarts,I_F_neutralZoneShiftStarts,I_F_flyShiftStarts,I_F_oZoneShiftEnds,I_F_dZoneShiftEnds,I_F_neutralZoneShiftEnds,I_F_flyShiftEnds,faceoffsWon,faceoffsLost,timeOnBench,penalityMinutes,penalityMinutesDrawn,penaltiesDrawn,shotsBlockedByPlayer,OnIce_F_xOnGoal,OnIce_F_xGoals,OnIce_F_flurryAdjustedxGoals,OnIce_F_scoreVenueAdjustedxGoals,OnIce_F_flurryScoreVenueAdjustedxGoals,OnIce_F_shotsOnGoal,OnIce_F_missedShots,OnIce_F_blockedShotAttempts,OnIce_F_shotAttempts,OnIce_F_goals,OnIce_F_rebounds,OnIce_F_reboundGoals,OnIce_F_lowDangerShots,OnIce_F_mediumDangerShots,OnIce_F_highDangerShots,OnIce_F_lowDangerxGoals,OnIce_F_mediumDangerxGoals,OnIce_F_highDangerxGoals,OnIce_F_lowDangerGoals,OnIce_F_mediumDangerGoals,OnIce_F_highDangerGoals,OnIce_F_scoreAdjustedShotsAttempts,OnIce_F_unblockedShotAttempts,OnIce_F_scoreAdjustedUnblockedShotAttempts,OnIce_F_xGoalsFromxReboundsOfShots,OnIce_F_xGoalsFromActualReboundsOfShots,OnIce_F_reboundxGoals,OnIce_F_xGoals_with_earned_rebounds,OnIce_F_xGoals_with_earned_rebounds_scoreAdjusted,OnIce_F_xGoals_with_earned_rebounds_scoreFlurryAdjusted,OnIce_A_xOnGoal,OnIce_A_xGoals,OnIce_A_flurryAdjustedxGoals,OnIce_A_scoreVenueAdjustedxGoals,OnIce_A_flurryScoreVenueAdjustedxGoals,OnIce_A_shotsOnGoal,OnIce_A_missedShots,OnIce_A_blockedShotAttempts,OnIce_A_shotAttempts,OnIce_A_goals,OnIce_A_rebounds,OnIce_A_reboundGoals,OnIce_A_lowDangerShots,OnIce_A_mediumDangerShots,OnIce_A_highDangerShots,OnIce_A_lowDangerxGoals,OnIce_A_mediumDangerxGoals,OnIce_A_highDangerxGoals,OnIce_A_lowDangerGoals,OnIce_A_mediumDangerGoals,OnIce_A_highDangerGoals,OnIce_A_scoreAdjustedShotsAttempts,OnIce_A_unblockedShotAttempts,OnIce_A_scoreAdjustedUnblockedShotAttempts,OnIce_A_xGoalsFromxReboundsOfShots,OnIce_A_xGoalsFromActualReboundsOfShots,OnIce_A_reboundxGoals,OnIce_A_xGoals_with_earned_rebounds,OnIce_A_xGoals_with_earned_rebounds_scoreAdjusted,OnIce_A_xGoals_with_earned_rebounds_scoreFlurryAdjusted,OffIce_F_xGoals,OffIce_A_xGoals,OffIce_F_shotAttempts,OffIce_A_shotAttempts,xGoalsForAfterShifts,xGoalsAgainstAfterShifts,corsiForAfterShifts,corsiAgainstAfterShifts,fenwickForAfterShifts,fenwickAgainstAfterShifts,Last_Season_Played,% Games Played,Points per Game,Goals per Game,Shots on Goal per Game
20255,8469820,2013,Zenon Konopka,BUF,C,other,59,307.0,9.0,-0.4,0.14,0.32,0.47,0.49,0.36,0.46,82.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,2.0,6.0,0.0,1.0,0.0,0.0,1.0,8.0,6.0,6.0,1790.0,0.0,0.0,0.0,0.0,2.99,0.15,0.13,0.15,0.13,1.0,3.0,3.0,7.0,0.0,1.0,0.0,4.0,0.0,0.0,0.15,0.0,0.0,0.0,0.0,0.0,7.0,4.0,4.0,0.13,0.04,0.04,0.24,0.24,0.21,4.92,0.93,0.89,0.93,0.89,4.0,3.0,1.0,8.0,1.0,0.0,0.0,5.0,1.0,1.0,0.12,0.17,0.64,0.0,0.0,1.0,8.0,7.0,7.0,0.13,0.0,0.0,1.06,1.06,1.02,2.06,4.34,38.0,40.0,0.0,0.0,0.0,0.0,0.0,0.0,2013,0.719512,0.0,0.0,0.0
20256,8469820,2013,Zenon Konopka,BUF,C,all,59,27970.0,849.0,-1.0,0.3,0.49,0.37,0.49,0.39,0.48,651.0,21.29,1.24,1.28,5.03,0.68,12.66,10.11,1.24,1.22,1.22,1.0,1.0,21.0,10.0,5.0,36.0,3.0,1.0,1.0,0.0,5.0,0.0,9.0,15.0,20.0,30.0,25.0,88.0,345.0,46.0,7.0,1.0,28.0,3.0,0.0,0.82,0.42,0.0,1.0,0.0,0.0,35.13,31.0,30.32,0.0,0.26,0.04,0.0,1.5,1.48,1.47,849.0,74.0,283.0,89.0,403.0,97.0,97.0,113.0,542.0,345.0,222.0,188685.0,88.0,64.0,14.0,26.0,142.75,8.05,7.85,7.94,7.75,132.0,70.0,66.0,268.0,8.0,6.0,1.0,175.0,23.0,4.0,4.15,2.57,1.33,5.0,2.0,1.0,262.61,202.0,199.4,1.78,0.96,1.04,8.79,8.72,8.6,225.67,18.89,18.12,18.86,18.09,230.0,88.0,131.0,449.0,25.0,11.0,3.0,250.0,54.0,14.0,6.49,6.68,5.72,9.0,9.0,7.0,449.66,318.0,318.71,3.5,3.0,3.16,19.23,19.22,18.67,132.11,138.62,2728.0,2882.0,0.0,0.0,0.0,0.0,0.0,0.0,2013,0.719512,0.050847,0.016949,0.355932
20257,8469820,2013,Zenon Konopka,BUF,C,5on5,59,23863.0,686.0,-1.0,0.4,0.5,0.43,0.48,0.45,0.48,655.0,20.57,1.22,1.26,4.9,0.66,12.44,9.52,1.22,1.2,1.2,1.0,1.0,20.0,10.0,5.0,35.0,3.0,1.0,1.0,0.0,5.0,0.0,9.0,14.0,19.0,29.0,25.0,88.0,261.0,46.0,7.0,1.0,27.0,3.0,0.0,0.8,0.42,0.0,1.0,0.0,0.0,34.13,30.0,29.32,0.0,0.26,0.04,0.0,1.48,1.45,1.45,686.0,65.0,169.0,83.0,369.0,95.0,86.0,88.0,417.0,261.0,157.0,146998.0,88.0,64.0,14.0,21.0,134.93,7.32,7.14,7.21,7.04,124.0,67.0,62.0,253.0,8.0,5.0,1.0,165.0,23.0,3.0,3.92,2.57,0.83,5.0,2.0,1.0,247.61,191.0,188.4,1.61,0.92,1.0,7.93,7.86,7.77,166.66,11.08,10.74,11.04,10.7,173.0,62.0,100.0,335.0,17.0,5.0,2.0,195.0,35.0,5.0,5.23,4.38,1.47,8.0,7.0,2.0,335.66,235.0,235.71,2.16,1.15,1.19,12.05,12.03,11.78,84.31,85.36,2042.0,2232.0,0.59,0.78,12.0,25.0,10.0,22.0,2013,0.719512,0.050847,0.016949,0.338983
20258,8469820,2013,Zenon Konopka,BUF,C,4on5,59,3736.0,147.0,-2.01,0.01,0.07,0.05,0.11,0.07,0.13,216.0,0.72,0.02,0.02,0.13,0.02,0.22,0.59,0.02,0.02,0.02,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,73.0,0.0,0.0,0.0,1.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.01,0.0,0.0,0.02,0.02,0.02,147.0,1.0,108.0,6.0,32.0,1.0,11.0,21.0,114.0,73.0,4.0,12517.0,0.0,0.0,0.0,0.0,3.95,0.08,0.08,0.08,0.08,6.0,0.0,0.0,6.0,0.0,0.0,0.0,6.0,0.0,0.0,0.08,0.0,0.0,0.0,0.0,0.0,6.0,6.0,6.0,0.03,0.0,0.0,0.1,0.1,0.1,53.43,6.86,6.47,6.86,6.47,53.0,22.0,30.0,105.0,7.0,6.0,1.0,49.0,18.0,8.0,1.11,2.14,3.61,1.0,2.0,4.0,105.0,75.0,75.0,1.2,1.85,1.97,6.09,6.09,5.84,1.77,24.05,39.0,301.0,0.01,0.23,1.0,2.0,1.0,2.0,2013,0.719512,0.0,0.0,0.016949
20259,8469820,2013,Zenon Konopka,BUF,C,5on4,59,64.0,7.0,-0.38,1.0,0.98,1.0,0.91,1.0,0.9,59.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,6.0,0.0,0.0,1.0,1.0,0.0,3.0,3.0,3.0,14.0,2683.0,0.0,0.0,0.0,0.0,0.88,0.5,0.5,0.5,0.5,1.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.5,0.0,0.0,0.0,2.0,1.0,1.0,0.01,0.0,0.0,0.51,0.51,0.51,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.02,0.12,73.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,2013,0.719512,0.0,0.0,0.0


## Slicing DataFrame

To slice the DataFrame based on selected criteria.

In [7]:
def slice_df(df):
    """Slice the DataFrame based on specific criteria."""

    df = df[df['situation'] == 'all']
    #df = df[df['I_F_points'] >= 10]
    df = df[df['Last_Season_Played'] ==  df['season'].max()]

    return df

In [8]:
df = slice_df(df)
df.head()

Unnamed: 0,playerId,season,name,team,position,situation,games_played,icetime,shifts,gameScore,onIce_xGoalsPercentage,offIce_xGoalsPercentage,onIce_corsiPercentage,offIce_corsiPercentage,onIce_fenwickPercentage,offIce_fenwickPercentage,iceTimeRank,I_F_xOnGoal,I_F_xGoals,I_F_xRebounds,I_F_xFreeze,I_F_xPlayStopped,I_F_xPlayContinuedInZone,I_F_xPlayContinuedOutsideZone,I_F_flurryAdjustedxGoals,I_F_scoreVenueAdjustedxGoals,I_F_flurryScoreVenueAdjustedxGoals,I_F_primaryAssists,I_F_secondaryAssists,I_F_shotsOnGoal,I_F_missedShots,I_F_blockedShotAttempts,I_F_shotAttempts,I_F_points,I_F_goals,I_F_rebounds,I_F_reboundGoals,I_F_freeze,I_F_playStopped,I_F_playContinuedInZone,I_F_playContinuedOutsideZone,I_F_savedShotsOnGoal,I_F_savedUnblockedShotAttempts,penalties,I_F_penalityMinutes,I_F_faceOffsWon,I_F_hits,I_F_takeaways,I_F_giveaways,I_F_lowDangerShots,I_F_mediumDangerShots,I_F_highDangerShots,I_F_lowDangerxGoals,I_F_mediumDangerxGoals,I_F_highDangerxGoals,I_F_lowDangerGoals,I_F_mediumDangerGoals,I_F_highDangerGoals,I_F_scoreAdjustedShotsAttempts,I_F_unblockedShotAttempts,I_F_scoreAdjustedUnblockedShotAttempts,I_F_dZoneGiveaways,I_F_xGoalsFromxReboundsOfShots,I_F_xGoalsFromActualReboundsOfShots,I_F_reboundxGoals,I_F_xGoals_with_earned_rebounds,I_F_xGoals_with_earned_rebounds_scoreAdjusted,I_F_xGoals_with_earned_rebounds_scoreFlurryAdjusted,I_F_shifts,I_F_oZoneShiftStarts,I_F_dZoneShiftStarts,I_F_neutralZoneShiftStarts,I_F_flyShiftStarts,I_F_oZoneShiftEnds,I_F_dZoneShiftEnds,I_F_neutralZoneShiftEnds,I_F_flyShiftEnds,faceoffsWon,faceoffsLost,timeOnBench,penalityMinutes,penalityMinutesDrawn,penaltiesDrawn,shotsBlockedByPlayer,OnIce_F_xOnGoal,OnIce_F_xGoals,OnIce_F_flurryAdjustedxGoals,OnIce_F_scoreVenueAdjustedxGoals,OnIce_F_flurryScoreVenueAdjustedxGoals,OnIce_F_shotsOnGoal,OnIce_F_missedShots,OnIce_F_blockedShotAttempts,OnIce_F_shotAttempts,OnIce_F_goals,OnIce_F_rebounds,OnIce_F_reboundGoals,OnIce_F_lowDangerShots,OnIce_F_mediumDangerShots,OnIce_F_highDangerShots,OnIce_F_lowDangerxGoals,OnIce_F_mediumDangerxGoals,OnIce_F_highDangerxGoals,OnIce_F_lowDangerGoals,OnIce_F_mediumDangerGoals,OnIce_F_highDangerGoals,OnIce_F_scoreAdjustedShotsAttempts,OnIce_F_unblockedShotAttempts,OnIce_F_scoreAdjustedUnblockedShotAttempts,OnIce_F_xGoalsFromxReboundsOfShots,OnIce_F_xGoalsFromActualReboundsOfShots,OnIce_F_reboundxGoals,OnIce_F_xGoals_with_earned_rebounds,OnIce_F_xGoals_with_earned_rebounds_scoreAdjusted,OnIce_F_xGoals_with_earned_rebounds_scoreFlurryAdjusted,OnIce_A_xOnGoal,OnIce_A_xGoals,OnIce_A_flurryAdjustedxGoals,OnIce_A_scoreVenueAdjustedxGoals,OnIce_A_flurryScoreVenueAdjustedxGoals,OnIce_A_shotsOnGoal,OnIce_A_missedShots,OnIce_A_blockedShotAttempts,OnIce_A_shotAttempts,OnIce_A_goals,OnIce_A_rebounds,OnIce_A_reboundGoals,OnIce_A_lowDangerShots,OnIce_A_mediumDangerShots,OnIce_A_highDangerShots,OnIce_A_lowDangerxGoals,OnIce_A_mediumDangerxGoals,OnIce_A_highDangerxGoals,OnIce_A_lowDangerGoals,OnIce_A_mediumDangerGoals,OnIce_A_highDangerGoals,OnIce_A_scoreAdjustedShotsAttempts,OnIce_A_unblockedShotAttempts,OnIce_A_scoreAdjustedUnblockedShotAttempts,OnIce_A_xGoalsFromxReboundsOfShots,OnIce_A_xGoalsFromActualReboundsOfShots,OnIce_A_reboundxGoals,OnIce_A_xGoals_with_earned_rebounds,OnIce_A_xGoals_with_earned_rebounds_scoreAdjusted,OnIce_A_xGoals_with_earned_rebounds_scoreFlurryAdjusted,OffIce_F_xGoals,OffIce_A_xGoals,OffIce_F_shotAttempts,OffIce_A_shotAttempts,xGoalsForAfterShifts,xGoalsAgainstAfterShifts,corsiForAfterShifts,corsiAgainstAfterShifts,fenwickForAfterShifts,fenwickAgainstAfterShifts,Last_Season_Played,% Games Played,Points per Game,Goals per Game,Shots on Goal per Game
5121,8476878,2022,Zemgus Girgensons,BUF,L,all,2,1477.0,37.0,1.07,0.41,0.46,0.42,0.46,0.45,0.45,19.0,5.53,0.44,0.34,1.09,0.18,3.2,2.75,0.44,0.43,0.43,0.0,0.0,4.0,4.0,1.0,9.0,1.0,1.0,2.0,0.0,0.0,0.0,3.0,2.0,3.0,7.0,1.0,2.0,3.0,1.0,0.0,0.0,5.0,3.0,0.0,0.13,0.31,0.0,1.0,0.0,0.0,8.93,8.0,7.97,0.0,0.07,0.35,0.0,0.51,0.5,0.5,37.0,1.0,6.0,10.0,20.0,4.0,5.0,7.0,21.0,3.0,1.0,5723.0,2.0,0.0,0.0,1.0,10.81,0.92,0.91,0.9,0.89,7.0,8.0,4.0,19.0,1.0,2.0,0.0,10.0,4.0,1.0,0.26,0.4,0.27,1.0,0.0,0.0,18.85,15.0,15.0,0.15,0.35,0.35,0.72,0.7,0.7,12.71,1.32,1.32,1.37,1.37,13.0,5.0,8.0,26.0,1.0,0.0,0.0,15.0,2.0,1.0,0.52,0.23,0.57,0.0,0.0,1.0,26.88,18.0,18.85,0.18,0.0,0.0,1.5,1.56,1.56,4.71,5.64,79.0,93.0,0.0,0.0,0.0,0.0,0.0,0.0,2022,0.02439,0.5,0.5,2.0
26111,8476878,2021,Zemgus Girgensons,BUF,L,all,56,49692.0,1148.0,18.99,0.42,0.47,0.46,0.5,0.46,0.48,387.0,85.32,8.89,6.22,18.38,2.85,47.59,36.07,8.57,8.81,8.49,7.0,1.0,92.0,28.0,25.0,145.0,18.0,10.0,6.0,1.0,16.0,6.0,38.0,44.0,82.0,110.0,7.0,17.0,143.0,81.0,24.0,10.0,84.0,28.0,8.0,2.94,3.53,2.42,4.0,4.0,2.0,143.14,120.0,118.98,2.0,1.36,1.17,1.81,8.44,8.35,8.17,1148.0,139.0,214.0,207.0,588.0,164.0,117.0,145.0,722.0,143.0,144.0,155001.0,17.0,18.0,9.0,25.0,346.15,29.75,28.9,29.68,28.83,365.0,121.0,123.0,609.0,29.0,21.0,4.0,369.0,92.0,25.0,10.8,11.4,7.55,7.0,14.0,8.0,602.78,486.0,482.74,5.15,4.19,4.19,30.71,30.58,30.09,410.8,41.47,39.57,41.52,39.63,442.0,130.0,153.0,725.0,42.0,32.0,7.0,416.0,116.0,40.0,12.24,14.15,15.08,11.0,15.0,16.0,732.8,572.0,575.18,6.45,8.41,8.12,39.8,39.89,38.89,109.96,125.92,2274.0,2308.0,0.0,0.0,0.0,0.0,0.0,0.0,2022,0.682927,0.321429,0.178571,1.642857
21346,8476878,2019,Zemgus Girgensons,BUF,C,all,69,57145.0,1320.0,19.37,0.41,0.48,0.42,0.52,0.42,0.51,531.0,80.15,10.56,5.59,15.85,2.49,43.59,33.91,10.14,10.62,10.2,4.0,3.0,84.0,28.0,31.0,143.0,19.0,12.0,5.0,1.0,17.0,4.0,26.0,48.0,72.0,100.0,5.0,10.0,30.0,110.0,13.0,11.0,68.0,33.0,11.0,2.29,3.88,4.4,3.0,4.0,5.0,144.59,112.0,112.65,6.0,1.17,0.88,0.95,10.78,10.78,10.38,1320.0,119.0,247.0,195.0,759.0,181.0,171.0,152.0,816.0,30.0,43.0,193923.0,10.0,12.0,6.0,20.0,356.84,31.68,31.12,31.71,31.16,368.0,137.0,139.0,644.0,27.0,20.0,2.0,384.0,94.0,27.0,10.89,11.37,9.43,9.0,9.0,9.0,647.0,505.0,507.28,4.74,3.0,2.96,33.47,33.49,33.06,498.39,45.0,43.33,45.17,43.48,534.0,171.0,197.0,902.0,53.0,34.0,5.0,551.0,112.0,42.0,16.22,13.62,15.17,21.0,16.0,16.0,906.78,705.0,707.77,8.31,7.78,7.78,45.53,45.7,44.56,143.43,152.59,2943.0,2767.0,0.0,0.0,0.0,0.0,0.0,0.0,2022,0.841463,0.275362,0.173913,1.217391
34081,8476878,2018,Zemgus Girgensons,BUF,C,all,72,58768.0,1304.0,16.72,0.38,0.51,0.42,0.53,0.41,0.53,558.0,78.39,8.61,5.5,16.92,2.55,42.7,35.72,8.44,8.71,8.53,6.0,7.0,78.0,34.0,20.0,132.0,18.0,5.0,1.0,0.0,20.0,3.0,44.0,39.0,73.0,107.0,8.0,19.0,28.0,144.0,21.0,11.0,72.0,32.0,8.0,2.22,4.11,2.29,2.0,2.0,1.0,132.88,112.0,112.61,4.0,1.16,0.3,1.49,8.28,8.34,8.21,1304.0,53.0,388.0,254.0,609.0,241.0,156.0,201.0,706.0,28.0,41.0,204069.0,19.0,27.0,12.0,29.0,387.04,32.38,31.58,32.52,31.72,397.0,153.0,162.0,712.0,26.0,18.0,3.0,436.0,85.0,29.0,11.97,10.23,10.18,13.0,6.0,7.0,712.66,550.0,550.15,4.97,4.28,4.28,33.06,33.19,32.59,558.11,52.83,49.5,52.96,49.62,575.0,208.0,218.0,1001.0,56.0,33.0,11.0,568.0,166.0,49.0,15.59,20.32,16.92,12.0,26.0,18.0,1006.21,783.0,786.61,9.6,8.79,8.79,53.64,53.74,51.45,166.0,158.8,3398.0,3064.0,0.0,0.0,0.0,0.0,0.0,0.0,2022,0.878049,0.25,0.069444,1.083333
191,8476878,2017,Zemgus Girgensons,BUF,C,all,71,58404.0,1331.0,16.75,0.35,0.48,0.43,0.5,0.42,0.5,526.0,101.48,9.5,7.13,21.16,3.21,54.39,46.61,9.32,9.42,9.24,4.0,4.0,113.0,29.0,26.0,168.0,15.0,7.0,3.0,2.0,22.0,2.0,45.0,63.0,106.0,135.0,13.0,26.0,23.0,134.0,18.0,12.0,99.0,34.0,9.0,3.04,4.01,2.45,2.0,2.0,3.0,164.88,142.0,139.48,7.0,1.51,0.57,1.25,9.76,9.61,9.53,1331.0,148.0,177.0,192.0,814.0,212.0,189.0,164.0,766.0,23.0,40.0,199964.0,26.0,34.0,17.0,32.0,398.56,28.9,28.33,28.76,28.2,425.0,138.0,174.0,737.0,25.0,16.0,3.0,443.0,102.0,18.0,11.98,11.98,4.94,9.0,10.0,6.0,724.86,563.0,555.95,5.47,3.02,3.22,31.15,30.89,30.53,547.77,53.66,50.53,54.05,50.92,564.0,206.0,199.0,969.0,55.0,45.0,11.0,569.0,149.0,52.0,16.01,18.25,19.41,19.0,17.0,19.0,985.88,770.0,780.97,8.64,12.28,12.28,50.02,50.4,48.82,149.74,161.51,3136.0,3124.0,0.0,0.0,0.0,0.0,0.0,0.0,2022,0.865854,0.211268,0.098592,1.591549


## PlayerID DataFrame

To get a DataFrame of players names and IDs to be used as a lookup table.

In [9]:
def playerid_df(df):
    """DataFrame for player names and IDs."""
    
    # Player Name & Player ID DataFrame
    players = df[['name', 'playerId','Last_Season_Played']].copy()
    
    # Removing duplicates
    players = players.drop_duplicates(subset=['playerId'], keep='first')
    players = players.sort_values('playerId').reset_index(drop = True)
    
    return players

In [10]:
players = playerid_df(df)
players.head()

Unnamed: 0,name,playerId,Last_Season_Played
0,Ryan Suter,8470600,2022
1,Jeff Carter,8470604,2022
2,Zach Parise,8470610,2022
3,Brent Burns,8470613,2022
4,Corey Perry,8470621,2022


## Machine Learning DataFrame

Create a DataFrame that is formatted to be used with various ML algorithms.

In [11]:
# DataFrame for ML
ml_df = df[['playerId', 'position', 'icetime',
            '% Games Played', 'Points per Game', 'Shots on Goal per Game',
            'Goals per Game', 'Shots on Goal per Game'
          ]].copy()

ml_df.head()

Unnamed: 0,playerId,position,icetime,% Games Played,Points per Game,Shots on Goal per Game,Goals per Game,Shots on Goal per Game.1
5121,8476878,L,1477.0,0.02439,0.5,2.0,0.5,2.0
26111,8476878,L,49692.0,0.682927,0.321429,1.642857,0.178571,1.642857
21346,8476878,C,57145.0,0.841463,0.275362,1.217391,0.173913,1.217391
34081,8476878,C,58768.0,0.878049,0.25,1.083333,0.069444,1.083333
191,8476878,C,58404.0,0.865854,0.211268,1.591549,0.098592,1.591549


In [12]:
# Instantiate MinMaxScaler
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

# Normalizing specific column so the values are between 0 and 1
ml_df[['icetime']] = scaler.fit_transform(ml_df[['icetime']])
ml_df[['Shots on Goal per Game']] = scaler.fit_transform(ml_df[['Shots on Goal per Game']])
ml_df.head()

Unnamed: 0,playerId,position,icetime,% Games Played,Points per Game,Shots on Goal per Game,Goals per Game,Shots on Goal per Game.1
5121,8476878,L,0.009607,0.02439,0.5,0.285714,0.5,0.285714
26111,8476878,L,0.343095,0.682927,0.321429,0.234694,0.178571,0.234694
21346,8476878,C,0.394645,0.841463,0.275362,0.173913,0.173913,0.173913
34081,8476878,C,0.405871,0.878049,0.25,0.154762,0.069444,0.154762
191,8476878,C,0.403353,0.865854,0.211268,0.227364,0.098592,0.227364


In [13]:
"""
# OneHotEncode categorical columns
# OneHotEncode 'position'
onehot_position = pd.get_dummies(ml_df['position'])
ml_df = ml_df.join(onehot_position)
ml_df = ml_df.drop('position', axis=1)

# Rename onehot_position column names
pos_cols = {'C': 'Position - C',
            'D': 'Position - D',
            'L': 'Position - L',
            'R': 'Position - R'}

ml_df.rename(columns=pos_cols, inplace=True)

ml_df.head()
"""

"\n# OneHotEncode categorical columns\n# OneHotEncode 'position'\nonehot_position = pd.get_dummies(ml_df['position'])\nml_df = ml_df.join(onehot_position)\nml_df = ml_df.drop('position', axis=1)\n\n# Rename onehot_position column names\npos_cols = {'C': 'Position - C',\n            'D': 'Position - D',\n            'L': 'Position - L',\n            'R': 'Position - R'}\n\nml_df.rename(columns=pos_cols, inplace=True)\n\nml_df.head()\n"

In [14]:
# Instantiate LabelEncoder
from sklearn import preprocessing
le = preprocessing.LabelEncoder()

# Label Encode 'position'
ml_df[['position']] = le.fit_transform(ml_df[['position']])

In [15]:
"""
# OneHotEncode 'name'
onehot_name = pd.get_dummies(ml_df['name'])
ml_df = ml_df.join(onehot_name)
ml_df = ml_df.drop('name', axis=1)

ml_df.head()
"""

"\n# OneHotEncode 'name'\nonehot_name = pd.get_dummies(ml_df['name'])\nml_df = ml_df.join(onehot_name)\nml_df = ml_df.drop('name', axis=1)\n\nml_df.head()\n"

In [16]:
ml_df.shape

(4184, 8)

In [17]:
# Separating features the ML models
features = ml_df.drop(labels= "Points per Game", axis=1)
features.head()

Unnamed: 0,playerId,position,icetime,% Games Played,Shots on Goal per Game,Goals per Game,Shots on Goal per Game.1
5121,8476878,2,0.009607,0.02439,0.285714,0.5,0.285714
26111,8476878,2,0.343095,0.682927,0.234694,0.178571,0.234694
21346,8476878,0,0.394645,0.841463,0.173913,0.173913,0.173913
34081,8476878,0,0.405871,0.878049,0.154762,0.069444,0.154762
191,8476878,0,0.403353,0.865854,0.227364,0.098592,0.227364


In [18]:
# Separating labels the ML models
labels = ml_df["Points per Game"]
labels.head()

5121     0.500000
26111    0.321429
21346    0.275362
34081    0.250000
191      0.211268
Name: Points per Game, dtype: float64

In [19]:
# Splitting the data into training and testing datasets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size = 0.3, random_state = 10)

## Machine Learning

Predicting points per game by player using multiple machine learning models.

### Linear Regression

In [23]:
# Train a LinearRegression model
from sklearn.metrics import log_loss
from sklearn.linear_model import LinearRegression

lm = LinearRegression()
#lm.fit(X_train.values,y_train)
lm.fit(X_train, y_train)
print("** Linear Regression model has been trained.")
    
# Get the train and test accuracy scores
print(f"** Training Score: {lm.score(X_train, y_train)}")
print(f"** Testing Score: {lm.score(X_test, y_test)}")

** Linear Regression model has been trained.
** Training Score: 0.6759299592433701
** Testing Score: 0.6550569096277677


### Neural Network

### Predictions