This workbook contains analysis for only those players who have been drafted by NHL Teams

In [2]:
# Dependencies
import pandas as pd
import plotly.express as px

In [3]:
# Read CSV into DF
df =pd.read_csv('data_sets/drafted_data.csv')
df.head()

Unnamed: 0,LEAGUE,PLAYER_ID,POSITION,SEASON,AGE_SEPT_15,GP,G,A,TP,PPG,...,SEASON_>20GP,CUM_GP,ONLY_1_SEASON,MIN_3_SEASONS,DY_STATUS,SHOOTS,DRAFTED,NHL_PPG,NHL_GP,NHL_DV
0,WHL,100577/richard-nejezchleb,F,2015,21.369444,49,20,31,51,1.040816,...,3,150,0,1,DY+3,L,Yes,0.0,0,0
1,QMJHL,100867/jan-kostalek,D,2015,20.577778,57,7,36,43,0.754386,...,3,160,0,1,DY+2,R,Yes,0.0,0,0
2,OHL,101221/dominik-kubalik,F,2014,19.066667,59,18,11,29,0.491525,...,2,126,0,0,DY+1,L,Yes,0.0,0,0
3,WHL,10123/denis-rehak,D,2004,19.336111,25,0,3,3,0.12,...,1,25,1,0,DY+1,L,Yes,0.0,0,0
4,USHL,101430/nathan-walker,F,2013,19.605556,29,7,20,27,0.931034,...,1,29,1,0,DY+1,L,Yes,0.166667,12,1


In [13]:
# Top 25 players by NHL Games Played, these should be familiar to hockey fans
nhl_df=df[['PLAYER_ID','NHL_PPG','NHL_GP','GP','G','A','TP']].copy()
# Drop players with 0 NHL games
nhl_df= nhl_df[nhl_df.NHL_GP !=0] 
nhl_df.sort_values('NHL_GP', ascending=False).head(25)

Unnamed: 0,PLAYER_ID,NHL_PPG,NHL_GP,GP,G,A,TP
2401,8764/scott-hartnell,0.566053,1249,62,27,55,82
1963,4506/justin-williams,0.631833,1244,68,37,46,83
1807,3659/jay-bouwmeester,0.350507,1184,61,11,50,61
1804,3656/eric-staal,0.829642,1174,66,39,59,98
2038,4723/brad-richards,0.827709,1126,63,71,115,186
2522,9099/dustin-brown,0.553268,1117,58,34,42,76
2507,9040/dan-hamhuis,0.319853,1088,59,10,50,60
2443,8879/brent-seabrook,0.425139,1082,63,12,42,54
2483,9012/duncan-keith,0.541318,1077,37,11,35,46
2395,8715/mike-ribeiro,0.738361,1074,23,18,31,49


In [14]:
# Top NHL players by NHL Points per game
nhl_df.sort_values('NHL_PPG', ascending=False).head(25)
# These are mostly pretty predictable, with a few outliers having only played a game or two
# Ryan Poehling, the top player by NHL PPG scored a hat trick in his career debut,
# which happened to be the final game of the 2018-2019 season
# which also happens to be the last game recorded in our dataset.

Unnamed: 0,PLAYER_ID,NHL_PPG,NHL_GP,GP,G,A,TP
1593,288045/ryan-poehling,3.0,1,9,2,2,4
990,183442/connor-mcdavid,1.296167,287,47,44,76,120
2156,6146/sidney-crosby,1.289502,943,62,66,102,168
2607,9326/patrick-kane,1.03876,903,58,62,83,145
2242,77237/nikita-kucherov,1.033557,447,33,29,34,63
168,11113/steven-stamkos,1.02681,746,61,58,47,105
2439,88699/tyler-lewington,1.0,2,69,9,36,45
2781,9654/ivan-baranka,1.0,1,64,7,16,23
744,14998/damian-surma,1.0,2,55,28,27,55
2666,94254/patrick-sieloff,1.0,2,45,3,8,11


In [6]:
# create a df with only player stats
stats_df = df.drop(['LEAGUE','PLAYER_ID','POSITION','SEASON','AGE_SEPT_15','SEASON_NO','ONLY_1_SEASON','MIN_3_SEASONS','DY_STATUS','SHOOTS','DRAFTED','NHL_DV'], axis=1)
stats_df

Unnamed: 0,GP,G,A,TP,PPG,SEASON_>20GP,CUM_GP,NHL_PPG,NHL_GP
0,49,20,31,51,1.040816,3,150,0.000000,0
1,57,7,36,43,0.754386,3,160,0.000000,0
2,59,18,11,29,0.491525,2,126,0.000000,0
3,25,0,3,3,0.120000,1,25,0.000000,0
4,29,7,20,27,0.931034,1,29,0.166667,12
5,68,14,54,68,1.000000,1,68,0.223529,850
6,68,28,40,68,1.000000,2,134,0.571109,893
7,59,12,37,49,0.830508,4,249,0.000000,2
8,43,13,35,48,1.116279,4,253,0.430052,193
9,56,20,33,53,0.946429,5,312,0.000000,0


In [17]:
# Mean statistics for all drafted players
stats_df.mean(axis=0)

GP               54.110676
G                18.587544
A                29.658363
TP               48.245196
PPG               0.871936
SEASON_>20GP      2.888968
CUM_GP          171.701423
NHL_PPG           0.118336
NHL_GP          105.266548
dtype: float64

In [30]:
# Median statistics of a player who has been drafted by an NHL Team, to try and account for outliers like Poehling
stats_median = stats_df.median(axis=0)
stats_median

GP               58.000000
G                16.000000
A                28.000000
TP               46.000000
PPG               0.842105
SEASON_>20GP      3.000000
CUM_GP          185.000000
NHL_PPG           0.000000
NHL_GP            0.000000
dtype: float64

In [39]:
# Determine what pct of drafted players make the nhl
# Group by NHL/ non-NHL
drafted_vs_nhl_df = df.groupby('NHL_DV')
# Count all Players
player_ct = df['PLAYER_ID'].count()
# Count each group (nhl/non-nhl)
split_player_ct = drafted_vs_nhl_df['PLAYER_ID'].count()
# Count NHL players
nhl_ct = drafted_vs_nhl_df['PLAYER_ID'].count()
# Calculate pct
nhl_pct = (nhl_ct/player_ct)
# Create new DF for counts
nhl_pct_df = pd.DataFrame({'Player Count': split_player_ct,
                         'Percent of Playerbase': nhl_pct})

# Format percents with %
nhl_pct_df['Percent of Playerbase'] = nhl_pct_df['Percent of Playerbase'].astype(float).map("{:.2%}".format)

nhl_pct_df.transpose()

NHL_DV,0,1
Player Count,1606,1204
Percent of Playerbase,57.15%,42.85%


Only 42.85% of players who were drafted by NHL teams ever make it into the NHL

In [48]:
# Comparing average juniors statistics for a player who does/ does not make it to the NHL level & percent who make it

# GP AVG
gp_avg= drafted_vs_nhl_df['GP'].mean()
# Goals AVG
g_avg= drafted_vs_nhl_df['G'].mean()
# Assits AVG
a_avg= drafted_vs_nhl_df['A'].mean()
# Total Points
pt_avg= drafted_vs_nhl_df['TP'].mean()
# Points per Game avg
ppg_avg= drafted_vs_nhl_df['PPG'].mean()


compared_stats_df = pd.DataFrame({'Player Count': split_player_ct,
                         'Percent of Playerbase': nhl_pct,
                            'Average Juniors Games Played': gp_avg,
                            'Average Juniors Goals': g_avg,
                            'Average Juniors Assists': a_avg,
                            'Average Juniors Points': pt_avg,
                            'Average Juniors Points Per Game': ppg_avg})

# Formatting
compared_stats_df['Percent of Playerbase'] = compared_stats_df['Percent of Playerbase'].astype(float).map("{:.2%}".format)
pd.options.display.float_format = '{:,.2f}'.format
compared_stats_df.transpose()

NHL_DV,0,1
Player Count,1606,1204
Percent of Playerbase,57.15%,42.85%
Average Juniors Games Played,54.22,53.96
Average Juniors Goals,15.82,22.28
Average Juniors Assists,25.74,34.88
Average Juniors Points,41.56,57.16
Average Juniors Points Per Game,0.74,1.05


In [None]:
# Graph junior PPG vs NHL GP