In [1]:
from eliteprospect import eliteprospect_scraper as ep
import numpy as np
import pandas as pd


In [2]:
help(ep)

Help on module eliteprospect.eliteprospect_scraper in eliteprospect:

NAME
    eliteprospect.eliteprospect_scraper

DESCRIPTION
    Functions for collecting data from eliteprospects
    Can be used together to extract data for players, matches etc.

FUNCTIONS
    dataprep_players(playerstats)
        Takes data from players, clean it and return data frame that can be used for further analysis
    
    getPlayerMetadata(dfplayers)
        Create dataframe with metadata by players. 
        Input is dataframe created with function getPlayers
    
    getPlayerStats(playerlinks)
        Takes series of playerlinks to eliteprospect-profiles, 
        Return dataframe with stats by player and season
    
    getPlayers(league, year)
        Get all players for specific year and league; returns dataframe
        League input in format '2018-19'
    
    getSeasonStat(dfplayers)
        Create dataframe with aggregated statistics by season and position (forward/defenceman) 
        Input is d

In [3]:
# Get players SHL seasons
shl_2012 = ep.getPlayers('shl', '2012-13')
shl_2013 = ep.getPlayers('shl', '2013-14')
shl_2014 = ep.getPlayers('shl', '2014-15')
shl_2015 = ep.getPlayers('shl', '2015-16')
shl_2016 = ep.getPlayers('shl', '2016-17')
shl_2017 = ep.getPlayers('shl', '2017-18')
shl_2018 = ep.getPlayers('shl', '2018-19')
shl_2019 = ep.getPlayers('shl', '2019-20')

players_shl = pd.concat([shl_2012,shl_2013,shl_2014,shl_2015,shl_2016,shl_2017,shl_2018,shl_2019])

players_shl.head()

Unnamed: 0,player,team,gp,g,a,tp,ppg,pim,+/-,link,season,league,playername,position,fw_def
0,Bud Holloway (RW),Skellefteå AIK,55,20,51,71,1.29,36,25,https://www.eliteprospects.com/player/11620/bu...,2012-13,shl,Bud Holloway,RW,FW
1,Carl Söderberg (C),Linköping HC,54,31,29,60,1.11,48,18,https://www.eliteprospects.com/player/665/carl...,2012-13,shl,Carl Söderberg,C,FW
2,Joakim Lindström (RW),Skellefteå AIK,53,18,36,54,1.02,56,11,https://www.eliteprospects.com/player/229/joak...,2012-13,shl,Joakim Lindström,RW,FW
3,Pär Arlbrandt (RW),Linköping HC,54,21,32,53,0.98,28,24,https://www.eliteprospects.com/player/825/par-...,2012-13,shl,Pär Arlbrandt,RW,FW
4,Broc Little (LW),AIK,55,16,30,46,0.84,24,-3,https://www.eliteprospects.com/player/34071/br...,2012-13,shl,Broc Little,LW,FW


In [4]:
# Get players allsvenskan seasons
allsvenskan_2012 = ep.getPlayers('allsvenskan', '2012-13')
allsvenskan_2013 = ep.getPlayers('allsvenskan', '2013-14')
allsvenskan_2014 = ep.getPlayers('allsvenskan', '2014-15')
allsvenskan_2015 = ep.getPlayers('allsvenskan', '2015-16')
allsvenskan_2016 = ep.getPlayers('allsvenskan', '2016-17')
allsvenskan_2017 = ep.getPlayers('allsvenskan', '2017-18')
allsvenskan_2018 = ep.getPlayers('allsvenskan', '2018-19')
allsvenskan_2019 = ep.getPlayers('allsvenskan', '2019-20')

players_allsvenskan = pd.concat([allsvenskan_2012,allsvenskan_2013,allsvenskan_2014,
                         allsvenskan_2015,allsvenskan_2016,allsvenskan_2017,
                         allsvenskan_2018,allsvenskan_2019])

players_allsvenskan.head()

Unnamed: 0,player,team,gp,g,a,tp,ppg,pim,+/-,link,season,league,playername,position,fw_def
0,Evan McGrath (C),IK Oskarshamn,52,22,28,50,0.96,38,6,https://www.eliteprospects.com/player/9491/eva...,2012-13,allsvenskan,Evan McGrath,C,FW
1,Jared Aulin (C/RW),Örebro HK,48,16,34,50,1.04,34,8,https://www.eliteprospects.com/player/9521/jar...,2012-13,allsvenskan,Jared Aulin,C/RW,FW
2,Matt Fornataro (C),VIK Västerås HK,48,16,32,48,1.0,40,8,https://www.eliteprospects.com/player/15568/ma...,2012-13,allsvenskan,Matt Fornataro,C,FW
3,Damien Fleury (RW/LW),Södertälje SK,51,29,18,47,0.92,50,19,https://www.eliteprospects.com/player/24848/da...,2012-13,allsvenskan,Damien Fleury,RW/LW,FW
4,Michael Raffl (LW/RW),Leksands IF,49,24,22,46,0.94,40,24,https://www.eliteprospects.com/player/15216/mi...,2012-13,allsvenskan,Michael Raffl,LW/RW,FW


In [5]:
# Use the output from getPlayers as input to get teamstatistics
teamstat = ep.getTeamStat(pd.concat([players_shl, players_allsvenskan]))
teamstat.rename(columns={ "nbr_players":"nbr_players_team"}, inplace=True)


teamstat.head()

Unnamed: 0,team,season,fw_def,gp,g,a,tp,pim,avg_g_team,avg_a_team,avg_tp_team,avg_pim_team,avg_+/-_team,nbr_players_team
0,AIK,2012-13,DEF,383,19,58,77,126,0.049608,0.151436,0.201044,0.328982,-6.363636,11
1,AIK,2012-13,FW,681,104,135,239,290,0.152717,0.198238,0.350954,0.425844,-5.190476,21
2,AIK,2013-14,DEF,403,21,54,75,229,0.052109,0.133995,0.186104,0.568238,-5.0,15
3,AIK,2013-14,FW,679,103,126,229,368,0.151694,0.185567,0.337261,0.541973,-5.869565,23
4,AIK,2014-15,DEF,327,17,60,77,237,0.051988,0.183486,0.235474,0.724771,-2.916667,12


In [6]:
# Get seasonstats for leagues
leaguestat_shl = ep.getSeasonStat(players_shl)
leaguestat_shl['league'] = 'SHL'
leaguestat_allsvenskan = ep.getSeasonStat(players_allsvenskan)
leaguestat_allsvenskan['league'] = 'allsvenskan'



leaguestat_allsvenskan.rename(columns={ "avg_+/-_team":"avg_+/-_season"}, inplace=True)
leaguestat_shl.rename(columns={ "avg_+/-_team":"avg_+/-_season"}, inplace=True)

leaguestat_allsvenskan.rename(columns={ "nbr_players":"nbr_players_season"}, inplace=True)
leaguestat_shl.rename(columns={ "nbr_players":"nbr_players_season"}, inplace=True)

# Merge 

leaguestat = leaguestat_shl.append(pd.DataFrame(data = leaguestat_allsvenskan), ignore_index=True)
leaguestat.head()


Unnamed: 0,season,fw_def,gp,g,a,tp,pim,avg_g_season,avg_a_season,avg_tp_season,avg_pim_season,avg_+/-_season,nbr_players_season,league
0,2012-13,DEF,4636,289,802,1091,2479,0.062338,0.172994,0.235332,0.534728,0.136691,139,SHL
1,2012-13,FW,8325,1291,1793,3084,4125,0.155075,0.215375,0.37045,0.495495,-0.303419,234,SHL
2,2013-14,DEF,4712,286,770,1056,2643,0.060696,0.163413,0.224109,0.560908,0.191489,141,SHL
3,2013-14,FW,8380,1456,1851,3307,4659,0.173747,0.220883,0.39463,0.555967,-0.5,246,SHL
4,2014-15,DEF,4644,256,839,1095,2536,0.055125,0.180663,0.235788,0.546081,0.079137,139,SHL


In [7]:
# Create metadata from players dataframe
playersmeta = ep.getPlayerMetadata(pd.concat([players_shl, players_allsvenskan]))
playersmeta.head()


Unnamed: 0,link,playername,fw_def
1163,https://www.eliteprospects.com/player/99865/au...,Austin Farley,FW
1906,https://www.eliteprospects.com/player/99862/er...,Erik Flood,DEF
551,https://www.eliteprospects.com/player/99859/fr...,Fredrik Forsberg,FW
33,https://www.eliteprospects.com/player/9985/den...,Dennis Rasmussen,FW
278,https://www.eliteprospects.com/player/9938/mik...,Mikael Eriksson,FW


In [13]:
# Get the links players and send to function getPLayerStats
playerlinks = playersmeta['link']
# playerstats=ep.getPlayerStats(playerlinks)
# playerstats.to_csv("C:/Users/marcu/Documents/projects/icehockey/data/playerstats.csv")

playerstats = pd.read_csv("C:/Users/marcu/Documents/projects/icehockey/data/playerstats.csv")


In [14]:
playerstats.head()

Unnamed: 0,season,Team,League,GP,G,A,TP,PIM,+/-,Unnamed: 9,POST,GP.1,G.1,A.1,TP.1,PIM.1,+/-.1,link
0,2006-07,Team Illinois Bantam Minor AAA,T1EMBHL,31,5,12,17,36,,|,,,,,,,,https://www.eliteprospects.com/player/99865/au...
1,2007-08,CYA Bantam Major AAA,T1EBHL,28,21,8,29,176,,|,,,,,,,,https://www.eliteprospects.com/player/99865/au...
2,2008-09,Team Illinois U16,T1EHL U16,29,7,18,25,4,,|,,,,,,,,https://www.eliteprospects.com/player/99865/au...
3,2009-10,Chicago Mission U16,T1EHL U16,38,19,19,38,44,,|,,,,,,,,https://www.eliteprospects.com/player/99865/au...
4,2010-11,Chicago Mission U18,T1EHL U18,16,10,7,17,35,,|,,,,,,,,https://www.eliteprospects.com/player/99865/au...


In [28]:
# Clean output from playerstats
#df_players_clean = ep.dataprep_players(playerstats)

df_players_clean = ep.dataprep_players(playerstats)


df_players_clean['team'] =  df_players_clean['team'].str.replace('“A”','',case=True)
df_players_clean['team'] =  df_players_clean['team'].str.replace('“C”','',case=True)

df_players_clean['league'] = df_players_clean['league'].str.lower()
leaguestat['league'] = leaguestat['league'].str.lower()


In [29]:
df_players_clean.head()

Unnamed: 0,link,season,team,league,gp,g,a,tp,pim,+/-,primary_team,league_seasons,team_seasons,avg_g,avg_a,avg_tp,avg_pim
0,https://www.eliteprospects.com/player/10014/me...,2005-06,Lycksele SK,division 2,9.0,1.0,2.0,3.0,0.0,,True,1,1,0.111111,0.222222,0.333333,0.0
1,https://www.eliteprospects.com/player/10014/me...,2006-07,Skellefteå AIK J18,j18 allsvenskan,14.0,5.0,4.0,9.0,20.0,,True,1,1,0.357143,0.285714,0.642857,1.428571
2,https://www.eliteprospects.com/player/10014/me...,2007-08,Skellefteå AIK J18,j18 elit,16.0,10.0,8.0,18.0,4.0,,True,1,2,0.625,0.5,1.125,0.25
3,https://www.eliteprospects.com/player/10014/me...,2008-09,Skellefteå AIK J20,superelit,34.0,10.0,14.0,24.0,58.0,,True,1,1,0.294118,0.411765,0.705882,1.705882
4,https://www.eliteprospects.com/player/10014/me...,2009-10,Skellefteå AIK,shl,36.0,2.0,0.0,2.0,8.0,-2.0,True,1,1,0.055556,0.0,0.055556,0.222222


In [30]:

print('Nbr players', len(df_players_clean))

# Add metadata
df_players = pd.merge(df_players_clean, playersmeta, 
                     how='left')

print('Nbr players', len(df_players))

# Add seasondata
df_players = pd.merge(df_players, leaguestat[['league', 'season', 'fw_def',
                                              'avg_g_season', 'avg_a_season', 'avg_tp_season', 
                                              'avg_pim_season', 'avg_+/-_season',
                                              'nbr_players_season']],
                     how='left',
                     on = ['league', 'fw_def', 'season'])

# Add teamdata
df_players = pd.merge(df_players, teamstat[['team', 'season', 'fw_def',
                                              'avg_g_team', 'avg_a_team', 'avg_tp_team', 
                                              'avg_pim_team', 'avg_+/-_team',
                                              'nbr_players_team']],
                     how='left',
                     on = ['team', 'fw_def', 'season'])


print('Nbr players', len(df_players))

# Calculate difference to averages

df_players['avg_g_vs_team'] = (df_players['avg_g']-df_players['avg_g_team']) / df_players['avg_g_team']
df_players['avg_a_vs_team'] = (df_players['avg_a']-df_players['avg_a_team']) / df_players['avg_a_team']
df_players['avg_tp_vs_team'] = (df_players['avg_tp']-df_players['avg_tp_team']) / df_players['avg_tp_team']


df_players['avg_g_vs_season'] = (df_players['avg_g']-df_players['avg_g_season']) / df_players['avg_g_season']
df_players['avg_a_vs_season'] = (df_players['avg_a']-df_players['avg_a_season']) / df_players['avg_a_season']
df_players['avg_tp_vs_season'] = (df_players['avg_tp']-df_players['avg_tp_season']) / df_players['avg_tp_season']

df_players['avg_g_team_vs_season'] = (df_players['avg_g_team']-df_players['avg_g_season']) / df_players['avg_g_season']
df_players['avg_a_team_vs_season'] = (df_players['avg_a_team']-df_players['avg_a_season']) / df_players['avg_a_season']
df_players['avg_tp_team_vs_season'] = (df_players['avg_tp_team']-df_players['avg_tp_season']) / df_players['avg_tp_season']


Nbr players 30992
Nbr players 30992
Nbr players 30992


In [31]:
# Shift rows to get previous season (used for analysis)

df_players['avg_g_prev'] = df_players.groupby(['link'])['avg_g'].shift(1)
df_players['avg_a_prev'] = df_players.groupby(['link'])['avg_a'].shift(1)
df_players['avg_tp_prev'] = df_players.groupby(['link'])['avg_tp'].shift(1)
df_players['+/-_prev'] = df_players.groupby(['link'])['+/-'].shift(1)

df_players['avg_g_vs_team_prev'] = df_players.groupby(['link'])['avg_g_vs_team'].shift(1)
df_players['avg_a_vs_team_prev'] = df_players.groupby(['link'])['avg_a_vs_team'].shift(1)
df_players['avg_tp_vs_team_prev'] = df_players.groupby(['link'])['avg_tp_vs_team'].shift(1)

df_players['avg_g_vs_season_prev'] = df_players.groupby(['link'])['avg_g_vs_season'].shift(1)
df_players['avg_a_vs_season_prev'] = df_players.groupby(['link'])['avg_a_vs_season'].shift(1)
df_players['avg_tp_vs_season_prev'] = df_players.groupby(['link'])['avg_tp_vs_season'].shift(1)

df_players['avg_g_team_vs_season_prev'] = df_players.groupby(['link'])['avg_g_team_vs_season'].shift(1)
df_players['avg_a_team_vs_season_prev'] = df_players.groupby(['link'])['avg_a_team_vs_season'].shift(1)
df_players['avg_tp_team_vs_season_prev'] = df_players.groupby(['link'])['avg_tp_team_vs_season'].shift(1)

df_players['league_prev'] = df_players.groupby(['link'])['league'].shift(1)


df_players.head()

Unnamed: 0,link,season,team,league,gp,g,a,tp,pim,+/-,...,avg_g_vs_team_prev,avg_a_vs_team_prev,avg_tp_vs_team_prev,avg_g_vs_season_prev,avg_a_vs_season_prev,avg_tp_vs_season_prev,avg_g_team_vs_season_prev,avg_a_team_vs_season_prev,avg_tp_team_vs_season_prev,league_prev
0,https://www.eliteprospects.com/player/10014/me...,2005-06,Lycksele SK,division 2,9.0,1.0,2.0,3.0,0.0,,...,,,,,,,,,,
1,https://www.eliteprospects.com/player/10014/me...,2006-07,Skellefteå AIK J18,j18 allsvenskan,14.0,5.0,4.0,9.0,20.0,,...,,,,,,,,,,division 2
2,https://www.eliteprospects.com/player/10014/me...,2007-08,Skellefteå AIK J18,j18 elit,16.0,10.0,8.0,18.0,4.0,,...,,,,,,,,,,j18 allsvenskan
3,https://www.eliteprospects.com/player/10014/me...,2008-09,Skellefteå AIK J20,superelit,34.0,10.0,14.0,24.0,58.0,,...,,,,,,,,,,j18 elit
4,https://www.eliteprospects.com/player/10014/me...,2009-10,Skellefteå AIK,shl,36.0,2.0,0.0,2.0,8.0,-2.0,...,,,,,,,,,,superelit


In [32]:
df_players.to_csv("C:/Users/marcu/Documents/projects/icehockey/data/df_players.csv", index=False)