In [14]:
import eliteprospect_scraper as ep
import numpy as np
import pandas as pd

In [2]:
help(ep)

Help on module eliteprospect_scraper:

NAME
    eliteprospect_scraper

DESCRIPTION
    Functions for collecting data from eliteprospects
    Can be used together to extract data for players, matches etc.

FUNCTIONS
    dataprep_players(playerstats)
        Takes data from players, clean it and return data frame that can be used for further analysis
    
    getPlayerMetadata(dfplayers)
        Create dataframe with metadata by players. 
        Input is dataframe created with function getPlayers
    
    getPlayerStats(playerlinks)
        Takes series of playerlinks to eliteprospect-profiles, 
        Return dataframe with stats by player and season
    
    getPlayers(league, year)
        Get all players for specific year and league; returns dataframe
        League input in format '2018-19'
    
    getSeasonStat(dfplayers)
        Create dataframe with aggregated statistics by season and position (forward/defenceman) 
        Input is dataframe created with function getPlayers
   

In [4]:
# Get players from two seasons and concatenate the output
shl_2012 = ep.getPlayers('shl', '2012-13')
shl_2013 = ep.getPlayers('shl', '2013-14')
players = pd.concat([shl_2012,shl_2013])

In [5]:
# Use the output from getPlayers as input to getTeamStat and getSeasonStat
teamstat = ep.getTeamStat(players)
seasonstat = ep.getSeasonStat(players)

print(seasonstat)
print(teamstat.head())

In [10]:
# Create metadata from players dataframe
playersmeta = ep.getPlayerMetadata(players)
playersmeta.head()

Unnamed: 0,link,playername,fw_def
551,https://www.eliteprospects.com/player/99859/fr...,Fredrik Forsberg,FW
33,https://www.eliteprospects.com/player/9985/den...,Dennis Rasmussen,FW
278,https://www.eliteprospects.com/player/9938/mik...,Mikael Eriksson,FW
220,https://www.eliteprospects.com/player/9915/mik...,Mikko Kousa,DEF
71,https://www.eliteprospects.com/player/9895/vil...,Ville Lajunen,DEF


In [28]:
# Get the links to the first 5 players and send to function getPLayerStats
playerlinks = playersmeta['link'].head(n=5)
playerlinks

playerstats=ep.getPlayerStats(playerlinks)
playerstats.head()

In [34]:
# Clean output from playerstats
players_cleaned = ep.dataprep_players(playerstats)

players_cleaned.head()

Unnamed: 0,link,season,team,league,gp,g,a,tp,pim,+/-,primary_team,league_seasons,team_seasons,avg_g,avg_a,avg_tp,avg_pim
0,https://www.eliteprospects.com/player/9895/vil...,2004-05,Blues U18 Akatemia,Jr. B SM-sarja,18.0,1.0,2.0,3.0,8.0,,True,1,1,0.055556,0.111111,0.166667,0.444444
1,https://www.eliteprospects.com/player/9895/vil...,2005-06,Blues U20,Jr. A SM-liiga,22.0,2.0,2.0,4.0,10.0,9.0,True,1,1,0.090909,0.090909,0.181818,0.454545
2,https://www.eliteprospects.com/player/9895/vil...,2006-07,Blues U20,Jr. A SM-liiga,39.0,3.0,23.0,32.0,79.0,22.0,True,2,2,0.076923,0.589744,0.820513,2.025641
3,https://www.eliteprospects.com/player/9895/vil...,2007-08,KooKoo,Mestis,20.0,2.0,9.0,11.0,24.0,-1.0,True,1,1,0.1,0.45,0.55,1.2
4,https://www.eliteprospects.com/player/9895/vil...,2008-09,Blues,Liiga,45.0,8.0,11.0,19.0,20.0,0.0,True,1,1,0.177778,0.244444,0.422222,0.444444


In [None]:
# Add comparison player vs team
    df_stats_primary['g_vs_team'] = (df_stats_primary['avg_g'] - df_stats_primary['avg_g_team']) / df_stats_primary['avg_g_team']
    df_stats_primary['a_vs_team'] = (df_stats_primary['avg_a'] - df_stats_primary['avg_a_team']) / df_stats_primary['avg_a_team']
    df_stats_primary['tp_vs_team'] = (df_stats_primary['avg_tp'] - df_stats_primary['avg_tp_team']) / df_stats_primary['avg_tp_team']
    df_stats_primary['pim_vs_team'] = (df_stats_primary['avg_pim'] - df_stats_primary['avg_pim_team']) / df_stats_primary['avg_pim_team']
    df_stats_primary['+/-_vs_team'] = (df_stats_primary['+/-'] - df_stats_primary['avg_+/-_team']) 
    
    # Add comparison player vs total
    df_stats_primary['g_vs_total'] = (df_stats_primary['avg_g'] - df_stats_primary['avg_g_season']) / df_stats_primary['avg_g_season']
    df_stats_primary['a_vs_total'] = (df_stats_primary['avg_a'] - df_stats_primary['avg_a_season']) / df_stats_primary['avg_a_season']
    df_stats_primary['tp_vs_total'] = (df_stats_primary['avg_tp'] - df_stats_primary['avg_tp_season']) / df_stats_primary['avg_tp_season']
    df_stats_primary['pim_vs_total'] = (df_stats_primary['avg_pim'] - df_stats_primary['avg_pim_season']) / df_stats_primary['avg_pim_season']
    df_stats_primary['+/-_vs_total'] = (df_stats_primary['+/-'] - df_stats_primary['avg_+/-_season']) 
    
    # Add comparison team vs total
    df_stats_primary['g_team_vs_total'] = (df_stats_primary['avg_g_team'] - df_stats_primary['avg_g_season']) / df_stats_primary['avg_g_season']
    df_stats_primary['a_team_vs_total'] = (df_stats_primary['avg_a_team'] - df_stats_primary['avg_a_season']) / df_stats_primary['avg_a_season']
    df_stats_primary['tp_team_vs_total'] = (df_stats_primary['avg_tp_team'] - df_stats_primary['avg_tp_season']) / df_stats_primary['avg_tp_season']
    df_stats_primary['pim_team_vs_total'] = (df_stats_primary['avg_pim_team'] - df_stats_primary['avg_pim_season']) / df_stats_primary['avg_pim_season']
    df_stats_primary['+/-_team_vs_total'] = (df_stats_primary['avg_+/-_team'] - df_stats_primary['avg_+/-_season']) 
    
    
    # Add data for previous season
    df_stats_primary = df_stats_primary.sort_values(['link', 'season'])
    
    cols_to_shift = df_stats_primary.columns[~df_stats_primary.columns.isin(['player', 'link', 'captain', 'season', 
                                                                             'league_season', 'primary_team', 'fw_def'])]
    
    df_stats_primary[cols_to_shift + '_prev'] =  df_stats_primary.groupby(['link'])[cols_to_shift].shift(1)
    