# Prepare icehockey data from swehockey

In [1]:
import pandas as pd
import numpy as np

import datetime

import swehockey.swehockey_scraper as swe 

In [2]:
# Read in needed schedule-ids
df_scheduleid = pd.read_csv("https://raw.githubusercontent.com/msjoelin/swehockey_scraper/master/data/scheduleid.csv", 
                           error_bad_lines=False,
                          dtype=str)

df_scheduleid.head()

Unnamed: 0,schedule_id,league,season
0,10371,shl,2019/20
1,9171,shl,2018/19
2,8121,shl,2017/18
3,7132,shl,2016/17
4,6052,shl,2015/16


In [3]:
# get games for schedule ids
games = swe.getGames(df_scheduleid)
games.head()

Unnamed: 0,date,game,score,periodscore,spectators,schedule_id,game_id,league,season
0,2019-09-14,Frölunda HC - Brynäs IF,2 - 5,"(0-2, 0-3, 2-0)",12044,10371,441805,shl,2019/20
1,2019-09-14,Skellefteå AIK - IK Oskarshamn,5 - 4,"(0-1, 3-3, 1-0, 1-0)",5137,10371,441808,shl,2019/20
2,2019-09-14,Leksands IF - IF Malmö Redhawks,5 - 2,"(1-2, 0-0, 4-0)",7650,10371,441809,shl,2019/20
3,2019-09-14,Djurgårdens IF - Linköping HC,4 - 2,"(0-1, 3-1, 1-0)",7018,10371,441803,shl,2019/20
4,2019-09-14,Färjestad BK - Örebro HK,4 - 6,"(2-2, 1-2, 1-2)",8250,10371,441804,shl,2019/20


In [4]:
# Clean up the output with cleanGames
df_games_clean = swe.cleanGames(games.copy())
df_games_clean.head()

Unnamed: 0,date,game,score,periodscore,spectators,schedule_id,game_id,league,season,home,...,p4score_home,p4score_away,p5score_home,p5score_away,result,result_p1,result_p2,result_p3,result_p4,result_p5
0,2019-09-14,Frölunda HC - Brynäs IF,2 - 5,"0-2, 0-3, 2-0",12044,10371,441805,shl,2019/20,Frölunda HC,...,,,,,away,away,away,home,,
1,2019-09-14,Skellefteå AIK - IK Oskarshamn,5 - 4,"0-1, 3-3, 1-0, 1-0",5137,10371,441808,shl,2019/20,Skellefteå AIK,...,1.0,0.0,,,draw,away,draw,home,home,
2,2019-09-14,Leksands IF - IF Malmö Redhawks,5 - 2,"1-2, 0-0, 4-0",7650,10371,441809,shl,2019/20,Leksands IF,...,,,,,home,away,draw,home,,
3,2019-09-14,Djurgårdens IF - Linköping HC,4 - 2,"0-1, 3-1, 1-0",7018,10371,441803,shl,2019/20,Djurgårdens IF,...,,,,,home,away,home,home,,
4,2019-09-14,Färjestad BK - Örebro HK,4 - 6,"2-2, 1-2, 1-2",8250,10371,441804,shl,2019/20,Färjestad BK,...,,,,,away,draw,away,away,,


In [5]:
df_games_clean.to_csv("data/df_games.csv", index=False)

In [18]:
# Create dataframe on teamlevel
df_teams = swe.getTeamData(df_games_clean)
df_teams.head()

Unnamed: 0,date,game,score,periodscore,spectators,schedule_id,game_id,league,season,team,...,H2H_W,H2H_D,H2H_L,points,points_cum,points_cum_prev,points_cum_prev_avg,points_cum_h_a,points_cum_h_a_prev,points_cum_h_a_prev_avg
0,2007-09-17,Leksands IF - AIK IF,7 - 3,"3-1, 2-1, 2-1",6095,9826,422474,allsvenskan,2007/08,AIK,...,0.0,0.0,0.0,0,0,0,,0,0,
1,2007-09-19,AIK IF - Bofors IK,6 - 4,"1-1, 3-1, 2-2",1554,9826,422481,allsvenskan,2007/08,AIK,...,0.0,0.0,0.0,3,3,0,0.0,3,0,
2,2007-09-21,Almtuna IS - AIK IF,2 - 3,"0-0, 1-1, 1-1, 0-1",1008,9826,422484,allsvenskan,2007/08,AIK,...,0.0,0.0,0.0,2,5,3,1.5,2,0,0.0
3,2007-09-26,AIK IF - IF Björklöven,6 - 2,"4-0, 1-2, 1-0",1602,9826,422494,allsvenskan,2007/08,AIK,...,0.0,0.0,0.0,3,8,5,1.666667,6,3,3.0
4,2007-09-28,Rögle BK - AIK IF,2 - 0,"1-0, 0-0, 1-0",2988,9826,422499,allsvenskan,2007/08,AIK,...,0.0,0.0,0.0,0,8,8,2.0,2,2,1.0


In [16]:
# Read in needed coach replacement
df_coachreplace = pd.read_csv("https://raw.githubusercontent.com/msjoelin/icehockey-analysis/master/data/shl_coach_replacement.csv", encoding='utf-8')

df_coachreplace['change_date'] = pd.to_datetime(df_coachreplace['change_date'], format = '%d/%m/%Y')

df_coachreplace.head(n=30)

Unnamed: 0,season,coach,team,change_date
0,2012/13,Anders Forsberg,Skellefteå AIK,2013-02-04
1,2012/13,Dan Tangnes,Rögle BK,2012-10-24
2,2012/13,Per-Erik Johnsson,AIK,2012-12-03
3,2012/13,Janne Karlsson,Växjö Lakers HC,2012-10-14
4,2013/14,Bäcklin / Franzen,AIK,2014-03-04
5,2013/14,Ulf Dahlen,HV71,2013-12-15
6,2013/14,Andreas Johansson,Färjestad BK,2013-12-05
7,2013/14,Patrik Ross,Örebro HK,2013-11-21
8,2014/15,Anders Forsberg,MODO Hockey,2015-01-04
9,2014/15,Tommy Jonsson,Brynäs IF,2015-01-19


In [19]:
df_teams = pd.merge(df_teams, df_coachreplace, on=['season', 'team'], how='left')

df_teams['date'] = pd.to_datetime(df_teams['date']) 
df_teams.loc[(df_teams['change_date'] > df_teams['date']), 'bef_after_change'] = 'before'
df_teams.loc[(df_teams['change_date'] <= df_teams['date']), 'bef_after_change'] = 'after'

df_teams.head()

Unnamed: 0,date,game,score,periodscore,spectators,schedule_id,game_id,league,season,team,...,points,points_cum,points_cum_prev,points_cum_prev_avg,points_cum_h_a,points_cum_h_a_prev,points_cum_h_a_prev_avg,coach,change_date,bef_after_change
0,2007-09-17,Leksands IF - AIK IF,7 - 3,"3-1, 2-1, 2-1",6095,9826,422474,allsvenskan,2007/08,AIK,...,0,0,0,,0,0,,,NaT,
1,2007-09-19,AIK IF - Bofors IK,6 - 4,"1-1, 3-1, 2-2",1554,9826,422481,allsvenskan,2007/08,AIK,...,3,3,0,0.0,3,0,,,NaT,
2,2007-09-21,Almtuna IS - AIK IF,2 - 3,"0-0, 1-1, 1-1, 0-1",1008,9826,422484,allsvenskan,2007/08,AIK,...,2,5,3,1.5,2,0,0.0,,NaT,
3,2007-09-26,AIK IF - IF Björklöven,6 - 2,"4-0, 1-2, 1-0",1602,9826,422494,allsvenskan,2007/08,AIK,...,3,8,5,1.666667,6,3,3.0,,NaT,
4,2007-09-28,Rögle BK - AIK IF,2 - 0,"1-0, 0-0, 1-0",2988,9826,422499,allsvenskan,2007/08,AIK,...,0,8,8,2.0,2,2,1.0,,NaT,


In [20]:
df_teams.to_csv("data/df_teams.csv", index=False)