# Projections

In [1]:
import pandas as pd
import numpy as np

pd.options.display.max_rows = 1_000

### Categories

In [2]:
SKATER_CATEGORIES = ['goals', 'assists', 'powerplay_points', 'shots_on_goal', 'hits', 'blocks', "game_winning_goals"]
GOALIE_CATEGORIES = ["wins", "save_percentage", "saves", "shutouts"]

### Players

In [3]:
df = pd.read_csv("../data/adp-yahoo_com.csv")
df = df[["team", "name", "position"]]

df.head()

Unnamed: 0,team,name,position
0,Edm,Connor McDavid,C
1,Edm,Leon Draisaitl,"C,LW"
2,Col,Nathan MacKinnon,C
3,TB,Nikita Kucherov,RW
4,TB,Andrei Vasilevskiy,G


### Projection Data

In [4]:
prj = pd.concat([
    pd.read_csv("../data/projections-cbs_com.csv"), 
    pd.read_csv("../data/projections-dfo.csv"), 
])

In [5]:
# fix weird assists issue
prj["assists"] = prj["assists"].apply(pd.to_numeric, errors="coerce")

# goals against average is a negative thing
prj["goals_against_average"] = -prj["goals_against_average"]

# reduce
prj = (
    prj
    [["name"] + SKATER_CATEGORIES + GOALIE_CATEGORIES]
    .groupby("name")
    .mean()
    .round(3)
    .reset_index()
)

prj.head()

Unnamed: 0,name,goals,assists,powerplay_points,shots_on_goal,hits,blocks,game_winning_goals,wins,save_percentage,saves,shutouts
0,Aaron Dell,,,,,,,,20.11,0.9,1581.43,2.52
1,Aaron Ekblad,14.375,36.975,12.75,213.425,76.6,88.6,2.6,,,,
2,Adam Boqvist,6.1,24.4,7.7,100.3,75.4,84.6,1.0,,,,
3,Adam Erne,10.1,11.9,3.6,101.3,170.2,31.5,1.3,,,,
4,Adam Fox,8.94,50.395,20.65,151.06,43.1,144.2,2.4,,,,


### Export

In [6]:
df = pd.merge(df, prj, how="inner", on="name")
df.head()

Unnamed: 0,team,name,position,goals,assists,powerplay_points,shots_on_goal,hits,blocks,game_winning_goals,wins,save_percentage,saves,shutouts
0,Edm,Connor McDavid,C,46.655,87.38,34.37,272.72,59.7,32.6,8.2,,,,
1,Edm,Leon Draisaitl,"C,LW",48.095,73.52,32.24,247.775,48.8,21.7,7.4,,,,
2,Col,Nathan MacKinnon,C,40.615,67.605,29.43,381.13,56.0,28.8,5.5,,,,
3,TB,Nikita Kucherov,RW,37.235,69.135,29.135,249.99,44.6,24.9,6.7,,,,
4,TB,Andrei Vasilevskiy,G,,,,,,,,43.37,0.906,2018.83,4.65


In [7]:
df.to_csv("../data/projections-generated.csv", index=False)