In [1]:
import pandas as pd
import numpy as np

pd.options.display.max_rows = 1_000

### Pool Settings

In [2]:
# Bitcoin/Thugz League

TEAMS = 12
ROSTER = {'C': 2, 'LW': 2, 'RW': 2, 'D': 4, 'G': 2}

SKATER_CATEGORIES = {
    'goals': 7/8, 
    'assists': 7/8, 
    'plus_minus': 3/4, 
    'powerplay_points': 3/4, 
    'shots_on_goal': 1, 
    'hits': 1, 
    'blocks': 1
}

GOALIE_CATEGORIES = {
    "wins" : 1, 
    "save_percentage": 7/8, 
    "saves": 3/4, 
    "shutouts": 1/2
}

In [3]:
# Yahoo Public Leagues

TEAMS = 12
ROSTER = {'C': 2, 'LW': 2, 'RW': 2, 'D': 4, 'G': 2}

SKATER_CATEGORIES = {
    'goals': 7/8, 
    'assists': 1, 
    'plus_minus': 3/4, 
    'powerplay_points': 3/4, 
    'shots_on_goal': 1, 
    'hits': 1
}

GOALIE_CATEGORIES = {
    "wins" : 1, 
    "goals_against_average": 7/8,
    "save_percentage": 7/8, 
    "shutouts": 1/2
}

### Master List

In [4]:
pic = pd.read_csv("../data/picks.csv")
cap = pd.read_csv("../data/capfriendly.csv")

# merge
master = pd.merge(pic, cap, how="left", on="name")

# extra features
master["apv"] = master["pick"] 
master["pick"] = master["pick"].rank(method='average', ascending=True)
master['round'] = (master['pick'] // TEAMS) + 1

# Organize
master = master[['team', 'age', 'name', 'position', 'apv', 'pick', 'round']]

In [5]:
master.head()

Unnamed: 0,team,age,name,position,apv,pick,round
0,Edm,23.0,Connor McDavid,C,1.6,1.0,1.0
1,Col,25.0,Nathan MacKinnon,C,2.5,2.0,1.0
2,Edm,25.0,Leon Draisaitl,"C,LW",3.6,3.0,1.0
3,NYR,29.0,Artemi Panarin,LW,5.2,4.0,1.0
4,Was,35.0,Alex Ovechkin,LW,5.5,5.0,1.0


### Multiple Positions

In [6]:
multi = (
    master
    [["name", "position"]]
    .set_index(['name']) 
    .apply(lambda col: col.str.split(',').explode())
    .reset_index()
)

### Projection Data

In [7]:
cbs = pd.read_csv("../data/cbs.csv")
dfo = pd.read_csv("../data/dailyfaceoff.csv")
yah = pd.read_csv("../data/yahoo.csv")
proj = pd.concat([cbs, dfo, yah])

# fix weird assists issue
proj["assists"] = proj["assists"].apply(pd.to_numeric, errors="coerce")

### Goalies

In [8]:
goalies = proj[proj["position"] == "G"].copy()

goalies = (
    goalies
    [["name"] + list(GOALIE_CATEGORIES.keys())]
    .groupby("name")
    .mean()
    .apply(lambda x: (x - x.min()) / (x.max() - x.min()))
)

for key, value in GOALIE_CATEGORIES.items():
    goalies[key] *= value
    
goalies["rollup"] = goalies.apply(lambda row: row.sum(), axis=1)
goalies["rollup"] /= sum(GOALIE_CATEGORIES.values())
goalies["rollup"] *= 100

goalies = goalies.reset_index()
goalies = goalies[["name", "rollup"]]

goalies.head()

Unnamed: 0,name,rollup
0,Alex Stalock,42.78223
1,Alexandar Georgiev,42.2309
2,Anders Nilsson,43.054779
3,Andrei Vasilevskiy,78.556005
4,Anthony Stolarz,32.29466


### Skaters

In [9]:
skaters = proj[proj["position"] != "G"].copy()
skaters = (
    skaters
    [["name"] + list(SKATER_CATEGORIES.keys())]
    .groupby("name")
    .mean()
    .apply(lambda x: (x - x.min()) / (x.max() - x.min()))
)
     
for key, value in SKATER_CATEGORIES.items():
    skaters[key] *= value
    
skaters["rollup"] = skaters.apply(lambda row: row.sum(), axis=1)
skaters["rollup"] /= sum(SKATER_CATEGORIES.values())
skaters["rollup"] *= 100

skaters = skaters.reset_index()
skaters = skaters[["name", "rollup"]]

skaters.head()

Unnamed: 0,name,rollup
0,Aaron Ekblad,36.269599
1,Adam Boqvist,26.254231
2,Adam Fox,38.833474
3,Adam Gaudette,27.400717
4,Adam Henrique,36.265858


### Merge

In [10]:
rollup = pd.concat([skaters, goalies])
rollup = pd.merge(multi, rollup, how="inner", on="name").sort_values("rollup", ascending=False)
rollup.head()

Unnamed: 0,name,position,rollup
8,Andrei Vasilevskiy,G,78.556005
1,Nathan MacKinnon,C,73.572641
98,Philipp Grubauer,G,70.305893
0,Connor McDavid,C,70.161122
5,Alex Ovechkin,LW,68.465597


### VORP

In [11]:
vorp = rollup.copy()
vorp["vorp"] = vorp["rollup"]

for position, slots in ROSTER.items():
    replacement = (
        vorp[vorp['position'] == position]
        .sort_values('vorp', ascending=False)
        .head(slots * TEAMS)
        ['vorp']
        .mean()
    )
    vorp.loc[vorp['position'] == position, 'vorp'] = vorp['vorp'] - replacement

vorp.head()

Unnamed: 0,name,position,rollup,vorp
8,Andrei Vasilevskiy,G,78.556005,16.429889
1,Nathan MacKinnon,C,73.572641,18.128495
98,Philipp Grubauer,G,70.305893,8.179776
0,Connor McDavid,C,70.161122,14.716976
5,Alex Ovechkin,LW,68.465597,14.370504


### Value Over Next Round

In [12]:
vnr = pd.merge(vorp, master[["name", "round"]], how="inner", on="name")

vnr = (
    vnr
    .sort_values(["rollup"], ascending=[False])
    .groupby(["round", "position"])
    .head(1)
    .sort_values(["position", "round"])
)

vnr["round"] = vnr["round"] - 1
vnr["next"] = vnr["rollup"]
vnr = vnr[["round", "position", "next"]]

### Draft

In [13]:
draft = pd.merge(master, vorp, how="left", on="name", suffixes=("_yahoo", ""))
draft = pd.merge(draft, vnr, how="left", on=["position", "round"])

draft = draft.sort_values("vorp", ascending=False)
draft = draft.groupby("name").head(1)

draft["vorn"] = draft["rollup"] - draft["next"]
draft['rank'] = draft['vorp'].rank(method='average', ascending=False)
draft["arbitrage"] = draft["pick"] - draft["rank"]

draft = draft[[
    'team', 'age', 'name', 'position_yahoo', 
    'rollup', 'vorp', 'vorn',
    'round', 'pick', 'rank', 'arbitrage'
]]

draft["rollup"] = draft["rollup"].round(1)
draft["vorp"] = draft["vorp"].round(1)
draft["vorn"] = draft["vorn"].round(1)

draft.head()

Unnamed: 0,team,age,name,position_yahoo,rollup,vorp,vorn,round,pick,rank,arbitrage
1,Col,25.0,Nathan MacKinnon,C,73.6,18.1,11.0,1.0,2.0,1.0,1.0
7,TB,26.0,Andrei Vasilevskiy,G,78.6,16.4,10.9,1.0,7.0,2.0,5.0
0,Edm,23.0,Connor McDavid,C,70.2,14.7,7.6,1.0,1.0,3.0,-2.0
5,Was,35.0,Alex Ovechkin,LW,68.5,14.4,8.7,1.0,5.0,4.0,1.0
35,Nsh,30.0,Roman Josi,D,53.6,13.2,1.9,3.0,29.0,5.0,24.0


### Targets

In [14]:
draft["target"] = (
    (draft["age"] <= 30) &
    (draft["vorp"] >= -1) &
    (draft["arbitrage"] >= -12) & 
    (draft["vorn"] >= 0)
)

In [15]:
draft.to_csv("../data/draft-250.csv", index=False)