In [1]:
import csv
import pandas as pd
import numpy as np

In [5]:
path = './data/game_data_public.MKM.PremierDraft.csv'
with open(path) as csvfile:
    base_df = pd.read_csv(csvfile)

In [6]:
df = pd.DataFrame(base_df)

In [7]:
all_columns = list(df.columns)
name_columns = [col for col in all_columns if 'drawn_' in col]
names = [n.split('_')[1] for n in name_columns]

name_gen = lambda x: [f'{x}_{name}' for name in names]

In [8]:
oh_df = df[name_gen('opening_hand')]
oh_df.columns = names

drawn_df = df[name_gen('drawn')]
drawn_df.columns = names

ih_df = oh_df + drawn_df

In [9]:
cards_seen = ih_df.sum(axis=1)
cards_seen.name = 'cards_seen'

cards_not_seen = cards_seen.apply(lambda x: max(1, 40-x))
cards_not_seen.name = 'cards_not_seen'

In [10]:
gih_counts = ih_df.sum()
gih_win_counts = ih_df[df['won']].sum()

gihwr = gih_win_counts / gih_counts
gihwr.name = 'gihwr'

In [11]:
deck_df = pd.DataFrame(df[name_gen('deck')])
deck_df.columns = names

deck_counts = deck_df.sum()
deck_win_counts = deck_df[df['won']].sum()

gpwr = deck_win_counts / deck_counts
gpwr.name = 'gpwr'

In [12]:
ihd = gihwr - gpwr
ihd.name = 'in-hand delta'

In [13]:
gwih = ih_df.div(cards_seen, axis=0) * 40

In [14]:
gwihwr = gwih[df['won']].sum() / gwih.sum()
gwihwr.name = 'gwihwr'

In [15]:
gwihd = gwihwr - gpwr
gwihd.name = 'game-weighted in-hand delta'

In [16]:
deck_adjacency = deck_df.transpose().dot(deck_df)
average_marginal_decklist = deck_adjacency / deck_adjacency.sum()
d1gpwr = gpwr.dot(average_marginal_decklist)

d1d = gpwr - d1gpwr
d1d.name = 'distance-one delta'

In [17]:
ihd_df = pd.DataFrame([ihd, gwihd, d1d])

In [18]:
print(d1d.describe())
print(ihd.describe())
gwihd.describe()

count    326.000000
mean      -0.015658
std        0.032347
min       -0.187290
25%       -0.026344
50%       -0.009367
75%        0.005134
max        0.051000
Name: distance-one delta, dtype: float64
count    326.000000
mean       0.004061
std        0.023923
min       -0.118957
25%       -0.008126
50%        0.002711
75%        0.014222
max        0.080215
Name: in-hand delta, dtype: float64


count    326.000000
mean      -0.002154
std        0.026982
min       -0.146353
25%       -0.014706
50%       -0.003421
75%        0.010623
max        0.081036
Name: game-weighted in-hand delta, dtype: float64

In [20]:
turn_info_df = df[['num_turns', 'won']]
speed_bias_df = pd.concat([deck_df, turn_info_df], axis=1)

In [34]:
corr_map = {}
for name in names:
    filtered_df = speed_bias_df[deck_df[name]>0]
    weighted_wins = filtered_df['won'] * filtered_df[name]
    corr_map[name] = weighted_wins.corr(filtered_df['num_turns'])

In [35]:
corr = pd.Series(corr_map, index=names)

In [36]:
corr['On the Job']

-0.13422862598823776

In [42]:
corr['Novice Inspector']

-0.10842746291399476

In [43]:
corr.corr(ihd)

0.09132755620549622

In [44]:
corr.corr(gwihd)

-0.23237711299218158

In [45]:
corr.corr(d1d)

-0.428732219355698

In [41]:
corr['Gnaw to the Bone']

0.1762650232657035

In [46]:
speed_bias = (speed_bias_df['won'].corr(speed_bias_df['num_turns']))

In [47]:
speed_bias

-0.046709522317764963

In [53]:
ihd_df["Insidious Roots"]

in-hand delta                  0.014259
game-weighted in-hand delta   -0.003820
distance-one delta            -0.038463
Name: Insidious Roots, dtype: float64

In [56]:
ihd_df['Aftermath Analyst']

in-hand delta                  0.002072
game-weighted in-hand delta   -0.005997
distance-one delta            -0.015631
Name: Aftermath Analyst, dtype: float64

In [57]:
ihd_df['Tunnel Tipster']

in-hand delta                  0.001358
game-weighted in-hand delta    0.003357
distance-one delta             0.003302
Name: Tunnel Tipster, dtype: float64