In [2]:
import csv
import pandas as pd
import numpy as np
import sklearn

from sklearn import linear_model
from sklearn.metrics import mean_squared_error, r2_score
reg = linear_model.LinearRegression()

In [3]:
path = './data/game_data_public.MKM.PremierDraft.csv'
with open(path) as csvfile:
    base_df = pd.read_csv(csvfile)

In [96]:
df = pd.DataFrame(base_df)
df = df[df['draft_time'] > '2024-02-19']

In [97]:
all_columns = list(df.columns)
name_columns = [col for col in all_columns if 'drawn_' in col]
names = [n.split('_')[1] for n in name_columns]

name_gen = lambda x: [f'{x}_{name}' for name in names]

In [98]:
oh_df = df[name_gen('opening_hand')]
oh_df.columns = names

drawn_df = df[name_gen('drawn')]
drawn_df.columns = names

ih_df = oh_df + drawn_df

In [99]:
cards_seen = ih_df.sum(axis=1)
cards_seen.name = 'cards_seen'

In [100]:
gih_counts = ih_df.sum()
gih_win_counts = ih_df[df['won']].sum()

gihwr = gih_win_counts / gih_counts
gihwr.name = 'gihwr'

In [101]:
deck_df = pd.DataFrame(df[name_gen('deck')])
deck_df.columns = names

deck_counts = deck_df.sum()
deck_win_counts = deck_df[df['won']].sum()

gpwr = deck_win_counts / deck_counts
gpwr.name = 'gpwr'

In [102]:
ihd = gihwr - gpwr
ihd.name = 'in-hand delta'

In [103]:
gwih = ih_df.div(cards_seen, axis=0) * 40

In [104]:
gwihwr = gwih[df['won']].sum() / gwih.sum()
gwihwr.name = 'gwihwr'

In [105]:
gwihd = gwihwr - gpwr
gwihd.name = 'game-weighted in-hand delta'

In [106]:
deck_adjacency = deck_df.transpose().dot(deck_df)
average_marginal_decklist = deck_adjacency / deck_adjacency.sum()
d1gpwr = gpwr.dot(average_marginal_decklist)

d1d = gpwr - d1gpwr
d1d.name = 'distance-one delta'

In [107]:
ihd_df = pd.DataFrame([ihd, gwihd, d1d])

In [108]:
turn_info_df = df[['num_turns', 'won']]
num_turn_bias_df = pd.concat([deck_df, turn_info_df], axis=1)

In [109]:
num_turn_bias_map = {}
for name in names:
    filtered_df = num_turn_bias_df[deck_df[name]>0]
    weighted_wins = filtered_df['won'] * filtered_df[name]
    num_turn_bias_map[name] = weighted_wins.corr(filtered_df['num_turns'])

In [110]:
num_turn_bias = pd.Series(num_turn_bias_map, index=names)
num_turn_bias_df = pd.DataFrame([num_turn_bias]).transpose()

In [111]:
reg.fit(num_turn_bias_df, ihd-gwihd)
reg.score(num_turn_bias_df, ihd-gwihd)

0.6925023460850965