In [53]:
from dataclasses import dataclass

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import plotly.express as px

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

import replay_dtypes

In [2]:
# Paths
data_dir = '/mnt/x/dev/mtgai/'
game_data_path = data_dir + 'game_data_public.LCI.PremierDraft.csv.gz'
replay_data_path = data_dir + 'replay_data_public.LCI.PremierDraft.csv.gz'
abilities_path = data_dir + 'abilities.csv'
cards_path = data_dir + 'cards.csv'

In [3]:
df_game = pd.read_csv(game_data_path, nrows=200)

In [38]:
dtypes = replay_dtypes.get_dtypes(filename=replay_data_path) 
adjusted_dtypes = {col: (dtype if dtype != 'float16' else 'float32') for col, dtype in dtypes.items()}
df_replay = pd.read_csv(replay_data_path, dtype=adjusted_dtypes, nrows=10000, on_bad_lines='skip')

In [10]:
df_abilities = pd.read_csv(abilities_path)
df_cards = pd.read_csv(cards_path)

In [11]:
df_game.head(2)

Unnamed: 0,expansion,event_type,draft_id,draft_time,game_time,build_index,match_number,game_number,rank,opp_rank,...,"tutored_Huatli, Poet of Unity","deck_Huatli, Poet of Unity","sideboard_Huatli, Poet of Unity","opening_hand_Ojer Axonil, Deepest Might","drawn_Ojer Axonil, Deepest Might","tutored_Ojer Axonil, Deepest Might","deck_Ojer Axonil, Deepest Might","sideboard_Ojer Axonil, Deepest Might",user_n_games_bucket,user_game_win_rate_bucket
0,LCI,PremierDraft,976d867638234d0087008f387809c325,2023-11-09 22:57:41,2023-11-09 23:43:35,0,1,1,bronze,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,50,0.56
1,LCI,PremierDraft,a5385ada75004c549e438b0066e3ac53,2023-11-15 22:55:37,2023-11-15 23:22:02,0,1,1,gold,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,50,0.56


In [12]:
df_replay.head(2)

Unnamed: 0,expansion,event_type,draft_id,draft_time,build_index,match_number,game_number,game_time,rank,opp_rank,...,oppo_turn_22_cards_drawn_or_tutored,oppo_turn_23_cards_drawn_or_tutored,oppo_turn_24_cards_drawn_or_tutored,oppo_turn_25_cards_drawn_or_tutored,oppo_turn_26_cards_drawn_or_tutored,oppo_turn_27_cards_drawn_or_tutored,oppo_turn_28_cards_drawn_or_tutored,oppo_turn_29_cards_drawn_or_tutored,oppo_turn_30_cards_drawn_or_tutored,oppo_total_cards_drawn_or_tutored
0,LCI,PremierDraft,976d867638234d0087008f387809c325,2023-11-09 22:57:41,0,1,1,2023-11-09 23:43:35,bronze,,...,0,0,0,0,0,0,0,0,0,12
1,LCI,PremierDraft,c62d13367fa34787b36cb3292926632c,2023-11-09 21:48:59,0,1,1,2023-11-09 22:32:23,bronze,,...,0,0,0,0,0,0,0,0,0,16


In [28]:
df_cards.head(2)

Unnamed: 0,id,expansion,name,rarity,color_identity,mana_value,types,is_booster
0,65591,HOU,Ammit Eternal,rare,B,3,Creature - Zombie Crocodile Demon,True
1,65633,HOU,Torment of Scarabs,uncommon,B,4,Enchantment - Aura Curse,True


In [29]:
df_abilities.head(2)

Unnamed: 0,id,text
0,1,Deathtouch
1,2,Defender


In [31]:
[print(x) for x in df_replay.columns.tolist()];

expansion
event_type
draft_id
draft_time
build_index
match_number
game_number
game_time
rank
opp_rank
main_colors
splash_colors
on_play
num_mulligans
opp_num_mulligans
opp_colors
num_turns
won
candidate_hand_1
candidate_hand_2
candidate_hand_3
candidate_hand_4
candidate_hand_5
candidate_hand_6
candidate_hand_7
opening_hand
user_turn_1_cards_drawn
user_turn_1_cards_tutored
user_turn_1_cards_discarded
user_turn_1_lands_played
user_turn_1_creatures_cast
user_turn_1_non_creatures_cast
user_turn_1_user_instants_sorceries_cast
user_turn_1_oppo_instants_sorceries_cast
user_turn_1_user_abilities
user_turn_1_oppo_abilities
user_turn_1_creatures_attacked
user_turn_1_creatures_blocked
user_turn_1_creatures_unblocked
user_turn_1_creatures_blocking
user_turn_1_oppo_combat_damage_taken
user_turn_1_user_combat_damage_taken
user_turn_1_user_creatures_killed_combat
user_turn_1_oppo_creatures_killed_combat
user_turn_1_user_creatures_killed_non_combat
user_turn_1_oppo_creatures_killed_non_combat
user_tur

In [56]:
@dataclass
class state():
    turn: int
    user_life: int
    oppo_life: int


Linear regression sanity check

In [39]:
# Eventually a for loop over all turns
turn = 5

df_lin = df_replay[[f'user_turn_{turn}_eot_user_life', f'user_turn_{turn}_eot_oppo_life', 'won']]
df_lin = df_lin.rename(columns={f'user_turn_{turn}_eot_user_life': 'user_life', f'user_turn_{turn}_eot_oppo_life': 'oppo_life'})
df_lin.head(5)

Unnamed: 0,user_life,oppo_life,won
0,13.0,20.0,False
1,14.0,19.0,True
2,18.0,20.0,True
3,21.0,9.0,True
4,20.0,18.0,False


In [44]:
X = df_lin[['user_life', 'oppo_life']]
X -= 20
y = df_lin['won']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LinearRegression()
model.fit(X_train, y_train)

In [45]:
print("Coefficients:", model.coef_)
print("Intercept:", model.intercept_)

y_pred = model.predict(X_test)
print("Mean Squared Error:", mean_squared_error(y_test, y_pred))
print("R^2 Score:", r2_score(y_test, y_pred))

Coefficients: [ 0.02471788 -0.02911829]
Intercept: 0.5111028
Mean Squared Error: 0.22010873
R^2 Score: 0.10817736921392374


In [51]:
y_test.sum()/len(y_test)

0.5565