In [1]:
import pandas as pd

In [2]:
deck_file = '../../data/OTJ/deck.feather'
card_file = '../../data/OTJ/card.feather'

In [3]:

df_d = pd.read_feather(deck_file)
df_d.shape

(1167709, 399)

In [10]:
df_d[:100].columns.to_list()[:20]

['expansion',
 'event_type',
 'build_index',
 'match_number',
 'game_number',
 'rank',
 'opp_rank',
 'main_colors',
 'splash_colors',
 'on_play',
 'num_mulligans',
 'opp_num_mulligans',
 'opp_colors',
 'num_turns',
 'won',
 'user_n_games_bucket',
 'user_game_win_rate_bucket',
 'draft_idx',
 'Abraded Bluffs',
 'Abrupt Decay']

In [9]:
df_d['opp_rank'].value_counts()

Series([], Name: count, dtype: int64)

In [16]:
df_d[['event_type', 'build_index', 'match_number', 'game_number', 'draft_idx', 'won']].head(30)

Unnamed: 0,event_type,build_index,match_number,game_number,draft_idx,won
0,PremierDraft,0,1,1,0,True
1,PremierDraft,0,2,1,0,True
2,PremierDraft,0,3,1,0,False
3,PremierDraft,0,4,1,0,False
4,PremierDraft,0,5,1,0,True
5,PremierDraft,0,6,1,0,True
6,PremierDraft,0,7,1,0,True
7,PremierDraft,0,8,1,0,False
8,PremierDraft,0,1,1,1,False
9,PremierDraft,0,2,1,1,True


In [37]:
df_d['game_number'].max()

2

In [None]:
# id

# {draft_idx}_{build_index}_{match_number}_{game_number}_{record}

In [35]:
def get_id(row):
    draft_idx = str(row['draft_idx']).zfill(6)
    build_index = str(row['build_index']).zfill(2)
    match_number = str(row['match_number']).zfill(2)
    return f"{draft_idx}_{build_index}_{match_number}_{row['game_number']}"

df_d['id'] = df_d.head(100).apply(get_id, axis=1)
df_d['id'].head(100)

0     000000_00_1_1
1     000000_00_2_1
2     000000_00_3_1
3     000000_00_4_1
4     000000_00_5_1
          ...      
95    000015_00_3_1
96    000015_00_4_1
97    000015_00_5_1
98    000015_00_6_1
99    000015_00_7_1
Name: id, Length: 100, dtype: object

In [26]:
col_mask = df_d.iloc[9] != df_d.iloc[10]
col_mask

df_d[df_d.columns[col_mask]].iloc[9:11]

Unnamed: 0,build_index,match_number,opp_rank,opp_colors,Plains,Vengeful Townsfolk
9,0,2,,WR,8,1
10,1,3,,WU,7,2


In [5]:
# sample rows

n_rows = 200_000  # ~1.1M rows originally

print(df_d.shape)
df_d = df_d.sample(n_rows, random_state=42)
print(df_d.shape)

(1167709, 399)
(200000, 399)


In [6]:
card_st_idx = df_d.columns.to_list().index('Abraded Bluffs')
card_cols = df_d.columns[card_st_idx:].to_list()
print(len(card_cols))

n_cards = 100
df_col = df_d[card_cols][:1].sample(n_cards, axis=1, random_state=42)
card_cols = df_col.columns
card_cols


381


Index(['Razzle-Dazzler', 'Magda, the Hoardmaster', 'Bucolic Ranch',
       'Clear Shot', 'Collector's Cage', 'Bridled Bighorn',
       'Iron-Fist Pulverizer', 'Vadmir, New Blood', 'Mountain',
       'Eriette's Lullaby', 'Spring Splasher', 'Obeka, Splitter of Seconds',
       'Rambling Possum', 'Intimidation Campaign', 'Daring Thunder-Thief',
       'Malcolm, the Eyes', 'Detention Sphere', 'The Key to the Vault',
       'Plains', 'Mindbreak Trap', 'Unfortunate Accident',
       'Geralf, the Fleshwright', 'Arid Archway', 'Return the Favor',
       'Trash the Town', 'Primal Might', 'Abraded Bluffs', 'Fell the Mighty',
       'Tyrant's Scorn', 'Bedevil', 'Boom Box', 'Annie Joins Up',
       'Demonic Ruckus', 'Scalestorm Summoner', 'Torpor Orb',
       'Djinn of Fool's Fall', 'Final Showdown', 'Outlaw Stitcher',
       'Bristlepack Sentry', 'Sterling Hound', 'Plan the Heist', 'Fling',
       'Skewer the Critics', 'Villainous Wealth', 'Bandit's Haul',
       'Boneyard Desecrator', 'Deepmuck 

In [7]:
df_d[card_cols] = df_d[card_cols] > 0

In [8]:
idx_cols = ['draft_idx', 'won']
cols = idx_cols + card_cols.to_list()
df_d = df_d[cols]
df_d.shape


(200000, 102)

In [9]:
df_d_melt = df_d[cols].melt(id_vars=idx_cols, var_name='card', value_name='in_deck')
df_d_melt.to_feather('../../data/OTJ/deck_melt.feather')
df_d_melt.shape

(20000000, 4)

In [10]:
df_d_melt.sample(5)

Unnamed: 0,draft_idx,won,card,in_deck
3859356,82266,True,Mindbreak Trap,False
10240456,183991,False,Ornery Tumblewagg,False
456753,111222,True,Bucolic Ranch,False
18089059,112833,False,Armored Armadillo,False
19905149,122919,False,Pitiless Carnage,False


In [11]:
df_d_melt['in_deck'].mean()

0.0728424

In [12]:
df_c = pd.read_feather(card_file)
df_c.shape


(371, 17)

In [13]:
df = pd.merge(df_d_melt, df_c, left_on='card', right_on='name', how='left')
df.shape

(20000000, 21)

In [14]:
na_mask = df['name'].isna()
print(na_mask.sum())
if na_mask.sum() > 0:
    df[na_mask].sample(10, random_state=42)

400000


In [15]:
df.head()

Unnamed: 0,draft_idx,won,card,in_deck,name,setCode,releaseDate,number,layout,availability,...,toughness,colorIdentity,colors,types,subtypes,supertypes,manaCost,manaValue,edhrecRank,edhrecSaltiness
0,96414,True,Razzle-Dazzler,False,Razzle-Dazzler,OTJ,2024-04-19,63,normal,"arena, mtgo, paper",...,2,U,U,Creature,"Human, Wizard",,{1}{U},2.0,17822.0,
1,170600,True,Razzle-Dazzler,False,Razzle-Dazzler,OTJ,2024-04-19,63,normal,"arena, mtgo, paper",...,2,U,U,Creature,"Human, Wizard",,{1}{U},2.0,17822.0,
2,157994,True,Razzle-Dazzler,False,Razzle-Dazzler,OTJ,2024-04-19,63,normal,"arena, mtgo, paper",...,2,U,U,Creature,"Human, Wizard",,{1}{U},2.0,17822.0,
3,35865,True,Razzle-Dazzler,False,Razzle-Dazzler,OTJ,2024-04-19,63,normal,"arena, mtgo, paper",...,2,U,U,Creature,"Human, Wizard",,{1}{U},2.0,17822.0,
4,1123,True,Razzle-Dazzler,False,Razzle-Dazzler,OTJ,2024-04-19,63,normal,"arena, mtgo, paper",...,2,U,U,Creature,"Human, Wizard",,{1}{U},2.0,17822.0,


In [16]:
df.to_feather('../../data/OTJ/deck_melt_card.feather')