# Create M19 dataset

This notebook creates a standardized M19 dataset.

In [9]:
import pickle
from sklearn.model_selection import train_test_split
import draftsimtools as ds

### Initial dataset
Load data from original M19 draftsim data files. 

In [10]:
dataset_path = "../../data/"
rating_path1 = dataset_path + "m19_rating.tsv"
rating_path2 = dataset_path + "m19_land_rating.tsv"
drafts_path = dataset_path + "m19_drafts.csv"

### Standardized Cardnames
1. Remove commas. 
2. Use underscores.
3. Group basic lands with different art into a single cardname.

In [11]:
cur_set = ds.create_set(rating_path1, rating_path2)

In [12]:
raw_drafts = ds.load_drafts(drafts_path)

In [13]:
cur_set, raw_drafts = ds.fix_commas(cur_set, raw_drafts)

In [14]:
le = ds.create_le(cur_set["Name"].values)

### Process draft data

In [15]:
drafts = ds.process_drafts(raw_drafts)
drafts = [d for d in drafts if len(d)==45] # Remove imcomplete drafts.

Processing draft: 0.
Processing draft: 10000.
Processing draft: 20000.
Processing draft: 30000.
Processing draft: 40000.


In [25]:
drafts_train, drafts_test = train_test_split(drafts, test_size=0.2, random_state=42)

In [29]:
drafts_tensor_test = ds.drafts_to_tensor(drafts_test, le) # Runs for ~5 minutes. 

In [33]:
drafts_tensor_train = ds.drafts_to_tensor(drafts_train, le) # Runs for ~15 minutes. 

### Serialize draft data

In [62]:
output_folder = "./standardized_m19/"

In [85]:
cur_set.to_csv(output_folder + "standardized_m19_rating.tsv", sep="\t", index=False)

In [64]:
def serialize_data(obj, path):
    """
    Serialize an object as a python pickle file.
    """
    with open(path, "wb") as f:
        pickle.dump(obj, f)

In [66]:
serialize_data(drafts_train, output_folder + "drafts_train.pkl")

In [67]:
serialize_data(drafts_test, output_folder + "drafts_test.pkl")

In [68]:
serialize_data(drafts_tensor_train, output_folder + "drafts_tensor_train.pkl")

In [69]:
serialize_data(drafts_tensor_test, output_folder + "drafts_tensor_test.pkl")

For details on how to load this data, check out the "Load M19 Dataset" notebook.