# Summary

The code to wrangle the data across multiple sets

# Introduction

This takes the set files from [20-draft-data-wrangle](20-draft-data-wrangle.ipynb) and combines them into a single file.

# Initial Setup

In [1]:
# | output: false
path = "../../../../scripts/notebook_header.py"
import os

if os.path.basename(os.getcwd()) != "mtg-modeling":
    args = f"--path {path}"
    get_ipython().run_line_magic("run", f"-i {path} {args}")  # type: ignore  # type: ignore

Changed working directory to: D:\mtg-modeling


In [2]:
from pathlib import Path
import polars as pl

Define Paths

In [3]:
set_codes = ["MKM", "OTJ", "MH3", "BLB"]

file_suffixes = {
    "summary": "_Game_PD_Summary",
    "game": "_Game_PD_Games",
    "draft": "_Game_PD_Drafts",
    # "card": "_Game_PD_Cards",  # Card data has diff col widths.  Have to preprocess first
}

root = Path("data/processed/17lands/game_data/premier_draft")


def build_filenames(root, set_codes, file_suffix):
    return {
        "in_files": [root / f"{code}{file_suffix}.parquet" for code in set_codes],
        "out_file": root / f"All_Sets{file_suffix}.parquet",
    }


all_files = {
    key: build_filenames(root, set_codes, value) for key, value in file_suffixes.items()
}

In [4]:
def read_files(files):
    return [pl.read_parquet(file) for file in files]


for files in all_files.values():
    df_games = pl.concat(read_files(files["in_files"]), how="vertical")
    df_games.write_parquet(files["out_file"])