# Summary

The code to acquire and pre-process the data to use for future analysis.

In [1]:
# Setup Notebook
import os

if os.path.basename(os.getcwd()) != "mtg-modeling":
    get_ipython().run_line_magic("run", '-i "../../scripts/notebook_header.py"')  # type: ignore

Changed working directory to: /root/mtg-modeling


In [2]:
import requests
import gzip
import shutil
import itertools

In [3]:
set_codes = ["OTJ", "MH3", "MKM"]
draft_formats = ["PremierDraft"]  # , "TradDraft", "Sealed", "TradSealed"]
data_types = ["game_data"]  # , "draft_data"]  # omit 'replay_data' due to large size

In [4]:
for set_code, draft_format, data_type in itertools.product(
    set_codes, draft_formats, data_types
):
    print(f"Downloading {set_code} {draft_format} {data_type} Data...")
    save_path = Path(f"data/raw/17lands/{data_type}/{draft_format}")
    gz_file = f"{data_type}_public.{set_code}.{draft_format}.csv.gz"
    csv_file = f"{data_type}_public.{set_code}.{draft_format}.csv"
    url = f"https://17lands-public.s3.amazonaws.com/analysis_data/{data_type}/{gz_file}"
    os.makedirs(save_path, exist_ok=True)

    # Download File
    response = requests.get(url, stream=True)
    if response.status_code != 200:
        print(
            f"Failed to download {set_code} {draft_format} {data_type} data from {url}."
        )
        continue
    with open(save_path / gz_file, "wb") as file:
        file.write(response.content)

    # Unzip File
    with gzip.open(save_path / gz_file, "rb") as f_in:
        with open(save_path / csv_file, "wb") as f_out:
            shutil.copyfileobj(f_in, f_out)

    os.remove(save_path / gz_file)
print("Done!")

Downloading OTJ PremierDraft game_data Data...
Downloading MH3 PremierDraft game_data Data...
Downloading MKM PremierDraft game_data Data...
Done!
