## Process Sealed Products
This notebook takes a folder of raw CSVs downloaded from MTGGoldfish and transforms the date/price data into a dataframe with other datapoints such as the set, type of product, and release date of the product.
Assumes the existence of "file_names.txt" which was made through a simple ls bash command at the terminal beforehand.

In [1]:
import pandas as pd

In [2]:
# define the type of boxes
box_list = ['Draft', "Collector", "Set"]

# dictionary of symbol and release date for matching
release_dates = {'RNA':'2019-01-25',
                'WAR':'2019-05-03 ',
                'ELD':'2019-10-04',
                'THB':'2020-01-24',
                'IKO':'2020-05-15',
                'ZNR':'2020-09-25',
                'KHM':'2021-02-05',
                'STX':'2021-04-23',
                'AFR':'2021-07-23',
                'MID':'2021-09-24',
                'VOW':'2021-11-19',
                'NEO':'2022-02-18',
                'SNC':'2022-04-29',
                'DMU':'2022-09-09',
                'BRO':'2022-11-18',
                'MH1':'2019-06-14',
                'MH2':'2021-06-18',
                'TSR':'2021-03-19',
                '2XM':'2020-08-07',
                '2X2':'2022-07-08',
                'M20':'2019-07-12',
                'M21':'2020-07-03',
                'CMR':'2020-11-20',
                'CLB':'2022-06-10',
                'JMP':'2020-07-17',
                'UNF':'2022-10-07'}

In [3]:

# set file path from analytics folder
path = "../_data/mtg_sealed/"

# append results to list to concatenate
output_list = []

# open the list of file names
with open(path+"file_names.txt") as file:

    # for each csv file name in the list
    for line in file:

        # get rid of the character break
        line = line[:-1]

        # store symbol out of [SYM].csv
        symbol = line[-8:-5]

        # read the csv without header, add column names
        temp_df = pd.read_csv(path+line, header=None, names=['date','price'])

        # set symbol colum, then get release date with string
        temp_df['symbol'] = symbol
        temp_df['release_date'] = release_dates.get(symbol) 

        # store the type of box based on CSV name, default to Booster, update if more specific
        temp_df['box_type'] = "Booster"
        for type in box_list:
            if type in line:
                temp_df['box_type'] = type

        # store output        
        output_list.append(temp_df)

In [11]:
output_df = pd.concat(output_list, ignore_index=True)
output_df.sample(5)

Unnamed: 0,date,price,symbol,release_date,box_type
12554,2023-01-07,124.99,VOW,2021-11-19,Set
24772,2022-10-01,249.99,STX,2021-04-23,Collector
31289,2023-02-07,110.25,UNF,2022-10-07,Draft
338,2022-04-16,211.99,AFR,2021-07-23,Collector
19485,2022-05-29,239.99,MH2,2021-06-18,Draft


In [12]:
output_df.to_csv("../_data/sealed_output.csv", index=False)