# Setup

In [1]:
import os
import pandas as pd

In [2]:
def create_dir(dir):
    try:
       os.makedirs(dir)
    except FileExistsError:
       pass

In [3]:
@pd.api.extensions.register_dataframe_accessor("info")
class Info:
    def __init__(self, pandas_obj):
        self._validate(pandas_obj, ['id', 'variable', 'type', 'min', 'max'])
        self.dataset = pandas_obj
    
    @staticmethod
    def _validate(obj, cols):
        if any(x not in obj.columns for x in cols):
            raise AttributeError("Some attributes are missing")

    def setcut(self, pcont, pcatmax):
        self.dataset['cut'] = 0
        self.dataset.loc[self.dataset['type'] == 'Continuous', 'cut'] = pcont
        self.dataset.loc[self.dataset['type'] == 'Categorical', 'cut'] = self.dataset['max'].map(lambda v: min(v, pcatmax))    

  @pd.api.extensions.register_dataframe_accessor("info")


# Given Information

In [4]:
pcont = 2
pcatmax = 2

In [5]:
year = 20
sel_num = 8

# Created Directories

In [6]:
create_dir("../info")
create_dir("../model")
create_dir("../select/cuts")

# Implementation

In [7]:
maincutname = f"proc{year}co{pcont}ca{pcatmax}cutinfo"
df_proc = pd.read_csv(f"../info/proc{year}info.csv")
df_proc.info.setcut(pcont, pcatmax)
df_proc.to_csv(f"../model/{maincutname}.csv", header=True, index=False)
df_proc.to_csv(f"../model/{maincutname}noh.csv", header=False, index=False)

In [8]:
selmaincutname = f"selproc{year}num{sel_num}co{pcont}ca{pcatmax}cutinfo"
df_sel_proc = pd.read_csv(f"../select/proc/selproc{year}num{sel_num}info.csv")
df_sel_proc.info.setcut(pcont, pcatmax)
df_sel_proc.to_csv(f"../select/cuts/{selmaincutname}.csv", header=True, index=False)
df_sel_proc.to_csv(f"../select/cuts/{selmaincutname}noh.csv", header=False, index=False)