# Imports

In [1]:
import pickle
from config import *

# Read + process Data

In [2]:
with open(BaselinePath, 'rb') as f:
    raw_dataset_dict = pickle.load(f)

In [3]:
def train_test_split(dic: dict, year: int) -> dict:
    train = {}
    test = {}
    for key in dic.keys():
        category_df = dic[key]
        train[key] = category_df[category_df['Year'] <= year]
        test[key] = category_df[category_df['Year'] > year]
    return train, test

raw_train_data_dict, raw_test_data_dict = train_test_split(raw_dataset_dict, year=Year)

In [4]:
def get_relevant_features(dic: dict, drop_columns: list) -> dict:
    processed_dict = dic.copy()
    for key in list(processed_dict.keys()):
        processed_dict[key] = processed_dict[key].drop(columns=drop_columns)
        processed_dict[key].dropna(inplace=True)
    return processed_dict

drop_columns = ['Category_id', 'Category', 'Year', 'Date', 'Price', 'Indent', 'Weight', 'Parent', 'Parent_ID']

train_data_dict = get_relevant_features(raw_train_data_dict, drop_columns)

test_data_dict = get_relevant_features(raw_test_data_dict, drop_columns)

In [5]:
with open(train_dataset_dict_path, 'wb') as handle:
    pickle.dump(train_data_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open(test_dataset_dict_path, 'wb') as handle:
    pickle.dump(test_data_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

------------

# Horizon Test Sets:

In [6]:
def get_relevant_features_horizon(dic: dict, drop_columns: list) -> dict:
    processed_dict = dic.copy()
    for key in list(processed_dict.keys()):
        processed_dict[key] = processed_dict[key].drop(columns=drop_columns)
        #processed_dict[key].dropna(inplace=True)
    return processed_dict

drop_columns = ['Category_id', 'Category', 'Year', 'Date', 'Price', 'Indent', 'Weight', 'Parent', 'Parent_ID']

In [7]:
with open(BaselinePath, 'rb') as f:
    hor_1_dataset_dict = pickle.load(f)

hor1_train_data_dict, hor1_test_data_dict = train_test_split(hor_1_dataset_dict, year=Year)
hor1_test_data_dict = get_relevant_features_horizon(hor1_test_data_dict, drop_columns)

print(f'columns: {hor1_test_data_dict["All-items"].columns}')
print(f'shape: {hor1_test_data_dict["All-items"].shape}')


columns: Index(['Inflation t-12', 'Inflation t-11', 'Inflation t-10', 'Inflation t-9',
       'Inflation t-8', 'Inflation t-7', 'Inflation t-6', 'Inflation t-5',
       'Inflation t-4', 'Inflation t-3', 'Inflation t-2', 'Inflation t-1',
       'Inflation t', 'Inflation t+1'],
      dtype='object')
shape: (29, 14)


In [8]:
with open('/Users/mvilenko/Library/CloudStorage/OneDrive-PayPal/CPI_HRNN - version 2.0/pickle files/bi_directional_norway_2_period_dataset_dict.pickle', 'rb') as f:
    hor_2_dataset_dict = pickle.load(f)

hor2_train_data_dict, hor2_test_data_dict = train_test_split(hor_2_dataset_dict, year=Year)
hor2_test_data_dict = get_relevant_features_horizon(hor2_test_data_dict, drop_columns)

print(f'columns: {hor2_test_data_dict["All-items"].columns}')
print(f'shape: {hor2_test_data_dict["All-items"].shape}')


columns: Index(['Inflation t-12', 'Inflation t-11', 'Inflation t-10', 'Inflation t-9',
       'Inflation t-8', 'Inflation t-7', 'Inflation t-6', 'Inflation t-5',
       'Inflation t-4', 'Inflation t-3', 'Inflation t-2', 'Inflation t-1',
       'Inflation t', 'Inflation t+1', 'Inflation t+2'],
      dtype='object')
shape: (29, 15)


In [9]:
with open('/Users/mvilenko/Library/CloudStorage/OneDrive-PayPal/CPI_HRNN - version 2.0/pickle files/bi_directional_norway_3_period_dataset_dict.pickle', 'rb') as f:
    hor_3_dataset_dict = pickle.load(f)

hor3_train_data_dict, hor3_test_data_dict = train_test_split(hor_3_dataset_dict, year=Year)
hor3_test_data_dict = get_relevant_features_horizon(hor3_test_data_dict, drop_columns)

print(f'columns: {hor3_test_data_dict["All-items"].columns}')
print(f'shape: {hor3_test_data_dict["All-items"].shape}')


columns: Index(['Inflation t-12', 'Inflation t-11', 'Inflation t-10', 'Inflation t-9',
       'Inflation t-8', 'Inflation t-7', 'Inflation t-6', 'Inflation t-5',
       'Inflation t-4', 'Inflation t-3', 'Inflation t-2', 'Inflation t-1',
       'Inflation t', 'Inflation t+1', 'Inflation t+2', 'Inflation t+3'],
      dtype='object')
shape: (29, 16)


In [10]:
with open('/Users/mvilenko/Library/CloudStorage/OneDrive-PayPal/CPI_HRNN - version 2.0/pickle files/bi_directional_norway_4_period_dataset_dict.pickle', 'rb') as f:
    hor_4_dataset_dict = pickle.load(f)

hor4_train_data_dict, hor4_test_data_dict = train_test_split(hor_4_dataset_dict, year=Year)
hor4_test_data_dict = get_relevant_features_horizon(hor4_test_data_dict, drop_columns)


print(f'columns: {hor4_test_data_dict["All-items"].columns}')
print(f'shape: {hor4_test_data_dict["All-items"].shape}')


columns: Index(['Inflation t-12', 'Inflation t-11', 'Inflation t-10', 'Inflation t-9',
       'Inflation t-8', 'Inflation t-7', 'Inflation t-6', 'Inflation t-5',
       'Inflation t-4', 'Inflation t-3', 'Inflation t-2', 'Inflation t-1',
       'Inflation t', 'Inflation t+1', 'Inflation t+2', 'Inflation t+3',
       'Inflation t+4'],
      dtype='object')
shape: (29, 17)


In [11]:
with open('/Users/mvilenko/Library/CloudStorage/OneDrive-PayPal/CPI_HRNN - version 2.0/pickle files/bi_directional_norway_8_period_dataset_dict.pickle', 'rb') as f:
    hor_8_dataset_dict = pickle.load(f)

hor8_train_data_dict, hor8_test_data_dict = train_test_split(hor_8_dataset_dict, year=Year)
hor8_test_data_dict = get_relevant_features_horizon(hor8_test_data_dict, drop_columns)


print(f'columns: {hor8_test_data_dict["All-items"].columns}')
print(f'shape: {hor8_test_data_dict["All-items"].shape}')


columns: Index(['Inflation t-12', 'Inflation t-11', 'Inflation t-10', 'Inflation t-9',
       'Inflation t-8', 'Inflation t-7', 'Inflation t-6', 'Inflation t-5',
       'Inflation t-4', 'Inflation t-3', 'Inflation t-2', 'Inflation t-1',
       'Inflation t', 'Inflation t+1', 'Inflation t+2', 'Inflation t+3',
       'Inflation t+4', 'Inflation t+5', 'Inflation t+6', 'Inflation t+7',
       'Inflation t+8'],
      dtype='object')
shape: (29, 21)


In [12]:
with open('/Users/mvilenko/Library/CloudStorage/OneDrive-PayPal/CPI_HRNN - version 2.0/mayas_project/basic_model_norway/data/hor1_test_data_dict.pickle', 'wb') as handle:
    pickle.dump(hor1_test_data_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open('/Users/mvilenko/Library/CloudStorage/OneDrive-PayPal/CPI_HRNN - version 2.0/mayas_project/basic_model_norway/data/hor2_test_data_dict.pickle', 'wb') as handle:
    pickle.dump(hor2_test_data_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open('/Users/mvilenko/Library/CloudStorage/OneDrive-PayPal/CPI_HRNN - version 2.0/mayas_project/basic_model_norway/data/hor3_test_data_dict.pickle', 'wb') as handle:
    pickle.dump(hor3_test_data_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open('/Users/mvilenko/Library/CloudStorage/OneDrive-PayPal/CPI_HRNN - version 2.0/mayas_project/basic_model_norway/data/hor4_test_data_dict.pickle', 'wb') as handle:
    pickle.dump(hor4_test_data_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open('/Users/mvilenko/Library/CloudStorage/OneDrive-PayPal/CPI_HRNN - version 2.0/mayas_project/basic_model_norway/data/hor8_test_data_dict.pickle', 'wb') as handle:
    pickle.dump(hor8_test_data_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)