# Tabular Playground Series (January 2022)
## Import Data and Gather Insights

In [100]:
from typing import Dict
import numpy as np
import pandas as pd
from IPython.display import display
from sklearn.neural_network import MLPRegressor

training_df = pd.read_csv("data/train.csv", index_col='row_id')
training_df['date'] = pd.to_datetime(training_df['date'], format='%Y-%m-%d')
display(training_df)

Unnamed: 0_level_0,date,country,store,product,num_sold
row_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,2015-01-01,Finland,KaggleMart,Kaggle Mug,329
1,2015-01-01,Finland,KaggleMart,Kaggle Hat,520
2,2015-01-01,Finland,KaggleMart,Kaggle Sticker,146
3,2015-01-01,Finland,KaggleRama,Kaggle Mug,572
4,2015-01-01,Finland,KaggleRama,Kaggle Hat,911
...,...,...,...,...,...
26293,2018-12-31,Sweden,KaggleMart,Kaggle Hat,823
26294,2018-12-31,Sweden,KaggleMart,Kaggle Sticker,250
26295,2018-12-31,Sweden,KaggleRama,Kaggle Mug,1004
26296,2018-12-31,Sweden,KaggleRama,Kaggle Hat,1441


## Format Data

In [101]:
training_df['date'] = training_df['date'].dt.dayofyear / 365
countries = training_df.drop_duplicates(subset='country')['country']
stores = training_df.drop_duplicates(subset='store')['store']
products = training_df.drop_duplicates(subset='product')['product']

In [102]:
def hash_str(series: pd.Series) -> Dict:
    i = 0
    _map = {}
    step = 1 / len(series)
    for item in series:
        _map[item] = i
        i += step
    return _map

In [103]:
countries_map = hash_str(countries)
stores_map = hash_str(stores)
products_map = hash_str(products)
training_df.replace(to_replace=countries_map, inplace=True)
training_df.replace(to_replace=stores_map, inplace=True)
training_df.replace(to_replace=products_map, inplace=True)
display(training_df)

Unnamed: 0_level_0,date,country,store,product,num_sold
row_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,0.00274,0.000000,0.0,0.000000,329
1,0.00274,0.000000,0.0,0.333333,520
2,0.00274,0.000000,0.0,0.666667,146
3,0.00274,0.000000,0.5,0.000000,572
4,0.00274,0.000000,0.5,0.333333,911
...,...,...,...,...,...
26293,1.00000,0.666667,0.0,0.333333,823
26294,1.00000,0.666667,0.0,0.666667,250
26295,1.00000,0.666667,0.5,0.000000,1004
26296,1.00000,0.666667,0.5,0.333333,1441


## Train MLP Regression Model

In [104]:
training_data = training_df[['date', 'country', 'store', 'product']].to_numpy()
target_values = training_df[['num_sold']].to_numpy().ravel()

In [105]:
mlp_regressor = MLPRegressor(hidden_layer_sizes=(4, 4, 4, 4),
                             activation='relu',
                             max_iter=400)
mlp_regressor = mlp_regressor.fit(training_data, target_values)

## Testing MLP Regressor Model

In [106]:
test_df = pd.read_csv("data/test.csv", index_col='row_id')
test_df['date'] = pd.to_datetime(test_df['date'], format='%Y-%m-%d')
display(test_df)

Unnamed: 0_level_0,date,country,store,product
row_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
26298,2019-01-01,Finland,KaggleMart,Kaggle Mug
26299,2019-01-01,Finland,KaggleMart,Kaggle Hat
26300,2019-01-01,Finland,KaggleMart,Kaggle Sticker
26301,2019-01-01,Finland,KaggleRama,Kaggle Mug
26302,2019-01-01,Finland,KaggleRama,Kaggle Hat
...,...,...,...,...
32863,2019-12-31,Sweden,KaggleMart,Kaggle Hat
32864,2019-12-31,Sweden,KaggleMart,Kaggle Sticker
32865,2019-12-31,Sweden,KaggleRama,Kaggle Mug
32866,2019-12-31,Sweden,KaggleRama,Kaggle Hat


In [107]:
test_df['date'] = test_df['date'].dt.dayofyear / 365
test_df.replace(to_replace=countries_map, inplace=True)
test_df.replace(to_replace=stores_map, inplace=True)
test_df.replace(to_replace=products_map, inplace=True)
display(test_df)

Unnamed: 0_level_0,date,country,store,product
row_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
26298,0.00274,0.000000,0.0,0.000000
26299,0.00274,0.000000,0.0,0.333333
26300,0.00274,0.000000,0.0,0.666667
26301,0.00274,0.000000,0.5,0.000000
26302,0.00274,0.000000,0.5,0.333333
...,...,...,...,...
32863,1.00000,0.666667,0.0,0.333333
32864,1.00000,0.666667,0.0,0.666667
32865,1.00000,0.666667,0.5,0.000000
32866,1.00000,0.666667,0.5,0.333333


In [108]:
test_data = test_df.to_numpy()
predictions = np.round(mlp_regressor.predict(test_data))
test_df['num_sold'] = np.int64(predictions)
display(test_df)

Unnamed: 0_level_0,date,country,store,product,num_sold
row_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
26298,0.00274,0.000000,0.0,0.000000,187
26299,0.00274,0.000000,0.0,0.333333,535
26300,0.00274,0.000000,0.0,0.666667,114
26301,0.00274,0.000000,0.5,0.000000,439
26302,0.00274,0.000000,0.5,0.333333,788
...,...,...,...,...,...
32863,1.00000,0.666667,0.0,0.333333,365
32864,1.00000,0.666667,0.0,0.666667,148
32865,1.00000,0.666667,0.5,0.000000,523
32866,1.00000,0.666667,0.5,0.333333,621


In [109]:
test_df['num_sold'].to_csv('predictions.csv')