# Multilayer Perceptron Regression

[Tabular Playground Series - Jan 2022 | Kaggle](https://www.kaggle.com/datasets/gauravduttakiit/tabular-playground-series-jan-2022?datasetId=1839455&sortBy=voteCount&select=train.csv)
> For this challenge, you will be predicting a full year's worth of sales for three items at two stores located in three different countries. This dataset is completely fictional but contains many effects you see in real-world data, e.g., weekend and holiday effect, seasonality, etc. The dataset is small enough to allow you to try numerous different modeling approaches.


In [42]:
import pandas as pd

train_df = pd.read_csv('./train.csv',index_col='row_id')
test_df = pd.read_csv('./test.csv',index_col='row_id')

train_df.sample(5)

Unnamed: 0_level_0,date,country,store,product,num_sold
row_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
22312,2018-05-24,Norway,KaggleRama,Kaggle Hat,1554
25395,2018-11-11,Sweden,KaggleRama,Kaggle Mug,549
22242,2018-05-20,Sweden,KaggleMart,Kaggle Mug,254
24967,2018-10-19,Finland,KaggleMart,Kaggle Hat,304
4076,2015-08-15,Norway,KaggleMart,Kaggle Sticker,198


In [43]:
train = train_df.copy()
train['date'] = pd.to_datetime(train['date'])
train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 26298 entries, 0 to 26297
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   date      26298 non-null  datetime64[ns]
 1   country   26298 non-null  object        
 2   store     26298 non-null  object        
 3   product   26298 non-null  object        
 4   num_sold  26298 non-null  int64         
dtypes: datetime64[ns](1), int64(1), object(3)
memory usage: 1.2+ MB


In [44]:
## Demo ordinal encoder
from sklearn.preprocessing import OrdinalEncoder

ordinal_encoder = OrdinalEncoder().fit(train.country.unique().reshape(-1, 1))
train['country_n'] = ordinal_encoder.transform(train.country.values.reshape(-1,1))
train.sample(5)

Unnamed: 0_level_0,date,country,store,product,num_sold,country_n
row_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
244,2015-01-14,Norway,KaggleRama,Kaggle Hat,946,1.0
10249,2016-07-23,Norway,KaggleMart,Kaggle Hat,503,1.0
12196,2016-11-08,Norway,KaggleRama,Kaggle Hat,675,1.0
8978,2016-05-13,Sweden,KaggleMart,Kaggle Sticker,103,2.0
13445,2017-01-16,Sweden,KaggleRama,Kaggle Sticker,189,2.0


In [45]:
train['store_n'] = OrdinalEncoder().fit(train.store.unique().reshape(-1, 1)).transform(train.store.values.reshape(-1,1))
train.sample(5)

Unnamed: 0_level_0,date,country,store,product,num_sold,country_n,store_n
row_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
5040,2015-10-08,Finland,KaggleMart,Kaggle Mug,167,0.0,0.0
20478,2018-02-11,Sweden,KaggleMart,Kaggle Mug,307,2.0,0.0
241,2015-01-14,Norway,KaggleMart,Kaggle Hat,495,1.0,0.0
14429,2017-03-12,Norway,KaggleRama,Kaggle Sticker,342,1.0,1.0
5932,2015-11-26,Norway,KaggleRama,Kaggle Hat,687,1.0,1.0


In [46]:
train['product_n'] = OrdinalEncoder().fit(train['product'].unique().reshape(-1, 1)).transform(train['product'].values.reshape(-1,1))
train.sample(5)

Unnamed: 0_level_0,date,country,store,product,num_sold,country_n,store_n,product_n
row_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
12067,2016-11-01,Norway,KaggleMart,Kaggle Hat,418,1.0,0.0,0.0
26190,2018-12-26,Finland,KaggleMart,Kaggle Mug,331,0.0,0.0,1.0
22038,2018-05-09,Norway,KaggleMart,Kaggle Mug,334,1.0,0.0,1.0
3528,2015-07-16,Finland,KaggleMart,Kaggle Mug,153,0.0,0.0,1.0
14434,2017-03-12,Sweden,KaggleRama,Kaggle Hat,996,2.0,1.0,0.0


In [47]:
train = train.drop(['country','store','product'],axis='columns')
train.sample(5)

Unnamed: 0_level_0,date,num_sold,country_n,store_n,product_n
row_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
3878,2015-08-04,157,1.0,0.0,2.0
2202,2015-05-03,356,1.0,0.0,1.0
15190,2017-04-23,1273,2.0,1.0,0.0
8904,2016-05-09,213,2.0,0.0,1.0
24876,2018-10-14,269,0.0,0.0,1.0


In [48]:
# train['day'] = train.date.dt.strftime('%d')
train['month'] =train.date.dt.strftime('%m')
train['year'] =train.date.dt.strftime('%y')
train = train.drop(['date'],axis='columns')
train.sample(5)

Unnamed: 0_level_0,num_sold,country_n,store_n,product_n,month,year
row_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
23208,294,1.0,0.0,1.0,7,18
9390,224,2.0,0.0,1.0,6,16
10543,325,2.0,0.0,0.0,8,16
9578,83,0.0,0.0,2.0,6,16
1945,484,0.0,0.0,0.0,4,15


In [49]:
X = train[['country_n','store_n','product_n','month','year']].values.copy()
y = train['num_sold'].copy()

X.shape

(26298, 5)

In [50]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
print(X_train.shape)
print(y_train.shape)

(17619, 5)
(17619,)


In [51]:
%%time
from sklearn.neural_network import MLPRegressor

model = MLPRegressor(
    hidden_layer_sizes=100,
    activation="logistic",
    solver='adam', 
    batch_size='auto', 
    learning_rate="adaptive", 
    learning_rate_init=0.001, 
    max_iter=700, 
    verbose=True, 
    early_stopping=True, 
    random_state=42
    )
model.fit(X_train, y_train)

Iteration 1, loss = 108574.51290809
Validation score: -2.104280
Iteration 2, loss = 106066.27610218
Validation score: -2.023078
Iteration 3, loss = 103052.78205943
Validation score: -1.926545
Iteration 4, loss = 99594.14982128
Validation score: -1.819430
Iteration 5, loss = 95926.52362788
Validation score: -1.712717
Iteration 6, loss = 92385.86078287
Validation score: -1.611130
Iteration 7, loss = 89098.71487631
Validation score: -1.520431
Iteration 8, loss = 86199.70262416
Validation score: -1.439384
Iteration 9, loss = 83538.38627087
Validation score: -1.363481
Iteration 10, loss = 81035.92226625
Validation score: -1.292092
Iteration 11, loss = 78668.38989543
Validation score: -1.224444
Iteration 12, loss = 76415.77603908
Validation score: -1.159843
Iteration 13, loss = 74264.87265063
Validation score: -1.097927
Iteration 14, loss = 72208.75246750
Validation score: -1.039234
Iteration 15, loss = 70246.02567714
Validation score: -0.982665
Iteration 16, loss = 68364.77218488
Validation

MLPRegressor(activation='logistic', early_stopping=True, hidden_layer_sizes=100,
             learning_rate='adaptive', max_iter=700, random_state=42,
             verbose=True)

In [52]:
from sklearn.metrics import mean_absolute_error,mean_absolute_percentage_error,max_error
y_pred = model.predict(X_test)

print('Resultados: ')
print('MAE: ',str(mean_absolute_error(y_test,y_pred)))
print('MAE%: ',str(100*round(mean_absolute_percentage_error(y_test,y_pred),4)))
print('MAXE: ',str(max_error(y_test,y_pred)))

Resultados: 
MAE:  61.10839918948577
MAE%:  17.05
MAXE:  2043.587249784388


In [53]:
%%time
from sklearn.model_selection import cross_validate

mlpR = MLPRegressor(
    hidden_layer_sizes=(100,),
    activation="logistic",
    solver='adam',
    batch_size='auto', 
    learning_rate="adaptive", 
    learning_rate_init=0.001, 
    max_iter=1500, 
    verbose=False, 
    early_stopping=True, 
    random_state=42
    )


cv_dict = cross_validate(mlpR, X_train, y_train, 
    cv=5,
    scoring=('neg_mean_absolute_error','max_error','neg_mean_absolute_percentage_error','r2', 'neg_mean_squared_error'),
    return_train_score=True, 
    verbose = 100,
    return_estimator=True
    )


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[CV] START .....................................................................
[CV] END  max_error: (train=-2003.310, test=-1318.981) neg_mean_absolute_error: (train=-58.044, test=-58.335) neg_mean_absolute_percentage_error: (train=-0.167, test=-0.172) neg_mean_squared_error: (train=-9748.290, test=-9445.769) r2: (train=0.860, test=0.863) total time=  54.7s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   54.7s remaining:    0.0s
[CV] START .....................................................................
[CV] END  max_error: (train=-2015.275, test=-1348.555) neg_mean_absolute_error: (train=-59.375, test=-58.274) neg_mean_absolute_percentage_error: (train=-0.174, test=-0.174) neg_mean_squared_error: (train=-10305.463, test=-8779.207) r2: (train=0.854, test=0.869) total time=  47.4s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:  1.7min remaining:    0.0s
[CV] START .......................

In [54]:
cv_dict

{'fit_time': array([54.758569  , 47.47036195, 34.17483258, 41.26486707, 40.10021544]),
 'score_time': array([0.0079577 , 0.01200414, 0.01299524, 0.00736499, 0.00799966]),
 'estimator': [MLPRegressor(activation='logistic', early_stopping=True,
               learning_rate='adaptive', max_iter=1500, random_state=42),
  MLPRegressor(activation='logistic', early_stopping=True,
               learning_rate='adaptive', max_iter=1500, random_state=42),
  MLPRegressor(activation='logistic', early_stopping=True,
               learning_rate='adaptive', max_iter=1500, random_state=42),
  MLPRegressor(activation='logistic', early_stopping=True,
               learning_rate='adaptive', max_iter=1500, random_state=42),
  MLPRegressor(activation='logistic', early_stopping=True,
               learning_rate='adaptive', max_iter=1500, random_state=42)],
 'test_neg_mean_absolute_error': array([-58.33512683, -58.27380132, -60.96120081, -60.41126765,
        -60.25955   ]),
 'train_neg_mean_absolute_erro

In [55]:
import numpy as np
def get_best_estimator(dict):
    error =dict.get('train_neg_mean_absolute_error')
    return dict.get('estimator')[np.where(error==min(error))[0][0]]

estimator = get_best_estimator(cv_dict)
estimator

MLPRegressor(activation='logistic', early_stopping=True,
             learning_rate='adaptive', max_iter=1500, random_state=42)

In [56]:
y_pred = estimator.predict(X_test)

print('Resultados: ')
print('MAE: ',str(mean_absolute_error(y_test,y_pred)))
print('MAE%: ',str(100*round(mean_absolute_percentage_error(y_test,y_pred),4)))
print('MAXE: ',str(max_error(y_test,y_pred)))

Resultados: 
MAE:  63.39084037056458
MAE%:  18.2
MAXE:  2065.4036451140664


In [57]:
from catboost import CatBoostRegressor

cbR = CatBoostRegressor()
cbR.fit(X_train, y_train)

Learning rate set to 0.065041
0:	learn: 249.5897031	total: 163ms	remaining: 2m 43s
1:	learn: 236.1236553	total: 168ms	remaining: 1m 23s
2:	learn: 223.7542537	total: 173ms	remaining: 57.6s
3:	learn: 212.2531745	total: 178ms	remaining: 44.4s
4:	learn: 201.7274591	total: 182ms	remaining: 36.2s
5:	learn: 191.9610584	total: 185ms	remaining: 30.7s
6:	learn: 183.2923358	total: 193ms	remaining: 27.4s
7:	learn: 175.0837236	total: 197ms	remaining: 24.4s
8:	learn: 167.5721681	total: 200ms	remaining: 22s
9:	learn: 160.7122782	total: 208ms	remaining: 20.6s
10:	learn: 154.4045861	total: 212ms	remaining: 19.1s
11:	learn: 148.5644509	total: 216ms	remaining: 17.8s
12:	learn: 143.2600592	total: 223ms	remaining: 17s
13:	learn: 138.4035220	total: 228ms	remaining: 16s
14:	learn: 134.0733276	total: 231ms	remaining: 15.2s
15:	learn: 130.0517139	total: 239ms	remaining: 14.7s
16:	learn: 126.4463181	total: 244ms	remaining: 14.1s
17:	learn: 123.0320899	total: 247ms	remaining: 13.5s
18:	learn: 119.8598557	total: 

<catboost.core.CatBoostRegressor at 0x1c9ec283790>

In [58]:
y_pred = cbR.predict(X_test)

print('Resultados: ')
print('MAE: ',str(mean_absolute_error(y_test,y_pred)))
print('MAE%: ',str(100*round(mean_absolute_percentage_error(y_test,y_pred),4)))
print('MAXE: ',str(max_error(y_test,y_pred)))

Resultados: 
MAE:  51.258866962367584
MAE%:  12.11
MAXE:  1683.4043055480615
