# First-level models

Train diverse models for the first level of a stacking approach.

1. linear regression
1. k-nearest neighbours on PCA features
1. gradient boosting regression
1. neural network regression

In [1]:
import pandas as pd
import numpy as np
import pickle
import os
from sklearn import linear_model
from sklearn import decomposition
from sklearn import preprocessing
from sklearn import neighbors
from sklearn import metrics
from sklearn import neural_network
from catboost import CatBoostRegressor
from catboost import Pool
import itertools

In [2]:
DATA_FOLDER = '../Data/'
INTERM_RES_FOLDER = "../Intermediary results"
with open(os.path.join(INTERM_RES_FOLDER, "sets.p"), "rb") as f:
    Xall,yall = pickle.load(f)

In [3]:
months = Xall.date_block_num
level1_valid_blocks = [32,33]# September and October 2015
level2_blocks = [24,25,26,27,28,29,30,31,32,33,34]# 10 months worth of meta-features, and test
months_level2 = months.loc[months.isin(level2_blocks)]
y_level2 = yall[months.isin(level2_blocks)]
X_level2 = np.zeros([y_level2.shape[0], 4])

## 1. Linear regression

### Validate on last two months

We also tried to transform the response variable to the log scale to reduce the leverage of large numbers, clipping only predictions, but it appears that performing the fit with the original response is much more efficient.

In [5]:
regul = np.linspace(0.1, 1, 10)
regr_mse_scores = pd.DataFrame({"block 32": np.zeros_like(regul),
                                "least important 32": np.empty_like(regul, dtype="str"),
                                "most important 32": np.empty_like(regul, dtype="str"),
                                "block 33": np.zeros_like(regul),
                                "least important 33": np.empty_like(regul, dtype="str"),
                                "most important 33": np.empty_like(regul, dtype="str")})

for i,valid in enumerate(level1_valid_blocks):
    print(i)
    for j,alpha in enumerate(regul):
        Xtrain = Xall.loc[months < valid]
        #ytrain = np.log(1+yall[months < valid].clip(0, None))
        ytrain = yall[months < valid]
        Xvalid = Xall.loc[months == valid]
        yvalid = yall[months == valid].clip(0, 20)
        
        regr = linear_model.Ridge(alpha=alpha, normalize=True)
        regr.fit(Xtrain, ytrain)
        #regr_pred = (np.exp(regr.predict(Xvalid))-1).clip(0,20)
        regr_pred = regr.predict(Xvalid).clip(0,20)
        regr_mse_scores.at[j,"block "+str(valid)] = metrics.mean_squared_error(y_true=yvalid, y_pred=regr_pred)
        regr_mse_scores.at[j,"least important "+str(valid)] = Xtrain.columns[np.argmin(np.abs(regr.coef_))]
        regr_mse_scores.at[j, "most important "+str(valid)] = Xtrain.columns[np.argmax(np.abs(regr.coef_))]
        
del Xtrain,ytrain,Xvalid,yvalid,regr_pred

0
1


In [6]:
regr_mse_scores

Unnamed: 0,block 32,least important 32,most important 32,block 33,least important 33,most important 33
0,0.967272,cat_agg_lag_1,item_cnt_month_lag_1,0.991619,cat_agg_lag_1,item_cnt_month_lag_1
1,0.967424,item_id,item_cnt_month_lag_1,0.983112,item_id,item_cnt_month_lag_1
2,0.968527,item_id,item_cnt_month_lag_1,0.977815,item_id,item_cnt_month_lag_1
3,0.970038,item_id,item_cnt_month_lag_1,0.974171,item_id,item_cnt_month_lag_1
4,0.971416,item_id,item_cnt_month_lag_1,0.971743,item_id,item_cnt_month_lag_1
5,0.972626,item_id,item_cnt_month_lag_1,0.970026,item_id,item_cnt_month_lag_1
6,0.973805,item_id,item_cnt_month_lag_1,0.968895,item_id,item_cnt_month_lag_1
7,0.97503,item_id,item_cnt_month_lag_1,0.96822,item_id,item_cnt_month_lag_1
8,0.976294,item_id,item_cnt_month_lag_1,0.967833,item_id,item_cnt_month_lag_1
9,0.977422,item_id,item_cnt_month_lag_1,0.967701,item_id,item_cnt_month_lag_1


In [8]:
list(zip(Xall.columns,regr.coef_))

[('shop_id', 0.0002748671112741864),
 ('item_id', 3.3970773415445556e-07),
 ('date_block_num', 0.00035772372195863947),
 ('month', 0.005889112108304929),
 ('item_category_id', -0.0004957882509574599),
 ('metacat', -0.0041148246091521215),
 ('subcat', 0.0002637139036240753),
 ('city', 0.0004927774648293156),
 ('since_first', -0.005697193368909096),
 ('item_cnt_month_lag_1', 0.16871769017239266),
 ('item_cnt_month_lag_2', 0.1064904949388695),
 ('item_cnt_month_lag_3', 0.10005820003025565),
 ('item_cnt_month_lag_12', 0.11694755942827306),
 ('shop_agg_lag_1', 1.4152784608327676e-05),
 ('item_agg_lag_1', 0.0018604115347876141),
 ('shop_agg_lag_2', -4.725738283873196e-06),
 ('item_agg_lag_2', 0.0008849268222470071),
 ('shop_agg_lag_3', -6.3499346089828955e-06),
 ('item_agg_lag_3', 0.0009642754027430102),
 ('shop_agg_lag_12', -3.646262530061297e-06),
 ('item_agg_lag_12', 0.0010483977805776686),
 ('shopcat_agg_lag_1', 0.00016211374111749734),
 ('cat_agg_lag_1', -2.478941314968042e-06),
 ('meta

In [5]:
best_regul = 0.8# weighing block 33 more

### Predict train meta-features

In [6]:
for meta_block in level2_blocks:
    print(meta_block)
    Xtrain = Xall.loc[months < meta_block]
    ytrain = yall[months < meta_block]
    Xmeta = Xall.loc[months == meta_block]
    
    regr = linear_model.Ridge(alpha=best_regul, normalize=True)
    regr.fit(Xtrain,ytrain)
    ymeta_regr = regr.predict(Xmeta).clip(0,20)
    
    X_level2[months_level2 == meta_block,0] = ymeta_regr

24
25
26
27
28
29
30
31
32
33
34


### Predict test meta-features

In [34]:
Xtrain = Xall.loc[months < 34]
ytrain = yall[months < 34]
Xtest = Xall.loc[months == 34]

regr = linear_model.Ridge(alpha=best_regul, normalize=True)
regr.fit(Xtrain, ytrain)
regr_pred = regr.predict(Xtest).clip(0,20)

In [54]:
# validate on public leaderboard
submission = Xtest.loc[:,["shop_id","item_id"]]
submission["item_cnt_month"] = regr_pred
submission.loc[submission.shop_id == 11, "shop_id"] = 10
test = pd.read_csv(os.path.join(DATA_FOLDER, 'test.csv.gz'))
submission = test.merge(submission, how="left", on=["shop_id","item_id"])
submission.drop(columns=["shop_id","item_id"], inplace=True)
submission.to_csv("../Submissions/submission_regr.csv", index=False)# 1.0717

## 2. kNN on PCA

### Validate on last two months

In [8]:
pca_comp = [4,5,6]
k_neighbours = [50,100,200,300]
knn_mse_scores = pd.DataFrame({"PCAcomps": [i for i,j in itertools.product(pca_comp,k_neighbours)],
                               "Neighs": [j for i,j in itertools.product(pca_comp,k_neighbours)],
                               "Block 32": np.zeros(len(pca_comp)*len(k_neighbours)),
                               "Block 33": np.zeros(len(pca_comp)*len(k_neighbours))})

for valid in level1_valid_blocks:
    Xtrain = Xall.loc[months < valid]
    ytrain = yall[months < valid].clip(0,20)
    Xvalid = Xall.loc[months == valid]
    yvalid = yall[months == valid].clip(0,20)
    for n_comp in pca_comp:
        print("Block {}, {} components".format(valid, n_comp))
        pca_decomp = decomposition.PCA(n_components=n_comp)
        pca_decomp.fit(Xtrain)
        print("Proportion of variance explained:", sum(pca_decomp.explained_variance_ratio_))
        Xtrain_pca = pca_decomp.transform(Xtrain)
        Xvalid_pca = pca_decomp.transform(Xvalid)
        for k_neigh in k_neighbours:
            knn_regr = neighbors.KNeighborsRegressor(algorithm="ball_tree", n_neighbors=k_neigh, n_jobs=-1)
            knn_regr.fit(Xtrain_pca, ytrain)
            knn_pred = knn_regr.predict(Xvalid_pca).clip(0,20)
            knn_mse_scores.loc[(knn_mse_scores.PCAcomps == n_comp) &
                               (knn_mse_scores.Neighs == k_neigh), "Block "+str(valid)] =\
                                    metrics.mean_squared_error(y_true=yvalid, y_pred=knn_pred)

del Xtrain,ytrain,Xvalid,yvalid,pca_decomp,Xtrain_pca,Xvalid_pca

Block 32, 4 components
Proportion of variance explained: 0.9799331556136279
Block 32, 5 components
Proportion of variance explained: 0.9890715041179821
Block 32, 6 components
Proportion of variance explained: 0.9934352438902337
Block 33, 4 components
Proportion of variance explained: 0.9800433503120071
Block 33, 5 components
Proportion of variance explained: 0.9891598489280494
Block 33, 6 components
Proportion of variance explained: 0.9934950155919271


In [9]:
knn_mse_scores

Unnamed: 0,PCAcomps,Neighs,Block 32,Block 33
0,4,50,1.414683,1.629712
1,4,100,1.299691,1.505183
2,4,200,1.257588,1.391841
3,4,300,1.252552,1.341284
4,5,50,1.364527,1.491274
5,5,100,1.272086,1.414374
6,5,200,1.242066,1.355735
7,5,300,1.238837,1.319622
8,6,50,1.236372,1.488409
9,6,100,1.205907,1.408497


In [7]:
best_comp = 6
best_neigh = 300

### Predict train meta-features

In [8]:
for meta_block in level2_blocks:
    print(meta_block)
    Xtrain = Xall.loc[months < meta_block]
    ytrain = yall[months < meta_block].clip(0,20)
    Xmeta = Xall.loc[months == meta_block]
    
    pca_decomp = decomposition.PCA(n_components=best_comp)
    pca_decomp.fit(Xtrain)
    print("Proportion of variance explained:", sum(pca_decomp.explained_variance_ratio_))
    Xtrain_pca = pca_decomp.transform(Xtrain)
    Xmeta_pca  = pca_decomp.transform(Xmeta)
    knn_regr = neighbors.KNeighborsRegressor(algorithm="ball_tree", n_neighbors=best_neigh, n_jobs=-1)
    knn_regr.fit(Xtrain_pca, ytrain)
    ymeta_knn = knn_regr.predict(Xmeta_pca).clip(0,20)
    
    X_level2[months_level2 == meta_block,1] = ymeta_knn

del Xtrain,ytrain,Xmeta,pca_decomp,Xtrain_pca,Xmeta_pca,ymeta_knn

24
Proportion of variance explained: 0.993707874481016
25
Proportion of variance explained: 0.9936296794659307
26
Proportion of variance explained: 0.9932615311456936
27
Proportion of variance explained: 0.9931750919570098
28
Proportion of variance explained: 0.9932089229586475
29
Proportion of variance explained: 0.9932662728727802
30
Proportion of variance explained: 0.9933159743122069
31
Proportion of variance explained: 0.9933709101635475
32
Proportion of variance explained: 0.9934352438902342
33
Proportion of variance explained: 0.993495015591923
34
Proportion of variance explained: 0.9935395618436456


### Predict test meta-features

In [None]:
pca_decomp = decomposition.PCA(n_components=5)
pca_decomp.fit(Xtrain_level1)
Xtrain_pca = pca_decomp.transform(Xtrain_level1)
Xtest_pca = pca_decomp.transform(Xtest_level1.drop(columns="ID"))
knn_regr = neighbors.KNeighborsRegressor(algorithm="ball_tree", n_neighbors=20, n_jobs=-1)
knn_regr.fit(Xtrain_pca, ytrain_level1.clip(0,20))
knn_pred = knn_regr.predict(Xtest_pca).clip(0,20)

## 3. Gradient boosting

### Validate on last two months

In [8]:
regul = np.logspace(-4, 4, 5)
cat_mse_scores = pd.DataFrame({"regul:": regul,
                               "block 32": np.zeros_like(regul),
                               "least important 32": np.empty_like(regul, dtype="str"),
                               "most important 32": np.empty_like(regul, dtype="str"),
                               "best nbr trees 32": np.zeros_like(regul),
                               "block 33": np.zeros_like(regul),
                               "least important 33": np.empty_like(regul, dtype="str"),
                               "most important 33": np.empty_like(regul, dtype="str"),
                               "best nbr trees 33": np.zeros_like(regul)})
cat_features = [Xall.columns.get_loc(c) for c in ["item_id","shop_id","item_category_id","month",
                                                  "metacat","subcat","city"]]

for i,valid in enumerate(level1_valid_blocks):
    print(valid)
    for j,alpha in enumerate(regul):
        Xtrain = Xall.loc[months < valid]
        ytrain = yall[months < valid].clip(0,20)
        Xvalid = Xall.loc[months == valid]
        yvalid = yall[months == valid].clip(0,20)
        pool_test = Pool(Xvalid, yvalid, cat_features=cat_features)

        cat = CatBoostRegressor(iterations=150,
                                learning_rate=0.03,
                                od_type="Iter",
                                od_wait=15,
                                use_best_model=True,
                                loss_function="RMSE",
                                eval_metric="RMSE",
                                one_hot_max_size=2,
                                l2_leaf_reg=alpha)
        cat.fit(Xtrain,
                ytrain,
                cat_features=cat_features,
                eval_set=pool_test,
                verbose=50)
        cat_mse_scores.at[j, "block "+str(valid)] = cat.get_best_score()["validation"]["RMSE"]
        cat_mse_scores.at[j, "best nbr trees "+str(valid)] = cat.get_best_iteration()
        feat_imp = cat.get_feature_importance(prettified=True)
        cat_mse_scores.at[j, "least important "+str(valid)] = feat_imp["Feature Id"].tail(1).values
        cat_mse_scores.at[j, "most important "+str(valid)] = feat_imp["Feature Id"].head(1).values
    
del Xtrain,ytrain,Xvalid,yvalid,pool_test

32
0:	learn: 1.1741923	test: 1.1414913	best: 1.1414913 (0)	total: 1.96s	remaining: 4m 51s
50:	learn: 0.8906879	test: 0.9325013	best: 0.9325013 (50)	total: 1m 18s	remaining: 2m 31s
100:	learn: 0.8315430	test: 0.9273728	best: 0.9273728 (100)	total: 2m 32s	remaining: 1m 13s
149:	learn: 0.8161097	test: 0.9238920	best: 0.9234228 (145)	total: 3m 48s	remaining: 0us

bestTest = 0.9234227737
bestIteration = 145

Shrink model to first 146 iterations.
0:	learn: 1.1741923	test: 1.1414913	best: 1.1414913 (0)	total: 1.85s	remaining: 4m 35s
50:	learn: 0.8906884	test: 0.9325017	best: 0.9325017 (50)	total: 1m 14s	remaining: 2m 24s
100:	learn: 0.8315436	test: 0.9273729	best: 0.9273729 (100)	total: 2m 32s	remaining: 1m 14s
149:	learn: 0.8161107	test: 0.9238922	best: 0.9234230 (145)	total: 3m 49s	remaining: 0us

bestTest = 0.9234229951
bestIteration = 145

Shrink model to first 146 iterations.
0:	learn: 1.1741962	test: 1.1414942	best: 1.1414942 (0)	total: 1.75s	remaining: 4m 20s
50:	learn: 0.8908597	test:

In [9]:
cat_mse_scores

Unnamed: 0,regul:,block 32,least important 32,most important 32,best nbr trees 32,block 33,least important 33,most important 33,best nbr trees 33
0,0.0001,0.923423,[item_price_rel_lag_1],[item_cnt_month_lag_1],145.0,0.954618,[item_price_rel_lag_1],[item_cnt_month_lag_1],59.0
1,0.01,0.923423,[item_price_rel_lag_1],[item_cnt_month_lag_1],145.0,0.954618,[item_price_rel_lag_1],[item_cnt_month_lag_1],59.0
2,1.0,0.926656,[item_price_rel_lag_1],[item_cnt_month_lag_1],73.0,0.954411,[item_price_rel_lag_1],[item_cnt_month_lag_1],57.0
3,100.0,0.926709,[item_price_rel_lag_1],[item_cnt_month_lag_1],72.0,0.953778,[item_price_rel_lag_1],[item_cnt_month_lag_1],60.0
4,10000.0,0.945348,[subcat_agg_lag_1],[item_cnt_month_lag_1],149.0,0.969398,[item_price_rel_lag_1],[item_cnt_month_lag_1],149.0


In [10]:
cat.get_feature_importance(prettified=True)

Unnamed: 0,Feature Id,Importances
0,item_cnt_month_lag_1,42.346897
1,since_first,16.708508
2,item_agg_lag_1,8.923589
3,item_cnt_month_lag_2,7.006977
4,item_cnt_month_lag_3,6.448289
5,item_category_id,5.756472
6,itemcity_agg_lag_1,4.262539
7,shop_id,3.395184
8,subcat,2.215479
9,item_id,1.758387


In [9]:
best_alpha = 1000
best_lr = 0.03
best_iter = 95

### Predict train meta-features

In [11]:
cat_features = [Xall.columns.get_loc(c) for c in ["item_id","shop_id","item_category_id","month",
                                                  "metacat","subcat","city"]]

for meta_block in level2_blocks:
    Xtrain = Xall.loc[months < meta_block]
    ytrain = yall[months < meta_block].clip(0,20)
    Xmeta = Xall.loc[months == meta_block]
    
    cat = CatBoostRegressor(iterations=best_iter,
                            learning_rate=best_lr,
                            use_best_model=False,
                            loss_function="RMSE",
                            eval_metric="RMSE",
                            one_hot_max_size=2,
                            l2_leaf_reg=best_alpha)
    cat.fit(Xtrain,
            ytrain,
            cat_features=cat_features,
            verbose=50)
    ymeta_cat = cat.predict(Xmeta).clip(0,20)
    
    X_level2[months_level2 == meta_block,2] = ymeta_cat
    
del Xtrain,ytrain,Xmeta,cat,ymeta_cat

0:	learn: 1.2242611	total: 1.29s	remaining: 2m 1s
50:	learn: 0.9792756	total: 51.8s	remaining: 44.7s
94:	learn: 0.8902057	total: 1m 35s	remaining: 0us
0:	learn: 1.2242507	total: 1.2s	remaining: 1m 52s
50:	learn: 0.9740695	total: 59.2s	remaining: 51.1s
94:	learn: 0.8935565	total: 1m 48s	remaining: 0us
0:	learn: 1.2148712	total: 1.31s	remaining: 2m 3s
50:	learn: 0.9660464	total: 57.1s	remaining: 49.2s
94:	learn: 0.8867692	total: 1m 46s	remaining: 0us
0:	learn: 1.2069370	total: 1.4s	remaining: 2m 11s
50:	learn: 0.9593243	total: 1m 1s	remaining: 53.1s
94:	learn: 0.8805034	total: 1m 54s	remaining: 0us
0:	learn: 1.2012330	total: 1.45s	remaining: 2m 16s
50:	learn: 0.9567137	total: 1m 2s	remaining: 54.3s
94:	learn: 0.8801021	total: 1m 58s	remaining: 0us
0:	learn: 1.1963612	total: 1.6s	remaining: 2m 30s
50:	learn: 0.9535608	total: 1m 5s	remaining: 56.5s
94:	learn: 0.8804154	total: 2m 1s	remaining: 0us
0:	learn: 1.1903493	total: 1.55s	remaining: 2m 25s
50:	learn: 0.9487649	total: 1m 7s	remaining

### Predict test meta-features

In [11]:
Xtrain = Xall.loc[months < 34]
ytrain = yall[months < 34].clip(0,20)
Xtest = Xall.loc[months == 34]

cat = CatBoostRegressor(iterations=best_iter,
                        learning_rate=best_lr,
                        use_best_model=False,
                        loss_function="RMSE",
                        eval_metric="RMSE",
                        one_hot_max_size=2,
                        l2_leaf_reg=best_alpha)
cat.fit(Xtrain,
        ytrain,
        cat_features=cat_features,
        verbose=50)
cat_pred = cat.predict(Xtest).clip(0,20)

del Xtrain,ytrain

0:	learn: 1.1755851	total: 1.98s	remaining: 3m 6s
50:	learn: 0.9393215	total: 1m 21s	remaining: 1m 10s
94:	learn: 0.8766386	total: 2m 33s	remaining: 0us


In [10]:
cat.get_param("random_seed")

NameError: name 'cat' is not defined

In [12]:
# validate on public leaderboard
submission = Xtest.loc[:,["shop_id","item_id"]]
submission["item_cnt_month"] = cat_pred
submission.loc[submission.shop_id == 11, "shop_id"] = 10
test = pd.read_csv(os.path.join(DATA_FOLDER, 'test.csv.gz'))
submission = test.merge(submission, how="left", on=["shop_id","item_id"])
submission.drop(columns=["shop_id","item_id"], inplace=True)
submission.to_csv("../Submissions/submission_cat.csv", index=False)# 0.986

## 4. Neural network regression

### Validate on last two months

In [58]:
nn_mse_scores = []

for i,valid in enumerate(level1_valid_blocks):
    print(i)
    scaler = preprocessing.StandardScaler()
    Xtrain = Xall.loc[months < valid]
    Xtrain = scaler.fit_transform(Xtrain)
    ytrain = yall[months < valid]
    Xvalid = Xall.loc[months == valid]
    Xvalid = scaler.transform(Xvalid)
    yvalid = yall[months == valid].clip(0,20)
    
    nn = neural_network.MLPRegressor(hidden_layer_sizes=(32,16), solver='adam', max_iter=10, verbose=True)
    nn.fit(Xtrain,ytrain)
    nn_pred = nn.predict(Xvalid).clip(0,20)
    nn_mse_scores.append(metrics.mean_squared_error(y_true=yvalid, y_pred=nn_pred))
    
del Xtrain,ytrain,Xvalid,yvalid,nn

0
Iteration 1, loss = 2.76755049
Iteration 2, loss = 2.66936138
Iteration 3, loss = 2.62013444
Iteration 4, loss = 2.56010249
Iteration 5, loss = 2.47909144
Iteration 6, loss = 2.45146428
Iteration 7, loss = 2.43063374
Iteration 8, loss = 2.38743215
Iteration 9, loss = 2.37173117
Iteration 10, loss = 2.34212261


1
Iteration 1, loss = 3.44962365
Iteration 2, loss = 3.31774433


In [59]:
nn_mse_scores

[0.9178123427760705, 0.901967080031361]

In [13]:
best_iter_nn = 4

### Predict train meta-features

In [14]:
for meta_block in level2_blocks:    
    scaler = preprocessing.StandardScaler()
    Xtrain = Xall.loc[months < meta_block]
    Xtrain = scaler.fit_transform(Xtrain)
    ytrain = yall[months < meta_block]
    Xmeta = Xall.loc[months == meta_block]
    Xmeta = scaler.transform(Xmeta)
    
    nn = neural_network.MLPRegressor(hidden_layer_sizes=(32,16), solver='adam',
                                     max_iter=best_iter_nn, verbose=True)
    nn.fit(Xtrain,ytrain)
    
    ymeta_nn = nn.predict(Xmeta).clip(0,20)
    
    X_level2[months_level2 == meta_block,3] = ymeta_nn
    
del Xtrain,ytrain,Xmeta,nn,ymeta_nn

Iteration 1, loss = 2.35996140
Iteration 2, loss = 2.19275192
Iteration 3, loss = 2.18869140
Iteration 4, loss = 2.13631903


Iteration 1, loss = 2.64120081
Iteration 2, loss = 2.39433605
Iteration 3, loss = 2.38574864
Iteration 4, loss = 2.34249175


Iteration 1, loss = 2.62126075
Iteration 2, loss = 2.46345494
Iteration 3, loss = 2.48329649
Iteration 4, loss = 2.38896935


Iteration 1, loss = 2.71350622
Iteration 2, loss = 2.47892450
Iteration 3, loss = 2.40572964
Iteration 4, loss = 2.37601864


Iteration 1, loss = 3.02946908
Iteration 2, loss = 2.77872481
Iteration 3, loss = 2.74822031
Iteration 4, loss = 2.78247670


Iteration 1, loss = 3.09840363
Iteration 2, loss = 2.79884279
Iteration 3, loss = 2.79022998
Iteration 4, loss = 2.75000151


Iteration 1, loss = 2.96334504
Iteration 2, loss = 2.83325838
Iteration 3, loss = 2.76544496
Iteration 4, loss = 2.65954197


Iteration 1, loss = 2.90040305
Iteration 2, loss = 2.72965326
Iteration 3, loss = 2.65967344
Iteration 4, loss = 2.62452289


Iteration 1, loss = 2.89463844
Iteration 2, loss = 2.66866759
Iteration 3, loss = 2.61215920
Iteration 4, loss = 2.55924768


Iteration 1, loss = 3.36985122
Iteration 2, loss = 3.23276157
Iteration 3, loss = 3.18073863
Iteration 4, loss = 3.08853389


Iteration 1, loss = 3.69426751
Iteration 2, loss = 3.58162144
Iteration 3, loss = 3.55099916
Iteration 4, loss = 3.46630155


### Predict test meta-features

In [61]:
scaler = preprocessing.StandardScaler()
Xtrain = Xall.loc[months < 34]
Xtrain = scaler.fit_transform(Xtrain)
ytrain = yall[months < 34]
Xtest = Xall.loc[months == 34]
Xtest = scaler.transform(Xtest)

nn = neural_network.MLPRegressor(hidden_layer_sizes=(32,16), solver='adam', max_iter=best_iter, verbose=True)
nn.fit(Xtrain,ytrain)
nn_pred = nn.predict(Xtest).clip(0,20)

del Xtrain,ytrain

Iteration 1, loss = 3.75001017
Iteration 2, loss = 3.57598808
Iteration 3, loss = 3.49979393
Iteration 4, loss = 3.43213998


In [69]:
# validate on public leaderboard
submission = Xall.loc[months == 34, ["shop_id","item_id"]]
submission["item_cnt_month"] = nn_pred
submission.loc[submission.shop_id == 11, "shop_id"] = 10
test = pd.read_csv(os.path.join(DATA_FOLDER, 'test.csv.gz'))
submission = test.merge(submission, how="left", on=["shop_id","item_id"])
submission.drop(columns=["shop_id","item_id"], inplace=True)
submission.to_csv("../Submissions/submission_nn.csv", index=False)# 0.995

In [15]:
with open(os.path.join(INTERM_RES_FOLDER, "sets_level2.p"), "wb") as f:
    pickle.dump((X_level2,y_level2,months_level2), f)