# Santander Product Recommendation

### Part 3. Evaluation

The important process to build the good model is evaluating the model to compare the performances.

### 3-1.  Prepare Data

First, we load data previously pre-processed and saved.

In [1]:
import numpy as np
import pandas as pd
import pickle

from sklearn.preprocessing import StandardScaler

from keras import models, layers
from keras.callbacks import EarlyStopping
import xgboost as xgb
import lightgbm as lgbm

Using TensorFlow backend.


In [2]:
with open('../input/meta_data.pkl', 'rb') as fin:
    meta = pickle.load(fin)

features = meta['features']
target = meta['target']
prods = meta['prods']

with open('../input/processed_data.pkl', 'rb') as finn:
    data = pickle.load(finn)

#validation data
tst_all = data['tst_all']

#training data
trn = data['trn_all']

del meta, data

In [None]:
with open('input/vlds.pkl', 'wb') as f:
    pickle.dump({
        'vld_all': trn
    }, f)

<br>

### 3-2. Prepare Evaluation  Method

Our evaluation method is MAP@7, which compare predictions and the actually purchased product list, and average them.

In [3]:
ncodpers_tst_vld = tst_vld['ncodpers'].values

def get_purchased_products():    

    # get newly bought products on the validation data
    for prod in prods:
        prev = prod + '_prev'
        padd = prod + '_add'
        tst_vld[padd] = tst_vld[prod] - tst_vld[prev]
    
    add_vld = tst_vld[[prod + '_add' for prod in prods]].values
    add_vld_list = [list() for i in range(len(ncodpers_tst_vld))]

    count_vld = 0
    for ncodper in range(len(ncodpers_tst_vld)):
        for prod in range(len(prods)):
            if add_vld[ncodper, prod] > 0:
                add_vld_list[ncodper].append(prod)
                count_vld += 1
                
    return add_vld_list

add_vld_list = get_purchased_products()

In [4]:
# Extract 7 products wigh high values based on the prediction
def predict_7_products(preds_vld):
    result_vld = []
    for ncodper, pred in zip(ncodpers_tst_vld, preds_vld):
        y_prods = [(y,p,ip) for y,p,ip in zip(pred, prods[target], target)]
        y_prods = sorted(y_prods, key=lambda a: a[0], reverse=True)[:7]
        result_vld.append([ip for y,p,ip in y_prods])
    
    return result_vld

In [5]:
def apk(actual, predicted, k=7, default=0.0):

    if len(predicted) > k:
        predicted = predicted[:k]

    score = 0.0
    num_hits = 0.0

    for i, p in enumerate(predicted):
        # When prediction is in actual products list and not duplicated, it scores
        if p in actual and p not in predicted[:i]:
            num_hits += 1.0
            score += num_hits / (i+1.0)

    if not actual:
        return default

    return score / min(len(actual), k)

def mapk(actual, predicted, k=7, default=0.0):
    return np.mean([apk(a, p, k, default) for a, p in zip(actual, predicted)]) 

In [6]:
# Get the highest score from validation data(0.042613)
print(mapk(add_vld_list, add_vld_list, 7, 0.0))

0.04266379915553903


<br>

### 3-3. Build Simple Model

For preprocessing all data, we assemble train data and test data.

In [8]:
vld_date = '2016-05-28'

trn_vld = trn[trn['fecha_dato'] < vld_date]
eval_vld = trn[trn['fecha_dato']==vld_date]

X_trn_vld = trn_vld[features].values
y_trn_vld = trn_vld['target'].values

X_eval_vld = eval_vld[features].values
y_eval_vld = eval_vld['target'].values

X_trn_vld.shape, y_trn_vld.shape, X_eval_vld.shape, y_eval_vld.shape

((10765757, 60), (10765757,), (689132, 60), (689132,))

#### XGBoost Model

In [70]:
# Set up the parameter for XGBoost model
param_xgb = {
    #'booster': 'gbtree',
    'max_depth': 8,
    'nthread': 4,
    'num_class': 17,
    'objective': 'multi:softprob',
    'silent': 1,
    'eval_metric': 'mlogloss',
    'eta': 0.1,
    'min_child_weight': 10,
    'colsample_bytree': 0.8,
    'colsample_bylevel': 0.9,
    'seed': 2018,
    }

# Convert train, validation data to fit in the XGBoost model
dtrn = xgb.DMatrix(X_trn_vld, label=y_trn_vld, feature_names=features)
dvld = xgb.DMatrix(X_eval_vld, label=y_eval_vld, feature_names=features)

# Train the XGBoost model
watch_list = [(dtrn, 'train'), (dvld, 'eval')]
model_xgb = xgb.train(param_xgb, dtrn, num_boost_round=100, evals=watch_list, early_stopping_rounds=10)
best_ntree_limit = model_xgb.best_ntree_limit


# Save the model
pickle.dump(model_xgb, open("../model/xgb.pkl", "wb"))

[0]	train-mlogloss:2.11779	eval-mlogloss:2.09076
Multiple eval metrics have been passed: 'eval-mlogloss' will be used for early stopping.

Will train until eval-mlogloss hasn't improved in 10 rounds.
[1]	train-mlogloss:1.82085	eval-mlogloss:1.75636
[2]	train-mlogloss:1.63405	eval-mlogloss:1.52409
[3]	train-mlogloss:1.44439	eval-mlogloss:1.34769
[4]	train-mlogloss:1.30098	eval-mlogloss:1.2078
[5]	train-mlogloss:1.21081	eval-mlogloss:1.08793
[6]	train-mlogloss:1.11172	eval-mlogloss:0.989476
[7]	train-mlogloss:1.02199	eval-mlogloss:0.903919
[8]	train-mlogloss:0.948211	eval-mlogloss:0.828923
[9]	train-mlogloss:0.892847	eval-mlogloss:0.763686
[10]	train-mlogloss:0.834503	eval-mlogloss:0.705944
[11]	train-mlogloss:0.791048	eval-mlogloss:0.655371
[12]	train-mlogloss:0.748521	eval-mlogloss:0.610169
[13]	train-mlogloss:0.713106	eval-mlogloss:0.569988
[14]	train-mlogloss:0.6793	eval-mlogloss:0.534392
[15]	train-mlogloss:0.642269	eval-mlogloss:0.501605
[16]	train-mlogloss:0.617519	eval-mlogloss:0

In [75]:
# Getting the prediction with the validation data
X_tst_vld = tst_vld[features].values
X_tst_vld = xgb.DMatrix(X_tst_vld, feature_names=features)
preds_vld_xgb = model_xgb.predict(X_tst_vld, ntree_limit=best_ntree_limit)

# Choose predictions only for top 16 classes
preds_vld_xgb_16 = np.delete(preds_vld_xgb, 16, axis=1)

# It is impossible to purchase products already did, so subtract 1 from the prediction
preds_vld_xgb_16 = preds_vld_xgb_16 - tst_vld[[prod+'_prev' for prod in prods[target]]]

# Get 7 products and calculate MAP@7 with the predictions (0.03609)
result_xgb = predict_7_products(preds_vld_xgb_16.values)
print(mapk(add_vld_list, result_xgb, 7, 0.0))

0.03609679275470783


### LightGBM Model

In [9]:
# Set up the parameter for the LightGBM model
params_lgb = {
    'task' : 'train',
    'boosting_type' : 'gbdt',
    'objective' : 'multiclass',
    'num_class': 17,
    'metric' : {'multi_logloss'},
    'is_training_metric': True,
    'max_bin': 255,
    'num_leaves' : 64,
    'learning_rate' : 0.1,
    'feature_fraction' : 0.8,
    'min_data_in_leaf': 10,
    'min_sum_hessian_in_leaf': 5,
}

# Convert train, validation data to fit in the LightGBM model
train = lgbm.Dataset(X_trn_vld, label=y_trn_vld, feature_name=features)
validate = lgbm.Dataset(X_eval_vld, label=y_eval_vld, feature_name=features, reference=train)


# Train the model
model_lgb = lgbm.train(params_lgb, train, num_boost_round=1000, valid_sets=validate, early_stopping_rounds=20)
best_iteration = model_lgb.best_iteration

# Save the trained model and best iteration detail
model_lgb.save_model("model/lgbm.model.txt")
pickle.dump(best_iteration, open("model/lgbm.model.meta", "wb"))

[1]	valid_0's multi_logloss: 0.367338
Training until validation scores don't improve for 20 rounds
[2]	valid_0's multi_logloss: 0.345466
[3]	valid_0's multi_logloss: 0.330157
[4]	valid_0's multi_logloss: 0.316865
[5]	valid_0's multi_logloss: 0.303929
[6]	valid_0's multi_logloss: 0.293882
[7]	valid_0's multi_logloss: 0.285786
[8]	valid_0's multi_logloss: 0.277765
[9]	valid_0's multi_logloss: 0.270836
[10]	valid_0's multi_logloss: 0.264364
[11]	valid_0's multi_logloss: 0.258532
[12]	valid_0's multi_logloss: 0.253337
[13]	valid_0's multi_logloss: 0.248596
[14]	valid_0's multi_logloss: 0.244373
[15]	valid_0's multi_logloss: 0.240536
[16]	valid_0's multi_logloss: 0.237038
[17]	valid_0's multi_logloss: 0.233878
[18]	valid_0's multi_logloss: 0.230963
[19]	valid_0's multi_logloss: 0.228342
[20]	valid_0's multi_logloss: 0.22607
[21]	valid_0's multi_logloss: 0.223855
[22]	valid_0's multi_logloss: 0.221916
[23]	valid_0's multi_logloss: 0.22005
[24]	valid_0's multi_logloss: 0.218437
[25]	valid_0's

[208]	valid_0's multi_logloss: 0.194018
[209]	valid_0's multi_logloss: 0.194009
[210]	valid_0's multi_logloss: 0.193993
[211]	valid_0's multi_logloss: 0.193983
[212]	valid_0's multi_logloss: 0.193968
[213]	valid_0's multi_logloss: 0.193955
[214]	valid_0's multi_logloss: 0.193947
[215]	valid_0's multi_logloss: 0.193938
[216]	valid_0's multi_logloss: 0.193927
[217]	valid_0's multi_logloss: 0.193915
[218]	valid_0's multi_logloss: 0.193901
[219]	valid_0's multi_logloss: 0.193892
[220]	valid_0's multi_logloss: 0.193878
[221]	valid_0's multi_logloss: 0.193866
[222]	valid_0's multi_logloss: 0.193855
[223]	valid_0's multi_logloss: 0.193841
[224]	valid_0's multi_logloss: 0.193835
[225]	valid_0's multi_logloss: 0.193826
[226]	valid_0's multi_logloss: 0.193816
[227]	valid_0's multi_logloss: 0.193806
[228]	valid_0's multi_logloss: 0.193794
[229]	valid_0's multi_logloss: 0.193785
[230]	valid_0's multi_logloss: 0.193776
[231]	valid_0's multi_logloss: 0.193765
[232]	valid_0's multi_logloss: 0.193752


[414]	valid_0's multi_logloss: 0.192209
[415]	valid_0's multi_logloss: 0.192202
[416]	valid_0's multi_logloss: 0.192192
[417]	valid_0's multi_logloss: 0.192183
[418]	valid_0's multi_logloss: 0.192177
[419]	valid_0's multi_logloss: 0.192168
[420]	valid_0's multi_logloss: 0.192161
[421]	valid_0's multi_logloss: 0.19215
[422]	valid_0's multi_logloss: 0.192144
[423]	valid_0's multi_logloss: 0.192135
[424]	valid_0's multi_logloss: 0.192127
[425]	valid_0's multi_logloss: 0.192122
[426]	valid_0's multi_logloss: 0.192109
[427]	valid_0's multi_logloss: 0.192101
[428]	valid_0's multi_logloss: 0.192094
[429]	valid_0's multi_logloss: 0.19209
[430]	valid_0's multi_logloss: 0.192082
[431]	valid_0's multi_logloss: 0.192073
[432]	valid_0's multi_logloss: 0.192067
[433]	valid_0's multi_logloss: 0.192061
[434]	valid_0's multi_logloss: 0.192053
[435]	valid_0's multi_logloss: 0.192034
[436]	valid_0's multi_logloss: 0.192026
[437]	valid_0's multi_logloss: 0.192019
[438]	valid_0's multi_logloss: 0.192012
[4

[620]	valid_0's multi_logloss: 0.190909
[621]	valid_0's multi_logloss: 0.190904
[622]	valid_0's multi_logloss: 0.190897
[623]	valid_0's multi_logloss: 0.19089
[624]	valid_0's multi_logloss: 0.190884
[625]	valid_0's multi_logloss: 0.190879
[626]	valid_0's multi_logloss: 0.190874
[627]	valid_0's multi_logloss: 0.190868
[628]	valid_0's multi_logloss: 0.190864
[629]	valid_0's multi_logloss: 0.190858
[630]	valid_0's multi_logloss: 0.190852
[631]	valid_0's multi_logloss: 0.190843
[632]	valid_0's multi_logloss: 0.190838
[633]	valid_0's multi_logloss: 0.190831
[634]	valid_0's multi_logloss: 0.190824
[635]	valid_0's multi_logloss: 0.190817
[636]	valid_0's multi_logloss: 0.190809
[637]	valid_0's multi_logloss: 0.190802
[638]	valid_0's multi_logloss: 0.190796
[639]	valid_0's multi_logloss: 0.190795
[640]	valid_0's multi_logloss: 0.190792
[641]	valid_0's multi_logloss: 0.190787
[642]	valid_0's multi_logloss: 0.190781
[643]	valid_0's multi_logloss: 0.190778
[644]	valid_0's multi_logloss: 0.190773
[

[826]	valid_0's multi_logloss: 0.189858
[827]	valid_0's multi_logloss: 0.189853
[828]	valid_0's multi_logloss: 0.189841
[829]	valid_0's multi_logloss: 0.189836
[830]	valid_0's multi_logloss: 0.189835
[831]	valid_0's multi_logloss: 0.189833
[832]	valid_0's multi_logloss: 0.189828
[833]	valid_0's multi_logloss: 0.189825
[834]	valid_0's multi_logloss: 0.189819
[835]	valid_0's multi_logloss: 0.189815
[836]	valid_0's multi_logloss: 0.189811
[837]	valid_0's multi_logloss: 0.189805
[838]	valid_0's multi_logloss: 0.189797
[839]	valid_0's multi_logloss: 0.189794
[840]	valid_0's multi_logloss: 0.189791
[841]	valid_0's multi_logloss: 0.189786
[842]	valid_0's multi_logloss: 0.189782
[843]	valid_0's multi_logloss: 0.189779
[844]	valid_0's multi_logloss: 0.189775
[845]	valid_0's multi_logloss: 0.189771
[846]	valid_0's multi_logloss: 0.189767
[847]	valid_0's multi_logloss: 0.18976
[848]	valid_0's multi_logloss: 0.189756
[849]	valid_0's multi_logloss: 0.189752
[850]	valid_0's multi_logloss: 0.189747
[

In [41]:
# Getting the prediction with the validation data
preds_vld_lgb = model_lgb.predict(tst_vld[features], ntree_limit=best_iteration)

# Choose predictions only for top 16 classes
preds_vld_lgb_16 = np.delete(preds_vld_lgb, 16, axis=1)

# It is impossible to purchase products already did, so subtract 1 from the prediction
preds_vld_lgb_16 = preds_vld_lgb_16 - tst_vld[[prod+'_prev' for prod in prods[target]]]
result_lgb = predict_7_products(preds_vld_lgb_16.values)

# Get 7 products and calculate MAP@7 with the predictions (0.03652)
print(mapk(add_vld_list, result_lgb, 7, 0.0))

0.036521553551041475


#### Neural Network Model

All data should be between -1 and 1 to be trained, so we scaled numeric features before training the model.

In [9]:
scaler = StandardScaler().fit(trn[features])

X_trn_vld_norm = scaler.transform(X_trn_vld)
X_eval_vld_norm = scaler.transform(X_eval_vld)

y_trn_vld_matrix = trn_vld[[prod for prod in prods[target]]].values
y_eval_vld_matrix = eval_vld[[prod for prod in prods[target]]].values

  return self.partial_fit(X, y)


In [12]:
model_nn = models.Sequential()
model_nn.add(layers.Dense(512, activation='relu', input_shape=(60,)))
model_nn.add(layers.Dropout(0.8))
model_nn.add(layers.Dense(512, activation='relu'))
model_nn.add(layers.Dropout(0.5))
model_nn.add(layers.Dense(16, activation='softmax'))

model_nn.compile(optimizer='rmsprop',
             loss='categorical_crossentropy',
             metrics=['accuracy'])

model_nn.summary()





Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 512)               31232     
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 512)               262656    
_________________________________________________________________
dropout_2 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 16)                8208      
Total params: 302,096
Trainable params: 302,096
Non-trainable params: 0
___________________________________________________________

In [14]:
# Train and save the model
callback_list = [EarlyStopping(monitor='val_acc', patience = 10)]
model_nn.fit(X_trn_vld_norm, y_trn_vld_matrix, epochs=100, batch_size=64, callbacks=callback_list, validation_data=(X_eval_vld_norm ,y_eval_vld_matrix))

pickle.dump(model_nn, open("../model/neuralnetwork.pkl", "wb"))

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Train on 10765757 samples, validate on 689132 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100


In [69]:
# Scaled the validation data and predict the model
X_tst_vld_norm = scaler.transform(tst_vld[features])
vld_preds_nn = model_nn.predict(X_tst_vld_norm, batch_size=512)

# Get products based on the predictions up to 7
result_nn = []
for ncodper, prds in zip(ncodpers_tst_vld, preds_prod):
    r = [(ip,p) for ip, p in zip(target,prds) if p > 0]
    r = sorted(r, key=lambda a:a[1], reverse=True)[:7]
    result_nn.append([ip for ip,p in r])

# Calculate MAP@7 with the predictions  (0.00884)
mapk(add_vld_list, result_nn, 7, 0.0)

0.00884893083733709

### Conclusion

#### Starategy
- XGBoost/LightGBM: I deisnged the model to target the 16 most popular products and 17th class indicating either no additions or an addition of one of the eight remaining products.
<br>
- NN: This model target presence of product in a given month regardless the products are newly purchased or not. It targeted a length 16 vector of the more more popular products and are trained on all customers.product. It has two hidden layers of 512 nodes and drop out layers, and the 16 node output layer.

#### Overview Performance
- Map@7 maximum : 0.04266
- XGBoost : 0.03609
- LightGBM : 0.03652
- NN : 0.00884