In [27]:
import numpy as np
import pandas as pd 
from collections import defaultdict
from pprint import pprint
import xgboost as xgb
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
 
matplotlib.rcParams['figure.figsize'] = (8, 6)

import keras
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.layers import Merge
from keras.layers import Convolution1D
from keras.layers import Reshape
from keras.layers import Flatten
seed = 0
np.random.seed(seed)

import time
import operator

In [2]:
demographic_cols = ['ncodpers','fecha_alta','ind_empleado','pais_residencia','sexo','age','ind_nuevo','antiguedad','indrel',
 'indrel_1mes','tiprel_1mes','indresi','indext','conyuemp','canal_entrada','indfall',
 'tipodom','cod_prov','ind_actividad_cliente','renta','segmento']

notuse = ["ult_fec_cli_1t","nomprov",'fecha_dato']

product_col = [
 'ind_ahor_fin_ult1','ind_aval_fin_ult1','ind_cco_fin_ult1','ind_cder_fin_ult1','ind_cno_fin_ult1','ind_ctju_fin_ult1',
 'ind_ctma_fin_ult1','ind_ctop_fin_ult1','ind_ctpp_fin_ult1','ind_deco_fin_ult1','ind_deme_fin_ult1',
 'ind_dela_fin_ult1','ind_ecue_fin_ult1','ind_fond_fin_ult1','ind_hip_fin_ult1','ind_plan_fin_ult1',
 'ind_pres_fin_ult1','ind_reca_fin_ult1','ind_tjcr_fin_ult1','ind_valo_fin_ult1','ind_viv_fin_ult1','ind_nomina_ult1',
 'ind_nom_pens_ult1','ind_recibo_ult1']

# Import Data

In [3]:
df_train = pd.read_csv('cleaned_data/DataMulticlass_6_withpast2.csv')

  interactivity=interactivity, compiler=compiler, result=result)


In [4]:
df_test = pd.read_csv('cleaned_data/TestSet_withpast3.csv')

  interactivity=interactivity, compiler=compiler, result=result)


In [5]:
pd.set_option('display.max_columns', None)

# Clean Data

### Drop unneccessary column

In [6]:
drop_column = ['ind_nuevo','indrel','indresi','indfall','tipodom','ind_empleado','pais_residencia','indrel_1mes','indext','conyuemp','fecha_alta','tiprel_1mes']

df_train.drop(drop_column, axis=1, inplace = True)
df_test.drop(drop_column, axis=1, inplace = True)

### Add missing income

In [7]:
df_test["renta"]   = pd.to_numeric(df_test["renta"], errors="coerce")
unique_prov = df_test[df_test.cod_prov.notnull()].cod_prov.unique()
grouped = df_test.groupby("cod_prov")["renta"].median()

def impute_renta(df):
    df["renta"]   = pd.to_numeric(df["renta"], errors="coerce")       
    for cod in unique_prov:
        df.loc[df['cod_prov']==cod,['renta']] = df.loc[df['cod_prov']==cod,['renta']].fillna({'renta':grouped[cod]}).values
    df.renta.fillna(df_test["renta"].median(), inplace=True)
    
impute_renta(df_train)
impute_renta(df_test)

In [8]:
def drop_na(df):
    df.dropna(axis = 0, subset = ['ind_actividad_cliente'], inplace = True) 
    
drop_na(df_train)

### Convert and make dummy

In [9]:
# These column are categories feature, I'll transform them using get_dummy
dummy_col = ['sexo','canal_entrada','cod_prov','segmento']
dummy_col_select = ['canal_entrada','cod_prov']

In [10]:
limit = int(0.05 * len(df_train.index))
use_dummy_col = {}

for col in dummy_col_select:
    trainlist = df_train[col].value_counts()
    use_dummy_col[col] = []
    for i,item in enumerate(trainlist):
        if item > limit:
            use_dummy_col[col].append(df_train[col].value_counts().index[i])   

In [11]:
def get_dummy(df):
    for col in dummy_col_select:
        for item in df[col].unique(): 
            if item not in use_dummy_col[col]:
                row_index = df[col] == item
                df.loc[row_index,col] = np.nan
    return pd.get_dummies(df, prefix=dummy_col, columns = dummy_col)
    
df_train = get_dummy(df_train)
df_test = get_dummy(df_test)

In [12]:
def clean_quantitative_param(df):
    df["age"]   = pd.to_numeric(df["age"], errors="coerce")
    max_age = 80 
    log_max_age = np.log(max_age) 
    square_max_age  = np.square(max_age)
    df["age"]   = df['age'].apply(lambda x: min(x ,max_age))
    df["log_age"]   = df['age'].apply(lambda x: round(np.log10(x+1)/log_max_age, 6))
    df["square_age"]   = df['age'].apply(lambda x: round(np.square(x)/square_max_age, 6))
    df["age"]   = df['age'].apply(lambda x: round( x/max_age, 6))
    
    max_renta = 1.0e6
    log_max_renta = np.log(max_renta) 
    square_max_renta  = np.square(max_renta)
    df["renta"]   = df['renta'].apply(lambda x: min(x ,max_renta))
    df["log_renta"]   = df['renta'].apply(lambda x: round(np.log10(x+1)/log_max_renta, 6))
    df["square_renta"]   = df['renta'].apply(lambda x: round(np.square(x)/square_max_renta, 6))
    df["renta"]   = df['renta'].apply(lambda x: round( x/max_renta, 6))
    
    df["antiguedad"]   = pd.to_numeric(df["antiguedad"], errors="coerce")
    df["antiguedad"] = df["antiguedad"].replace(-999999, df['antiguedad'].median())
    max_antigue = 256
    log_max_antigue = np.log(max_antigue) 
    square_max_antigue  = np.square(max_antigue)
    df["antiguedad"]   = df['antiguedad'].apply(lambda x: min(x ,max_antigue))
    df["log_antiguedad"]   = df['antiguedad'].apply(lambda x: round(np.log10(x+1)/log_max_antigue, 6))
    df["square_antiguedad"]   = df['antiguedad'].apply(lambda x: round(np.square(x)/square_max_antigue, 6))
    df["antiguedad"]   = df['antiguedad'].apply(lambda x: round( x/max_antigue, 6)) 
    
    return df      

In [13]:
df_train = clean_quantitative_param(df_train)
df_test = clean_quantitative_param(df_test)

In [14]:
product_col_5 = [col for col in df_train.columns if '_ult1_5' in col]
product_col_4 = [col for col in df_train.columns if '_ult1_4' in col]
product_col_3 = [col for col in df_train.columns if '_ult1_3' in col]
product_col_2 = [col for col in df_train.columns if '_ult1_2' in col]
product_col_1 = [col for col in df_train.columns if '_ult1_1' in col]

df_train['tot5'] = df_train[product_col_5].sum(axis=1)
df_test['tot5'] = df_test[product_col_5].sum(axis=1)

In [15]:
for col in product_col[2:]:
    df_train[col+'_past'] = (df_train[col+'_5']+df_train[col+'_4']+df_train[col+'_3']+df_train[col+'_2']+df_train[col+'_1'])/5
    df_test[col+'_past'] = (df_test[col+'_5']+df_test[col+'_4']+df_test[col+'_3']+df_test[col+'_2']+df_test[col+'_1'])/5

In [16]:
for pro in product_col[2:]:
    df_train[pro+'_past'] = df_train[pro+'_past']*(1-df_train[pro+'_5'])
    df_test[pro+'_past'] = df_test[pro+'_past']*(1-df_test[pro+'_5'])

# Model: Keras

In [30]:
col1 = product_col_1 + product_col_2 + product_col_3 + product_col_4 + product_col_5
col2 = list(df_train.drop(['target','ncodpers']+col1, 1).columns.values)

x_train1 = df_train[col1].as_matrix()
x_test1 = df_test[col1].as_matrix()

x_train1 = np.reshape(x_train1, (len(x_train1), 5, 22))
x_test1 = np.reshape(x_test1, (len(x_test1), 5, 22))

x_train2 = df_train[col2].as_matrix()
x_test2 = df_test[col2].as_matrix()

y_train = pd.get_dummies(df_train['target'].astype(int)).as_matrix()

In [31]:
id_preds1 = defaultdict(list)
ids = df_test['ncodpers'].values
  
### product-wise
model1 = Sequential()
model1.add(Convolution1D(60, 22, border_mode='same', input_shape=(5, 22),activation = 'relu'))
model1.add(Flatten())

# time-wise
model2 = Sequential()
model2.add(Convolution1D(30, 5, border_mode='same', input_shape=(22, 5),activation = 'relu'))
model2.add(Flatten())

# domegraphic-wise
model3 = Sequential()
model3.add(Dense(150, input_dim=len(col2), init='uniform', activation='relu'))

merged = Merge([model1,model2,model3], mode='concat')

final_model = Sequential()
final_model.add(merged)
final_model.add(Dense(22, init='uniform', activation='softmax'))
final_model.compile(loss='categorical_crossentropy', optimizer='adagrad', metrics=['categorical_accuracy'])
print(final_model.summary())

history = final_model.fit([x_train1, x_train1.transpose((0, 2, 1)), x_train2], y_train, nb_epoch=100, batch_size=100, verbose = 2)
        
p_test = final_model.predict([x_test1, x_test1.transpose((0, 2, 1)), x_test2])
        
for id, p in zip(ids, p_test):
    #id_preds[id] = list(p)
    id_preds1[id] = [0,0] + list(p)

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
convolution1d_1 (Convolution1D)  (None, 5, 60)         29100                                        
____________________________________________________________________________________________________
flatten_1 (Flatten)              (None, 300)           0                                            
____________________________________________________________________________________________________
convolution1d_2 (Convolution1D)  (None, 22, 30)        780                                          
____________________________________________________________________________________________________
flatten_2 (Flatten)              (None, 660)           0                                            
___________________________________________________________________________________________

# Model: XGB

In [24]:
def runXGB(train_X, train_y, reg =100, colsample_bytree=0.9, max_depth= 6, eta=0.1, min_child_weight=2, subsample=0.9, num_rounds=150):
    param = {}
    param['objective'] = 'multi:softprob'
    param['seed'] = 0
    param['silent'] = 0
    param['eval_metric'] = "mlogloss"
    param['booster'] = 'gbtree'
    param['num_class'] = 22
    param['reg_lambda'] = reg
    param['colsample_bytree'] = colsample_bytree
    param['max_depth'] = max_depth 
    param['eta'] = eta
    param['min_child_weight'] = min_child_weight
    param['subsample'] = subsample
    num_rounds = num_rounds

    progress = dict()
    plst = list(param.items())
    
    xgtrain = xgb.DMatrix(train_X, label=train_y)
    watchlist  = [(xgtrain,'train')]
    model = xgb.train(plst, xgtrain, num_rounds, watchlist, evals_result=progress)
    
    #xgb.cv(param, xgtrain, num_rounds, nfold=3,
    #   metrics={'mlogloss'}, seed = 0,
    #   callbacks=[xgb.callback.print_evaluation(show_stdv=True)])
    
    return (model, progress)

In [29]:
cols = list(df_train.drop(['target','ncodpers'], 1).columns.values)
#cols= selected_col

id_preds2 = defaultdict(list)
ids = df_test['ncodpers'].values

# predict model 
y_train = df_train['target']
x_train = df_train[cols]
    
(clf, progress) = runXGB(x_train, y_train, reg =50, eta=0.05,min_child_weight=10, num_rounds=194)
          
x_test = df_test[cols]
x_test = x_test.fillna(0) 
        
d_test = xgb.DMatrix(x_test)
p_test = clf.predict(d_test)
        
for id, p in zip(ids, p_test):
    #id_preds[id] = list(p)
    id_preds2[id] = [0,0] + list(p)

[0]	train-mlogloss:2.86693
[1]	train-mlogloss:2.70496
[2]	train-mlogloss:2.56824
[3]	train-mlogloss:2.45361
[4]	train-mlogloss:2.35473
[5]	train-mlogloss:2.2679
[6]	train-mlogloss:2.19099
[7]	train-mlogloss:2.12172
[8]	train-mlogloss:2.05876
[9]	train-mlogloss:2.00139
[10]	train-mlogloss:1.94895
[11]	train-mlogloss:1.90034
[12]	train-mlogloss:1.85543
[13]	train-mlogloss:1.81435
[14]	train-mlogloss:1.77524
[15]	train-mlogloss:1.73878
[16]	train-mlogloss:1.70462
[17]	train-mlogloss:1.67226
[18]	train-mlogloss:1.64269
[19]	train-mlogloss:1.61407
[20]	train-mlogloss:1.58713
[21]	train-mlogloss:1.56169
[22]	train-mlogloss:1.53759
[23]	train-mlogloss:1.51459
[24]	train-mlogloss:1.49293
[25]	train-mlogloss:1.47238
[26]	train-mlogloss:1.45292
[27]	train-mlogloss:1.43423
[28]	train-mlogloss:1.41649
[29]	train-mlogloss:1.39954
[30]	train-mlogloss:1.38347
[31]	train-mlogloss:1.36807
[32]	train-mlogloss:1.35332
[33]	train-mlogloss:1.33952
[34]	train-mlogloss:1.32621
[35]	train-mlogloss:1.31342
[36

In [39]:
#feature_imp = clf.get_fscore()

#sorted_feature = sorted(feature_imp.items(), key=operator.itemgetter(1), reverse=True)
#selected_col = [a for a,b in sorted_feature[:101]]

# Model Product Ranking: Recent 2016

In [32]:
product_list = df_test[product_col_5].sum(axis=0)/(df_test[product_col_5].sum(axis=0).sum())

id_preds5 = {}
for row in df_test.values:
    id = row[0]
    id_preds5[id] = [0,0]+ list(product_list)

In [33]:
fractionKeras = 0.3
fractionXGB = 0.7
fractionRanking = 0.1
id_preds_combined = {}

for uid, p in id_preds1.items():
    id_preds_combined[uid] = fractionKeras*np.asarray(id_preds1[uid]) + fractionXGB*np.asarray(id_preds2[uid]) + fractionRanking*np.asarray(id_preds5[uid])
    
id_preds = id_preds_combined    

# Make submission

In [34]:
df_recent =  pd.read_csv('cleaned_data/df_recent.csv')

In [35]:
sample = pd.read_csv('input/sample_submission.csv')

In [36]:
# check if customer already have each product or not. 
already_active = {}
for row in df_recent.values:
    row = list(row)
    id = row.pop(0)
    active = [c[0] for c in zip(tuple(product_col), row) if c[1] > 0]
    already_active[id] = active

# add 7 products(that user don't have yet), higher probability first -> train_pred   
train_preds = {}
for id, p in id_preds.items():
    preds = [i[0] for i in sorted([i for i in zip(tuple(product_col), p) if i[0] not in already_active[id]],
                                  key=lambda i:i [1], 
                                  reverse=True)[:7]]
    train_preds[id] = preds
    
test_preds = []
for row in sample.values:
    id = row[0]
    p = train_preds[id]
    test_preds.append(' '.join(p))


In [37]:
sample.shape

(929615, 2)

In [38]:
sample['added_products'] = test_preds
sample.to_csv('output/Ensemble12.csv', index=False)

# Validation part

### Keras

### dense

In [None]:
print(time.strftime('%a %H:%M:%S'))

cols = list(df_train.drop(['target','ncodpers'], 1).columns.values)

# id_preds1 = defaultdict(list)
# ids = df_test['ncodpers'].values

# predict model 
y_train = pd.get_dummies(df_train['target'].astype(int))
x_train = df_train[cols]
    
# create model
model = Sequential()
model.add(Dense(150, input_dim=len(cols), init='uniform', activation='relu'))
model.add(Dropout(0.2))
#model.add(Dense(50, init='uniform', activation='relu'))
model.add(Dense(22, init='uniform', activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adagrad', metrics=['accuracy'])

print(model.summary())
history = model.fit(x_train.as_matrix(), y_train.as_matrix(), validation_split=0.2, nb_epoch=170, batch_size=100, verbose = 2)

print(time.strftime('%a %H:%M:%S'))

In [None]:
plt.plot(history.history['val_loss'])

In [None]:
history.history['val_loss']

### lstm

In [None]:
col1 = product_col_1 + product_col_2 + product_col_3 + product_col_4 + product_col_5
col2 = list(df_train.drop(['target','ncodpers']+col1, 1).columns.values)

x_train1 = df_train[col1].as_matrix()
x_test1 = df_test[col1].as_matrix()

x_train1 = np.reshape(x_train1, (len(x_train1), 5, 22))
x_test1 = np.reshape(x_test1, (len(x_test1), 5, 22))

x_train2 = df_train[col2].as_matrix()
x_test2 = df_test[col2].as_matrix()

y_train = pd.get_dummies(df_train['target'].astype(int)).as_matrix()

In [None]:
print(time.strftime('%a %H:%M:%S'))

model = Sequential()
model.add(LSTM(150, input_shape=(5, 22), dropout_W=0.0, dropout_U=0.4))
model.add(Dense(22, init='uniform', activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adagrad', metrics=['categorical_accuracy'])

print(model.summary())
history = model.fit(x_train1, y_train, validation_split=0.2, nb_epoch=200, batch_size=100, verbose = 2)

print(time.strftime('%a %H:%M:%S'))

In [None]:
plt.plot(history.history['val_loss'])

In [None]:
print(history.history['val_loss'])

### lstm + dense

In [None]:

model1 = Sequential()
model1.add(LSTM(150, input_shape=(5, 22), dropout_W=0.0, dropout_U=0.3))

model2 = Sequential()
model2.add(Dense(150, input_dim=len(col2), init='uniform', activation='relu'))
model2.add(Dropout(0.3))

merged = Merge([model1,model2], mode='concat')

final_model = Sequential()
final_model.add(merged)
final_model.add(Dense(64, init='uniform', activation='relu'))
final_model.add(Dropout(0.3))
final_model.add(Dense(22, init='uniform', activation='softmax'))
final_model.compile(loss='categorical_crossentropy', optimizer='adagrad', metrics=['categorical_accuracy'])

print(final_model.summary())
final_model.fit([x_train1, x_train2], y_train, validation_split=0.2, nb_epoch=170, batch_size=100, verbose = 2)

print(time.strftime('%a %H:%M:%S'))

In [None]:
plt.plot(history.history['val_loss'])

In [None]:
print(history.history['val_loss'])

### Convolution1D: time_wise

In [None]:
model = Sequential()
model.add(Convolution1D(60, 22, border_mode='same', input_shape=(5, 22),activation = 'relu'))
model.add(Flatten())
#model.add(Dense(64, init='uniform', activation='relu'))
#model.add(Dropout(0.1))
model.add(Dense(22, init='uniform', activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['categorical_accuracy'])

print(model.summary())
history = model.fit(x_train1, y_train, validation_split=0.2, nb_epoch=25, batch_size=100, verbose = 2)

### Convolution1d: product-wise and  time-wise

In [None]:
model1 = Sequential()
model1.add(Convolution1D(60, 22, border_mode='same', input_shape=(5, 22),activation = 'relu'))
model1.add(Flatten())

model2 = Sequential()
model2.add(Convolution1D(30, 5, border_mode='same', input_shape=(22, 5),activation = 'relu'))
model2.add(Flatten())

merged = Merge([model1,model2], mode='concat')

final_model = Sequential()
final_model.add(merged)
final_model.add(Dense(22, init='uniform', activation='softmax'))
final_model.compile(loss='categorical_crossentropy', optimizer='adagrad', metrics=['categorical_accuracy'])
print(final_model.summary())

history = final_model.fit([x_train1, x_train1.transpose((0, 2, 1))], y_train, validation_split=0.2, nb_epoch=30, batch_size=100, verbose = 2)

### Convolution1d: demographic-wise + product-wise + time+wise 

In [None]:
col_product = product_col_1 + product_col_2 + product_col_3 + product_col_4 + product_col_5
col_quant = ['age','antiguedad','renta','log_age','square_age','log_renta','square_renta','log_antiguedad','square_antiguedad']
col_past = [col for col in df_train.columns if '_past' in col]
col_demo = list(df_train.drop(['target','ncodpers']+col_product+col_quant+col_past, 1).columns.values)

x_train1 = df_train[col_product].as_matrix()
x_test1 = df_test[col_product].as_matrix()

x_train1 = np.reshape(x_train1, (len(x_train1), 5, 22))
x_test1 = np.reshape(x_test1, (len(x_test1), 5, 22))

x_train_quant = df_train[col_quant].as_matrix()
x_train_quant = np.reshape(x_train_quant, (len(x_train_quant), 1, len(col_quant)))
x_test_quant = df_test[col_quant].as_matrix()
x_test_quant = np.reshape(x_test_quant, (len(x_test_quant), 1, len(col_quant)))

x_train_past = df_train[col_past].as_matrix()
x_train_past = np.reshape(x_train_past, (len(x_train_past), 1, len(col_past)))
x_test_past = df_test[col_past].as_matrix()
x_test_past = np.reshape(x_test_past, (len(x_test_past), 1, len(col_past)))

x_train_demo = df_train[col_demo].as_matrix()
x_train_demo = np.reshape(x_train_demo, (len(x_train_demo), 1, len(col_demo)))
x_test_demo = df_test[col_demo].as_matrix()
x_test_demo = np.reshape(x_test_demo, (len(x_test_demo), 1, len(col_demo)))

y_train = pd.get_dummies(df_train['target'].astype(int)).as_matrix()

In [None]:
### product-wise
model1 = Sequential()
model1.add(Convolution1D(60, 22, border_mode='same', input_shape=(5, 22),activation = 'relu'))
model1.add(Flatten())

# time-wise
model2 = Sequential()
model2.add(Convolution1D(30, 5, border_mode='same', input_shape=(22, 5),activation = 'relu'))
model2.add(Flatten())

# quantitative-wise
model3 = Sequential()
model3.add(Convolution1D(30, len(col_quant), border_mode='same',input_shape=(1, len(col_quant)),activation = 'relu'))
model3.add(Flatten())

# past_product-wise
model4 = Sequential()
model4.add(Convolution1D(30, len(col_past), border_mode='same',input_shape=(1, len(col_past)),activation = 'relu'))
model4.add(Flatten())

# demographic-wise
model5 = Sequential()
model5.add(Convolution1D(30, len(col_demo), border_mode='same',input_shape=(1, len(col_demo)),activation = 'relu'))
model5.add(Flatten())
#model3.add(Dense(150, input_dim=len(col2), init='uniform', activation='relu'))



merged = Merge([model1,model2,model3,model4,model5], mode='concat')


final_model = Sequential()
final_model.add(merged)
final_model.add(Dense(150, init='uniform', activation='relu'))
final_model.add(Dense(22, init='uniform', activation='softmax'))
final_model.compile(loss='categorical_crossentropy', optimizer='adagrad', metrics=['categorical_accuracy'])
print(final_model.summary())

history = final_model.fit([x_train1, x_train1.transpose((0, 2, 1)), x_train_quant, x_train_past, x_train_demo], y_train, validation_split=0.2, nb_epoch=100, batch_size=100, verbose = 2)

### Convolution1d: time-wise + product-wise + demographic dense

In [None]:
### product-wise
model1 = Sequential()
model1.add(Convolution1D(60, 22, border_mode='same', input_shape=(5, 22),activation = 'relu'))
model1.add(Flatten())

# time-wise
model2 = Sequential()
model2.add(Convolution1D(30, 5, border_mode='same', input_shape=(22, 5),activation = 'relu'))
model2.add(Flatten())

# domegraphic-wise
model3 = Sequential()
model3.add(Dense(150, input_dim=len(col2), init='uniform', activation='relu'))

merged = Merge([model1,model2,model3], mode='concat')

final_model = Sequential()
final_model.add(merged)
final_model.add(Dense(22, init='uniform', activation='softmax'))
final_model.compile(loss='categorical_crossentropy', optimizer='adagrad', metrics=['categorical_accuracy'])
print(final_model.summary())

history = final_model.fit([x_train1, x_train1.transpose((0, 2, 1)), x_train2], y_train,validation_split=0.2, nb_epoch=100, batch_size=100, verbose = 2)


### XGB

In [None]:
cols = list(df_train.drop(['target','ncodpers'], 1).columns.values)

id_preds = defaultdict(list)
ids = df_test['ncodpers'].values
 
# predict model 
y_train = df_train['target']
x_train = df_train[cols]

print("Validating...")

param = {}
param['objective'] = 'multi:softprob'
param['seed'] = 0
param['silent'] = 0
param['eval_metric'] = "mlogloss"
param['booster'] = 'gbtree'
param['eta'] = 0.1
param['num_class'] = 22
param['colsample_bytree'] = 0.9
param['subsample'] = 0.9
param['max_depth'] = 6
param['min_child_weight'] = 2
param['reg_lambda'] =100
num_round = 170

plst = list(param.items())
  
xgtrain = xgb.DMatrix(x_train, label=y_train)

xgb.cv(param, xgtrain, num_round, nfold=3,
       metrics={'mlogloss'}, seed = 0,
       callbacks=[xgb.callback.print_evaluation(show_stdv=True)])