In [1]:
import pandas as pd
import numpy as np
import matplotlib.pylab as plt

In [2]:
def reduce_mem_usage(df, verbose=True):
    numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
    start_mem = df.memory_usage().sum() / 1024**2    
    for col in df.columns:
        col_type = df[col].dtypes
        if col_type in numerics:
            c_min = df[col].min()
            c_max = df[col].max(
            )
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)    
    end_mem = df.memory_usage().sum() / 1024**2
    if verbose: print('Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction)'.format(end_mem, 100 * (start_mem - end_mem) / start_mem))
    return df

In [3]:
df_train = pd.read_csv('C:/Users/user/Documents/Salamat/ELO/train.csv')
df_test = pd.read_csv('C:/Users/user/Documents/Salamat/ELO/test.csv')
df_hist_trans = pd.read_csv('C:/Users/user/Documents/Salamat/ELO/historical_transactions.csv')
df_new_merchant_trans = pd.read_csv('C:/Users/user/Documents/Salamat/ELO/new_merchant_transactions.csv')

In [4]:
df_train=reduce_mem_usage(df_train)
df_test=reduce_mem_usage(df_test)
df_hist_trans=reduce_mem_usage(df_hist_trans)
df_new_merchant_trans=reduce_mem_usage(df_new_merchant_trans)

Mem. usage decreased to  4.04 Mb (56.2% reduction)
Mem. usage decreased to  2.24 Mb (52.5% reduction)
Mem. usage decreased to 1749.11 Mb (43.7% reduction)
Mem. usage decreased to 114.20 Mb (45.5% reduction)


In [6]:
df_merchants=pd.read_csv('C:/Users/user/Documents/Salamat/ELO/merchants.csv')
df_merchants=reduce_mem_usage(df_merchants)

Mem. usage decreased to 30.32 Mb (46.0% reduction)


In [7]:
df_merchants.head()

Unnamed: 0,merchant_id,merchant_group_id,merchant_category_id,subsector_id,numerical_1,numerical_2,category_1,most_recent_sales_range,most_recent_purchases_range,avg_sales_lag3,...,avg_sales_lag6,avg_purchases_lag6,active_months_lag6,avg_sales_lag12,avg_purchases_lag12,active_months_lag12,category_4,city_id,state_id,category_2
0,M_ID_838061e48c,8353,792,9,-0.057465,-0.057465,N,E,E,-0.4,...,-2.25,18.666667,6,-2.32,13.916667,12,N,242,9,1.0
1,M_ID_9339d880ad,3184,840,20,-0.057465,-0.057465,N,E,E,-0.72,...,-0.74,1.291667,6,-0.57,1.6875,12,N,22,16,1.0
2,M_ID_e726bbae1e,447,690,1,-0.057465,-0.057465,N,E,E,-82.129997,...,-82.129997,260.0,2,-82.129997,260.0,2,N,-1,5,5.0
3,M_ID_a70e9c5f81,5026,792,9,-0.057465,-0.057465,Y,E,E,,...,,4.666667,6,,3.833333,12,Y,-1,-1,
4,M_ID_64456c37ce,2228,222,21,-0.057465,-0.057465,Y,E,E,,...,,0.361111,6,,0.347222,12,Y,-1,-1,


In [10]:
df_merchants.merchant_group_id.value_counts(dropna=False).head()

35      46026
434       482
419       365
713       333
3648      305
Name: merchant_group_id, dtype: int64

In [11]:
df_merchants.merchant_category_id.value_counts(dropna=False).head()

705    30666
278    17992
307    17528
367    15387
68     14341
Name: merchant_category_id, dtype: int64

### Before exploring merchants. Let's look at history of transactions. And see if there is any relation between average frequency of going to some merchant and target

In [23]:
df_hist_trans_group=df_hist_trans.groupby(['card_id'])

In [37]:
# Total number of purchases made
df=df_hist_trans_group.authorized_flag.count()
df=df.to_frame()

In [40]:
df.columns=['num_purchases']

In [43]:
df['num_unique_merchants']=df_hist_trans_group.merchant_id.nunique()

In [44]:
df['pur_freq_unique']=df['num_unique_merchants']/df['num_purchases']

In [47]:
df['max_month']=df_hist_trans_group.month_lag.min().abs()

In [49]:
df['pur_freq_unique_month']=df['pur_freq_unique']/df['pur_freq_unique']/df['max_month']

In [51]:
df_hist_trans.columns

Index(['authorized_flag', 'card_id', 'city_id', 'category_1', 'installments',
       'category_3', 'merchant_category_id', 'merchant_id', 'month_lag',
       'purchase_amount', 'purchase_date', 'category_2', 'state_id',
       'subsector_id'],
      dtype='object')

In [151]:
# Now let's check seperately authorized frequency and non authorized
df_hist_trans_group_auth=df_hist_trans.groupby(['card_id','authorized_flag'])

In [152]:
df2=df_hist_trans_group_auth.city_id.count().reset_index()

In [153]:
df2.columns=['card_id','authorized_flag','num_purchases']

In [154]:
df2['num_unique_merchants']=df_hist_trans_group_auth.merchant_id.nunique().reset_index().merchant_id
df2['pur_freq_unique']=df2['num_unique_merchants']/df2['num_purchases']

In [155]:
df2['month_diff']=df2.card_id.map(df_hist_trans.groupby('card_id').month_lag.min().abs())


In [156]:
df2['pur_freq_unique_month']=df2['pur_freq_unique']/df2['month_diff']

In [171]:
df2.isnull().any()

card_id                  False
authorized_flag          False
num_purchases            False
num_unique_merchants     False
pur_freq_unique          False
month_diff               False
pur_freq_unique_month    False
dtype: bool

In [172]:
dfT=df.merge(df2[df2.authorized_flag=='Y'],on='card_id', how='outer')

In [173]:
dfT=dfT.merge(df2[df2.authorized_flag=='N'],on='card_id', how='outer')

In [176]:
dfT.isnull().any()

index                      False
card_id                    False
num_purchases_x            False
num_unique_merchants_x     False
pur_freq_unique_x          False
max_month                  False
pur_freq_unique_month_x    False
authorized_flag_x          False
num_purchases_y            False
num_unique_merchants_y     False
pur_freq_unique_y          False
month_diff_x               False
pur_freq_unique_month_y    False
authorized_flag_y           True
num_purchases               True
num_unique_merchants        True
pur_freq_unique             True
month_diff_y                True
pur_freq_unique_month       True
dtype: bool

In [183]:
dfT.columns[-6:]

Index(['authorized_flag_y', 'num_purchases', 'num_unique_merchants',
       'pur_freq_unique', 'month_diff_y', 'pur_freq_unique_month'],
      dtype='object')

In [184]:
# will put zero who never did nonauthorized purchases
dfT.loc[dfT.authorized_flag_y.isnull(),dfT.columns[-6:]]=np.zeros(dfT[dfT.authorized_flag_y.isnull()].iloc[:,-6:].shape)

In [190]:
dfT.columns

Index(['index', 'card_id', 'num_purchases_x', 'num_unique_merchants_x',
       'pur_freq_unique_x', 'max_month', 'pur_freq_unique_month_x',
       'authorized_flag_x', 'num_purchases_y', 'num_unique_merchants_y',
       'pur_freq_unique_y', 'month_diff_x', 'pur_freq_unique_month_y',
       'authorized_flag_y', 'num_purchases', 'num_unique_merchants',
       'pur_freq_unique', 'month_diff_y', 'pur_freq_unique_month'],
      dtype='object')

In [198]:
columns=['card_id', 'pur_freq_unique_x', 'pur_freq_unique_month_x',
       'pur_freq_unique_y', 'pur_freq_unique_month_y',
        'pur_freq_unique',  'pur_freq_unique_month']

In [199]:
dfT.isnull().any()

index                      False
card_id                    False
num_purchases_x            False
num_unique_merchants_x     False
pur_freq_unique_x          False
max_month                  False
pur_freq_unique_month_x    False
authorized_flag_x          False
num_purchases_y            False
num_unique_merchants_y     False
pur_freq_unique_y          False
month_diff_x               False
pur_freq_unique_month_y    False
authorized_flag_y          False
num_purchases              False
num_unique_merchants       False
pur_freq_unique            False
month_diff_y               False
pur_freq_unique_month      False
dtype: bool

In [200]:
df_final=dfT.loc[:,columns]

In [201]:
df_final

Unnamed: 0,card_id,pur_freq_unique_x,pur_freq_unique_month_x,pur_freq_unique_y,pur_freq_unique_month_y,pur_freq_unique,pur_freq_unique_month
0,C_ID_00007093c1,0.194631,0.083333,0.245614,0.020468,0.314286,0.026190
1,C_ID_0001238066,0.528455,0.200000,0.541667,0.108333,0.666667,0.133333
2,C_ID_0001506ef0,0.424242,0.076923,0.451613,0.034739,1.000000,0.076923
3,C_ID_0001793786,0.550926,0.111111,0.603175,0.067019,0.629630,0.069959
4,C_ID_000183fdda,0.506944,0.166667,0.518248,0.086375,0.571429,0.095238
5,C_ID_00024e244b,0.271429,0.076923,0.339623,0.026125,0.411765,0.031674
6,C_ID_0002709b5a,0.479452,0.076923,0.477612,0.036739,0.833333,0.064103
7,C_ID_00027503e2,0.285714,0.125000,0.458333,0.057292,0.277778,0.034722
8,C_ID_000298032a,0.666667,0.090909,0.714286,0.064935,1.000000,0.090909
9,C_ID_0002ba3c2e,0.500000,0.111111,0.581818,0.064646,0.400000,0.044444


In [203]:
df_final.columns=['card_id','pur_freq_unique', 'pur_freq_unique_month', 'auth_pur_freq_unique',
       'auth_pur_freq_unique_month', 'nonauth_pur_freq_unique', 'nonauth_freq_unique_month']

In [204]:
df_final.isnull().any()


card_id                       False
pur_freq_unique               False
pur_freq_unique_month         False
auth_pur_freq_unique          False
auth_pur_freq_unique_month    False
nonauth_pur_freq_unique       False
nonauth_freq_unique_month     False
dtype: bool

In [206]:
df_final.to_csv('merchants_map.csv',index=False)

In [207]:
import numpy as np
import lightgbm as lgb
import pickle
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, roc_curve, mean_squared_error 

In [208]:
# train = pd.read_pickle('df_train.pickle')
# test = pd.read_pickle('df_test.pickle')

train = pd.read_csv('df_train_2_extra.csv')
test = pd.read_csv('df_test_2_extra.csv')
#target = np.loadtxt('wang.target.txt')
target=train['target']
outlier=train['outliers']
#outlier = np.zeros(len(target), dtype=np.int)
#outlier[target < -33] = 1

In [210]:
data=pd.read_csv('merchants_map.csv')

In [218]:
data.shape

(325540, 7)

In [220]:
train=train.merge(data,on='card_id')
test=test.merge(data,on='card_id')

In [221]:
feats = [c for c in train.columns if c not in ['card_id', 'first_active_month','target','outliers']]

In [222]:
target_with_outlier = pd.DataFrame({
    'target': target,
    'outlier': outlier
})

In [223]:
X_tr, X_va, y_tr, y_va = train_test_split(train[feats].values, target_with_outlier, test_size=0.38, random_state=12)

In [224]:
def get_lb(pred_target):
    return mean_squared_error(y_va.target, pred_target) ** 0.5

# Outliers predictions

In [228]:
cl_param = {
         'num_leaves': 31,
         'min_data_in_leaf': 20, 
         'objective':'binary',
         'max_depth': -1,
         'learning_rate': 0.01,
         "boosting": "gbdt",
         "feature_fraction": 0.9, #
         "bagging_freq": 1,
         "bagging_fraction": 0.9, #
         "metric": 'auc',
         "lambda_l1": 0.1,
         "verbosity": -1,
         "scale_pos_weight": 15,
}

In [226]:
tr_data = lgb.Dataset(X_tr, label=y_tr.outlier)

In [229]:
cv_score = lgb.cv(cl_param, tr_data, 10000, early_stopping_rounds=600, verbose_eval=100)

[100]	cv_agg's auc: 0.902945 + 0.00734746
[200]	cv_agg's auc: 0.904339 + 0.00709242
[300]	cv_agg's auc: 0.904113 + 0.0072194
[400]	cv_agg's auc: 0.903562 + 0.00720682
[500]	cv_agg's auc: 0.902888 + 0.00729769
[600]	cv_agg's auc: 0.902122 + 0.00729537
[700]	cv_agg's auc: 0.901353 + 0.00731498
[800]	cv_agg's auc: 0.900676 + 0.00745622


In [230]:
print('best num: ', len(cv_score['auc-mean']), '\nbest score:', cv_score['auc-mean'][-1])

best num:  234 
best score: 0.9044947779733421


In [231]:
clf = lgb.train(cl_param, tr_data, 400, valid_sets=(tr_data), verbose_eval=100)

[100]	training's auc: 0.967096
[200]	training's auc: 0.974609
[300]	training's auc: 0.980398
[400]	training's auc: 0.984948


In [233]:
# Slightly better than previous results (0.902935029308729)
prob = clf.predict(X_va, num_iteration=clf.best_iteration)
roc_auc_score(y_va.outlier, prob)

0.903642710498444

## Regression

In [235]:
rg_param = {
         'num_leaves': 31,
         'min_data_in_leaf': 20,
         'objective':'regression',
         'max_depth': -1,
         'learning_rate': 0.01,
         "boosting": "gbdt",
         "feature_fraction": 0.9,
         "bagging_freq": 1,
         "bagging_fraction": 0.9,
         "metric": 'rmse',
         "lambda_l1": 0.1,
         "verbosity": -1,
         
}
tr_data = lgb.Dataset(X_tr, label=y_tr.target)
cv_score = lgb.cv(rg_param, tr_data, 10000, early_stopping_rounds=600, verbose_eval=200, stratified=False)

[200]	cv_agg's rmse: 3.65345 + 0.122583
[400]	cv_agg's rmse: 3.634 + 0.121164
[600]	cv_agg's rmse: 3.62711 + 0.121091
[800]	cv_agg's rmse: 3.62482 + 0.120194
[1000]	cv_agg's rmse: 3.6244 + 0.120639
[1200]	cv_agg's rmse: 3.6242 + 0.12108
[1400]	cv_agg's rmse: 3.62423 + 0.121415
[1600]	cv_agg's rmse: 3.62488 + 0.122439


In [236]:
print('best num: ', len(cv_score['rmse-mean']), '\nbest score:', cv_score['rmse-mean'][-1])

best num:  1172 
best score: 3.624118993720347


In [237]:
clf = lgb.train(rg_param, tr_data, 2000, valid_sets=(tr_data), verbose_eval=200)

[200]	training's rmse: 3.4861
[400]	training's rmse: 3.3683
[600]	training's rmse: 3.29015
[800]	training's rmse: 3.23059
[1000]	training's rmse: 3.1771
[1200]	training's rmse: 3.12849
[1400]	training's rmse: 3.083
[1600]	training's rmse: 3.03736
[1800]	training's rmse: 2.99701
[2000]	training's rmse: 2.95647


In [238]:
pred = clf.predict(X_va, num_iteration=clf.best_iteration)
get_lb(pred)
# small improvement than previous results 3.7026626002538454

3.6998631401240067

In [239]:
rg_param = {
         'num_leaves': 31,
         'min_data_in_leaf': 20,
         'objective':'regression',
         'max_depth': -1,
         'learning_rate': 0.01,
         "boosting": "gbdt",
         "feature_fraction": 0.9,
         "bagging_freq": 1,
         "bagging_fraction": 0.9,
         "metric": 'rmse',
         "lambda_l1": 0.1,
         "verbosity": -1,
         
}
tr_data = lgb.Dataset(train[feats], label=train.target)
cv_score = lgb.cv(rg_param, tr_data, 10000, early_stopping_rounds=600, verbose_eval=200, stratified=False)

[200]	cv_agg's rmse: 3.68231 + 0.0292198
[400]	cv_agg's rmse: 3.66166 + 0.0279358
[600]	cv_agg's rmse: 3.65358 + 0.0271903
[800]	cv_agg's rmse: 3.65077 + 0.0271296
[1000]	cv_agg's rmse: 3.65013 + 0.0271201
[1200]	cv_agg's rmse: 3.65004 + 0.0267076
[1400]	cv_agg's rmse: 3.6498 + 0.0262773
[1600]	cv_agg's rmse: 3.64973 + 0.0261065
[1800]	cv_agg's rmse: 3.64974 + 0.0258073
[2000]	cv_agg's rmse: 3.65008 + 0.025922


In [240]:
print('best num: ', len(cv_score['rmse-mean']), '\nbest score:', cv_score['rmse-mean'][-1])

best num:  1429 
best score: 3.6496132351691117


In [241]:
clf = lgb.train(rg_param, tr_data, 2000, valid_sets=(tr_data), verbose_eval=200)

[200]	training's rmse: 3.56656
[400]	training's rmse: 3.46941
[600]	training's rmse: 3.40747
[800]	training's rmse: 3.36017
[1000]	training's rmse: 3.3212
[1200]	training's rmse: 3.28551
[1400]	training's rmse: 3.252
[1600]	training's rmse: 3.21674
[1800]	training's rmse: 3.18612
[2000]	training's rmse: 3.15434


In [242]:
pred = clf.predict(test[feats], num_iteration=clf.best_iteration)

In [245]:
test['target']=pred

In [247]:
test[['card_id','target']].to_csv('submission_with_merchats.csv',index=False)

In [248]:
check=pd.read_csv('submission_with_merchats.csv')
check.head()

Unnamed: 0,card_id,target
0,C_ID_0ab67a22ab,-2.30472
1,C_ID_130fd0cbdd,-0.465607
2,C_ID_b709037bc5,-0.982526
3,C_ID_d27d835a9f,-0.095061
4,C_ID_2b5e3df5c2,-1.288827


In [250]:
check=pd.read_csv('submission.csv')

In [251]:
check.head()

Unnamed: 0,card_id,target
0,C_ID_0ab67a22ab,-2.730562
1,C_ID_130fd0cbdd,-0.245508
2,C_ID_b709037bc5,-0.89598
3,C_ID_d27d835a9f,-0.154691
4,C_ID_2b5e3df5c2,-1.264231


# Let's check one by one 

In [253]:
feats[-6:]

['pur_freq_unique',
 'pur_freq_unique_month',
 'auth_pur_freq_unique',
 'auth_pur_freq_unique_month',
 'nonauth_pur_freq_unique',
 'nonauth_freq_unique_month']

In [261]:
feats.remove('pur_freq_unique')

In [260]:
feats_to_remove=feats[-6:][:]

In [271]:
feats = [c for c in train.columns if c not in ['card_id', 'first_active_month','target','outliers']]
feats_to_remove=feats[-6:][:]

results=[]
predictions=pd.DataFrame()
predictions_test=pd.DataFrame()
for item in feats_to_remove:
    feats.remove(item)
    X_tr, X_va, y_tr, y_va = train_test_split(train[feats].values, target_with_outlier, test_size=0.10, random_state=12)

    rg_param = {
             'num_leaves': 31,
             'min_data_in_leaf': 20,
             'objective':'regression',
             'max_depth': -1,
             'learning_rate': 0.01,
             "boosting": "gbdt",
             "feature_fraction": 0.9,
             "bagging_freq": 1,
             "bagging_fraction": 0.9,
             "metric": 'rmse',
             "lambda_l1": 0.1,
             "verbosity": -1,

    }
    tr_data = lgb.Dataset(X_tr, label=y_tr.target)
    cv_score = lgb.cv(rg_param, tr_data, 10000, early_stopping_rounds=600, verbose_eval=200, stratified=False)

    print('best num: ', len(cv_score['rmse-mean']), '\nbest score:', cv_score['rmse-mean'][-1])

    clf = lgb.train(rg_param, tr_data, 2000, valid_sets=(tr_data), verbose_eval=200)
    pred = clf.predict(X_va, num_iteration=clf.best_iteration)
    pred_test = clf.predict(test[feats], num_iteration=clf.best_iteration)
    results.append(get_lb(pred))
    predictions[item]=pred
    predictions_test[item]=pred_test


[200]	cv_agg's rmse: 3.67446 + 0.0658473
[400]	cv_agg's rmse: 3.6543 + 0.0636794
[600]	cv_agg's rmse: 3.64709 + 0.0634064
[800]	cv_agg's rmse: 3.64434 + 0.0630859
[1000]	cv_agg's rmse: 3.64368 + 0.0633575
[1200]	cv_agg's rmse: 3.6434 + 0.0634779
[1400]	cv_agg's rmse: 3.64342 + 0.0627258
[1600]	cv_agg's rmse: 3.64389 + 0.0625041
[1800]	cv_agg's rmse: 3.64415 + 0.0626921
best num:  1225 
best score: 3.6432888323737664
[200]	training's rmse: 3.54864
[400]	training's rmse: 3.45079
[600]	training's rmse: 3.3844
[800]	training's rmse: 3.33506
[1000]	training's rmse: 3.29204
[1200]	training's rmse: 3.25451
[1400]	training's rmse: 3.21599
[1600]	training's rmse: 3.18052
[1800]	training's rmse: 3.14556
[2000]	training's rmse: 3.11245
[200]	cv_agg's rmse: 3.67385 + 0.0652736
[400]	cv_agg's rmse: 3.65408 + 0.063005
[600]	cv_agg's rmse: 3.64677 + 0.0625379
[800]	cv_agg's rmse: 3.64419 + 0.0623363
[1000]	cv_agg's rmse: 3.64328 + 0.0626654
[1200]	cv_agg's rmse: 3.64314 + 0.062608
[1400]	cv_agg's rms

In [272]:
results

[3.7153126450315446,
 3.7163047225306935,
 3.715715768050351,
 3.717204255728074,
 3.7172264715184706,
 3.7144518960693516]

In [273]:
feats = [c for c in train.columns if c not in ['card_id', 'first_active_month','target','outliers']]

X_tr, X_va, y_tr, y_va = train_test_split(train[feats].values, target_with_outlier, test_size=0.10, random_state=12)

rg_param = {
         'num_leaves': 31,
         'min_data_in_leaf': 20,
         'objective':'regression',
         'max_depth': -1,
         'learning_rate': 0.01,
         "boosting": "gbdt",
         "feature_fraction": 0.9,
         "bagging_freq": 1,
         "bagging_fraction": 0.9,
         "metric": 'rmse',
         "lambda_l1": 0.1,
         "verbosity": -1,

}
tr_data = lgb.Dataset(X_tr, label=y_tr.target)
cv_score = lgb.cv(rg_param, tr_data, 10000, early_stopping_rounds=600, verbose_eval=200, stratified=False)

print('best num: ', len(cv_score['rmse-mean']), '\nbest score:', cv_score['rmse-mean'][-1])

clf = lgb.train(rg_param, tr_data, 2000, valid_sets=(tr_data), verbose_eval=200)
pred = clf.predict(X_va, num_iteration=clf.best_iteration)
pred_test = clf.predict(test[feats], num_iteration=clf.best_iteration)
results.append(get_lb(pred))
predictions['all']=pred
predictions_test['all']=pred_test


[200]	cv_agg's rmse: 3.6742 + 0.0655648
[400]	cv_agg's rmse: 3.65491 + 0.0627464
[600]	cv_agg's rmse: 3.64762 + 0.0623429
[800]	cv_agg's rmse: 3.64522 + 0.0618497
[1000]	cv_agg's rmse: 3.64438 + 0.0621163
[1200]	cv_agg's rmse: 3.64388 + 0.0620205
[1400]	cv_agg's rmse: 3.64368 + 0.0616158
[1600]	cv_agg's rmse: 3.64415 + 0.0613986
[1800]	cv_agg's rmse: 3.64469 + 0.0610068
best num:  1327 
best score: 3.6435045058099695
[200]	training's rmse: 3.54755
[400]	training's rmse: 3.44802
[600]	training's rmse: 3.38134
[800]	training's rmse: 3.33297
[1000]	training's rmse: 3.29076
[1200]	training's rmse: 3.25225
[1400]	training's rmse: 3.21382
[1600]	training's rmse: 3.17801
[1800]	training's rmse: 3.14248
[2000]	training's rmse: 3.10848


In [274]:
results


[3.7153126450315446,
 3.7163047225306935,
 3.715715768050351,
 3.717204255728074,
 3.7172264715184706,
 3.7144518960693516,
 3.7128954654832427]