In [1]:
import pandas as pd
import numpy as np

pd.set_option("display.max_rows", 100)
pd.set_option('display.max_columns', 200)

import warnings
warnings.filterwarnings('ignore')
%config InlineBackend.figure_format = 'svg'

from tqdm import tqdm

from statsmodels.tsa.tsatools import lagmat

import matplotlib.pylab as plt
%matplotlib inline

import os
os.chdir('..')

In [2]:
transactions = pd.read_csv('transactions.csv')
transactions['day'] = transactions.tr_datetime.apply(lambda dt: dt.split()[0]).astype(int)

transactions['pos_amount'] = transactions.amount.apply(lambda x: 0 if x<0 else np.log(x + 1))
transactions['neg_amount'] = transactions.amount.apply(lambda x: 0 if x>0 else -x)

In [3]:
transactions.term_id.fillna(-1, inplace=True)
main_term_id = transactions.term_id.value_counts()[:200].index
transactions.term_id = transactions.term_id.apply(lambda x: x if x in main_term_id else -2)
transactions.term_id.head()

0    -1
1    -1
2    -1
3    -1
4    -1
Name: term_id, dtype: object

In [4]:
transactions = pd.get_dummies(transactions, columns=['term_id'])
term_cols = [col for col in transactions.columns if 'term_id_' in col]

In [8]:
transactions.drop(['amount', 'tr_datetime'], 1, inplace=True)

test_transactions = pd.DataFrame(columns=transactions.mcc_code.unique(), 
                                 index=np.arange(1, 31) + transactions.day.max())
test_transactions = test_transactions.unstack().reset_index().dropna(axis=1)
test_transactions.columns = ['mcc_code', 'day']


train_grid = pd.DataFrame(columns=transactions.mcc_code.unique(), 
                          index=transactions.day.unique())
train_grid = train_grid.unstack().reset_index().dropna(axis=1)
train_grid.columns = ['mcc_code', 'day']

for tr_table in tqdm([transactions, test_transactions, train_grid]):
    tr_table['week_num'] = (tr_table['day'] - 2) // 7
    tr_table['week_day'] = (tr_table['day'] - 2) % 7
    tr_table['month_num'] = tr_table['day'] // 28
    tr_table['month_day'] = tr_table['day'] % 28
    
merge_col_names = ['day', 'week_num', 'week_day', 'month_num', 'month_day', 'mcc_code']

train_transactions = pd.merge(
    train_grid,
    transactions.groupby(merge_col_names)[['neg_amount', 'pos_amount'] + term_cols].sum().reset_index(),
    how='left').fillna(0)

train_transactions = pd.merge(
    train_transactions,
    transactions.groupby(merge_col_names)[['customer_id']].count().reset_index(),
    how='left').fillna(0).astype(np.int32)
train_transactions.columns = np.hstack([train_transactions.columns[:-1], ['n_transactions']])

train_transactions['log_neg_amount'] = train_transactions.neg_amount.apply(lambda x: np.log(x + 1))
train_transactions['log_pos_amount'] = train_transactions.pos_amount.apply(lambda x: np.log(x + 1))
train_transactions.head()

100%|██████████| 3/3 [00:00<00:00,  1.94it/s]


Unnamed: 0,mcc_code,day,week_num,week_day,month_num,month_day,neg_amount,pos_amount,term_id_-2,term_id_-1,term_id_00000000,term_id_00000001,term_id_00000002,term_id_00000181,term_id_00002013,term_id_00003304,term_id_00003799,term_id_00006536,term_id_00010002,term_id_00301402,term_id_01170002,term_id_01170636,term_id_04170520,term_id_078367,term_id_10000001,term_id_10055145,term_id_10166202,term_id_10236498,term_id_10244892,term_id_10244893,term_id_10244895,term_id_121176,term_id_121295,term_id_121313,term_id_122133,term_id_122763,term_id_124955,term_id_20001004,term_id_20001020,term_id_20001033,term_id_20001471,term_id_20001472,term_id_20828301,term_id_20831963,term_id_23353601,term_id_24022201,term_id_24050333,term_id_290594,term_id_30000801,term_id_321498,term_id_333219,term_id_33333874,term_id_392684,term_id_461536,term_id_470156,term_id_50038,term_id_507714,term_id_547167,term_id_560500,term_id_713157,term_id_757918,term_id_77062921,term_id_77096196,term_id_80200001,term_id_807824,term_id_826510,term_id_826511,term_id_826512,term_id_872806,term_id_888901,term_id_888902,term_id_888903,term_id_888904,term_id_888905,term_id_888906,term_id_888907,term_id_888908,term_id_888909,term_id_888910,term_id_888911,term_id_888912,term_id_888913,term_id_888914,term_id_888915,term_id_888916,term_id_888917,term_id_888918,term_id_888919,term_id_888920,term_id_888921,term_id_888922,term_id_888923,term_id_888924,term_id_888925,term_id_888926,term_id_888927,term_id_888928,term_id_888929,term_id_888930,term_id_888984,...,term_id_888997,term_id_889000,term_id_889001,term_id_889002,term_id_889003,term_id_889065,term_id_902866,term_id_940308,term_id_940309,term_id_940310,term_id_940311,term_id_940312,term_id_940313,term_id_940314,term_id_940315,term_id_940316,term_id_940317,term_id_940318,term_id_940319,term_id_940576,term_id_940577,term_id_940578,term_id_940579,term_id_940580,term_id_940581,term_id_940582,term_id_940583,term_id_940584,term_id_940585,term_id_940586,term_id_940587,term_id_940588,term_id_940589,term_id_940590,term_id_940591,term_id_941122,term_id_941124,term_id_941125,term_id_941126,term_id_941127,term_id_941128,term_id_941129,term_id_941130,term_id_941131,term_id_941132,term_id_941133,term_id_941134,term_id_941135,term_id_941392,term_id_941393,term_id_941394,term_id_941395,term_id_941396,term_id_941397,term_id_941398,term_id_941399,term_id_941400,term_id_941401,term_id_941402,term_id_941403,term_id_941404,term_id_941405,term_id_941406,term_id_941407,term_id_945001,term_id_945002,term_id_945003,term_id_945004,term_id_945005,term_id_945006,term_id_945007,term_id_945008,term_id_945009,term_id_945010,term_id_945011,term_id_945012,term_id_945013,term_id_945014,term_id_945015,term_id_945016,term_id_945017,term_id_945018,term_id_945019,term_id_945020,term_id_945021,term_id_945022,term_id_945023,term_id_945033,term_id_945036,term_id_945039,term_id_945042,term_id_945045,term_id_953140,term_id_995026,term_id_CARDSVCS,term_id_I0MO0IRZ,term_id_WPGTID01,n_transactions,log_neg_amount,log_pos_amount
0,4814,0,-1,5,0,0,11098744,0,0,2365,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2365,16.222343,0.0
1,4814,1,-1,6,0,1,7881825,0,0,1697,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1697,15.88007,0.0
2,4814,2,0,0,0,2,6777480,0,0,1524,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1524,15.729116,0.0
3,4814,3,0,1,0,3,9277943,0,0,1937,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1937,16.043151,0.0
4,4814,4,0,2,0,4,9999757,0,0,1943,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1943,16.118071,0.0


In [9]:
train = train_transactions.copy()

helper = train.copy().set_index(['mcc_code', 'week_num'])
helper['week_mean'] = train_transactions.groupby(['mcc_code', 'week_num']).mean()['log_neg_amount']
train = helper.reset_index().copy()

helper = train.copy().set_index(['mcc_code', 'week_num'])
helper['week_std'] = train_transactions.groupby(['mcc_code', 'week_num']).std()['log_neg_amount']
train = helper.reset_index().copy()
train.head()

helper = train.copy().set_index(['mcc_code', 'week_num'])
helper['week_mean_transactions'] = train_transactions.groupby(['mcc_code', 'week_num']).mean()['n_transactions']
train = helper.reset_index().copy()

helper = train.copy().set_index(['mcc_code', 'week_num'])
helper['week_std_transactions'] = train_transactions.groupby(['mcc_code', 'week_num']).std()['n_transactions']
train = helper.reset_index().copy()
train.head()

Unnamed: 0,mcc_code,week_num,day,week_day,month_num,month_day,neg_amount,pos_amount,term_id_-2,term_id_-1,term_id_00000000,term_id_00000001,term_id_00000002,term_id_00000181,term_id_00002013,term_id_00003304,term_id_00003799,term_id_00006536,term_id_00010002,term_id_00301402,term_id_01170002,term_id_01170636,term_id_04170520,term_id_078367,term_id_10000001,term_id_10055145,term_id_10166202,term_id_10236498,term_id_10244892,term_id_10244893,term_id_10244895,term_id_121176,term_id_121295,term_id_121313,term_id_122133,term_id_122763,term_id_124955,term_id_20001004,term_id_20001020,term_id_20001033,term_id_20001471,term_id_20001472,term_id_20828301,term_id_20831963,term_id_23353601,term_id_24022201,term_id_24050333,term_id_290594,term_id_30000801,term_id_321498,term_id_333219,term_id_33333874,term_id_392684,term_id_461536,term_id_470156,term_id_50038,term_id_507714,term_id_547167,term_id_560500,term_id_713157,term_id_757918,term_id_77062921,term_id_77096196,term_id_80200001,term_id_807824,term_id_826510,term_id_826511,term_id_826512,term_id_872806,term_id_888901,term_id_888902,term_id_888903,term_id_888904,term_id_888905,term_id_888906,term_id_888907,term_id_888908,term_id_888909,term_id_888910,term_id_888911,term_id_888912,term_id_888913,term_id_888914,term_id_888915,term_id_888916,term_id_888917,term_id_888918,term_id_888919,term_id_888920,term_id_888921,term_id_888922,term_id_888923,term_id_888924,term_id_888925,term_id_888926,term_id_888927,term_id_888928,term_id_888929,term_id_888930,term_id_888984,...,term_id_889003,term_id_889065,term_id_902866,term_id_940308,term_id_940309,term_id_940310,term_id_940311,term_id_940312,term_id_940313,term_id_940314,term_id_940315,term_id_940316,term_id_940317,term_id_940318,term_id_940319,term_id_940576,term_id_940577,term_id_940578,term_id_940579,term_id_940580,term_id_940581,term_id_940582,term_id_940583,term_id_940584,term_id_940585,term_id_940586,term_id_940587,term_id_940588,term_id_940589,term_id_940590,term_id_940591,term_id_941122,term_id_941124,term_id_941125,term_id_941126,term_id_941127,term_id_941128,term_id_941129,term_id_941130,term_id_941131,term_id_941132,term_id_941133,term_id_941134,term_id_941135,term_id_941392,term_id_941393,term_id_941394,term_id_941395,term_id_941396,term_id_941397,term_id_941398,term_id_941399,term_id_941400,term_id_941401,term_id_941402,term_id_941403,term_id_941404,term_id_941405,term_id_941406,term_id_941407,term_id_945001,term_id_945002,term_id_945003,term_id_945004,term_id_945005,term_id_945006,term_id_945007,term_id_945008,term_id_945009,term_id_945010,term_id_945011,term_id_945012,term_id_945013,term_id_945014,term_id_945015,term_id_945016,term_id_945017,term_id_945018,term_id_945019,term_id_945020,term_id_945021,term_id_945022,term_id_945023,term_id_945033,term_id_945036,term_id_945039,term_id_945042,term_id_945045,term_id_953140,term_id_995026,term_id_CARDSVCS,term_id_I0MO0IRZ,term_id_WPGTID01,n_transactions,log_neg_amount,log_pos_amount,week_mean,week_std,week_mean_transactions,week_std_transactions
0,4814,-1,0,5,0,0,11098744,0,0,2365,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2365,16.222343,0.0,16.051206,0.242023,2031.0,472.34733
1,4814,-1,1,6,0,1,7881825,0,0,1697,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1697,15.88007,0.0,16.051206,0.242023,2031.0,472.34733
2,4814,0,2,0,0,2,6777480,0,0,1524,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1524,15.729116,0.0,15.985827,0.141873,1821.571429,186.298915
3,4814,0,3,1,0,3,9277943,0,0,1937,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1937,16.043151,0.0,15.985827,0.141873,1821.571429,186.298915
4,4814,0,4,2,0,4,9999757,0,0,1943,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1943,16.118071,0.0,15.985827,0.141873,1821.571429,186.298915


In [10]:
helper = train.copy().set_index(['mcc_code', 'month_num'])
for col in tqdm(term_cols):
    helper['month_sum_' + col] = train_transactions.groupby(['mcc_code', 'month_num']).sum()[col]
helper.head()

100%|██████████| 201/201 [01:19<00:00,  2.73it/s]


Unnamed: 0_level_0,Unnamed: 1_level_0,week_num,day,week_day,month_day,neg_amount,pos_amount,term_id_-2,term_id_-1,term_id_00000000,term_id_00000001,term_id_00000002,term_id_00000181,term_id_00002013,term_id_00003304,term_id_00003799,term_id_00006536,term_id_00010002,term_id_00301402,term_id_01170002,term_id_01170636,term_id_04170520,term_id_078367,term_id_10000001,term_id_10055145,term_id_10166202,term_id_10236498,term_id_10244892,term_id_10244893,term_id_10244895,term_id_121176,term_id_121295,term_id_121313,term_id_122133,term_id_122763,term_id_124955,term_id_20001004,term_id_20001020,term_id_20001033,term_id_20001471,term_id_20001472,term_id_20828301,term_id_20831963,term_id_23353601,term_id_24022201,term_id_24050333,term_id_290594,term_id_30000801,term_id_321498,term_id_333219,term_id_33333874,term_id_392684,term_id_461536,term_id_470156,term_id_50038,term_id_507714,term_id_547167,term_id_560500,term_id_713157,term_id_757918,term_id_77062921,term_id_77096196,term_id_80200001,term_id_807824,term_id_826510,term_id_826511,term_id_826512,term_id_872806,term_id_888901,term_id_888902,term_id_888903,term_id_888904,term_id_888905,term_id_888906,term_id_888907,term_id_888908,term_id_888909,term_id_888910,term_id_888911,term_id_888912,term_id_888913,term_id_888914,term_id_888915,term_id_888916,term_id_888917,term_id_888918,term_id_888919,term_id_888920,term_id_888921,term_id_888922,term_id_888923,term_id_888924,term_id_888925,term_id_888926,term_id_888927,term_id_888928,term_id_888929,term_id_888930,term_id_888984,term_id_888985,term_id_888986,...,month_sum_term_id_888994,month_sum_term_id_888995,month_sum_term_id_888996,month_sum_term_id_888997,month_sum_term_id_889000,month_sum_term_id_889001,month_sum_term_id_889002,month_sum_term_id_889003,month_sum_term_id_889065,month_sum_term_id_902866,month_sum_term_id_940308,month_sum_term_id_940309,month_sum_term_id_940310,month_sum_term_id_940311,month_sum_term_id_940312,month_sum_term_id_940313,month_sum_term_id_940314,month_sum_term_id_940315,month_sum_term_id_940316,month_sum_term_id_940317,month_sum_term_id_940318,month_sum_term_id_940319,month_sum_term_id_940576,month_sum_term_id_940577,month_sum_term_id_940578,month_sum_term_id_940579,month_sum_term_id_940580,month_sum_term_id_940581,month_sum_term_id_940582,month_sum_term_id_940583,month_sum_term_id_940584,month_sum_term_id_940585,month_sum_term_id_940586,month_sum_term_id_940587,month_sum_term_id_940588,month_sum_term_id_940589,month_sum_term_id_940590,month_sum_term_id_940591,month_sum_term_id_941122,month_sum_term_id_941124,month_sum_term_id_941125,month_sum_term_id_941126,month_sum_term_id_941127,month_sum_term_id_941128,month_sum_term_id_941129,month_sum_term_id_941130,month_sum_term_id_941131,month_sum_term_id_941132,month_sum_term_id_941133,month_sum_term_id_941134,month_sum_term_id_941135,month_sum_term_id_941392,month_sum_term_id_941393,month_sum_term_id_941394,month_sum_term_id_941395,month_sum_term_id_941396,month_sum_term_id_941397,month_sum_term_id_941398,month_sum_term_id_941399,month_sum_term_id_941400,month_sum_term_id_941401,month_sum_term_id_941402,month_sum_term_id_941403,month_sum_term_id_941404,month_sum_term_id_941405,month_sum_term_id_941406,month_sum_term_id_941407,month_sum_term_id_945001,month_sum_term_id_945002,month_sum_term_id_945003,month_sum_term_id_945004,month_sum_term_id_945005,month_sum_term_id_945006,month_sum_term_id_945007,month_sum_term_id_945008,month_sum_term_id_945009,month_sum_term_id_945010,month_sum_term_id_945011,month_sum_term_id_945012,month_sum_term_id_945013,month_sum_term_id_945014,month_sum_term_id_945015,month_sum_term_id_945016,month_sum_term_id_945017,month_sum_term_id_945018,month_sum_term_id_945019,month_sum_term_id_945020,month_sum_term_id_945021,month_sum_term_id_945022,month_sum_term_id_945023,month_sum_term_id_945033,month_sum_term_id_945036,month_sum_term_id_945039,month_sum_term_id_945042,month_sum_term_id_945045,month_sum_term_id_953140,month_sum_term_id_995026,month_sum_term_id_CARDSVCS,month_sum_term_id_I0MO0IRZ,month_sum_term_id_WPGTID01
mcc_code,month_num,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1,Unnamed: 198_level_1,Unnamed: 199_level_1,Unnamed: 200_level_1,Unnamed: 201_level_1,Unnamed: 202_level_1
4814,0,-1,0,5,0,11098744,0,0,2365,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4814,0,-1,1,6,1,7881825,0,0,1697,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4814,0,0,2,0,2,6777480,0,0,1524,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4814,0,0,3,1,3,9277943,0,0,1937,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4814,0,0,4,2,4,9999757,0,0,1943,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [21]:
train = helper.reset_index().copy()
from collections import Counter
Counter(train.columns)

In [12]:
helper = train.copy().set_index(['mcc_code', 'month_num'])
helper['month_mean'] = train_transactions.groupby(['mcc_code', 'month_num']).mean()['log_neg_amount']
train = helper.reset_index().copy()

helper = train.copy().set_index(['mcc_code', 'month_num'])
helper['month_std'] = train_transactions.groupby(['mcc_code', 'month_num']).std()['log_neg_amount']
train = helper.reset_index().copy()

helper = train.copy().set_index(['mcc_code', 'month_num'])
helper['month_median'] = train_transactions.groupby(['mcc_code', 'month_num']).median()['log_neg_amount']
train = helper.reset_index().copy()

helper = train.copy().set_index(['mcc_code', 'month_num'])
helper['month_min'] = train_transactions.groupby(['mcc_code', 'month_num']).min()['log_neg_amount']
train = helper.reset_index().copy()

helper = train.copy().set_index(['mcc_code', 'month_num'])
helper['month_max'] = train_transactions.groupby(['mcc_code', 'month_num']).max()['log_neg_amount']
train = helper.reset_index().copy()

#n_transactions
helper = train.copy().set_index(['mcc_code', 'month_num'])
helper['month_mean_transactions'] = train_transactions.groupby(['mcc_code', 'month_num']).mean()['n_transactions']
train = helper.reset_index().copy()

helper = train.copy().set_index(['mcc_code', 'month_num'])
helper['month_std_transactions'] = train_transactions.groupby(['mcc_code', 'month_num']).std()['n_transactions']
train = helper.reset_index().copy()

helper = train.copy().set_index(['mcc_code', 'month_num'])
helper['month_median_transactions'] = train_transactions.groupby(['mcc_code', 'month_num']).median()['n_transactions']
train = helper.reset_index().copy()

helper = train.copy().set_index(['mcc_code', 'month_num'])
helper['month_min_transactions'] = train_transactions.groupby(['mcc_code', 'month_num']).min()['n_transactions']
train = helper.reset_index().copy()

helper = train.copy().set_index(['mcc_code', 'month_num'])
helper['month_max_transactions'] = train_transactions.groupby(['mcc_code', 'month_num']).max()['n_transactions']
train = helper.reset_index().copy()

TIMESTAT_COLS = train.columns.difference(train_transactions.columns)

train.head()

Unnamed: 0,mcc_code,month_num,week_num,day,week_day,month_day,neg_amount,pos_amount,term_id_-2,term_id_-1,term_id_00000000,term_id_00000001,term_id_00000002,term_id_00000181,term_id_00002013,term_id_00003304,term_id_00003799,term_id_00006536,term_id_00010002,term_id_00301402,term_id_01170002,term_id_01170636,term_id_04170520,term_id_078367,term_id_10000001,term_id_10055145,term_id_10166202,term_id_10236498,term_id_10244892,term_id_10244893,term_id_10244895,term_id_121176,term_id_121295,term_id_121313,term_id_122133,term_id_122763,term_id_124955,term_id_20001004,term_id_20001020,term_id_20001033,term_id_20001471,term_id_20001472,term_id_20828301,term_id_20831963,term_id_23353601,term_id_24022201,term_id_24050333,term_id_290594,term_id_30000801,term_id_321498,term_id_333219,term_id_33333874,term_id_392684,term_id_461536,term_id_470156,term_id_50038,term_id_507714,term_id_547167,term_id_560500,term_id_713157,term_id_757918,term_id_77062921,term_id_77096196,term_id_80200001,term_id_807824,term_id_826510,term_id_826511,term_id_826512,term_id_872806,term_id_888901,term_id_888902,term_id_888903,term_id_888904,term_id_888905,term_id_888906,term_id_888907,term_id_888908,term_id_888909,term_id_888910,term_id_888911,term_id_888912,term_id_888913,term_id_888914,term_id_888915,term_id_888916,term_id_888917,term_id_888918,term_id_888919,term_id_888920,term_id_888921,term_id_888922,term_id_888923,term_id_888924,term_id_888925,term_id_888926,term_id_888927,term_id_888928,term_id_888929,term_id_888930,term_id_888984,...,month_sum_term_id_940308,month_sum_term_id_940309,month_sum_term_id_940310,month_sum_term_id_940311,month_sum_term_id_940312,month_sum_term_id_940313,month_sum_term_id_940314,month_sum_term_id_940315,month_sum_term_id_940316,month_sum_term_id_940317,month_sum_term_id_940318,month_sum_term_id_940319,month_sum_term_id_940576,month_sum_term_id_940577,month_sum_term_id_940578,month_sum_term_id_940579,month_sum_term_id_940580,month_sum_term_id_940581,month_sum_term_id_940582,month_sum_term_id_940583,month_sum_term_id_940584,month_sum_term_id_940585,month_sum_term_id_940586,month_sum_term_id_940587,month_sum_term_id_940588,month_sum_term_id_940589,month_sum_term_id_940590,month_sum_term_id_940591,month_sum_term_id_941122,month_sum_term_id_941124,month_sum_term_id_941125,month_sum_term_id_941126,month_sum_term_id_941127,month_sum_term_id_941128,month_sum_term_id_941129,month_sum_term_id_941130,month_sum_term_id_941131,month_sum_term_id_941132,month_sum_term_id_941133,month_sum_term_id_941134,month_sum_term_id_941135,month_sum_term_id_941392,month_sum_term_id_941393,month_sum_term_id_941394,month_sum_term_id_941395,month_sum_term_id_941396,month_sum_term_id_941397,month_sum_term_id_941398,month_sum_term_id_941399,month_sum_term_id_941400,month_sum_term_id_941401,month_sum_term_id_941402,month_sum_term_id_941403,month_sum_term_id_941404,month_sum_term_id_941405,month_sum_term_id_941406,month_sum_term_id_941407,month_sum_term_id_945001,month_sum_term_id_945002,month_sum_term_id_945003,month_sum_term_id_945004,month_sum_term_id_945005,month_sum_term_id_945006,month_sum_term_id_945007,month_sum_term_id_945008,month_sum_term_id_945009,month_sum_term_id_945010,month_sum_term_id_945011,month_sum_term_id_945012,month_sum_term_id_945013,month_sum_term_id_945014,month_sum_term_id_945015,month_sum_term_id_945016,month_sum_term_id_945017,month_sum_term_id_945018,month_sum_term_id_945019,month_sum_term_id_945020,month_sum_term_id_945021,month_sum_term_id_945022,month_sum_term_id_945023,month_sum_term_id_945033,month_sum_term_id_945036,month_sum_term_id_945039,month_sum_term_id_945042,month_sum_term_id_945045,month_sum_term_id_953140,month_sum_term_id_995026,month_sum_term_id_CARDSVCS,month_sum_term_id_I0MO0IRZ,month_sum_term_id_WPGTID01,month_mean,month_std,month_median,month_min,month_max,month_mean_transactions,month_std_transactions,month_median_transactions,month_min_transactions,month_max_transactions
0,4814,0,-1,0,5,0,11098744,0,0,2365,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,16.024313,0.164409,16.082561,15.694662,16.227089,1921.607143,230.296321,1986.5,1513,2365
1,4814,0,-1,1,6,1,7881825,0,0,1697,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,16.024313,0.164409,16.082561,15.694662,16.227089,1921.607143,230.296321,1986.5,1513,2365
2,4814,0,0,2,0,2,6777480,0,0,1524,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,16.024313,0.164409,16.082561,15.694662,16.227089,1921.607143,230.296321,1986.5,1513,2365
3,4814,0,0,3,1,3,9277943,0,0,1937,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,16.024313,0.164409,16.082561,15.694662,16.227089,1921.607143,230.296321,1986.5,1513,2365
4,4814,0,0,4,2,4,9999757,0,0,1943,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,16.024313,0.164409,16.082561,15.694662,16.227089,1921.607143,230.296321,1986.5,1513,2365


In [13]:
week_lag_max = 35
# week_pos_lag_max = 20
for week_shift in tqdm(np.arange(1, week_lag_max)):
    train_shift = train.copy()
    train_shift['week_num'] += week_shift
    train_shift['prev_week_{}_neg'.format(week_shift)] = train_shift.log_neg_amount
    train_shift['prev_week_{}_mean_neg'.format(week_shift)] = train_shift.week_mean
    train_shift['prev_week_{}_std_neg'.format(week_shift)] = train_shift.week_std
    
    #n_transactions
    train_shift['prev_week_{}_n_trans'.format(week_shift)] = train_shift.n_transactions
    train_shift['prev_week_{}_mean_n_trans'.format(week_shift)] = train_shift.week_mean_transactions
    train_shift['prev_week_{}_std_n_trans'.format(week_shift)] = train_shift.week_std_transactions
    
    train_shift = train_shift[[
        'week_num', 'week_day', 'mcc_code',
        'prev_week_{}_neg'.format(week_shift),
        'prev_week_{}_mean_neg'.format(week_shift),
        'prev_week_{}_std_neg'.format(week_shift),
        #added
        'prev_week_{}_n_trans'.format(week_shift),
        'prev_week_{}_mean_n_trans'.format(week_shift),
        'prev_week_{}_std_n_trans'.format(week_shift),
    ]]
    train_transactions = pd.merge(
        train_transactions, train_shift, 
        on=['week_num', 'week_day', 'mcc_code'],
        how='left').fillna(0)
    test_transactions = pd.merge(
        test_transactions, train_shift, 
        on=['week_num', 'week_day', 'mcc_code'],
        how='left').fillna(0)
train_transactions.head()

100%|██████████| 34/34 [00:19<00:00,  1.39it/s]


Unnamed: 0,mcc_code,day,week_num,week_day,month_num,month_day,neg_amount,pos_amount,term_id_-2,term_id_-1,term_id_00000000,term_id_00000001,term_id_00000002,term_id_00000181,term_id_00002013,term_id_00003304,term_id_00003799,term_id_00006536,term_id_00010002,term_id_00301402,term_id_01170002,term_id_01170636,term_id_04170520,term_id_078367,term_id_10000001,term_id_10055145,term_id_10166202,term_id_10236498,term_id_10244892,term_id_10244893,term_id_10244895,term_id_121176,term_id_121295,term_id_121313,term_id_122133,term_id_122763,term_id_124955,term_id_20001004,term_id_20001020,term_id_20001033,term_id_20001471,term_id_20001472,term_id_20828301,term_id_20831963,term_id_23353601,term_id_24022201,term_id_24050333,term_id_290594,term_id_30000801,term_id_321498,term_id_333219,term_id_33333874,term_id_392684,term_id_461536,term_id_470156,term_id_50038,term_id_507714,term_id_547167,term_id_560500,term_id_713157,term_id_757918,term_id_77062921,term_id_77096196,term_id_80200001,term_id_807824,term_id_826510,term_id_826511,term_id_826512,term_id_872806,term_id_888901,term_id_888902,term_id_888903,term_id_888904,term_id_888905,term_id_888906,term_id_888907,term_id_888908,term_id_888909,term_id_888910,term_id_888911,term_id_888912,term_id_888913,term_id_888914,term_id_888915,term_id_888916,term_id_888917,term_id_888918,term_id_888919,term_id_888920,term_id_888921,term_id_888922,term_id_888923,term_id_888924,term_id_888925,term_id_888926,term_id_888927,term_id_888928,term_id_888929,term_id_888930,term_id_888984,...,prev_week_18_std_neg,prev_week_18_n_trans,prev_week_18_mean_n_trans,prev_week_18_std_n_trans,prev_week_19_neg,prev_week_19_mean_neg,prev_week_19_std_neg,prev_week_19_n_trans,prev_week_19_mean_n_trans,prev_week_19_std_n_trans,prev_week_20_neg,prev_week_20_mean_neg,prev_week_20_std_neg,prev_week_20_n_trans,prev_week_20_mean_n_trans,prev_week_20_std_n_trans,prev_week_21_neg,prev_week_21_mean_neg,prev_week_21_std_neg,prev_week_21_n_trans,prev_week_21_mean_n_trans,prev_week_21_std_n_trans,prev_week_22_neg,prev_week_22_mean_neg,prev_week_22_std_neg,prev_week_22_n_trans,prev_week_22_mean_n_trans,prev_week_22_std_n_trans,prev_week_23_neg,prev_week_23_mean_neg,prev_week_23_std_neg,prev_week_23_n_trans,prev_week_23_mean_n_trans,prev_week_23_std_n_trans,prev_week_24_neg,prev_week_24_mean_neg,prev_week_24_std_neg,prev_week_24_n_trans,prev_week_24_mean_n_trans,prev_week_24_std_n_trans,prev_week_25_neg,prev_week_25_mean_neg,prev_week_25_std_neg,prev_week_25_n_trans,prev_week_25_mean_n_trans,prev_week_25_std_n_trans,prev_week_26_neg,prev_week_26_mean_neg,prev_week_26_std_neg,prev_week_26_n_trans,prev_week_26_mean_n_trans,prev_week_26_std_n_trans,prev_week_27_neg,prev_week_27_mean_neg,prev_week_27_std_neg,prev_week_27_n_trans,prev_week_27_mean_n_trans,prev_week_27_std_n_trans,prev_week_28_neg,prev_week_28_mean_neg,prev_week_28_std_neg,prev_week_28_n_trans,prev_week_28_mean_n_trans,prev_week_28_std_n_trans,prev_week_29_neg,prev_week_29_mean_neg,prev_week_29_std_neg,prev_week_29_n_trans,prev_week_29_mean_n_trans,prev_week_29_std_n_trans,prev_week_30_neg,prev_week_30_mean_neg,prev_week_30_std_neg,prev_week_30_n_trans,prev_week_30_mean_n_trans,prev_week_30_std_n_trans,prev_week_31_neg,prev_week_31_mean_neg,prev_week_31_std_neg,prev_week_31_n_trans,prev_week_31_mean_n_trans,prev_week_31_std_n_trans,prev_week_32_neg,prev_week_32_mean_neg,prev_week_32_std_neg,prev_week_32_n_trans,prev_week_32_mean_n_trans,prev_week_32_std_n_trans,prev_week_33_neg,prev_week_33_mean_neg,prev_week_33_std_neg,prev_week_33_n_trans,prev_week_33_mean_n_trans,prev_week_33_std_n_trans,prev_week_34_neg,prev_week_34_mean_neg,prev_week_34_std_neg,prev_week_34_n_trans,prev_week_34_mean_n_trans,prev_week_34_std_n_trans
0,4814,0,-1,5,0,0,11098744,0,0,2365,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,4814,1,-1,6,0,1,7881825,0,0,1697,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,4814,2,0,0,0,2,6777480,0,0,1524,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,4814,3,0,1,0,3,9277943,0,0,1937,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,4814,4,0,2,0,4,9999757,0,0,1943,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [14]:
month_lag_max = 10
for month_shift in tqdm(np.arange(1, month_lag_max)):
    train_shift = train.copy()
    train_shift['month_num'] += month_shift
    train_shift['prev_month_{}_mean_neg'.format(month_shift)] = train_shift.month_mean
    train_shift['prev_month_{}_std_neg'.format(month_shift)] = train_shift.month_std
    train_shift['prev_month_{}_median_neg'.format(month_shift)] = train_shift.month_median
    train_shift['prev_month_{}_max_neg'.format(month_shift)] = train_shift.month_max
    train_shift['prev_month_{}_min_neg'.format(month_shift)] = train_shift.month_min
    
    #added
    train_shift['prev_month_{}_mean_n_trans'.format(month_shift)] = train_shift.month_mean_transactions
    train_shift['prev_month_{}_std_n_trans'.format(month_shift)] = train_shift.month_std_transactions
    train_shift['prev_month_{}_median_n_trans'.format(month_shift)] = train_shift.month_median_transactions
    train_shift['prev_month_{}_max_n_trans'.format(month_shift)] = train_shift.month_max_transactions
    train_shift['prev_month_{}_min_n_trans'.format(month_shift)] = train_shift.month_min_transactions
    
    train_shift = train_shift[[
        'month_num', 'day', 'mcc_code',
        'prev_month_{}_mean_neg'.format(month_shift),
        'prev_month_{}_std_neg'.format(month_shift),
        'prev_month_{}_median_neg'.format(month_shift),
        'prev_month_{}_max_neg'.format(month_shift),
        'prev_month_{}_min_neg'.format(month_shift),
        'prev_month_{}_mean_n_trans'.format(month_shift),
        'prev_month_{}_std_n_trans'.format(month_shift),
        'prev_month_{}_median_n_trans'.format(month_shift),
        'prev_month_{}_max_n_trans'.format(month_shift),
        'prev_month_{}_min_n_trans'.format(month_shift),
    ]]
    train_transactions = pd.merge(
        train_transactions, train_shift, 
        on=['month_num', 'day', 'mcc_code'],
        how='left').fillna(0)
    test_transactions = pd.merge(
        test_transactions, train_shift, 
        on=['month_num', 'day', 'mcc_code'],
        how='left').fillna(0)
train_transactions.head()

100%|██████████| 9/9 [00:07<00:00,  1.13it/s]


Unnamed: 0,mcc_code,day,week_num,week_day,month_num,month_day,neg_amount,pos_amount,term_id_-2,term_id_-1,term_id_00000000,term_id_00000001,term_id_00000002,term_id_00000181,term_id_00002013,term_id_00003304,term_id_00003799,term_id_00006536,term_id_00010002,term_id_00301402,term_id_01170002,term_id_01170636,term_id_04170520,term_id_078367,term_id_10000001,term_id_10055145,term_id_10166202,term_id_10236498,term_id_10244892,term_id_10244893,term_id_10244895,term_id_121176,term_id_121295,term_id_121313,term_id_122133,term_id_122763,term_id_124955,term_id_20001004,term_id_20001020,term_id_20001033,term_id_20001471,term_id_20001472,term_id_20828301,term_id_20831963,term_id_23353601,term_id_24022201,term_id_24050333,term_id_290594,term_id_30000801,term_id_321498,term_id_333219,term_id_33333874,term_id_392684,term_id_461536,term_id_470156,term_id_50038,term_id_507714,term_id_547167,term_id_560500,term_id_713157,term_id_757918,term_id_77062921,term_id_77096196,term_id_80200001,term_id_807824,term_id_826510,term_id_826511,term_id_826512,term_id_872806,term_id_888901,term_id_888902,term_id_888903,term_id_888904,term_id_888905,term_id_888906,term_id_888907,term_id_888908,term_id_888909,term_id_888910,term_id_888911,term_id_888912,term_id_888913,term_id_888914,term_id_888915,term_id_888916,term_id_888917,term_id_888918,term_id_888919,term_id_888920,term_id_888921,term_id_888922,term_id_888923,term_id_888924,term_id_888925,term_id_888926,term_id_888927,term_id_888928,term_id_888929,term_id_888930,term_id_888984,...,prev_week_33_std_neg,prev_week_33_n_trans,prev_week_33_mean_n_trans,prev_week_33_std_n_trans,prev_week_34_neg,prev_week_34_mean_neg,prev_week_34_std_neg,prev_week_34_n_trans,prev_week_34_mean_n_trans,prev_week_34_std_n_trans,prev_month_1_mean_neg,prev_month_1_std_neg,prev_month_1_median_neg,prev_month_1_max_neg,prev_month_1_min_neg,prev_month_1_mean_n_trans,prev_month_1_std_n_trans,prev_month_1_median_n_trans,prev_month_1_max_n_trans,prev_month_1_min_n_trans,prev_month_2_mean_neg,prev_month_2_std_neg,prev_month_2_median_neg,prev_month_2_max_neg,prev_month_2_min_neg,prev_month_2_mean_n_trans,prev_month_2_std_n_trans,prev_month_2_median_n_trans,prev_month_2_max_n_trans,prev_month_2_min_n_trans,prev_month_3_mean_neg,prev_month_3_std_neg,prev_month_3_median_neg,prev_month_3_max_neg,prev_month_3_min_neg,prev_month_3_mean_n_trans,prev_month_3_std_n_trans,prev_month_3_median_n_trans,prev_month_3_max_n_trans,prev_month_3_min_n_trans,prev_month_4_mean_neg,prev_month_4_std_neg,prev_month_4_median_neg,prev_month_4_max_neg,prev_month_4_min_neg,prev_month_4_mean_n_trans,prev_month_4_std_n_trans,prev_month_4_median_n_trans,prev_month_4_max_n_trans,prev_month_4_min_n_trans,prev_month_5_mean_neg,prev_month_5_std_neg,prev_month_5_median_neg,prev_month_5_max_neg,prev_month_5_min_neg,prev_month_5_mean_n_trans,prev_month_5_std_n_trans,prev_month_5_median_n_trans,prev_month_5_max_n_trans,prev_month_5_min_n_trans,prev_month_6_mean_neg,prev_month_6_std_neg,prev_month_6_median_neg,prev_month_6_max_neg,prev_month_6_min_neg,prev_month_6_mean_n_trans,prev_month_6_std_n_trans,prev_month_6_median_n_trans,prev_month_6_max_n_trans,prev_month_6_min_n_trans,prev_month_7_mean_neg,prev_month_7_std_neg,prev_month_7_median_neg,prev_month_7_max_neg,prev_month_7_min_neg,prev_month_7_mean_n_trans,prev_month_7_std_n_trans,prev_month_7_median_n_trans,prev_month_7_max_n_trans,prev_month_7_min_n_trans,prev_month_8_mean_neg,prev_month_8_std_neg,prev_month_8_median_neg,prev_month_8_max_neg,prev_month_8_min_neg,prev_month_8_mean_n_trans,prev_month_8_std_n_trans,prev_month_8_median_n_trans,prev_month_8_max_n_trans,prev_month_8_min_n_trans,prev_month_9_mean_neg,prev_month_9_std_neg,prev_month_9_median_neg,prev_month_9_max_neg,prev_month_9_min_neg,prev_month_9_mean_n_trans,prev_month_9_std_n_trans,prev_month_9_median_n_trans,prev_month_9_max_n_trans,prev_month_9_min_n_trans
0,4814,0,-1,5,0,0,11098744,0,0,2365,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,4814,1,-1,6,0,1,7881825,0,0,1697,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,4814,2,0,0,0,2,6777480,0,0,1524,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,4814,3,0,1,0,3,9277943,0,0,1937,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,4814,4,0,2,0,4,9999757,0,0,1943,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [20]:
# train = helper.reset_index().copy()
# train_transactions.drop(term_cols, 1, inplace=True)
# test_transactions.drop(term_cols, 1, inplace=True)
from collections import Counter
Counter(test_transactions.columns)

Counter({'day': 1,
         'mcc_code': 1,
         'month_day': 1,
         'month_num': 1,
         'prev_month_1_max_n_trans': 1,
         'prev_month_1_max_neg': 1,
         'prev_month_1_mean_n_trans': 1,
         'prev_month_1_mean_neg': 1,
         'prev_month_1_median_n_trans': 1,
         'prev_month_1_median_neg': 1,
         'prev_month_1_min_n_trans': 1,
         'prev_month_1_min_neg': 1,
         'prev_month_1_std_n_trans': 1,
         'prev_month_1_std_neg': 1,
         'prev_month_2_max_n_trans': 1,
         'prev_month_2_max_neg': 1,
         'prev_month_2_mean_n_trans': 1,
         'prev_month_2_mean_neg': 1,
         'prev_month_2_median_n_trans': 1,
         'prev_month_2_median_neg': 1,
         'prev_month_2_min_n_trans': 1,
         'prev_month_2_min_neg': 1,
         'prev_month_2_std_n_trans': 1,
         'prev_month_2_std_neg': 1,
         'prev_month_3_max_n_trans': 1,
         'prev_month_3_max_neg': 1,
         'prev_month_3_mean_n_trans': 1,
         'prev

In [27]:
month_lag_max = 5
for month_shift in tqdm(np.arange(1, month_lag_max)):
    train_shift = train.copy()
    train_shift['month_num'] += month_shift
    for col in tqdm(term_cols):
        train_shift['month_sum_' + col] = train['month_sum_' + col]
     
    added_cols = [col for col in train_shift.columns if 'month_sum_term' in col]
    train_shift = train_shift[[
        'month_num', 'day', 'mcc_code'] + added_cols] 
    train_transactions = pd.merge(
        train_transactions, train_shift, 
        on=['month_num', 'day', 'mcc_code'],
        how='left').fillna(0)
    test_transactions = pd.merge(
        test_transactions, train_shift, 
        on=['month_num', 'day', 'mcc_code'],
        how='left').fillna(0)
train_transactions.head()

  0%|          | 0/4 [00:00<?, ?it/s]
  0%|          | 0/201 [00:00<?, ?it/s][A
 92%|█████████▏| 185/201 [00:00<00:00, 1848.26it/s][A
 25%|██▌       | 1/4 [00:01<00:03,  1.21s/it]2it/s][A
  0%|          | 0/201 [00:00<?, ?it/s][A
 50%|█████     | 2/4 [00:02<00:02,  1.30s/it]5it/s][A
  0%|          | 0/201 [00:00<?, ?it/s][A
 75%|███████▌  | 3/4 [00:04<00:01,  1.45s/it]5it/s][A
  0%|          | 0/201 [00:00<?, ?it/s][A
100%|██████████| 4/4 [00:06<00:00,  1.63s/it]2it/s][A


Unnamed: 0,mcc_code,day,week_num,week_day,month_num,month_day,neg_amount,pos_amount,term_id_-2,term_id_-1,term_id_00000000,term_id_00000001,term_id_00000002,term_id_00000181,term_id_00002013,term_id_00003304,term_id_00003799,term_id_00006536,term_id_00010002,term_id_00301402,term_id_01170002,term_id_01170636,term_id_04170520,term_id_078367,term_id_10000001,term_id_10055145,term_id_10166202,term_id_10236498,term_id_10244892,term_id_10244893,term_id_10244895,term_id_121176,term_id_121295,term_id_121313,term_id_122133,term_id_122763,term_id_124955,term_id_20001004,term_id_20001020,term_id_20001033,term_id_20001471,term_id_20001472,term_id_20828301,term_id_20831963,term_id_23353601,term_id_24022201,term_id_24050333,term_id_290594,term_id_30000801,term_id_321498,term_id_333219,term_id_33333874,term_id_392684,term_id_461536,term_id_470156,term_id_50038,term_id_507714,term_id_547167,term_id_560500,term_id_713157,term_id_757918,term_id_77062921,term_id_77096196,term_id_80200001,term_id_807824,term_id_826510,term_id_826511,term_id_826512,term_id_872806,term_id_888901,term_id_888902,term_id_888903,term_id_888904,term_id_888905,term_id_888906,term_id_888907,term_id_888908,term_id_888909,term_id_888910,term_id_888911,term_id_888912,term_id_888913,term_id_888914,term_id_888915,term_id_888916,term_id_888917,term_id_888918,term_id_888919,term_id_888920,term_id_888921,term_id_888922,term_id_888923,term_id_888924,term_id_888925,term_id_888926,term_id_888927,term_id_888928,term_id_888929,term_id_888930,term_id_888984,...,month_sum_term_id_888994_y,month_sum_term_id_888995_y,month_sum_term_id_888996_y,month_sum_term_id_888997_y,month_sum_term_id_889000_y,month_sum_term_id_889001_y,month_sum_term_id_889002_y,month_sum_term_id_889003_y,month_sum_term_id_889065_y,month_sum_term_id_902866_y,month_sum_term_id_940308_y,month_sum_term_id_940309_y,month_sum_term_id_940310_y,month_sum_term_id_940311_y,month_sum_term_id_940312_y,month_sum_term_id_940313_y,month_sum_term_id_940314_y,month_sum_term_id_940315_y,month_sum_term_id_940316_y,month_sum_term_id_940317_y,month_sum_term_id_940318_y,month_sum_term_id_940319_y,month_sum_term_id_940576_y,month_sum_term_id_940577_y,month_sum_term_id_940578_y,month_sum_term_id_940579_y,month_sum_term_id_940580_y,month_sum_term_id_940581_y,month_sum_term_id_940582_y,month_sum_term_id_940583_y,month_sum_term_id_940584_y,month_sum_term_id_940585_y,month_sum_term_id_940586_y,month_sum_term_id_940587_y,month_sum_term_id_940588_y,month_sum_term_id_940589_y,month_sum_term_id_940590_y,month_sum_term_id_940591_y,month_sum_term_id_941122_y,month_sum_term_id_941124_y,month_sum_term_id_941125_y,month_sum_term_id_941126_y,month_sum_term_id_941127_y,month_sum_term_id_941128_y,month_sum_term_id_941129_y,month_sum_term_id_941130_y,month_sum_term_id_941131_y,month_sum_term_id_941132_y,month_sum_term_id_941133_y,month_sum_term_id_941134_y,month_sum_term_id_941135_y,month_sum_term_id_941392_y,month_sum_term_id_941393_y,month_sum_term_id_941394_y,month_sum_term_id_941395_y,month_sum_term_id_941396_y,month_sum_term_id_941397_y,month_sum_term_id_941398_y,month_sum_term_id_941399_y,month_sum_term_id_941400_y,month_sum_term_id_941401_y,month_sum_term_id_941402_y,month_sum_term_id_941403_y,month_sum_term_id_941404_y,month_sum_term_id_941405_y,month_sum_term_id_941406_y,month_sum_term_id_941407_y,month_sum_term_id_945001_y,month_sum_term_id_945002_y,month_sum_term_id_945003_y,month_sum_term_id_945004_y,month_sum_term_id_945005_y,month_sum_term_id_945006_y,month_sum_term_id_945007_y,month_sum_term_id_945008_y,month_sum_term_id_945009_y,month_sum_term_id_945010_y,month_sum_term_id_945011_y,month_sum_term_id_945012_y,month_sum_term_id_945013_y,month_sum_term_id_945014_y,month_sum_term_id_945015_y,month_sum_term_id_945016_y,month_sum_term_id_945017_y,month_sum_term_id_945018_y,month_sum_term_id_945019_y,month_sum_term_id_945020_y,month_sum_term_id_945021_y,month_sum_term_id_945022_y,month_sum_term_id_945023_y,month_sum_term_id_945033_y,month_sum_term_id_945036_y,month_sum_term_id_945039_y,month_sum_term_id_945042_y,month_sum_term_id_945045_y,month_sum_term_id_953140_y,month_sum_term_id_995026_y,month_sum_term_id_CARDSVCS_y,month_sum_term_id_I0MO0IRZ_y,month_sum_term_id_WPGTID01_y
0,4814,0,-1,5,0,0,11098744,0,0,2365,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,4814,1,-1,6,0,1,7881825,0,0,1697,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,4814,2,0,0,0,2,6777480,0,0,1524,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,4814,3,0,1,0,3,9277943,0,0,1937,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,4814,4,0,2,0,4,9999757,0,0,1943,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [29]:
train_transactions.drop(term_cols, 1, inplace=True)
test_transactions.drop(term_cols, 1, inplace=True)

In [31]:
dummy_train = pd.get_dummies(train_transactions, columns=['mcc_code'])
dummy_test = pd.get_dummies(test_transactions, columns=['mcc_code'])

def rmsle(predicted, actual):
    assert(len(predicted) == len(actual))
    p = np.log(np.array(predicted) + 1)
    a = np.log(np.array(actual) + 1)
    return (((p - a)**2).sum() / len(predicted))**0.5

def rmsle_by_logs(predicted, actual):
    assert(len(predicted) == len(actual))
    return (((predicted - actual)**2).sum() / len(predicted))**0.5

def eval_model(labeled_data, target_col_name, clf, day_shifts=np.arange(90, 0, -15)):
    max_day = labeled_data.day.max()
    c = labeled_data.columns.difference([target_col_name])
    metric_by_shift = {}
    for day_shift in tqdm(day_shifts): 
        train_sample = labeled_data[labeled_data.day <= max_day - day_shift]
        test_sample = labeled_data[labeled_data.day > max_day - day_shift]
        clf.fit(train_sample[c], train_sample[target_col_name])
        predicted_volume = clf.predict(test_sample[c])
        metric_by_shift['RMSLE with {} days'.format(day_shift)] = rmsle_by_logs(
            predicted_volume, test_sample[target_col_name])
    return metric_by_shift

def test_weeks_preparation(X_test):
    X_test_by_weeks = []
    current_test_week = X_test[X_test.week_num == 65].copy()
    X_test_by_weeks.append(current_test_week)
    removing_month_cols = ['prev_month_1_mean_neg', 'prev_month_1_std_neg', 'prev_month_1_median_neg', 
                           'prev_month_1_max_neg', 'prev_month_1_min_neg',
#                            'prev_month_1_mean_pos', 'prev_month_1_std_pos', 'prev_month_1_median_pos'
                          ]
    removing_week_cols = []
    for prev_week_index in range(1, 5):
        current_test_week = X_test[X_test.week_num == 65 + prev_week_index].copy()
        current_test_week.drop(removing_month_cols, 1, inplace=True)
        removing_week_cols += [
            'prev_week_{}_neg'.format(prev_week_index),
            'prev_week_{}_mean_neg'.format(prev_week_index),
            'prev_week_{}_std_neg'.format(prev_week_index),
            'prev_week_{}_n_trans'.format(prev_week_index),
            'prev_week_{}_mean_n_trans'.format(prev_week_index),
            'prev_week_{}_std_n_trans'.format(prev_week_index),
            
        ]
        current_test_week.drop(removing_week_cols, 1, inplace=True)
        X_test_by_weeks.append(current_test_week)
    return X_test_by_weeks

def eval_nestimators(X, y, days_for_test, clf):
    max_day = X.day.max()
    X_train = X[X.day <= max_day - days_for_test]
    y_train = y[X.day <= max_day - days_for_test]
    X_test = X[X.day > max_day - days_for_test]
    y_test = y[X.day > max_day - days_for_test]
    
    eval_set  = [(X_train, y_train), (X_test, y_test)]
    clf.fit(X_train, y_train, early_stopping_rounds=100, eval_metric='rmse', eval_set=eval_set)
    return clf.best_iteration

def get_agile_prediction(X_train, y_train, X_test, clf, test_form):
    X_test_by_weeks = test_weeks_preparation(X_test)
    out_form = pd.DataFrame()
    for prev_week_index in tqdm(range(5)):
        clf.n_estimators = 3000
        current_test = X_test_by_weeks[prev_week_index] 
        drop_cols = X_train.columns.difference(current_test.columns)
        current_train = X_train.drop(drop_cols, 1)
    
        clf.n_estimators = eval_nestimators(current_train, y_train, 30, clf)
        print(clf.n_estimators)
        clf.fit(current_train, y_train)
        predicted_log_volume = clf.predict(current_test)
        
        current_out_form = test_form[test_form.week_num == 65 + prev_week_index].copy()
        current_out_form['id'] = current_out_form[['mcc_code', 'day']].apply(
            lambda x: '-'.join(map(str, x)), axis=1)
        current_out_form['volume'] = np.expm1(predicted_log_volume)
        out_form = out_form.append(current_out_form[['id', 'volume']])
    return out_form

In [38]:
from collections import Counter
Counter(dummy_train.columns)

Counter({'day': 1,
         'log_neg_amount': 1,
         'log_pos_amount': 1,
         'mcc_code_1711': 1,
         'mcc_code_1731': 1,
         'mcc_code_1799': 1,
         'mcc_code_2741': 1,
         'mcc_code_3000': 1,
         'mcc_code_3351': 1,
         'mcc_code_3501': 1,
         'mcc_code_4111': 1,
         'mcc_code_4112': 1,
         'mcc_code_4121': 1,
         'mcc_code_4131': 1,
         'mcc_code_4214': 1,
         'mcc_code_4215': 1,
         'mcc_code_4411': 1,
         'mcc_code_4511': 1,
         'mcc_code_4722': 1,
         'mcc_code_4784': 1,
         'mcc_code_4789': 1,
         'mcc_code_4812': 1,
         'mcc_code_4814': 1,
         'mcc_code_4816': 1,
         'mcc_code_4829': 1,
         'mcc_code_4899': 1,
         'mcc_code_4900': 1,
         'mcc_code_5013': 1,
         'mcc_code_5039': 1,
         'mcc_code_5044': 1,
         'mcc_code_5045': 1,
         'mcc_code_5047': 1,
         'mcc_code_5065': 1,
         'mcc_code_5072': 1,
         'mcc_code_507

In [32]:
from xgboost import XGBRegressor
clf = XGBRegressor(learning_rate=0.02, max_depth=6, n_estimators=350, nthread=-1, reg_alpha=1, reg_lambda=1)
drop_cols = [
    'log_pos_amount', 'n_transactions', 'neg_amount', 'pos_amount'
]
for prev_week_index in range(26,35):
    drop_cols += ['prev_week_{}_neg'.format(prev_week_index),
                'prev_week_{}_mean_neg'.format(prev_week_index),
                'prev_week_{}_std_neg'.format(prev_week_index)]

for prev_month_index in []:
    drop_cols += ['prev_month_1_mean_neg'.format(prev_month_index), 
                  'prev_month_1_std_neg'.format(prev_month_index),
                  'prev_month_1_median_neg'.format(prev_month_index),
                  'prev_month_1_max_neg'.format(prev_month_index),
                  'prev_month_1_min_neg'.format(prev_month_index),
                 ]
metric_by_shift = eval_model(
    dummy_train.drop(drop_cols, 1),
    'log_neg_amount', clf, [30])
for elem in metric_by_shift:
    print('{0}: {1}'.format(elem, metric_by_shift[elem]))

  0%|          | 0/1 [00:00<?, ?it/s]


ValueError: feature_names must be unique

In [30]:
train_transactions.columns.difference

Unnamed: 0,mcc_code,day,week_num,week_day,month_num,month_day,neg_amount,pos_amount,n_transactions,log_neg_amount,log_pos_amount,prev_week_1_neg,prev_week_1_mean_neg,prev_week_1_std_neg,prev_week_1_n_trans,prev_week_1_mean_n_trans,prev_week_1_std_n_trans,prev_week_2_neg,prev_week_2_mean_neg,prev_week_2_std_neg,prev_week_2_n_trans,prev_week_2_mean_n_trans,prev_week_2_std_n_trans,prev_week_3_neg,prev_week_3_mean_neg,prev_week_3_std_neg,prev_week_3_n_trans,prev_week_3_mean_n_trans,prev_week_3_std_n_trans,prev_week_4_neg,prev_week_4_mean_neg,prev_week_4_std_neg,prev_week_4_n_trans,prev_week_4_mean_n_trans,prev_week_4_std_n_trans,prev_week_5_neg,prev_week_5_mean_neg,prev_week_5_std_neg,prev_week_5_n_trans,prev_week_5_mean_n_trans,prev_week_5_std_n_trans,prev_week_6_neg,prev_week_6_mean_neg,prev_week_6_std_neg,prev_week_6_n_trans,prev_week_6_mean_n_trans,prev_week_6_std_n_trans,prev_week_7_neg,prev_week_7_mean_neg,prev_week_7_std_neg,prev_week_7_n_trans,prev_week_7_mean_n_trans,prev_week_7_std_n_trans,prev_week_8_neg,prev_week_8_mean_neg,prev_week_8_std_neg,prev_week_8_n_trans,prev_week_8_mean_n_trans,prev_week_8_std_n_trans,prev_week_9_neg,prev_week_9_mean_neg,prev_week_9_std_neg,prev_week_9_n_trans,prev_week_9_mean_n_trans,prev_week_9_std_n_trans,prev_week_10_neg,prev_week_10_mean_neg,prev_week_10_std_neg,prev_week_10_n_trans,prev_week_10_mean_n_trans,prev_week_10_std_n_trans,prev_week_11_neg,prev_week_11_mean_neg,prev_week_11_std_neg,prev_week_11_n_trans,prev_week_11_mean_n_trans,prev_week_11_std_n_trans,prev_week_12_neg,prev_week_12_mean_neg,prev_week_12_std_neg,prev_week_12_n_trans,prev_week_12_mean_n_trans,prev_week_12_std_n_trans,prev_week_13_neg,prev_week_13_mean_neg,prev_week_13_std_neg,prev_week_13_n_trans,prev_week_13_mean_n_trans,prev_week_13_std_n_trans,prev_week_14_neg,prev_week_14_mean_neg,prev_week_14_std_neg,prev_week_14_n_trans,prev_week_14_mean_n_trans,prev_week_14_std_n_trans,prev_week_15_neg,prev_week_15_mean_neg,prev_week_15_std_neg,prev_week_15_n_trans,prev_week_15_mean_n_trans,...,month_sum_term_id_888994_y,month_sum_term_id_888995_y,month_sum_term_id_888996_y,month_sum_term_id_888997_y,month_sum_term_id_889000_y,month_sum_term_id_889001_y,month_sum_term_id_889002_y,month_sum_term_id_889003_y,month_sum_term_id_889065_y,month_sum_term_id_902866_y,month_sum_term_id_940308_y,month_sum_term_id_940309_y,month_sum_term_id_940310_y,month_sum_term_id_940311_y,month_sum_term_id_940312_y,month_sum_term_id_940313_y,month_sum_term_id_940314_y,month_sum_term_id_940315_y,month_sum_term_id_940316_y,month_sum_term_id_940317_y,month_sum_term_id_940318_y,month_sum_term_id_940319_y,month_sum_term_id_940576_y,month_sum_term_id_940577_y,month_sum_term_id_940578_y,month_sum_term_id_940579_y,month_sum_term_id_940580_y,month_sum_term_id_940581_y,month_sum_term_id_940582_y,month_sum_term_id_940583_y,month_sum_term_id_940584_y,month_sum_term_id_940585_y,month_sum_term_id_940586_y,month_sum_term_id_940587_y,month_sum_term_id_940588_y,month_sum_term_id_940589_y,month_sum_term_id_940590_y,month_sum_term_id_940591_y,month_sum_term_id_941122_y,month_sum_term_id_941124_y,month_sum_term_id_941125_y,month_sum_term_id_941126_y,month_sum_term_id_941127_y,month_sum_term_id_941128_y,month_sum_term_id_941129_y,month_sum_term_id_941130_y,month_sum_term_id_941131_y,month_sum_term_id_941132_y,month_sum_term_id_941133_y,month_sum_term_id_941134_y,month_sum_term_id_941135_y,month_sum_term_id_941392_y,month_sum_term_id_941393_y,month_sum_term_id_941394_y,month_sum_term_id_941395_y,month_sum_term_id_941396_y,month_sum_term_id_941397_y,month_sum_term_id_941398_y,month_sum_term_id_941399_y,month_sum_term_id_941400_y,month_sum_term_id_941401_y,month_sum_term_id_941402_y,month_sum_term_id_941403_y,month_sum_term_id_941404_y,month_sum_term_id_941405_y,month_sum_term_id_941406_y,month_sum_term_id_941407_y,month_sum_term_id_945001_y,month_sum_term_id_945002_y,month_sum_term_id_945003_y,month_sum_term_id_945004_y,month_sum_term_id_945005_y,month_sum_term_id_945006_y,month_sum_term_id_945007_y,month_sum_term_id_945008_y,month_sum_term_id_945009_y,month_sum_term_id_945010_y,month_sum_term_id_945011_y,month_sum_term_id_945012_y,month_sum_term_id_945013_y,month_sum_term_id_945014_y,month_sum_term_id_945015_y,month_sum_term_id_945016_y,month_sum_term_id_945017_y,month_sum_term_id_945018_y,month_sum_term_id_945019_y,month_sum_term_id_945020_y,month_sum_term_id_945021_y,month_sum_term_id_945022_y,month_sum_term_id_945023_y,month_sum_term_id_945033_y,month_sum_term_id_945036_y,month_sum_term_id_945039_y,month_sum_term_id_945042_y,month_sum_term_id_945045_y,month_sum_term_id_953140_y,month_sum_term_id_995026_y,month_sum_term_id_CARDSVCS_y,month_sum_term_id_I0MO0IRZ_y,month_sum_term_id_WPGTID01_y
0,4814,0,-1,5,0,0,11098744,0,2365,16.222343,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,4814,1,-1,6,0,1,7881825,0,1697,15.880070,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,4814,2,0,0,0,2,6777480,0,1524,15.729116,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,4814,3,0,1,0,3,9277943,0,1937,16.043151,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,4814,4,0,2,0,4,9999757,0,1943,16.118071,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,4814,5,0,3,0,5,9501177,0,1800,16.066926,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,4814,6,0,4,0,6,9553618,0,1947,16.072431,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,4814,7,0,5,0,7,9084047,9,1993,16.022030,2.302585,16.222343,16.051206,0.242023,2365.0,2031.000000,472.347330,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,4814,8,0,6,0,8,7641198,0,1607,15.849065,0.000000,15.880070,16.051206,0.242023,1697.0,2031.000000,472.347330,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,4814,10,1,1,0,10,10398283,0,1989,16.157151,0.000000,16.043151,15.985827,0.141873,1937.0,1821.571429,186.298915,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [28]:
def split_train(labeled_data, target_col_name, day, drop_cols):
    drop_cols = [
        'log_pos_amount', 'n_transactions', 'neg_amount', 'pos_amount',
        'month_day', 'month_num',
    ]
    for prev_week_index in range(26,35):
        drop_cols += ['prev_week_{}_neg'.format(prev_week_index),
                    'prev_week_{}_mean_neg'.format(prev_week_index),
                    'prev_week_{}_std_neg'.format(prev_week_index)]
        
    max_day = labeled_data.day.max()
    c = labeled_data.drop(drop_cols, 1).columns.difference([target_col_name])
    train_sample = labeled_data[labeled_data.day <= max_day - day]
    test_sample = labeled_data[labeled_data.day > max_day - day]
    return train_sample[c], train_sample[target_col_name], test_sample[c], test_sample[target_col_name]

X_train, y_train, X_test, y_test = split_train(train_transactions, 'log_neg_amount', 30, drop_cols)

NameError: name 'drop_cols' is not defined

In [14]:
X_train.shape, y_train.shape

((78568, 273), (78568,))

In [51]:
from catboost import CatBoostRegressor, Pool
train_pool = Pool(data=X_train.values, label=y_train,
                  feature_names=X_train.columns.tolist(),
                  cat_features=[1, 269])
test_pool =  Pool(data=X_test.values, label=y_test,
                  feature_names=X_test.columns.tolist(),
                  cat_features=[1, 269])

# model = CatBoostRegressor(iterations=100, depth=6, learning_rate=0.05, loss_function='RMSE')

In [52]:
model = CatBoostRegressor(use_best_model=True, loss_function='RMSE')
model.fit(train_pool, eval_set=test_pool, verbose=True)

def rmsle_by_logs(predicted, actual):
    assert(len(predicted) == len(actual))
    return (((predicted - actual)**2).sum() / len(predicted))**0.5

predicted = model.predict(test_pool)
rmsle_by_logs(predicted, test_pool.get_label())

Borders for float features generated
0:	learn 10.11329289	test 10.45972023	bestTest 10.45972023		total: 191ms	remaining: 1m 35s
1:	learn 9.850727333	test 10.18899399	bestTest 10.18899399		total: 342ms	remaining: 1m 25s
2:	learn 9.595732255	test 9.924435667	bestTest 9.924435667		total: 486ms	remaining: 1m 20s
3:	learn 9.353273265	test 9.666036857	bestTest 9.666036857		total: 616ms	remaining: 1m 16s
4:	learn 9.119030135	test 9.422097457	bestTest 9.422097457		total: 692ms	remaining: 1m 8s
5:	learn 8.888422127	test 9.184339398	bestTest 9.184339398		total: 833ms	remaining: 1m 8s
6:	learn 8.668324691	test 8.953013406	bestTest 8.953013406		total: 959ms	remaining: 1m 7s
7:	learn 8.456881289	test 8.730312727	bestTest 8.730312727		total: 1.09s	remaining: 1m 7s
8:	learn 8.252567796	test 8.515305991	bestTest 8.515305991		total: 1.22s	remaining: 1m 6s
9:	learn 8.052613875	test 8.30973814	bestTest 8.30973814		total: 1.35s	remaining: 1m 6s
10:	learn 7.861768081	test 8.105978633	bestTest 8.105978633		

91:	learn 3.494243455	test 3.422551775	bestTest 3.422551775		total: 12.7s	remaining: 56.5s
92:	learn 3.491022288	test 3.418645843	bestTest 3.418645843		total: 12.8s	remaining: 56.1s
93:	learn 3.487187832	test 3.414050913	bestTest 3.414050913		total: 13s	remaining: 56s
94:	learn 3.483480866	test 3.409667324	bestTest 3.409667324		total: 13.1s	remaining: 56s
95:	learn 3.480331291	test 3.406084932	bestTest 3.406084932		total: 13.3s	remaining: 55.8s
96:	learn 3.476482469	test 3.402082305	bestTest 3.402082305		total: 13.4s	remaining: 55.7s
97:	learn 3.473663695	test 3.397694805	bestTest 3.397694805		total: 13.6s	remaining: 55.6s
98:	learn 3.470961603	test 3.393674036	bestTest 3.393674036		total: 13.7s	remaining: 55.5s
99:	learn 3.468371715	test 3.390569753	bestTest 3.390569753		total: 13.9s	remaining: 55.4s
100:	learn 3.465261473	test 3.38704214	bestTest 3.38704214		total: 14s	remaining: 55.4s
101:	learn 3.463044767	test 3.384014587	bestTest 3.384014587		total: 14.2s	remaining: 55.3s
102:	le

181:	learn 3.404389961	test 3.313314236	bestTest 3.313314236		total: 25.7s	remaining: 44.9s
182:	learn 3.404248951	test 3.313020631	bestTest 3.313020631		total: 25.9s	remaining: 44.8s
183:	learn 3.403979871	test 3.312814774	bestTest 3.312814774		total: 26s	remaining: 44.7s
184:	learn 3.403821771	test 3.312601212	bestTest 3.312601212		total: 26.2s	remaining: 44.7s
185:	learn 3.403778074	test 3.312552618	bestTest 3.312552618		total: 26.3s	remaining: 44.5s
186:	learn 3.403552223	test 3.312315138	bestTest 3.312315138		total: 26.5s	remaining: 44.4s
187:	learn 3.40323281	test 3.312221378	bestTest 3.312221378		total: 26.7s	remaining: 44.3s
188:	learn 3.40290127	test 3.312136278	bestTest 3.312136278		total: 26.8s	remaining: 44.2s
189:	learn 3.402689529	test 3.312187858	bestTest 3.312136278		total: 27s	remaining: 44s
190:	learn 3.402493038	test 3.312030915	bestTest 3.312030915		total: 27.1s	remaining: 43.9s
191:	learn 3.402392412	test 3.311973507	bestTest 3.311973507		total: 27.3s	remaining: 43

273:	learn 3.388601146	test 3.303750788	bestTest 3.303750788		total: 38.7s	remaining: 31.9s
274:	learn 3.388468664	test 3.303758116	bestTest 3.303750788		total: 38.9s	remaining: 31.8s
275:	learn 3.38824861	test 3.303741185	bestTest 3.303741185		total: 39s	remaining: 31.6s
276:	learn 3.388119752	test 3.303790413	bestTest 3.303741185		total: 39.2s	remaining: 31.5s
277:	learn 3.387935265	test 3.303642469	bestTest 3.303642469		total: 39.4s	remaining: 31.4s
278:	learn 3.387671068	test 3.303702994	bestTest 3.303642469		total: 39.5s	remaining: 31.3s
279:	learn 3.387450725	test 3.303906076	bestTest 3.303642469		total: 39.8s	remaining: 31.2s
280:	learn 3.387383561	test 3.303928215	bestTest 3.303642469		total: 39.9s	remaining: 31.1s
281:	learn 3.38738354	test 3.30392804	bestTest 3.303642469		total: 39.9s	remaining: 30.9s
282:	learn 3.387383519	test 3.303927871	bestTest 3.303642469		total: 40s	remaining: 30.7s
283:	learn 3.387383499	test 3.303927708	bestTest 3.303642469		total: 40s	remaining: 30.

363:	learn 3.37347953	test 3.301884641	bestTest 3.301653174		total: 50s	remaining: 18.7s
364:	learn 3.373230359	test 3.301961449	bestTest 3.301653174		total: 50.2s	remaining: 18.6s
365:	learn 3.373123989	test 3.301915773	bestTest 3.301653174		total: 50.3s	remaining: 18.4s
366:	learn 3.372946323	test 3.302012616	bestTest 3.301653174		total: 50.4s	remaining: 18.3s
367:	learn 3.372729181	test 3.301981275	bestTest 3.301653174		total: 50.6s	remaining: 18.1s
368:	learn 3.372555635	test 3.301954075	bestTest 3.301653174		total: 50.7s	remaining: 18s
369:	learn 3.372410878	test 3.301775122	bestTest 3.301653174		total: 50.9s	remaining: 17.9s
370:	learn 3.372192735	test 3.301551169	bestTest 3.301551169		total: 51s	remaining: 17.7s
371:	learn 3.372111596	test 3.301499225	bestTest 3.301499225		total: 51.1s	remaining: 17.6s
372:	learn 3.372004397	test 3.301558945	bestTest 3.301499225		total: 51.3s	remaining: 17.5s
373:	learn 3.371930378	test 3.301493582	bestTest 3.301493582		total: 51.4s	remaining: 1

454:	learn 3.35759624	test 3.300495633	bestTest 3.300258345		total: 1m 3s	remaining: 6.23s
455:	learn 3.357368506	test 3.300634933	bestTest 3.300258345		total: 1m 3s	remaining: 6.09s
456:	learn 3.357106159	test 3.300832893	bestTest 3.300258345		total: 1m 3s	remaining: 5.96s
457:	learn 3.357059627	test 3.300830142	bestTest 3.300258345		total: 1m 3s	remaining: 5.82s
458:	learn 3.356884161	test 3.300880705	bestTest 3.300258345		total: 1m 3s	remaining: 5.68s
459:	learn 3.356654645	test 3.30075058	bestTest 3.300258345		total: 1m 3s	remaining: 5.55s
460:	learn 3.356392567	test 3.300603337	bestTest 3.300258345		total: 1m 3s	remaining: 5.41s
461:	learn 3.356195176	test 3.300591733	bestTest 3.300258345		total: 1m 4s	remaining: 5.27s
462:	learn 3.356194028	test 3.300591629	bestTest 3.300258345		total: 1m 4s	remaining: 5.13s
463:	learn 3.355929456	test 3.300601071	bestTest 3.300258345		total: 1m 4s	remaining: 4.99s
464:	learn 3.355657492	test 3.300772911	bestTest 3.300258345		total: 1m 4s	remaini

3.3050215804039484

In [55]:
model.get_params()

{'depth': 6,
 'eval_metric': 'RMSE',
 'feature_border_type': 'MinEntropy',
 'gradient_iterations': 1,
 'has_time': False,
 'iterations': 500,
 'l2_leaf_reg': 3,
 'leaf_estimation_method': 'Gradient',
 'learning_rate': 0.03,
 'loss_function': 'RMSE',
 'name': 'experiment',
 'random_seed': 2597327671415215,
 'random_strength': 1,
 'rsm': 1,
 'store_all_simple_ctr': False,
 'thread_count': 8,
 'use_best_model': True,
 'verbose': True}

In [58]:
model = CatBoostRegressor(learning_rate=0.02, depth=5, iterations=700, use_best_model=True, loss_function='RMSE')
model.fit(train_pool, eval_set=test_pool, verbose=True)

def rmsle_by_logs(predicted, actual):
    assert(len(predicted) == len(actual))
    return (((predicted - actual)**2).sum() / len(predicted))**0.5

predicted = model.predict(test_pool)
rmsle_by_logs(predicted, test_pool.get_label())

Borders for float features generated
0:	learn 10.20208653	test 10.55593201	bestTest 10.55593201		total: 124ms	remaining: 1m 26s
1:	learn 10.02650468	test 10.37287783	bestTest 10.37287783		total: 247ms	remaining: 1m 26s
2:	learn 9.855170581	test 10.195783	bestTest 10.195783		total: 368ms	remaining: 1m 25s
3:	learn 9.686653378	test 10.02063933	bestTest 10.02063933		total: 480ms	remaining: 1m 23s
4:	learn 9.521708401	test 9.848647455	bestTest 9.848647455		total: 598ms	remaining: 1m 23s
5:	learn 9.360541042	test 9.68076017	bestTest 9.68076017		total: 718ms	remaining: 1m 23s
6:	learn 9.203280046	test 9.515898703	bestTest 9.515898703		total: 834ms	remaining: 1m 22s
7:	learn 9.048064309	test 9.355951233	bestTest 9.355951233		total: 954ms	remaining: 1m 22s
8:	learn 8.8977524	test 9.199549508	bestTest 9.199549508		total: 1.08s	remaining: 1m 22s
9:	learn 8.750861156	test 9.046082759	bestTest 9.046082759		total: 1.23s	remaining: 1m 24s
10:	learn 8.605620912	test 8.895516454	bestTest 8.895516454		

91:	learn 3.809818988	test 3.775304879	bestTest 3.775304879		total: 11.5s	remaining: 1m 16s
92:	learn 3.796311563	test 3.760308423	bestTest 3.760308423		total: 11.7s	remaining: 1m 16s
93:	learn 3.783265539	test 3.745523871	bestTest 3.745523871		total: 11.8s	remaining: 1m 15s
94:	learn 3.770640889	test 3.731704854	bestTest 3.731704854		total: 11.9s	remaining: 1m 15s
95:	learn 3.758894242	test 3.718320623	bestTest 3.718320623		total: 12s	remaining: 1m 15s
96:	learn 3.747351496	test 3.70523097	bestTest 3.70523097		total: 12.1s	remaining: 1m 15s
97:	learn 3.736072048	test 3.692308928	bestTest 3.692308928		total: 12.3s	remaining: 1m 15s
98:	learn 3.725180337	test 3.680126541	bestTest 3.680126541		total: 12.4s	remaining: 1m 15s
99:	learn 3.714397844	test 3.667997735	bestTest 3.667997735		total: 12.5s	remaining: 1m 15s
100:	learn 3.704132222	test 3.656574823	bestTest 3.656574823		total: 12.6s	remaining: 1m 14s
101:	learn 3.694217008	test 3.64515753	bestTest 3.64515753		total: 12.7s	remaining:

182:	learn 3.433995018	test 3.343960051	bestTest 3.343960051		total: 22.2s	remaining: 1m 2s
183:	learn 3.433366357	test 3.343304024	bestTest 3.343304024		total: 22.3s	remaining: 1m 2s
184:	learn 3.432809642	test 3.342624987	bestTest 3.342624987		total: 22.4s	remaining: 1m 2s
185:	learn 3.432212327	test 3.34210267	bestTest 3.34210267		total: 22.5s	remaining: 1m 2s
186:	learn 3.431631425	test 3.341538225	bestTest 3.341538225		total: 22.6s	remaining: 1m 1s
187:	learn 3.4310034	test 3.34102639	bestTest 3.34102639		total: 22.7s	remaining: 1m 1s
188:	learn 3.430427138	test 3.34017492	bestTest 3.34017492		total: 22.8s	remaining: 1m 1s
189:	learn 3.430016943	test 3.339470628	bestTest 3.339470628		total: 22.9s	remaining: 1m 1s
190:	learn 3.429499829	test 3.338842181	bestTest 3.338842181		total: 23s	remaining: 1m 1s
191:	learn 3.428945895	test 3.338200397	bestTest 3.338200397		total: 23.1s	remaining: 1m 1s
192:	learn 3.428443385	test 3.337579243	bestTest 3.337579243		total: 23.2s	remaining: 1m 1

273:	learn 3.408357715	test 3.314695176	bestTest 3.314695176		total: 33s	remaining: 51.2s
274:	learn 3.408260517	test 3.314628426	bestTest 3.314628426		total: 33.1s	remaining: 51.1s
275:	learn 3.408161828	test 3.314478141	bestTest 3.314478141		total: 33.2s	remaining: 51s
276:	learn 3.408080922	test 3.314419287	bestTest 3.314419287		total: 33.3s	remaining: 50.8s
277:	learn 3.40792605	test 3.31425355	bestTest 3.31425355		total: 33.4s	remaining: 50.7s
278:	learn 3.407743783	test 3.314053854	bestTest 3.314053854		total: 33.5s	remaining: 50.5s
279:	learn 3.407390803	test 3.31380868	bestTest 3.31380868		total: 33.6s	remaining: 50.4s
280:	learn 3.407161035	test 3.31358419	bestTest 3.31358419		total: 33.7s	remaining: 50.2s
281:	learn 3.407021973	test 3.313336437	bestTest 3.313336437		total: 33.8s	remaining: 50.1s
282:	learn 3.406855543	test 3.313264155	bestTest 3.313264155		total: 33.9s	remaining: 50s
283:	learn 3.406785691	test 3.313216168	bestTest 3.313216168		total: 34s	remaining: 49.8s
284

455:	learn 3.390258694	test 3.303102118	bestTest 3.303102118		total: 52.8s	remaining: 28.2s
456:	learn 3.390149338	test 3.303102202	bestTest 3.303102118		total: 52.9s	remaining: 28.1s
457:	learn 3.390012186	test 3.303087346	bestTest 3.303087346		total: 53s	remaining: 28s
458:	learn 3.389917928	test 3.303003374	bestTest 3.303003374		total: 53.1s	remaining: 27.9s
459:	learn 3.389917925	test 3.303003253	bestTest 3.303003253		total: 53.1s	remaining: 27.7s
460:	learn 3.389917923	test 3.303003135	bestTest 3.303003135		total: 53.2s	remaining: 27.6s
461:	learn 3.389883581	test 3.303007803	bestTest 3.303003135		total: 53.3s	remaining: 27.4s
462:	learn 3.389757225	test 3.302915614	bestTest 3.302915614		total: 53.4s	remaining: 27.3s
463:	learn 3.389704845	test 3.302861707	bestTest 3.302861707		total: 53.5s	remaining: 27.2s
464:	learn 3.389636554	test 3.302866126	bestTest 3.302861707		total: 53.6s	remaining: 27.1s
465:	learn 3.389581306	test 3.302931849	bestTest 3.302861707		total: 53.7s	remaining

546:	learn 3.381960344	test 3.300066857	bestTest 3.300065824		total: 1m 2s	remaining: 17.4s
547:	learn 3.381865224	test 3.300025142	bestTest 3.300025142		total: 1m 2s	remaining: 17.3s
548:	learn 3.381767938	test 3.299922469	bestTest 3.299922469		total: 1m 2s	remaining: 17.2s
549:	learn 3.381668241	test 3.300018885	bestTest 3.299922469		total: 1m 2s	remaining: 17.1s
550:	learn 3.381579374	test 3.300002333	bestTest 3.299922469		total: 1m 2s	remaining: 17s
551:	learn 3.381576371	test 3.30000231	bestTest 3.299922469		total: 1m 2s	remaining: 16.8s
552:	learn 3.381499633	test 3.299998586	bestTest 3.299922469		total: 1m 2s	remaining: 16.7s
553:	learn 3.381388988	test 3.299958077	bestTest 3.299922469		total: 1m 2s	remaining: 16.6s
554:	learn 3.381301344	test 3.2999011	bestTest 3.2999011		total: 1m 3s	remaining: 16.5s
555:	learn 3.381198852	test 3.29981025	bestTest 3.29981025		total: 1m 3s	remaining: 16.4s
556:	learn 3.381131564	test 3.299652857	bestTest 3.299652857		total: 1m 3s	remaining: 16.

637:	learn 3.373653241	test 3.297889049	bestTest 3.2978828		total: 1m 12s	remaining: 7.04s
638:	learn 3.373549356	test 3.297871621	bestTest 3.297871621		total: 1m 12s	remaining: 6.93s
639:	learn 3.373447408	test 3.29776587	bestTest 3.29776587		total: 1m 12s	remaining: 6.81s
640:	learn 3.373349031	test 3.297715053	bestTest 3.297715053		total: 1m 12s	remaining: 6.7s
641:	learn 3.373234369	test 3.297677398	bestTest 3.297677398		total: 1m 12s	remaining: 6.59s
642:	learn 3.373190437	test 3.297681455	bestTest 3.297677398		total: 1m 13s	remaining: 6.48s
643:	learn 3.37312643	test 3.297682334	bestTest 3.297677398		total: 1m 13s	remaining: 6.36s
644:	learn 3.373043041	test 3.297750166	bestTest 3.297677398		total: 1m 13s	remaining: 6.25s
645:	learn 3.372935199	test 3.297863731	bestTest 3.297677398		total: 1m 13s	remaining: 6.14s
646:	learn 3.372867451	test 3.297884486	bestTest 3.297677398		total: 1m 13s	remaining: 6.02s
647:	learn 3.372795736	test 3.297833366	bestTest 3.297677398		total: 1m 13s	

3.2999693783503807

In [59]:
model = CatBoostRegressor(learning_rate=0.02, depth=4, iterations=800, use_best_model=True, loss_function='RMSE')
model.fit(train_pool, eval_set=test_pool, verbose=True)

def rmsle_by_logs(predicted, actual):
    assert(len(predicted) == len(actual))
    return (((predicted - actual)**2).sum() / len(predicted))**0.5

predicted = model.predict(test_pool)
rmsle_by_logs(predicted, test_pool.get_label())

Borders for float features generated
0:	learn 10.2040481	test 10.55661855	bestTest 10.55661855		total: 94.5ms	remaining: 1m 15s
1:	learn 10.02780697	test 10.37334367	bestTest 10.37334367		total: 184ms	remaining: 1m 13s
2:	learn 9.855805373	test 10.19419934	bestTest 10.19419934		total: 265ms	remaining: 1m 10s
3:	learn 9.687561789	test 10.0196378	bestTest 10.0196378		total: 342ms	remaining: 1m 8s
4:	learn 9.52320908	test 9.84903141	bestTest 9.84903141		total: 424ms	remaining: 1m 7s
5:	learn 9.36198384	test 9.678267954	bestTest 9.678267954		total: 511ms	remaining: 1m 7s
6:	learn 9.207035337	test 9.520505495	bestTest 9.520505495		total: 595ms	remaining: 1m 7s
7:	learn 9.054622945	test 9.3587262	bestTest 9.3587262		total: 674ms	remaining: 1m 6s
8:	learn 8.904322617	test 9.202502104	bestTest 9.202502104		total: 750ms	remaining: 1m 5s
9:	learn 8.757652556	test 9.049657433	bestTest 9.049657433		total: 830ms	remaining: 1m 5s
10:	learn 8.613914532	test 8.899838642	bestTest 8.899838642		total: 91

90:	learn 3.837365924	test 3.803106939	bestTest 3.803106939		total: 8.41s	remaining: 1m 5s
91:	learn 3.823420954	test 3.787203801	bestTest 3.787203801		total: 8.49s	remaining: 1m 5s
92:	learn 3.809823331	test 3.772279138	bestTest 3.772279138		total: 8.58s	remaining: 1m 5s
93:	learn 3.796422968	test 3.7578799	bestTest 3.7578799		total: 8.67s	remaining: 1m 5s
94:	learn 3.784147941	test 3.744193602	bestTest 3.744193602		total: 8.76s	remaining: 1m 5s
95:	learn 3.772335349	test 3.730850576	bestTest 3.730850576		total: 8.86s	remaining: 1m 4s
96:	learn 3.760857811	test 3.718036721	bestTest 3.718036721		total: 8.95s	remaining: 1m 4s
97:	learn 3.749699039	test 3.705142227	bestTest 3.705142227		total: 9.03s	remaining: 1m 4s
98:	learn 3.738818454	test 3.692711145	bestTest 3.692711145		total: 9.11s	remaining: 1m 4s
99:	learn 3.728092577	test 3.680567388	bestTest 3.680567388		total: 9.2s	remaining: 1m 4s
100:	learn 3.717918125	test 3.668861644	bestTest 3.668861644		total: 9.29s	remaining: 1m 4s
101

183:	learn 3.442745045	test 3.352646013	bestTest 3.352646013		total: 16.7s	remaining: 56s
184:	learn 3.442102619	test 3.351904144	bestTest 3.351904144		total: 16.9s	remaining: 56.3s
185:	learn 3.441462747	test 3.351287931	bestTest 3.351287931		total: 17.1s	remaining: 56.4s
186:	learn 3.440856347	test 3.350608074	bestTest 3.350608074		total: 17.2s	remaining: 56.4s
187:	learn 3.440311927	test 3.350009862	bestTest 3.350009862		total: 17.3s	remaining: 56.4s
188:	learn 3.439427798	test 3.349199296	bestTest 3.349199296		total: 17.4s	remaining: 56.4s
189:	learn 3.43887849	test 3.348607383	bestTest 3.348607383		total: 17.6s	remaining: 56.6s
190:	learn 3.438339152	test 3.34795003	bestTest 3.34795003		total: 17.8s	remaining: 56.8s
191:	learn 3.437518991	test 3.346898454	bestTest 3.346898454		total: 17.9s	remaining: 56.8s
192:	learn 3.437038263	test 3.346380734	bestTest 3.346380734		total: 18.1s	remaining: 56.8s
193:	learn 3.43652666	test 3.34579109	bestTest 3.34579109		total: 18.1s	remaining: 56

274:	learn 3.416777134	test 3.322489688	bestTest 3.322489688		total: 25.7s	remaining: 49s
275:	learn 3.416428259	test 3.322308498	bestTest 3.322308498		total: 25.8s	remaining: 48.9s
276:	learn 3.416311447	test 3.321993919	bestTest 3.321993919		total: 25.8s	remaining: 48.8s
277:	learn 3.416234812	test 3.321958846	bestTest 3.321958846		total: 25.9s	remaining: 48.7s
278:	learn 3.41606574	test 3.321734087	bestTest 3.321734087		total: 26s	remaining: 48.6s
279:	learn 3.415810901	test 3.321517617	bestTest 3.321517617		total: 26.1s	remaining: 48.5s
280:	learn 3.415509973	test 3.321298633	bestTest 3.321298633		total: 26.2s	remaining: 48.4s
281:	learn 3.4154807	test 3.321255483	bestTest 3.321255483		total: 26.3s	remaining: 48.3s
282:	learn 3.415292874	test 3.321105529	bestTest 3.321105529		total: 26.4s	remaining: 48.2s
283:	learn 3.415212027	test 3.320933058	bestTest 3.320933058		total: 26.5s	remaining: 48.1s
284:	learn 3.415135503	test 3.320868018	bestTest 3.320868018		total: 26.5s	remaining: 4

365:	learn 3.408355465	test 3.313466093	bestTest 3.313466093		total: 34.1s	remaining: 40.4s
366:	learn 3.408254344	test 3.313170851	bestTest 3.313170851		total: 34.2s	remaining: 40.3s
367:	learn 3.408212078	test 3.312995176	bestTest 3.312995176		total: 34.3s	remaining: 40.2s
368:	learn 3.408107637	test 3.312937187	bestTest 3.312937187		total: 34.4s	remaining: 40.1s
369:	learn 3.408064285	test 3.312880061	bestTest 3.312880061		total: 34.5s	remaining: 40.1s
370:	learn 3.407983574	test 3.312778853	bestTest 3.312778853		total: 34.6s	remaining: 40s
371:	learn 3.407942368	test 3.312745704	bestTest 3.312745704		total: 34.6s	remaining: 39.9s
372:	learn 3.407883944	test 3.312707063	bestTest 3.312707063		total: 34.8s	remaining: 39.8s
373:	learn 3.407833089	test 3.312665804	bestTest 3.312665804		total: 34.8s	remaining: 39.7s
374:	learn 3.407756584	test 3.312670836	bestTest 3.312665804		total: 34.9s	remaining: 39.6s
375:	learn 3.407752755	test 3.312645475	bestTest 3.312645475		total: 35s	remaining

455:	learn 3.402987784	test 3.309335407	bestTest 3.309316202		total: 41.8s	remaining: 31.5s
456:	learn 3.402940702	test 3.309315984	bestTest 3.309315984		total: 41.9s	remaining: 31.4s
457:	learn 3.402940698	test 3.309315864	bestTest 3.309315864		total: 41.9s	remaining: 31.3s
458:	learn 3.402883685	test 3.309258361	bestTest 3.309258361		total: 42s	remaining: 31.2s
459:	learn 3.402847876	test 3.309205825	bestTest 3.309205825		total: 42.1s	remaining: 31.1s
460:	learn 3.402743461	test 3.309122977	bestTest 3.309122977		total: 42.1s	remaining: 31s
461:	learn 3.402743103	test 3.309122891	bestTest 3.309122891		total: 42.2s	remaining: 30.9s
462:	learn 3.402680973	test 3.309167803	bestTest 3.309122891		total: 42.3s	remaining: 30.8s
463:	learn 3.402618858	test 3.309158786	bestTest 3.309122891		total: 42.4s	remaining: 30.7s
464:	learn 3.402534273	test 3.309105442	bestTest 3.309105442		total: 42.5s	remaining: 30.6s
465:	learn 3.402481012	test 3.309090483	bestTest 3.309090483		total: 42.6s	remaining

546:	learn 3.397587969	test 3.306525899	bestTest 3.306506467		total: 49.4s	remaining: 22.9s
547:	learn 3.397548138	test 3.306521546	bestTest 3.306506467		total: 49.5s	remaining: 22.8s
548:	learn 3.397437223	test 3.306447668	bestTest 3.306447668		total: 49.6s	remaining: 22.7s
549:	learn 3.397391257	test 3.306413735	bestTest 3.306413735		total: 49.7s	remaining: 22.6s
550:	learn 3.397291237	test 3.306485942	bestTest 3.306413735		total: 49.8s	remaining: 22.5s
551:	learn 3.397279022	test 3.306471289	bestTest 3.306413735		total: 49.9s	remaining: 22.4s
552:	learn 3.397183073	test 3.306335833	bestTest 3.306335833		total: 50s	remaining: 22.3s
553:	learn 3.397113103	test 3.306244789	bestTest 3.306244789		total: 50.1s	remaining: 22.2s
554:	learn 3.397027152	test 3.306266299	bestTest 3.306244789		total: 50.1s	remaining: 22.1s
555:	learn 3.396972913	test 3.30617137	bestTest 3.30617137		total: 50.2s	remaining: 22s
556:	learn 3.396893828	test 3.306103738	bestTest 3.306103738		total: 50.3s	remaining: 

636:	learn 3.39200067	test 3.304193403	bestTest 3.304193403		total: 57.4s	remaining: 14.7s
637:	learn 3.391942134	test 3.304058482	bestTest 3.304058482		total: 57.5s	remaining: 14.6s
638:	learn 3.391894898	test 3.304028054	bestTest 3.304028054		total: 57.6s	remaining: 14.5s
639:	learn 3.39180219	test 3.303989366	bestTest 3.303989366		total: 57.7s	remaining: 14.4s
640:	learn 3.391749907	test 3.303948615	bestTest 3.303948615		total: 57.8s	remaining: 14.3s
641:	learn 3.391710725	test 3.303934658	bestTest 3.303934658		total: 57.9s	remaining: 14.3s
642:	learn 3.39164521	test 3.303932566	bestTest 3.303932566		total: 58s	remaining: 14.2s
643:	learn 3.391549064	test 3.304010948	bestTest 3.303932566		total: 58.1s	remaining: 14.1s
644:	learn 3.391479865	test 3.304189358	bestTest 3.303932566		total: 58.2s	remaining: 14s
645:	learn 3.39140501	test 3.304129265	bestTest 3.303932566		total: 58.3s	remaining: 13.9s
646:	learn 3.391318541	test 3.304125372	bestTest 3.303932566		total: 58.4s	remaining: 13

728:	learn 3.38616419	test 3.303493818	bestTest 3.303344452		total: 1m 6s	remaining: 6.45s
729:	learn 3.386132443	test 3.30349696	bestTest 3.303344452		total: 1m 6s	remaining: 6.35s
730:	learn 3.386104288	test 3.303487725	bestTest 3.303344452		total: 1m 6s	remaining: 6.26s
731:	learn 3.386045777	test 3.303471318	bestTest 3.303344452		total: 1m 6s	remaining: 6.17s
732:	learn 3.385986596	test 3.303490399	bestTest 3.303344452		total: 1m 6s	remaining: 6.08s
733:	learn 3.385928879	test 3.303453871	bestTest 3.303344452		total: 1m 6s	remaining: 5.99s
734:	learn 3.385888855	test 3.303420554	bestTest 3.303344452		total: 1m 6s	remaining: 5.9s
735:	learn 3.385837299	test 3.303387419	bestTest 3.303344452		total: 1m 6s	remaining: 5.81s
736:	learn 3.385813223	test 3.303361916	bestTest 3.303344452		total: 1m 6s	remaining: 5.72s
737:	learn 3.385797552	test 3.30335118	bestTest 3.303344452		total: 1m 7s	remaining: 5.63s
738:	learn 3.385720539	test 3.303284396	bestTest 3.303284396		total: 1m 7s	remaining

3.3056319243487331