In [1]:
import numpy as np
import pandas as pd
from sklearn import preprocessing
import lightgbm as lgb
import os
import shutil
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from tqdm import tqdm
init_notebook_mode(connected=True)

pd.options.display.max_columns = 500
#import pandas_profiling

def display_importances(feature_importance_df_):
    cols = feature_importance_df_[["feature", "importance"]].groupby("feature").mean().sort_values(by="importance", ascending=False)[:50].index
    best_features = feature_importance_df_.loc[feature_importance_df_.feature.isin(cols)]
    plt.figure(figsize=(8, 10))
    sns.barplot(x="importance", y="feature", data=best_features.sort_values(by="importance", ascending=False))
    plt.title('LightGBM Features (avg over folds)')
    plt.tight_layout()
    #plt.savefig('lgbm_importances01.png')

In [2]:
train_df = pd.read_csv('../data/train.csv')
test_df = pd.read_csv('../data/test.csv')

var_list = [f for f in train_df.columns if 'var' in f]


In [3]:
freq_cols = [f +'_freq' for f in var_list]



In [5]:
test_df[freq_cols] = test_df[var_list].apply(lambda x: x.map(x.value_counts()))
train_df[freq_cols] = train_df[var_list].apply(lambda x: x.map(x.value_counts()))


test_df['min_freq'] = test_df[freq_cols].min(1)
train_df['min_freq'] = train_df[freq_cols].min(1)



real_test = test_df.loc[test_df.min_freq==1].copy()
fake_test = test_df.loc[test_df.min_freq!=1].copy()

In [5]:
from sklearn.model_selection import train_test_split
train_df_1,train_df_2 = train_test_split(train_df, test_size=0.5, random_state=42)


In [6]:
train_df_1[freq_cols] = train_df_1[var_list].apply(lambda x: x.map(x.value_counts()))
train_df_2[freq_cols] = train_df_2[var_list].apply(lambda x: x.map(x.value_counts()))
real_test[freq_cols] = real_test[var_list].apply(lambda x: x.map(x.value_counts()))
fake_test[freq_cols] = fake_test[var_list].apply(lambda x: x.map(x.value_counts()))




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



In [7]:
train_df_1['freq_mean'] = train_df_1[freq_cols].mean(1)
train_df_1['freq_std'] = train_df_1[freq_cols].std(1)
train_df_1['freq_sum'] = train_df_1[freq_cols].sum(1)
train_df_1['freq_max'] = train_df_1[freq_cols].max(1)
train_df_1['unique_per_row'] = np.sum(np.where(train_df_1[freq_cols]==1,1,0),axis=1)

train_df_2['freq_mean'] = train_df_2[freq_cols].mean(1)
train_df_2['freq_std'] = train_df_2[freq_cols].std(1)
train_df_2['freq_sum'] = train_df_2[freq_cols].sum(1)
train_df_2['freq_max'] = train_df_2[freq_cols].max(1)
train_df_2['unique_per_row'] = np.sum(np.where(train_df_2[freq_cols]==1,1,0),axis=1)

real_test['freq_mean'] = real_test[freq_cols].mean(1)
real_test['freq_std'] = real_test[freq_cols].std(1)
real_test['freq_sum'] = real_test[freq_cols].sum(1)
real_test['freq_max'] = real_test[freq_cols].max(1)
real_test['unique_per_row'] = np.sum(np.where(real_test[freq_cols]==1,1,0),axis=1)

fake_test['freq_mean'] = fake_test[freq_cols].mean(1)
fake_test['freq_std'] = fake_test[freq_cols].std(1)
fake_test['freq_sum'] = fake_test[freq_cols].sum(1)
fake_test['freq_max'] = fake_test[freq_cols].max(1)
fake_test['unique_per_row'] = np.sum(np.where(fake_test[freq_cols]==1,1,0),axis=1)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



A 

In [8]:
train_df = pd.concat([train_df_1, train_df_2],sort=False)
train_df.sort_index(inplace=True)

test_df = pd.concat([real_test, fake_test],sort=False)
test_df.sort_index(inplace=True)

In [9]:
test_df.head()

Unnamed: 0,ID_code,var_0,var_1,var_2,var_3,var_4,var_5,var_6,var_7,var_8,var_9,var_10,var_11,var_12,var_13,var_14,var_15,var_16,var_17,var_18,var_19,var_20,var_21,var_22,var_23,var_24,var_25,var_26,var_27,var_28,var_29,var_30,var_31,var_32,var_33,var_34,var_35,var_36,var_37,var_38,var_39,var_40,var_41,var_42,var_43,var_44,var_45,var_46,var_47,var_48,var_49,var_50,var_51,var_52,var_53,var_54,var_55,var_56,var_57,var_58,var_59,var_60,var_61,var_62,var_63,var_64,var_65,var_66,var_67,var_68,var_69,var_70,var_71,var_72,var_73,var_74,var_75,var_76,var_77,var_78,var_79,var_80,var_81,var_82,var_83,var_84,var_85,var_86,var_87,var_88,var_89,var_90,var_91,var_92,var_93,var_94,var_95,var_96,var_97,var_98,var_99,var_100,var_101,var_102,var_103,var_104,var_105,var_106,var_107,var_108,var_109,var_110,var_111,var_112,var_113,var_114,var_115,var_116,var_117,var_118,var_119,var_120,var_121,var_122,var_123,var_124,var_125,var_126,var_127,var_128,var_129,var_130,var_131,var_132,var_133,var_134,var_135,var_136,var_137,var_138,var_139,var_140,var_141,var_142,var_143,var_144,var_145,var_146,var_147,var_148,var_149,var_150,var_151,var_152,var_153,var_154,var_155,var_156,var_157,var_158,var_159,var_160,var_161,var_162,var_163,var_164,var_165,var_166,var_167,var_168,var_169,var_170,var_171,var_172,var_173,var_174,var_175,var_176,var_177,var_178,var_179,var_180,var_181,var_182,var_183,var_184,var_185,var_186,var_187,var_188,var_189,var_190,var_191,var_192,var_193,var_194,var_195,var_196,var_197,var_198,var_199,var_0_freq,var_1_freq,var_2_freq,var_3_freq,var_4_freq,var_5_freq,var_6_freq,var_7_freq,var_8_freq,var_9_freq,var_10_freq,var_11_freq,var_12_freq,var_13_freq,var_14_freq,var_15_freq,var_16_freq,var_17_freq,var_18_freq,var_19_freq,var_20_freq,var_21_freq,var_22_freq,var_23_freq,var_24_freq,var_25_freq,var_26_freq,var_27_freq,var_28_freq,var_29_freq,var_30_freq,var_31_freq,var_32_freq,var_33_freq,var_34_freq,var_35_freq,var_36_freq,var_37_freq,var_38_freq,var_39_freq,var_40_freq,var_41_freq,var_42_freq,var_43_freq,var_44_freq,var_45_freq,var_46_freq,var_47_freq,var_48_freq,var_49_freq,var_50_freq,var_51_freq,var_52_freq,var_53_freq,var_54_freq,var_55_freq,var_56_freq,var_57_freq,var_58_freq,var_59_freq,var_60_freq,var_61_freq,var_62_freq,var_63_freq,var_64_freq,var_65_freq,var_66_freq,var_67_freq,var_68_freq,var_69_freq,var_70_freq,var_71_freq,var_72_freq,var_73_freq,var_74_freq,var_75_freq,var_76_freq,var_77_freq,var_78_freq,var_79_freq,var_80_freq,var_81_freq,var_82_freq,var_83_freq,var_84_freq,var_85_freq,var_86_freq,var_87_freq,var_88_freq,var_89_freq,var_90_freq,var_91_freq,var_92_freq,var_93_freq,var_94_freq,var_95_freq,var_96_freq,var_97_freq,var_98_freq,var_99_freq,var_100_freq,var_101_freq,var_102_freq,var_103_freq,var_104_freq,var_105_freq,var_106_freq,var_107_freq,var_108_freq,var_109_freq,var_110_freq,var_111_freq,var_112_freq,var_113_freq,var_114_freq,var_115_freq,var_116_freq,var_117_freq,var_118_freq,var_119_freq,var_120_freq,var_121_freq,var_122_freq,var_123_freq,var_124_freq,var_125_freq,var_126_freq,var_127_freq,var_128_freq,var_129_freq,var_130_freq,var_131_freq,var_132_freq,var_133_freq,var_134_freq,var_135_freq,var_136_freq,var_137_freq,var_138_freq,var_139_freq,var_140_freq,var_141_freq,var_142_freq,var_143_freq,var_144_freq,var_145_freq,var_146_freq,var_147_freq,var_148_freq,var_149_freq,var_150_freq,var_151_freq,var_152_freq,var_153_freq,var_154_freq,var_155_freq,var_156_freq,var_157_freq,var_158_freq,var_159_freq,var_160_freq,var_161_freq,var_162_freq,var_163_freq,var_164_freq,var_165_freq,var_166_freq,var_167_freq,var_168_freq,var_169_freq,var_170_freq,var_171_freq,var_172_freq,var_173_freq,var_174_freq,var_175_freq,var_176_freq,var_177_freq,var_178_freq,var_179_freq,var_180_freq,var_181_freq,var_182_freq,var_183_freq,var_184_freq,var_185_freq,var_186_freq,var_187_freq,var_188_freq,var_189_freq,var_190_freq,var_191_freq,var_192_freq,var_193_freq,var_194_freq,var_195_freq,var_196_freq,var_197_freq,var_198_freq,var_199_freq,min_freq,freq_mean,freq_std,freq_sum,freq_max,unique_per_row
0,test_0,11.0656,7.7798,12.9536,9.4292,11.4327,-2.3805,5.8493,18.2675,2.1337,8.81,-2.0248,-4.3554,13.9696,0.3458,7.5408,14.5001,7.7028,-19.0919,15.5806,16.1763,3.7088,18.8064,1.5899,3.0654,6.4509,14.1192,-9.4902,-2.1917,5.7107,3.7864,-1.7981,9.2645,2.0657,12.7753,11.3334,8.1462,-0.061,3.5331,9.7804,8.7625,-15.6305,18.8766,11.2864,11.8362,13.368,-31.9891,12.1776,8.7714,17.2011,16.8508,13.0534,14.4069,-4.8525,7.3213,-0.5259,16.6365,19.3036,6.4129,-5.3948,9.3269,11.9314,-3.575,-0.7706,0.8705,6.9282,2.8914,5.9744,17.4851,5.0125,-1.423,33.3401,0.8018,-4.7906,30.2708,26.8339,21.7205,7.3075,14.081,3.1192,17.4265,9.4883,16.906,14.5117,10.0276,-0.9706,20.4588,4.7945,20.416,13.1633,7.9307,-7.6509,7.0834,15.2324,10.1416,5.9156,-0.5775,5.76,30.3238,2.1251,1.8585,-9.2198,17.3089,30.9548,1.4918,12.8721,3.4902,8.2856,11.9794,14.0176,15.0763,3.7662,6.0426,4.4243,14.1799,2.0921,1.5493,3.2206,0.0172,-6.6602,8.4785,42.0248,11.4164,0.4564,9.4006,0.9685,12.4929,14.124,4.0388,-4.4442,16.6684,12.538,0.9205,10.5998,7.5147,-4.1748,-0.4824,10.5267,17.7547,-6.5226,-2.5502,-5.1547,-2.1246,19.8319,13.0752,9.2275,3.0213,11.6793,-11.6827,4.1017,5.2954,18.7741,9.8892,7.5219,14.9745,18.988,1.0842,11.9125,-4.5103,16.1361,11.0067,5.9232,5.4113,3.8302,5.738,-8.6105,22.953,2.5531,-0.2836,4.3416,5.1855,4.2603,1.6779,29.0849,8.4685,18.1317,12.2818,-0.6912,10.2226,-5.5579,2.2926,-4.5358,10.3903,-15.4937,3.9697,31.3521,-1.1651,9.2874,-23.5705,13.2643,1.6591,-2.1556,11.8495,-1.43,2.4508,13.7112,2.4669,4.3654,10.72,15.4722,-8.7197,4,2,4,5,4,1,3,1,2,6,2,6,11,1,4,7,4,2,2,3,3,2,3,14,3,7,6,6,6,1,5,1,5,6,5,2,4,4,3,4,3,2,4,16,2,3,5,2,2,1,6,2,2,3,2,1,3,5,4,2,6,4,2,3,3,1,4,4,387,2,3,17,2,3,1,1,4,3,1,2,2,3,3,1,3,2,3,6,3,2,2,33,3,7,2,5,3,2,10,1,2,2,1,18,1,4,8,2,7,1,1,11,7,4,5,4,5,2,3,4,1,4,1,2,3,12,2,1,5,4,13,11,2,2,2,2,4,2,1,4,3,1,4,3,9,2,7,1,23,1,1,3,1,8,1,2,3,3,1,4,2,18,1,3,2,2,19,1,2,6,7,3,3,3,3,2,2,2,1,7,1,8,4,2,2,2,8,1,1,7,3,1,1,5,2,2,3,3,3,1,2,5.915,27.391408,1183,387,37
1,test_1,8.5304,1.2543,11.3047,5.1858,9.1974,-4.0117,6.0196,18.6316,-4.4131,5.9739,-1.3809,-0.331,14.1129,2.5667,5.4988,14.1853,7.0196,4.6564,29.1609,0.091,12.1469,3.1389,5.2578,2.4228,16.2064,13.5023,-5.2341,-3.6648,5.708,2.9965,-10.472,11.4938,-0.966,15.3445,10.6361,0.8966,6.7428,2.3421,12.8678,-1.5536,10.0309,3.1337,10.5742,11.7664,2.1782,-41.1924,13.5322,-17.3834,6.3806,12.5589,11.6887,25.393,1.5776,6.8481,8.7348,16.4239,21.7056,6.9345,1.6678,9.5249,5.3383,-18.7083,1.3382,-1.7401,5.8398,3.1051,4.4307,16.0005,5.0306,-7.3365,12.2806,0.6992,-0.7772,21.5123,6.7803,18.1896,6.9388,22.1336,6.3755,13.1525,1.9772,14.0406,6.6904,9.9732,-11.5679,20.4525,9.4951,9.6343,8.1252,2.6059,-17.4201,7.1848,15.3484,10.6522,5.9897,0.3392,10.3516,29.8204,1.9998,-1.4166,-1.7257,15.4712,35.602,1.657,13.0783,2.7752,6.4986,4.6835,13.7963,17.7261,1.7375,5.5689,3.6609,8.9725,4.1159,1.0693,2.0234,8.276,-6.861,0.278,17.0488,11.6704,3.1215,8.5093,5.6367,12.0099,14.2372,-6.16,-5.669,8.9094,11.0605,0.4583,9.7974,7.0891,2.6849,8.497,15.7774,4.8775,3.6129,6.753,11.1003,15.3593,2.2105,8.228,9.0717,-5.0947,8.7644,-2.2873,4.124,-13.3006,18.7454,9.3783,1.5284,16.0407,7.7732,1.4316,14.8679,3.3619,11.5799,14.2058,30.9641,5.6723,3.6873,13.0429,-10.6572,15.5134,3.2185,9.0535,7.0535,5.3924,-0.772,-8.1783,29.9227,-5.6274,10.5018,9.6083,-0.4935,8.1696,-4.3605,5.211,0.4087,12.003,-10.3812,5.8496,25.1958,-8.8468,11.8263,-8.7112,15.9072,0.9812,10.6165,8.8349,0.9403,10.1282,15.5765,0.4773,-1.4852,9.8714,19.1293,-20.976,3,3,6,2,2,1,4,5,1,2,3,2,23,4,4,13,7,2,6,4,3,1,11,5,1,10,5,3,7,2,4,6,3,1,7,5,3,2,1,2,1,1,4,12,3,3,2,3,1,2,4,2,2,2,2,2,3,9,3,5,3,2,1,7,2,3,1,3,159,3,2,21,2,1,1,4,2,3,4,2,3,1,2,2,2,1,1,5,2,4,2,18,3,8,1,7,1,3,6,12,3,4,2,23,7,11,2,1,6,3,3,3,1,1,2,7,4,4,2,5,1,3,2,2,2,11,4,3,2,2,2,3,3,5,3,1,2,2,2,3,1,2,1,2,3,5,2,1,16,3,6,2,1,4,2,3,2,1,2,3,1,15,3,3,3,1,5,2,8,5,4,2,2,1,3,2,3,2,1,2,1,4,1,2,1,1,4,4,2,8,2,2,9,3,1,8,2,4,2,4,2,4.51,11.573228,902,159,37
2,test_2,5.4827,-10.3581,10.1407,7.0479,10.2628,9.8052,4.895,20.2537,1.5233,8.3442,-4.7057,-3.0422,13.6751,3.8183,10.8535,14.2126,9.8837,2.6541,21.2181,20.8163,12.4666,12.3696,4.7473,2.7936,5.2189,13.567,-15.4246,-0.1655,7.2633,3.431,-9.1508,9.732,3.1062,22.3076,11.9593,9.9255,4.0702,4.9934,8.0667,0.8804,-19.0841,5.2272,9.5977,12.1801,8.3565,15.117,10.0921,-20.8504,8.6758,8.1292,11.8932,10.6869,-0.6434,5.651,9.3742,25.8831,19.8701,5.4834,-4.0304,8.516,8.9776,-5.6619,2.8117,2.5996,9.0986,7.1167,4.9466,13.8268,5.0093,4.7782,19.2081,0.434,0.8459,34.8598,20.7048,16.4953,-9.7077,19.6357,7.6587,15.5744,16.1691,14.3299,1.336,-0.4412,-0.283,14.9105,-3.9016,14.6881,7.322,-5.1443,-34.3488,7.0194,12.4785,9.6665,13.2595,-0.5624,5.6347,9.5853,1.4515,1.7818,-3.5065,14.1663,28.0256,1.3935,10.8257,4.2954,8.2125,26.2595,14.0232,19.4604,8.6896,8.1036,1.2057,8.9156,0.9777,2.3797,3.1638,37.8664,-3.3864,-2.409,29.7978,12.2056,4.7688,7.9344,2.2102,12.6482,14.3377,2.3268,2.393,13.7005,12.7047,0.7507,7.7726,6.595,0.299,12.9154,29.9162,6.8031,10.5031,-6.0452,-4.5298,1.3903,5.0469,12.974,9.3878,-0.1113,11.6749,16.8588,4.26,14.6476,14.4431,14.1649,9.4875,16.5769,7.2638,-2.2008,12.5953,7.4487,23.1407,10.4597,39.3654,5.5228,3.3159,4.3324,-0.5382,13.3009,3.1243,-4.1731,1.233,6.1513,-0.0391,1.495,16.8874,-2.9787,27.4035,15.8819,-10.966,15.6415,-9.4056,4.4611,-3.0835,8.5549,-2.8517,13.477,24.4721,-3.4824,4.9178,-2.072,11.539,1.1821,-0.7484,10.9935,1.9803,2.18,12.9813,2.1281,-7.1086,7.0618,19.8956,-23.1794,2,2,3,5,5,2,6,1,4,4,2,3,7,2,1,12,4,3,2,1,2,2,4,10,5,14,1,7,4,5,1,9,3,2,4,1,2,6,1,1,4,2,4,8,2,1,3,1,6,3,7,1,4,3,1,2,2,4,3,6,2,1,2,3,3,5,5,2,194,1,8,13,3,2,1,2,1,1,2,3,1,2,1,2,1,1,1,4,3,4,1,15,11,6,3,1,2,1,4,2,1,2,9,15,2,10,4,2,25,2,2,1,3,6,4,6,3,1,1,1,4,5,2,2,4,5,4,4,8,1,8,11,2,15,3,1,2,2,2,2,1,2,1,3,6,1,2,3,6,3,3,1,5,2,2,1,4,3,4,2,1,13,6,2,1,1,15,1,3,8,6,3,2,1,1,3,2,5,3,1,2,2,1,3,4,1,2,1,1,5,3,3,4,3,1,4,2,4,4,1,2,4.565,13.88053,913,194,50
3,test_3,8.5374,-1.3222,12.022,6.5749,8.8458,3.1744,4.9397,20.566,3.3755,7.4578,0.0095,-5.0659,14.0526,13.501,8.766,14.7352,10.0383,-15.3508,2.1273,21.4797,14.5372,12.5527,2.9707,4.2398,13.7796,14.1408,1.0061,-1.3479,5.257,6.5911,6.2161,9.554,2.3628,10.2124,10.8047,-2.5588,6.072,3.2613,16.5632,8.8336,-4.8327,0.9554,12.3754,11.4241,6.6917,-12.9761,13.7343,5.015,31.3923,5.8555,12.6082,1.4182,-4.1185,6.2536,1.4257,13.5426,15.409,6.8761,1.7476,10.0413,15.2857,-4.1378,0.7928,2.5301,8.1458,2.5738,5.9876,13.0758,5.0087,-9.7824,8.9289,0.4205,-2.5463,2.9428,10.7087,12.2008,12.5465,19.4201,5.506,14.1586,17.5941,15.4375,-13.2668,14.0885,4.0357,22.3119,1.8571,16.521,10.8149,0.3256,-21.4797,6.9174,9.9483,10.3696,11.0362,0.1892,19.4321,40.3383,1.4105,2.6165,1.7021,2.5363,3.8763,1.5173,13.4083,2.8965,7.0919,21.6304,14.2,23.0368,10.3445,6.0369,5.0227,12.66,2.1278,4.0592,1.9084,11.6095,7.5397,8.1972,20.0844,10.444,8.4676,5.035,4.3103,12.0067,13.7149,1.6143,-1.2328,22.7248,12.6609,0.8039,4.7666,6.7888,5.8537,-4.5434,19.0111,12.6907,-2.9322,12.7898,12.0466,13.1646,7.7063,11.6549,9.8274,1.8061,8.6963,1.8057,3.8265,-16.3027,13.7106,9.7908,5.8497,15.4378,5.0372,-8.7673,13.6035,-3.5002,13.9785,14.6118,19.7251,5.3882,3.6775,7.4753,-11.078,24.8712,2.6415,2.2673,7.2788,5.6406,7.2048,3.4504,2.413,11.1674,14.5499,10.6151,-5.7922,13.9407,7.1078,1.1019,9.459,9.8243,5.9917,5.1634,8.1154,3.6638,3.3102,-19.7819,13.4499,1.3104,9.5702,9.0766,1.658,3.5813,15.1874,3.1656,3.9567,9.2295,13.0168,-4.2108,1,2,4,5,4,2,7,2,2,3,1,1,23,1,4,3,1,1,1,1,1,1,2,2,2,8,2,2,3,3,2,3,1,3,3,1,1,4,2,1,1,1,2,16,1,1,2,1,1,1,8,1,3,7,3,1,2,7,2,3,4,1,2,1,1,2,3,1,162,2,1,15,2,1,2,1,2,2,2,2,3,3,1,1,1,2,1,1,3,2,1,25,2,7,3,6,1,1,4,2,2,1,1,25,2,2,2,1,123,1,2,3,6,1,5,3,1,2,1,1,1,3,1,1,2,8,3,3,4,2,7,8,1,10,1,1,1,1,3,2,2,2,1,4,1,2,1,3,22,1,1,1,3,2,2,2,6,1,1,1,3,14,4,1,2,1,8,2,3,4,1,2,1,2,1,3,3,2,1,4,1,8,1,1,2,1,1,1,2,3,2,2,2,2,2,1,1,1,4,1,1,4.335,14.559625,867,162,79
4,test_4,11.7058,-0.1327,14.1295,7.7506,9.1035,-8.5848,6.8595,10.6048,2.989,7.1437,5.1025,-3.2827,14.1013,8.9672,4.7276,14.5811,11.8615,3.148,18.0126,13.8006,1.6026,16.3059,6.7954,3.6015,13.6569,13.8807,8.6228,-2.2654,5.2255,7.0165,-15.6961,10.6239,-4.7674,17.5447,11.8668,3.0154,4.2546,6.7601,5.9613,0.3695,-14.4364,5.1392,11.6336,12.0338,18.967,12.0144,16.2096,-2.1966,1.1174,13.4532,12.7925,4.3775,-0.1543,5.6794,0.821,19.1358,12.6589,6.4394,4.3425,8.7003,12.0586,-10.4753,-0.0337,5.6603,6.2529,1.5238,4.5356,20.1344,5.0267,-1.8628,39.8219,1.0498,-0.9113,38.5076,2.2201,9.5235,8.1522,14.9224,6.1573,15.5221,11.8133,16.7661,-14.6524,-0.4469,0.0306,22.5276,6.9774,2.2563,3.5779,1.4268,9.068,7.0197,19.7765,10.0499,11.4803,0.2548,16.7029,45.551,1.5795,0.1148,-14.3858,17.863,23.2274,1.4375,14.4838,4.3806,10.6976,18.4023,14.2212,16.0638,6.3933,6.8699,2.7253,12.6458,3.2376,3.4218,-0.5658,-5.684,4.7753,10.332,39.7127,11.2319,-1.2978,12.4827,6.5034,12.7157,13.3054,-1.9678,-1.2363,11.5686,12.6428,0.4792,7.1984,7.1434,-0.2056,-16.3908,27.1589,23.5997,-4.6175,11.7989,12.5683,-3.6145,22.1069,9.5539,9.2721,-1.6214,12.9327,6.808,4.2135,22.1044,20.0502,6.9953,9.3823,20.5534,3.4368,-15.2208,13.0974,-14.0888,11.7586,14.5259,22.87,5.6688,6.1159,13.2433,-11.9785,26.204,3.2348,-5.5775,5.7036,6.1717,-1.6039,-2.4866,17.2728,2.364,14.0037,12.9165,-12.0311,10.1161,-8.7562,6.0889,-1.362,10.3559,-7.4915,9.4588,3.9829,5.858,8.3635,-24.8254,11.4928,1.6321,4.2259,9.1723,1.2835,3.3778,19.5542,-0.286,-5.1612,7.2882,13.926,-9.1846,2,5,3,5,2,2,2,1,1,6,3,2,14,3,1,6,4,3,2,4,2,2,4,3,6,9,2,4,6,1,1,3,1,3,9,3,3,4,2,2,2,3,3,2,6,2,2,1,2,2,11,3,5,9,4,2,4,12,3,7,2,2,5,2,2,2,5,2,391,4,4,7,2,3,3,3,2,6,6,2,2,1,3,3,2,6,2,2,1,2,2,30,3,3,2,10,4,2,9,3,3,6,3,22,1,8,3,2,19,2,1,1,2,2,5,2,2,3,3,1,2,9,1,2,1,4,11,6,5,1,12,11,7,10,4,3,3,1,1,6,3,3,2,4,4,2,5,2,9,2,3,6,1,3,4,2,6,2,2,3,3,13,3,2,2,3,14,5,1,5,6,1,2,2,1,5,2,4,2,1,5,3,3,1,4,2,1,2,3,9,3,2,3,3,1,10,1,4,4,3,2,5.88,27.61537,1176,391,27


In [20]:
train_df.tail(10)

Unnamed: 0,ID_code,target,var_0,var_1,var_2,var_3,var_4,var_5,var_6,var_7,var_8,var_9,var_10,var_11,var_12,var_13,var_14,var_15,var_16,var_17,var_18,var_19,var_20,var_21,var_22,var_23,var_24,var_25,var_26,var_27,var_28,var_29,var_30,var_31,var_32,var_33,var_34,var_35,var_36,var_37,var_38,var_39,var_40,var_41,var_42,var_43,var_44,var_45,var_46,var_47,var_48,var_49,var_50,var_51,var_52,var_53,var_54,var_55,var_56,var_57,var_58,var_59,var_60,var_61,var_62,var_63,var_64,var_65,var_66,var_67,var_68,var_69,var_70,var_71,var_72,var_73,var_74,var_75,var_76,var_77,var_78,var_79,var_80,var_81,var_82,var_83,var_84,var_85,var_86,var_87,var_88,var_89,var_90,var_91,var_92,var_93,var_94,var_95,var_96,var_97,var_98,var_99,var_100,var_101,var_102,var_103,var_104,var_105,var_106,var_107,var_108,var_109,var_110,var_111,var_112,var_113,var_114,var_115,var_116,var_117,var_118,var_119,var_120,var_121,var_122,var_123,var_124,var_125,var_126,var_127,var_128,var_129,var_130,var_131,var_132,var_133,var_134,var_135,var_136,var_137,var_138,var_139,var_140,var_141,var_142,var_143,var_144,var_145,var_146,var_147,var_148,var_149,var_150,var_151,var_152,var_153,var_154,var_155,var_156,var_157,var_158,var_159,var_160,var_161,var_162,var_163,var_164,var_165,var_166,var_167,var_168,var_169,var_170,var_171,var_172,var_173,var_174,var_175,var_176,var_177,var_178,var_179,var_180,var_181,var_182,var_183,var_184,var_185,var_186,var_187,var_188,var_189,var_190,var_191,var_192,var_193,var_194,var_195,var_196,var_197,var_198,var_199,var_0_freq,var_1_freq,var_2_freq,var_3_freq,var_4_freq,var_5_freq,var_6_freq,var_7_freq,var_8_freq,var_9_freq,var_10_freq,var_11_freq,var_12_freq,var_13_freq,var_14_freq,var_15_freq,var_16_freq,var_17_freq,var_18_freq,var_19_freq,var_20_freq,var_21_freq,var_22_freq,var_23_freq,var_24_freq,var_25_freq,var_26_freq,var_27_freq,var_28_freq,var_29_freq,var_30_freq,var_31_freq,var_32_freq,var_33_freq,var_34_freq,var_35_freq,var_36_freq,var_37_freq,var_38_freq,var_39_freq,var_40_freq,var_41_freq,var_42_freq,var_43_freq,var_44_freq,var_45_freq,var_46_freq,var_47_freq,var_48_freq,var_49_freq,var_50_freq,var_51_freq,var_52_freq,var_53_freq,var_54_freq,var_55_freq,var_56_freq,var_57_freq,var_58_freq,var_59_freq,var_60_freq,var_61_freq,var_62_freq,var_63_freq,var_64_freq,var_65_freq,var_66_freq,var_67_freq,var_68_freq,var_69_freq,var_70_freq,var_71_freq,var_72_freq,var_73_freq,var_74_freq,var_75_freq,var_76_freq,var_77_freq,var_78_freq,var_79_freq,var_80_freq,var_81_freq,var_82_freq,var_83_freq,var_84_freq,var_85_freq,var_86_freq,var_87_freq,var_88_freq,var_89_freq,var_90_freq,var_91_freq,var_92_freq,var_93_freq,var_94_freq,var_95_freq,var_96_freq,var_97_freq,var_98_freq,var_99_freq,var_100_freq,var_101_freq,var_102_freq,var_103_freq,var_104_freq,var_105_freq,var_106_freq,var_107_freq,var_108_freq,var_109_freq,var_110_freq,var_111_freq,var_112_freq,var_113_freq,var_114_freq,var_115_freq,var_116_freq,var_117_freq,var_118_freq,var_119_freq,var_120_freq,var_121_freq,var_122_freq,var_123_freq,var_124_freq,var_125_freq,var_126_freq,var_127_freq,var_128_freq,var_129_freq,var_130_freq,var_131_freq,var_132_freq,var_133_freq,var_134_freq,var_135_freq,var_136_freq,var_137_freq,var_138_freq,var_139_freq,var_140_freq,var_141_freq,var_142_freq,var_143_freq,var_144_freq,var_145_freq,var_146_freq,var_147_freq,var_148_freq,var_149_freq,var_150_freq,var_151_freq,var_152_freq,var_153_freq,var_154_freq,var_155_freq,var_156_freq,var_157_freq,var_158_freq,var_159_freq,var_160_freq,var_161_freq,var_162_freq,var_163_freq,var_164_freq,var_165_freq,var_166_freq,var_167_freq,var_168_freq,var_169_freq,var_170_freq,var_171_freq,var_172_freq,var_173_freq,var_174_freq,var_175_freq,var_176_freq,var_177_freq,var_178_freq,var_179_freq,var_180_freq,var_181_freq,var_182_freq,var_183_freq,var_184_freq,var_185_freq,var_186_freq,var_187_freq,var_188_freq,var_189_freq,var_190_freq,var_191_freq,var_192_freq,var_193_freq,var_194_freq,var_195_freq,var_196_freq,var_197_freq,var_198_freq,var_199_freq,min_freq,freq_mean,freq_std,freq_sum,freq_max,unique_per_row
199990,train_199990,1,14.1475,1.8568,11.0066,3.6779,12.1944,-16.5936,5.3217,14.8508,3.3377,6.165,6.1414,0.0827,14.04,1.615,7.5366,14.4789,6.2584,2.1186,26.7359,0.5702,4.0029,15.5389,5.4881,2.4573,18.2939,13.883,-16.193,-1.8986,4.897,7.5907,0.8798,8.5026,4.3286,12.8019,11.9591,2.962,-4.4218,5.8075,6.2756,-1.8136,-24.1308,24.6378,11.9017,11.6704,11.5972,35.0815,9.515,-5.1348,37.5782,13.2321,12.67,29.2393,-6.1634,6.5313,0.4095,7.6697,17.3582,4.8764,-5.3656,7.796,8.2764,-22.6021,1.6359,-5.1907,6.5715,1.6882,6.2847,3.3086,5.0167,-7.6718,23.4462,0.7742,1.1682,31.0228,32.3939,16.7054,10.3803,15.6047,4.7141,13.5369,-2.3319,11.8592,0.995,17.4239,-0.873,13.8346,10.0802,10.3809,7.0715,12.7042,-25.1787,7.1149,12.27,10.931,7.7426,0.4302,13.6463,38.963,2.3068,-0.3915,-8.5204,20.1458,19.0598,1.4362,11.6263,4.1638,7.5614,19.4758,14.2716,16.1327,3.1721,9.3863,4.1246,4.7095,2.6171,4.9913,2.108,31.6905,-1.7796,0.4008,22.8108,9.054,0.5427,-6.9109,6.0711,12.766,12.7326,3.8427,1.2784,19.1283,12.6645,0.4973,8.1403,6.4378,-8.2097,1.5164,34.2795,14.7229,5.3334,-9.051,-3.449,-6.8524,17.0059,11.4479,7.6751,3.8062,9.9805,-11.8864,3.8402,21.7955,17.6339,6.8544,10.987,16.7374,4.9328,-8.8372,15.221,-9.6098,25.5304,7.5533,12.4722,5.7143,4.3119,11.5818,-1.0976,15.9085,3.311,-0.8404,5.9175,4.7656,-7.7289,6.2246,26.4941,3.7733,31.7617,8.5447,-2.1253,9.6392,-2.7775,-2.293,-0.4321,8.2347,-16.8808,6.8203,15.0222,3.3871,9.8074,-36.3302,16.0983,0.8156,-6.4708,4.7287,1.9034,7.2324,20.6047,1.717,-4.0032,9.1627,13.8077,-1.9646,2,1,3,1,4,1,7,1,2,2,1,2,17,1,2,4,1,3,1,1,1,3,2,5,2,15,1,4,9,1,2,1,2,3,11,1,2,1,1,1,1,1,5,12,1,1,1,1,1,3,5,1,1,10,1,1,2,4,2,4,3,2,5,2,2,3,3,1,468,1,1,15,2,1,2,1,2,3,1,2,1,4,1,1,1,1,2,1,1,1,2,15,3,5,1,4,1,1,5,4,2,1,1,18,4,7,4,1,19,1,3,1,3,1,4,1,3,1,2,2,2,3,3,1,1,12,3,1,3,1,7,4,4,3,1,1,1,1,2,2,2,2,1,3,4,1,1,1,15,1,2,1,2,1,1,2,1,1,2,1,2,27,5,2,1,3,9,1,1,1,1,2,1,1,1,3,1,4,1,1,2,3,1,1,1,1,2,1,3,7,2,4,2,2,3,1,1,6,4,1,1,5.27,33.089905,1054,468,91
199991,train_199991,0,9.9909,2.5523,11.9653,6.3958,13.5497,-9.5293,6.0864,14.1789,-2.1245,9.8862,-3.1886,-0.2491,13.9248,13.8947,9.6829,13.9033,10.2258,5.9237,17.3947,1.9836,9.2717,28.8883,5.69,3.1928,8.912,13.765,-11.6627,-1.4927,5.4689,1.306,2.4111,12.4307,3.1743,14.9818,11.8855,1.8232,9.2384,5.5283,9.1446,5.1439,-3.5793,1.0812,10.7423,11.6788,1.9277,-19.955,6.409,-23.0383,-5.0783,16.4067,13.449,3.3566,7.4365,5.1999,4.9993,6.6391,15.4445,6.4535,2.7636,9.3584,22.6203,1.7745,1.2965,-1.7242,5.1655,-3.2149,6.561,16.0365,5.0258,-6.1367,48.5375,0.8261,3.0558,16.2346,39.7911,19.4477,10.8329,15.5222,4.4408,13.5912,1.9444,18.0211,-23.233,5.7972,-11.095,13.2983,4.5397,7.8958,3.0405,-4.4533,-25.4294,6.8083,21.1329,10.9765,11.9918,0.361,21.2432,-2.4605,2.3231,-3.0889,2.6465,2.2822,28.641,1.8103,12.2662,4.9371,9.1332,5.7253,14.1759,14.4442,9.2293,7.8426,1.8774,8.9777,1.8659,4.1717,4.7503,35.5995,-10.8612,5.7606,22.9226,12.3712,4.2567,9.2771,-1.0729,12.4798,13.7258,5.4244,-1.4757,14.5808,11.6918,1.355,7.7013,7.0071,2.2016,-6.7182,14.2702,22.0202,2.1429,7.8805,5.5708,5.273,5.7377,16.2787,7.6531,3.8759,6.8635,5.6207,4.0785,19.2299,15.6735,10.6984,11.5623,18.9767,3.5617,-12.6183,14.5029,-12.2975,28.5627,15.2163,27.8691,5.8872,3.4342,14.4756,6.9225,17.3888,3.1142,-12.9806,8.9525,6.2386,0.4161,-0.1863,17.295,0.5174,33.1748,14.2377,7.851,13.3826,-14.5881,3.344,-7.7214,8.4275,1.0574,3.1567,3.976,-14.273,8.5374,-16.8167,19.2717,0.634,12.0737,5.2139,0.8094,-0.6585,17.0548,0.5328,-5.3444,8.5414,13.2895,-6.7896,1,1,1,3,1,1,5,2,1,2,3,1,19,1,3,6,3,2,1,3,1,1,3,9,2,22,2,2,4,2,1,3,2,3,8,2,1,2,2,1,1,1,2,7,1,1,2,1,1,2,6,1,1,3,2,1,4,3,1,2,1,1,2,3,3,1,4,1,412,3,1,10,2,2,1,2,1,1,3,4,1,1,1,1,1,2,1,2,1,1,1,16,1,3,2,7,2,1,3,1,2,2,1,13,2,5,3,1,18,1,2,2,2,3,1,3,2,1,1,1,1,2,2,1,1,8,4,1,4,1,3,2,2,7,1,1,1,1,1,1,2,2,1,2,3,3,1,1,30,2,2,1,2,5,1,1,2,1,1,1,1,10,2,1,1,1,11,1,1,5,3,2,1,2,1,2,1,2,1,3,3,3,1,1,1,1,3,1,1,4,2,2,2,1,4,4,1,4,1,2,1,4.76,29.16359,952,412,90
199992,train_199992,0,12.2825,2.6918,15.4684,6.4262,10.9863,9.9659,4.503,9.9232,3.1029,7.5225,3.1223,-13.7093,14.2801,6.1486,5.3874,15.0709,6.0153,-9.9398,24.4941,3.6063,17.656,28.8568,3.6906,3.1487,18.1332,13.2658,-11.6506,-2.1987,5.1128,3.9595,-6.3685,9.0262,-3.1848,17.3417,11.6295,12.0087,4.5044,1.7071,12.5764,-6.8294,-21.846,10.385,11.7957,11.6997,11.9089,6.2536,13.7222,-15.2214,5.9381,20.7557,13.3224,16.4928,1.383,6.0306,-4.9506,4.0726,14.4348,6.1442,1.9035,7.6811,17.3005,-26.789,2.4176,4.4711,5.7548,5.5313,5.7094,17.547,5.0183,-5.7827,26.6706,1.1526,-1.6384,27.5379,6.1085,24.1712,-4.0751,11.9874,3.5505,13.9143,-2.153,11.7073,1.8242,-1.5304,-8.7127,18.8457,4.7867,5.2633,10.0537,6.2077,-35.78,6.8679,9.2937,10.4741,11.2761,-0.4809,6.9307,25.9494,1.8895,-0.0953,-15.6344,11.7245,13.468,1.6864,10.9487,4.099,8.1261,13.3942,14.2155,18.3975,8.0833,5.16,4.7246,12.6282,2.5129,-0.9412,3.9081,4.3108,0.4932,11.4456,9.9452,9.9463,-2.8029,9.3765,4.8875,12.7903,13.4951,1.2178,2.0796,10.7976,10.9763,0.9816,8.0911,6.8055,-9.4673,-17.2125,30.7602,23.2429,-4.6024,4.1782,1.5362,1.7552,10.3786,14.3794,7.3445,0.7976,12.3726,11.4228,4.1227,-3.9174,15.6006,9.1087,5.3879,13.3449,2.4793,6.5923,14.9919,-1.345,11.8287,6.158,18.7862,5.7688,7.5789,8.9248,-8.548,24.6075,3.4078,-7.252,-0.1797,6.3919,-9.0748,-5.4637,20.4366,-7.7285,25.3722,8.8294,0.6597,8.8527,4.2092,4.0341,-9.1868,9.4319,-7.668,-0.3547,26.6488,-12.5284,8.4394,-0.8307,12.5382,-0.8819,3.9514,10.904,1.3472,5.6184,20.5498,-0.2854,7.6252,10.1758,17.4066,-11.5244,1,1,1,1,1,1,7,1,3,6,1,1,8,1,3,8,3,1,3,2,2,1,1,14,1,9,3,4,2,3,1,4,3,2,9,1,1,2,1,1,1,1,7,8,2,1,1,1,1,1,6,1,2,5,1,1,3,6,1,3,1,1,2,4,2,3,8,1,509,2,2,3,1,2,1,1,1,1,2,3,1,1,2,1,1,2,2,1,2,2,1,19,2,7,1,7,1,1,4,6,1,3,1,16,4,5,3,2,18,1,2,2,3,1,5,2,1,1,2,2,2,2,6,1,2,12,3,3,2,2,3,9,3,6,2,1,3,1,1,2,1,1,2,3,3,2,3,1,19,1,2,1,3,2,1,2,2,2,1,3,1,26,1,2,2,2,11,2,2,3,1,1,1,1,1,2,1,2,3,1,2,3,2,1,1,1,2,1,1,3,2,1,6,2,2,2,1,3,3,4,1,5.455,35.954857,1091,509,80
199993,train_199993,0,13.2152,-5.8006,9.726,6.591,12.4612,-7.1652,6.0666,12.9887,4.8121,8.6043,6.4829,-1.3537,14.3334,10.7962,8.5172,14.3719,5.9248,-12.4834,8.7746,22.9885,15.9505,17.4389,3.7304,2.4216,7.1634,13.4715,-10.7914,1.085,5.3917,4.2952,-20.6622,14.6348,0.1073,18.8359,11.9586,12.0073,6.1539,3.9873,10.1482,1.2709,-1.311,20.2178,10.791,11.3869,15.9616,-45.9839,13.6187,-3.9876,17.2117,12.5571,12.3434,8.0588,-0.2585,6.3296,-2.3278,15.1085,14.2894,5.5024,6.1108,9.768,9.5012,0.8483,-3.1655,-0.628,5.6858,1.68,5.649,24.8661,5.0271,-6.8852,12.045,0.7663,3.0445,23.8149,23.9861,23.4601,14.8463,19.2061,4.753,16.9419,9.7403,15.682,9.3077,-0.1978,-3.6381,18.3102,0.5708,11.7697,8.2822,6.8328,-24.4734,7.026,14.1888,9.7887,7.7601,-0.2584,16.6808,-0.6143,2.4305,-4.6367,-0.7972,9.9091,15.1369,1.6657,11.534,2.1669,9.6433,21.6079,14.1651,24.1963,1.6248,4.3398,4.7125,1.5256,2.5172,0.17,1.5434,5.2143,-13.4846,5.1493,9.0818,13.7828,-0.699,5.2003,10.4911,13.2568,14.1692,0.4961,-4.5207,13.5398,12.7264,0.3502,8.8219,6.8117,-0.8046,-5.3284,27.1929,27.8706,2.5958,6.9456,6.4535,7.5931,22.6172,16.2174,7.7792,5.6255,8.3867,-18.7719,3.9678,-3.7586,17.2302,8.5153,8.005,17.8464,-3.151,-10.1633,12.3233,-11.6241,19.5502,16.1874,23.4368,5.7838,5.8966,16.1423,-8.2583,22.262,2.9672,-12.4852,5.8613,5.0818,-1.6543,5.7111,16.1043,-2.8244,21.6734,12.3769,-6.8937,10.5019,-12.9228,-3.0719,-2.8878,9.2866,3.6058,10.5332,28.4711,-8.2904,9.1823,-22.6879,21.6183,1.0458,3.9357,8.8442,1.8096,-4.8314,22.005,0.3916,6.7302,8.9709,14.5405,6.1149,1,1,3,2,1,1,3,1,3,4,1,2,5,2,2,9,1,2,1,1,1,2,2,6,3,15,1,1,4,1,1,1,4,2,5,1,2,3,1,1,1,1,8,19,1,1,2,1,1,1,9,1,1,6,1,2,1,6,2,8,3,1,1,2,1,2,8,2,351,1,2,19,4,1,1,1,1,2,2,1,1,3,1,2,2,2,1,1,3,1,1,21,1,6,3,2,1,1,7,2,1,2,3,15,3,1,3,1,18,1,3,2,1,1,7,3,4,2,1,1,1,3,1,2,1,1,8,5,1,1,4,5,4,11,4,1,2,1,1,3,1,2,1,1,4,1,3,1,26,1,2,4,2,1,1,1,6,1,2,1,2,22,4,2,1,2,7,1,1,5,2,1,2,1,2,3,1,1,1,2,1,1,1,3,1,1,5,1,1,5,2,2,3,1,3,4,3,9,2,3,1,4.77,24.91467,954,351,91
199994,train_199994,0,12.3925,-5.8821,11.2323,3.9237,10.4509,10.7262,7.0503,18.6968,-0.4976,6.0916,3.1941,0.018,13.6864,10.184,9.8641,14.831,6.2333,-10.0176,14.7197,4.6983,2.2122,2.9573,7.4937,2.5721,12.4556,13.7752,-7.5963,-0.3729,5.7767,3.3295,-6.8513,8.7369,-0.3362,9.2855,10.6468,4.4759,6.5457,6.5461,8.9143,1.5065,-6.7259,15.9743,10.6579,11.7607,16.8149,-2.6518,12.4926,-12.3501,-8.2629,21.8112,12.5453,27.2368,-7.7296,6.7084,-2.4154,11.1885,19.3433,7.6822,8.809,8.1849,20.2374,7.3224,3.4762,-2.0093,8.1242,-2.9186,6.0892,13.5714,5.0171,-8.5192,11.2478,0.5575,2.4149,33.2018,39.044,17.7897,22.6119,12.9335,3.2667,14.1478,-13.0591,10.2997,-1.3952,11.1085,2.6704,19.9182,6.4169,9.0312,7.4642,4.7096,-1.9112,6.9523,4.2984,11.044,6.9833,-0.3825,22.3924,17.0644,1.1949,-1.4553,-6.7977,10.2816,34.9114,1.522,11.2309,5.1064,10.6914,17.024,14.6088,17.1654,2.8452,7.5341,5.4049,6.5559,2.3091,3.3182,0.9047,34.2445,-5.3557,0.9427,22.2059,10.8204,4.1033,2.0618,0.2992,12.6356,12.9105,-1.8686,-9.636,14.5168,13.0762,0.3982,6.6118,6.8521,-7.8369,-5.4932,31.5765,7.7356,1.4871,5.8412,9.7074,-13.6967,4.4195,6.4588,8.9844,9.7638,9.9719,-12.1858,4.4432,-7.9207,12.2697,9.6069,8.8146,16.1697,0.9595,3.9896,12.8511,-9.8704,21.7685,13.5411,7.8425,5.764,2.2666,5.3453,-0.9698,26.8913,2.2833,-8.792,2.5469,5.1518,8.4931,-13.8864,17.0457,8.4709,15.7375,13.063,-10.0201,13.193,7.9901,-1.4754,9.9827,9.1572,-6.0677,14.4162,13.2788,-4.1262,5.9494,-22.8233,13.3396,1.011,1.3911,6.8687,3.7788,6.3378,14.4656,-1.4843,-3.9299,9.1164,16.317,-7.5048,2,1,5,1,3,1,1,2,1,6,2,1,5,1,2,10,3,1,1,1,2,1,2,7,1,10,2,4,3,3,1,3,2,3,2,1,1,1,2,2,1,1,7,11,1,1,2,1,1,3,7,2,3,3,1,2,1,2,1,2,2,1,2,1,5,1,2,2,511,4,1,13,2,1,1,2,1,1,2,2,1,1,2,2,1,1,1,2,2,3,2,28,1,4,1,9,1,1,4,5,1,1,1,22,3,3,2,1,2,2,2,3,3,1,4,1,2,1,1,3,1,3,1,2,2,5,5,2,1,1,6,10,4,13,1,1,1,1,1,1,1,1,1,1,3,1,2,1,2,1,2,2,3,3,1,1,10,1,1,1,1,13,1,2,3,1,3,2,2,6,1,1,3,2,1,1,1,3,1,2,1,4,1,1,1,1,1,1,1,5,1,2,3,1,1,10,2,5,4,2,1,5.215,36.093524,1043,511,92
199995,train_199995,0,11.488,-0.4956,8.2622,3.5142,10.3404,11.6081,5.6709,15.1516,-0.6209,5.6669,3.7574,-9.5348,13.986,5.2982,8.2705,14.1527,7.454,-5.0105,12.0465,8.6349,9.9137,25.1376,1.0914,3.2326,7.7802,13.9939,2.9085,0.1005,4.2369,7.5665,-9.2149,9.5746,1.4012,7.4211,11.0075,7.808,4.5567,4.9861,9.7471,0.0722,5.9053,8.1743,10.88,11.1665,4.26,-2.1296,8.7833,-15.5727,-8.4916,22.1905,12.411,15.1168,1.6041,6.1868,10.9576,18.7371,15.2986,5.7322,5.1244,9.8225,14.0315,-23.6064,-1.3403,-2.5577,6.3582,-5.4557,5.6063,7.0054,5.0171,-5.0055,28.9502,1.2297,4.4918,19.5568,20.8357,19.2136,17.6422,17.9836,4.0395,14.0761,-5.7878,16.387,-14.1721,-13.0269,-2.5955,21.4526,15.6163,0.9845,8.211,-0.8553,-12.1682,6.7779,7.3895,10.5084,15.5057,-0.6812,5.8999,6.1825,3.1038,-1.693,-18.8473,9.9358,25.3359,1.3647,11.8509,5.0357,6.463,18.4008,14.3787,19.0369,-0.6364,6.9155,3.6763,3.146,4.9442,-1.8289,1.3521,34.6265,-0.6869,-5.3781,20.503,10.9614,4.9677,6.1408,2.2575,12.8757,14.2253,-1.2868,0.2212,16.8661,12.7663,1.2414,7.1304,7.4108,-6.3369,3.076,24.9796,20.341,5.3312,23.7116,2.4745,11.2013,17.8165,13.0057,9.5506,5.3589,13.2491,-3.3068,3.6998,2.5927,14.3025,8.1596,7.9609,18.3343,4.3086,1.3546,12.4158,-5.3985,16.3683,10.4522,35.4923,5.5477,7.4244,12.5459,-6.784,31.1895,2.6529,-11.1867,9.8865,5.473,-5.388,-0.4698,24.4025,-5.4493,11.3529,7.7075,-5.0491,13.0756,15.8271,3.358,-14.3371,10.4421,7.653,9.4585,22.7783,-4.0305,4.2233,-6.3906,13.5058,-0.4594,6.1415,13.2305,3.9901,0.9388,18.0249,-1.7939,2.1661,8.5326,16.666,-17.8661,2,1,4,1,3,1,4,1,2,4,2,1,17,1,2,8,2,2,1,1,2,2,1,3,3,8,1,1,1,4,1,1,1,1,4,3,3,3,1,1,1,1,6,4,2,1,4,1,1,1,7,1,2,7,1,4,4,5,3,4,1,2,1,1,3,1,2,2,472,2,1,4,2,1,1,1,1,1,3,2,1,4,1,2,2,2,1,1,1,2,2,10,1,8,1,6,1,1,2,4,1,3,1,11,3,7,2,1,21,1,1,2,2,2,1,1,3,1,1,1,1,1,3,2,3,8,6,2,2,2,3,2,2,5,2,2,2,2,2,1,1,2,1,1,2,1,2,1,7,2,3,1,1,1,1,1,10,2,1,1,1,17,2,1,1,1,9,1,1,8,2,2,1,1,1,2,1,2,1,3,1,4,1,1,2,2,3,1,3,2,3,1,2,2,2,2,1,6,3,1,1,4.92,33.309174,984,472,88
199996,train_199996,0,4.9149,-2.4484,16.7052,6.6345,8.3096,-10.5628,5.8802,21.594,-3.6797,6.0019,6.5576,-11.8776,14.4131,3.3087,3.58,14.1597,7.5191,-8.8715,17.9467,17.0237,6.6459,18.2345,0.8982,2.2532,15.4977,13.3282,5.2281,-3.7424,5.5144,5.7148,-13.747,7.4369,1.3041,12.7552,12.5362,-1.1002,2.437,6.2631,14.8565,-2.9862,-7.882,7.132,11.8869,11.4218,8.9282,-27.2007,14.5962,-19.8502,26.0775,24.3915,12.691,10.2453,6.8173,4.5666,-9.5685,18.4685,16.9534,7.366,4.7038,9.4559,6.0037,-10.8728,0.7859,4.7,7.8077,-1.7926,6.1534,12.9087,5.0398,-0.4247,22.6256,0.7166,0.6533,13.5821,20.3267,25.538,14.0155,17.3326,4.2046,14.0195,11.4812,17.9954,-18.3549,-3.4537,1.1233,22.3135,1.9795,16.0239,4.7492,0.2446,-39.6406,6.9473,9.9392,11.1977,14.1006,-0.8012,18.8214,32.9827,1.7989,-0.2476,-15.5294,9.5501,11.8548,1.5127,11.3998,4.2304,6.6777,11.3434,14.2993,13.1205,13.3224,7.3143,3.6817,9.778,4.0491,2.7221,4.4344,3.7648,2.1927,-2.9197,23.0679,12.2112,3.7517,6.7907,6.5622,13.0283,12.2389,4.0627,-1.2406,13.9757,12.6133,0.6524,8.3929,6.9125,-6.0942,-6.3209,38.8105,17.6153,-2.907,0.827,2.0615,0.9315,6.5953,17.2099,9.396,9.9801,3.7881,2.9866,3.8695,17.8068,18.7807,9.4546,4.4657,17.8085,13.3077,-1.3209,12.7288,-12.3625,15.35,11.1798,35.1445,5.5375,5.6397,17.0598,-9.7142,15.5117,3.3696,-17.1855,2.8292,5.2606,2.6836,5.8767,25.1262,7.3478,27.1264,11.8542,9.7999,11.1395,-3.287,0.4285,2.5058,10.0339,9.161,9.4318,13.4913,4.6247,6.2906,-17.8522,18.6751,-0.1162,4.9611,4.6549,0.6998,1.8341,22.2717,1.7337,-2.1651,6.7419,15.9054,0.3388,2,3,1,1,1,1,6,2,2,4,2,1,2,1,2,7,3,1,1,1,1,2,1,7,4,8,2,1,7,3,1,2,1,1,1,1,3,3,2,2,1,1,2,16,2,1,1,1,1,1,9,1,1,3,1,2,1,5,1,4,1,1,1,1,3,1,2,1,2,1,1,17,1,1,2,2,1,1,8,3,1,1,2,1,1,2,1,1,1,1,2,19,2,3,1,9,1,1,6,3,1,1,2,15,2,1,1,2,28,1,1,3,4,2,9,3,4,1,2,1,1,5,2,2,3,9,3,1,3,1,7,9,5,6,1,1,1,1,3,1,2,1,1,1,6,1,1,1,11,2,1,1,2,2,1,2,4,2,1,4,1,14,6,1,1,1,6,1,3,8,2,1,1,1,1,1,1,4,1,2,3,2,1,1,1,1,3,1,1,3,1,2,5,1,2,2,2,1,5,1,1,2.77,3.478159,554,28,100
199997,train_199997,0,11.2232,-5.0518,10.5127,5.6456,9.341,-5.4086,4.5555,21.5571,0.1202,6.1629,4.4004,-0.4651,13.8775,9.7414,10.9044,14.5597,9.6214,-1.6429,23.1127,12.1517,16.2577,3.1453,3.1008,2.1497,10.2715,13.5637,4.9473,-0.9905,6.2801,9.4902,-12.8549,11.0403,1.4306,13.8533,11.7484,6.8969,6.4162,3.4246,12.117,3.4096,-8.8763,9.523,11.2566,11.4025,11.8492,-49.5007,7.4376,-21.2946,16.5701,15.9192,11.4688,16.38,-5.7152,6.0771,7.5194,9.6364,15.3166,5.483,0.6006,9.5466,22.096,-6.7813,3.687,-4.0387,5.8101,3.7793,5.7782,14.573,5.0075,-1.0104,25.605,0.2655,3.3822,13.4685,10.8834,9.2657,-4.1948,12.1229,7.5949,11.9158,11.9537,16.9399,-2.2643,-3.3658,6.402,18.2095,17.471,6.3349,7.474,4.8024,-0.3345,7.0295,16.5425,10.5645,12.733,-0.9946,23.721,11.239,1.0012,-1.1083,-8.0574,10.0606,25.2535,1.8019,10.4973,4.2183,9.1158,10.1525,14.0837,15.2503,3.4797,8.7901,2.9,0.6471,2.3316,1.5084,0.2888,43.0307,-4.4543,3.2765,28.2664,12.1189,3.1526,14.2214,3.3878,13.241,12.9788,4.5766,-4.8512,16.6344,12.3827,0.5293,8.0588,7.1081,-9.2317,-11.9277,20.5706,22.5568,3.0665,1.0527,7.4011,4.3367,1.4242,11.3654,9.1812,2.7627,12.2434,-0.242,4.1575,4.7996,20.6307,10.289,5.689,13.4601,-0.9774,2.3728,11.7245,-9.6385,17.3101,14.0422,19.9293,5.3427,5.4776,13.1202,5.35,31.7346,3.1693,-19.4779,6.8053,5.6281,-0.8774,-8.9508,17.4931,-1.653,32.0032,12.5749,5.8756,8.8059,-10.6367,5.4401,-12.7967,8.799,0.7021,14.9744,18.9211,0.3016,11.2869,-6.3741,12.9726,2.3425,4.0651,5.4414,3.1032,4.8793,23.5311,-1.5736,1.2832,8.7155,13.8329,4.1995,1,1,2,2,2,1,6,2,1,4,2,1,16,3,1,8,2,2,1,1,1,2,2,4,2,13,1,4,7,1,1,4,3,1,9,2,1,2,1,1,1,1,7,18,2,1,1,2,1,1,5,1,3,11,1,2,1,4,3,6,1,1,1,2,1,2,3,1,146,3,1,5,2,1,2,2,1,1,6,1,1,3,1,2,1,1,1,1,4,3,2,19,1,8,1,6,1,3,5,4,2,1,1,8,3,8,4,1,11,2,2,1,4,1,8,2,3,1,1,2,1,2,1,1,5,1,6,3,3,3,7,10,4,4,2,2,2,1,1,1,1,2,1,1,4,1,1,1,20,1,4,2,2,2,1,1,2,1,1,2,1,13,2,2,1,1,4,1,1,9,2,1,1,1,1,1,1,2,1,1,1,4,1,2,1,1,2,2,1,2,2,1,3,1,1,3,1,5,3,1,1,3.53,10.620881,706,146,92
199998,train_199998,0,9.7148,-8.6098,13.6104,5.793,12.5173,0.5339,6.0479,17.0152,-2.1926,8.7542,1.4245,0.7086,14.211,6.5641,7.6177,13.8771,9.0479,-11.8164,14.0831,-2.0345,18.3863,3.0911,5.5803,3.7091,12.8219,13.8866,-3.3859,-0.444,5.4817,4.0902,-7.7085,10.3952,2.5739,17.8529,11.3433,5.0534,-3.0055,3.9433,11.0759,1.2173,-11.7669,11.8626,10.7766,11.69,12.9929,-42.9704,12.7881,4.4044,27.088,14.0471,13.4318,9.4325,1.0213,6.2404,-8.1836,4.1057,10.7941,5.9704,-4.6315,9.9272,14.4322,-13.8557,-1.8803,1.8243,4.8059,-1.6255,5.1595,-2.8395,5.0116,2.4464,24.0896,0.8953,-2.6184,27.704,43.5092,16.4079,14.4559,27.7355,5.536,16.7484,9.6956,21.4391,-5.1839,6.8296,-9.0318,24.2122,-7.5779,5.6786,13.1278,7.0086,-32.3247,7.0141,6.9451,10.0272,10.0716,-0.3385,19.4605,26.948,1.7079,-4.8882,-2.3891,24.6626,19.7783,1.578,14.3962,4.8206,12.2354,33.9267,14.2625,26.2407,2.9091,6.454,5.329,10.6131,3.4212,-1.8915,2.1376,46.4915,1.0591,3.3543,18.125,10.0102,9.3483,11.0467,2.3866,12.2352,13.5462,3.0043,5.3751,17.1567,11.6873,0.6677,8.3511,6.5834,1.6146,4.8462,15.2331,3.839,0.6656,2.2357,14.8203,5.8648,8.719,15.1468,9.993,10.4543,10.9535,-10.3405,3.6463,-16.8622,18.858,8.2192,-0.4073,16.7224,8.8882,-3.2567,12.9142,-8.5421,15.9319,5.8348,40.3378,5.5357,4.6151,8.591,-12.6998,25.8578,2.2346,-6.4988,2.6702,5.3868,-7.1875,8.1477,22.4362,-2.5914,8.8704,11.6621,7.4904,8.1808,-11.4177,2.8379,3.8748,8.741,8.9998,16.4058,11.3244,-2.1751,12.4735,-18.3932,12.6337,0.3243,2.684,8.6587,2.7337,11.1178,20.4158,-0.0786,6.798,10.0342,15.5289,-13.9001,2,1,1,1,2,1,2,2,2,6,2,1,10,1,3,7,6,1,1,1,2,1,1,2,2,13,3,2,7,1,1,2,4,4,8,2,1,1,2,2,1,1,2,13,1,1,1,1,1,2,3,1,1,2,1,2,3,3,1,1,1,1,1,3,2,2,4,1,326,3,1,13,1,1,1,1,2,1,2,3,1,1,2,2,1,2,1,1,2,1,2,20,1,7,2,6,2,2,4,1,3,1,2,17,1,1,2,1,9,1,1,2,2,1,5,1,7,1,1,2,2,3,1,1,3,11,8,1,1,1,5,12,5,7,2,1,3,2,2,4,1,1,1,4,5,1,2,1,9,1,1,2,1,1,3,1,2,1,1,1,1,19,8,2,1,2,5,1,2,6,1,1,1,1,1,2,1,1,1,3,1,4,1,1,1,2,1,1,1,2,3,2,1,2,2,3,1,5,2,1,1,4.3,23.074274,860,326,95
199999,train_199999,0,10.8762,-5.7105,12.1183,8.0328,11.5577,0.3488,5.2839,15.2058,-0.4541,9.3688,-7.3826,-8.7049,14.2486,15.0849,5.2313,14.3572,12.5523,-6.5066,11.3592,11.4779,15.4997,3.8474,2.4381,2.8295,10.6681,13.7167,-7.7771,-2.7798,6.2885,6.0089,2.1547,10.8181,-0.2712,12.5254,11.6304,-1.4949,7.9509,2.248,8.1459,0.7928,-7.9028,7.4223,11.4249,11.9103,8.7002,-6.6883,10.5219,-25.9933,11.6241,13.467,12.3563,3.4031,-12.9247,6.2607,11.8525,8.8581,20.6438,6.5641,0.5322,10.074,11.2477,-19.5169,-1.6499,5.3036,5.6244,1.2976,5.468,10.3979,5.0209,-6.9248,32.4865,0.8271,4.388,16.1819,11.508,11.9092,6.3494,23.0598,2.4466,15.6721,9.3809,14.7593,-12.8156,3.4928,-3.1634,21.5742,9.7015,22.4258,7.1213,2.405,-3.1107,7.1529,16.2315,11.5051,16.5967,0.6444,21.0773,25.727,2.4916,-3.0062,0.9636,13.4966,31.3629,1.5517,12.1898,3.5216,6.8915,9.0475,14.126,15.9937,6.7106,8.1964,6.552,6.69,2.4643,4.5841,0.7221,23.402,-10.3157,6.4857,33.43,12.2439,4.3416,10.3869,1.3913,12.7127,13.853,4.6685,-1.6082,10.1564,11.7936,0.2316,8.176,7.6166,-18.3865,-7.3542,32.6663,15.5464,-0.9083,1.0662,5.3922,9.7708,11.4687,8.698,8.0106,13.1911,12.3484,0.2655,3.8811,-9.4762,11.6612,13.1571,8.5043,17.0369,7.1124,-13.1967,13.9404,-8.3303,29.014,9.6174,15.9041,5.3187,6.2987,13.0729,-4.2045,19.2141,3.2902,-1.2175,4.1583,5.7675,5.7719,-1.2139,21.8496,-3.5368,25.9094,11.7673,1.9765,15.9218,3.935,4.3993,-10.3268,10.52,9.9587,11.9242,7.0626,-6.5429,10.5947,-3.8827,16.3552,1.7535,8.9842,1.6893,0.1276,0.3766,15.2101,-2.4907,-2.2342,8.1857,12.1284,0.1385,2,2,2,3,5,4,6,2,1,4,2,1,7,4,1,9,4,1,1,2,1,1,3,8,1,17,2,3,3,1,2,6,1,1,5,2,1,1,2,2,2,1,6,11,3,1,2,2,1,1,5,1,2,9,1,1,1,5,2,2,2,2,4,1,2,4,5,1,528,2,2,14,1,1,1,1,1,3,1,4,2,1,1,2,2,1,1,1,2,3,1,31,2,3,1,8,1,1,2,4,1,2,2,15,3,5,4,1,22,1,2,3,1,1,3,3,5,1,3,2,3,3,2,3,3,7,3,1,2,1,1,9,3,2,1,1,2,2,1,1,2,3,1,3,2,1,3,2,17,1,1,1,2,3,2,1,7,1,2,1,1,9,2,1,2,1,6,2,1,14,1,2,1,1,3,4,1,1,1,1,1,3,1,1,1,1,3,1,2,3,4,2,2,2,3,2,2,5,1,1,1,5.59,37.30604,1118,528,78


In [3]:



# train_len = train_df.shape[0]
# merged_df = pd.concat([train_df, test_df],sort=False)



In [4]:

# for var in tqdm(var_list):
    
#     merged_df['random_{}'.format(var)] = np.random.normal(merged_df[var].mean(), merged_df[var].std(), 400000).round(4)
#     merged_df['frequency_{}'.format(var)] = merged_df[var].map(merged_df[var].value_counts())
#     merged_df['true_frequency_{}'.format(var)] = merged_df[var].map(merged_df['random_{}'.format(var)].value_counts())
#     merged_df['true_frequency_{}'.format(var)].fillna(0,inplace=True)
#     merged_df['freq_diff_{}'.format(var)] = abs(merged_df['frequency_{}'.format(var)] - merged_df['true_frequency_{}'.format(var)]) 
    
    
    
    
    

In [5]:

# for var in tqdm(var_list):
    
#     random = pd.DataFrame(np.random.normal(merged_df[var].mean(), merged_df[var].std(), 4000000).round(4))
#     random.columns = ['random']

#     #merged_df['frequency_{}'.format(var)] = merged_df[var].map(random['random'].value_counts())
#     merged_df['true_frequency_{}'.format(var)] = merged_df[var].map(random['random'].value_counts())
#     merged_df['true_frequency_{}'.format(var)].fillna(0,inplace=True)
# #    merged_df['freq_diff_{}'.format(var)] = abs(merged_df['frequency_{}'.format(var)] - merged_df['true_frequency_{}'.format(var)]) 
    
    
    
    

In [10]:
# for var in tqdm(var_list):
    
#     random = pd.DataFrame(np.random.normal(merged_df[var].mean(), merged_df[var].std(), merged_df.shape[0]).round(4))
#     random.columns = ['random']
#     #random['random'].value_counts
#     #random['true_prob'] = random['random']/4000000
#     merged_df['frequency_{}'.format(var)] = merged_df[var].map(merged_df[var].value_counts())
#     merged_df['prob_{}'.format(var)] = merged_df['frequency_{}'.format(var)]/merged_df.shape[0]
#     merged_df['true_prob_{}'.format(var)] = merged_df[var].map(random['random'].value_counts())/4000000
#     merged_df['true_prob_{}'.format(var)].fillna(0,inplace=True)
#     merged_df['prob_diff_{}'.format(var)] = abs(merged_df['prob_{}'.format(var)] - merged_df['true_prob_{}'.format(var)]) 

In [60]:

# train_df = merged_df.iloc[:train_len]
# test_df = merged_df.iloc[train_len:]

In [11]:
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score

n_folds = 5
random_seed = 26
model = 'stats_of_freq'


model_name = "{0}_{1}_folds".format(model, n_folds)
print("Model: {}".format(model_name))

Model: stats_of_freq_5_folds


In [12]:
test_df.columns

Index(['ID_code', 'var_0', 'var_1', 'var_2', 'var_3', 'var_4', 'var_5',
       'var_6', 'var_7', 'var_8',
       ...
       'var_196_freq', 'var_197_freq', 'var_198_freq', 'var_199_freq',
       'min_freq', 'freq_mean', 'freq_std', 'freq_sum', 'freq_max',
       'unique_per_row'],
      dtype='object', length=407)

In [16]:

exclusion = ['ID_code', 'target'] + freq_cols

# for var in tqdm(var_list):
#     exclusion.append('frequency_{}'.format(var))
#     exclusion.append('prob_{}'.format(var))
#     exclusion.append('true_prob_{}'.format(var))
    
feats = [c for c in train_df.columns if c not in exclusion]



In [17]:
feats
    

['var_0',
 'var_1',
 'var_2',
 'var_3',
 'var_4',
 'var_5',
 'var_6',
 'var_7',
 'var_8',
 'var_9',
 'var_10',
 'var_11',
 'var_12',
 'var_13',
 'var_14',
 'var_15',
 'var_16',
 'var_17',
 'var_18',
 'var_19',
 'var_20',
 'var_21',
 'var_22',
 'var_23',
 'var_24',
 'var_25',
 'var_26',
 'var_27',
 'var_28',
 'var_29',
 'var_30',
 'var_31',
 'var_32',
 'var_33',
 'var_34',
 'var_35',
 'var_36',
 'var_37',
 'var_38',
 'var_39',
 'var_40',
 'var_41',
 'var_42',
 'var_43',
 'var_44',
 'var_45',
 'var_46',
 'var_47',
 'var_48',
 'var_49',
 'var_50',
 'var_51',
 'var_52',
 'var_53',
 'var_54',
 'var_55',
 'var_56',
 'var_57',
 'var_58',
 'var_59',
 'var_60',
 'var_61',
 'var_62',
 'var_63',
 'var_64',
 'var_65',
 'var_66',
 'var_67',
 'var_68',
 'var_69',
 'var_70',
 'var_71',
 'var_72',
 'var_73',
 'var_74',
 'var_75',
 'var_76',
 'var_77',
 'var_78',
 'var_79',
 'var_80',
 'var_81',
 'var_82',
 'var_83',
 'var_84',
 'var_85',
 'var_86',
 'var_87',
 'var_88',
 'var_89',
 'var_90',
 'var_91'

In [18]:
clfs = []
folds = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=random_seed)
oof_preds = np.zeros((len(train_df), 1))
test_preds = np.zeros((len(test_df), 1))


X = train_df[feats]
y = train_df['target']
X_test = test_df[feats]
test_ids = test_df.ID_code.values


parameters = {
    'objective': 'binary',
    'metric': 'auc',
    'is_unbalance': 'true',
    #'scale_pos_weight': 400,
    #'device' : 'gpu' ,
    'boosting': 'gbdt',
    'num_leaves': 5, #31
    'feature_fraction': 0.5,
    'bagging_fraction': 0.7,
    'bagging_freq': 10,
    'learning_rate': 0.05, #0.05
    'verbose': 30
    #'min_data_in_leaf': 200
}

feature_importance_df = pd.DataFrame()
for fold_, (trn_, val_) in enumerate(folds.split(X, y)):
    print("Current Fold: {}".format(fold_+1))
    trn_x, trn_y = X.iloc[trn_, :], y[trn_]
    val_x, val_y = X.iloc[val_, :], y[val_]
    
    
    for var in tqdm(var_list):
        trn_x[new_col_name] = trn_x.iloc[:,[col_index]].apply(lambda x: x.map(trn_x.iloc[:,col_index].value_counts()))
        X_test[new_col_name] = X_test.iloc[:,[col_index]].apply(lambda x: x.map(trn_x.iloc[:,col_index].value_counts()))
        val_x[new_col_name] = val_x.iloc[:,[col_index]].apply(lambda x: x.map(trn_x.iloc[:,col_index].value_counts()))
        
    trn_lgb = lgb.Dataset(trn_x, trn_y)
    val_lgb = lgb.Dataset(val_x, val_y)
    clf = lgb.train(parameters,
                     train_set=trn_lgb,
                     #valid_sets=[valid_data_lgb,holdout_data_lgb],
                     valid_sets=[trn_lgb, val_lgb],
                     num_boost_round=30000,
                     early_stopping_rounds=50,
                     verbose_eval=500)
    


    val_pred = clf.predict(val_x)
    test_fold_pred = clf.predict(X_test)

    print("AUC = {}".format(roc_auc_score(val_y, val_pred)))
    oof_preds[val_, :] = val_pred.reshape((-1, 1))
    test_preds += test_fold_pred.reshape((-1, 1))
    
   # print('getting feature importance')
    
    fold_importance_df = pd.DataFrame()
    fold_importance_df["feature"] = feats
    fold_importance_df["importance"] = clf.feature_importance()
    fold_importance_df["fold"] = fold_ + 1
    feature_importance_df = pd.concat([feature_importance_df, fold_importance_df], axis=0)
    
    
test_preds /= n_folds
roc_score = roc_auc_score(y, oof_preds.ravel())
print("Overall AUC = {}".format(roc_score))


print("Saving submission file")
sample = pd.read_csv('../data/sample_submission.csv')
sample.target = test_preds.astype(float)
sample.ID_code = test_ids
sample.to_csv('../submissions/{}_{}.csv'.format(model_name,str(roc_score)), index=False)

display_importances(feature_importance_df)


Current Fold: 1
Training until validation scores don't improve for 50 rounds.
[500]	training's auc: 0.893164	valid_1's auc: 0.870414
[1000]	training's auc: 0.913498	valid_1's auc: 0.886788
[1500]	training's auc: 0.922292	valid_1's auc: 0.89263
Early stopping, best iteration is:
[1901]	training's auc: 0.927685	valid_1's auc: 0.894751
AUC = 0.8947514726193938
Current Fold: 2
Training until validation scores don't improve for 50 rounds.
[500]	training's auc: 0.892845	valid_1's auc: 0.870889
[1000]	training's auc: 0.913484	valid_1's auc: 0.886865
[1500]	training's auc: 0.922436	valid_1's auc: 0.892256
[2000]	training's auc: 0.92889	valid_1's auc: 0.894608
Early stopping, best iteration is:
[2039]	training's auc: 0.929413	valid_1's auc: 0.894776
AUC = 0.8947757253309894
Current Fold: 3
Training until validation scores don't improve for 50 rounds.
[500]	training's auc: 0.89259	valid_1's auc: 0.877731
[1000]	training's auc: 0.912758	valid_1's auc: 0.891666
[1500]	training's auc: 0.921868	vali

KeyboardInterrupt: 

In [72]:
%%javascript
var nb = IPython.notebook;
var kernel = IPython.notebook.kernel;
var command = "NOTEBOOK_FULL_PATH = '" + nb.base_url + nb.notebook_path + "'";
kernel.execute(command);

<IPython.core.display.Javascript object>

In [73]:


shutil.copyfile(os.path.basename(NOTEBOOK_FULL_PATH), 
                             '../models/{}_{}.ipynb'.format(model_name, str(roc_score)))


'../models/random_prob_diff_10_times_5_folds_0.9027353449061455.ipynb'