In [1]:
%config Completer.use_jedi = False
import pandas as pd
import numpy as np
pd.options.display.max_rows=1000
pd.options.display.max_columns=1000
import math
import re
import datetime as dt
from bureau_fc import get_stats,get_bureau_feats_2
from multiprocessing import Pool
import warnings
warnings.filterwarnings("ignore")
import copy
from functools import partial

In [2]:
DATA_DIR = '/home/sahil/data/av/AV_LTFS3/data/'

In [3]:
train = pd.read_csv(DATA_DIR+'train_Data.csv')
test = pd.read_csv(DATA_DIR+'test_Data.csv')
bur_df = pd.read_pickle(DATA_DIR+'bureau_data.pkl')

In [4]:
train.shape,test.shape,bur_df.shape

((128655, 26), (14745, 25), (591000, 32))

In [5]:
train['DisbursalDate'] = pd.to_datetime(train['DisbursalDate'])
test['DisbursalDate'] = pd.to_datetime(test['DisbursalDate'])
train['MaturityDAte'] = pd.to_datetime(train['MaturityDAte'])
test['MaturityDAte'] = pd.to_datetime(test['MaturityDAte'])


bur_df['DATE-REPORTED'] = pd.to_datetime(bur_df['DATE-REPORTED'])
bur_df['dpd_strin_var'] = bur_df['dpd_string'].fillna('').apply(lambda x: [x[y-3:y] for y in range(3, len(x)+3, 3)] if x.find('E')==-1 else ['000'])

In [6]:
df = pd.concat((train,test),axis=0)
df.shape

(143400, 26)

In [7]:
bur_df = bur_df.merge(df[['ID','DisbursalDate','MaturityDAte']],on='ID')
bur_df.sort_values(['ID','DISBURSED-DT'],inplace=True)

In [8]:
bur_df['dpd_seq'] = bur_df.dpd_strin_var.apply(lambda x: ' '.join(x))

#### using the following combination to treat it as a product
#### can apply the same thing on other sequence as well

In [9]:
from sklearn.decomposition import TruncatedSVD
from sklearn.feature_extraction.text import TfidfVectorizer

In [10]:
def get_tfidf_svd_feats(tfidf_fun,data,apply_svd=True):
    temp = tfidf_fun.fit_transform(data)
#     temp = pd.DataFrame(temp.toarray())
#     temp.columns = ['tfidf_feat_{}'.format(x) for x in range(temp.shape[1])]
    if apply_svd:
        svd = TruncatedSVD(n_components=100,random_state=100)
        temp = pd.DataFrame(svd.fit_transform(temp))
        temp.columns = ['tfidf_svd_feat_{}'.format(x) for x in range(temp.shape[1])]      
        return temp,svd
    else:
        return temp

In [11]:
bur_df['products'] = bur_df['SELF-INDICATOR'].astype('str')+'_'+bur_df['MATCH-TYPE'].astype('str')+'_'+\
bur_df['ACCT-TYPE'].astype('str')+'_'+bur_df['CONTRIBUTOR-TYPE'].astype('str')+'_'+bur_df['OWNERSHIP-IND'].astype('str')
seq_df = bur_df.groupby('ID')['products'].apply(list).reset_index(name='seq')
seq_df.seq = seq_df.seq.apply(lambda x:' '.join(x))
seq_df.seq = seq_df.seq.str.lower()

In [12]:
%%time
vectorizer = TfidfVectorizer(input='content', encoding='utf-8', decode_error='strict', strip_accents=None, 
         lowercase=False, preprocessor=None, tokenizer=None, analyzer='word', stop_words=None, 
         token_pattern='\S+', ngram_range=(1, 3), max_df=0.99, min_df=10, max_features=None, 
         vocabulary=None, binary=False, norm='l2', use_idf=True, 
         smooth_idf=True, sublinear_tf=False)
data,svd = get_tfidf_svd_feats(vectorizer,seq_df.seq)

CPU times: user 19.3 s, sys: 1.05 s, total: 20.4 s
Wall time: 6.57 s


In [13]:
data['ID'] = seq_df['ID']

In [14]:
data.head()

Unnamed: 0,tfidf_svd_feat_0,tfidf_svd_feat_1,tfidf_svd_feat_2,tfidf_svd_feat_3,tfidf_svd_feat_4,tfidf_svd_feat_5,tfidf_svd_feat_6,tfidf_svd_feat_7,tfidf_svd_feat_8,tfidf_svd_feat_9,tfidf_svd_feat_10,tfidf_svd_feat_11,tfidf_svd_feat_12,tfidf_svd_feat_13,tfidf_svd_feat_14,tfidf_svd_feat_15,tfidf_svd_feat_16,tfidf_svd_feat_17,tfidf_svd_feat_18,tfidf_svd_feat_19,tfidf_svd_feat_20,tfidf_svd_feat_21,tfidf_svd_feat_22,tfidf_svd_feat_23,tfidf_svd_feat_24,tfidf_svd_feat_25,tfidf_svd_feat_26,tfidf_svd_feat_27,tfidf_svd_feat_28,tfidf_svd_feat_29,tfidf_svd_feat_30,tfidf_svd_feat_31,tfidf_svd_feat_32,tfidf_svd_feat_33,tfidf_svd_feat_34,tfidf_svd_feat_35,tfidf_svd_feat_36,tfidf_svd_feat_37,tfidf_svd_feat_38,tfidf_svd_feat_39,tfidf_svd_feat_40,tfidf_svd_feat_41,tfidf_svd_feat_42,tfidf_svd_feat_43,tfidf_svd_feat_44,tfidf_svd_feat_45,tfidf_svd_feat_46,tfidf_svd_feat_47,tfidf_svd_feat_48,tfidf_svd_feat_49,tfidf_svd_feat_50,tfidf_svd_feat_51,tfidf_svd_feat_52,tfidf_svd_feat_53,tfidf_svd_feat_54,tfidf_svd_feat_55,tfidf_svd_feat_56,tfidf_svd_feat_57,tfidf_svd_feat_58,tfidf_svd_feat_59,tfidf_svd_feat_60,tfidf_svd_feat_61,tfidf_svd_feat_62,tfidf_svd_feat_63,tfidf_svd_feat_64,tfidf_svd_feat_65,tfidf_svd_feat_66,tfidf_svd_feat_67,tfidf_svd_feat_68,tfidf_svd_feat_69,tfidf_svd_feat_70,tfidf_svd_feat_71,tfidf_svd_feat_72,tfidf_svd_feat_73,tfidf_svd_feat_74,tfidf_svd_feat_75,tfidf_svd_feat_76,tfidf_svd_feat_77,tfidf_svd_feat_78,tfidf_svd_feat_79,tfidf_svd_feat_80,tfidf_svd_feat_81,tfidf_svd_feat_82,tfidf_svd_feat_83,tfidf_svd_feat_84,tfidf_svd_feat_85,tfidf_svd_feat_86,tfidf_svd_feat_87,tfidf_svd_feat_88,tfidf_svd_feat_89,tfidf_svd_feat_90,tfidf_svd_feat_91,tfidf_svd_feat_92,tfidf_svd_feat_93,tfidf_svd_feat_94,tfidf_svd_feat_95,tfidf_svd_feat_96,tfidf_svd_feat_97,tfidf_svd_feat_98,tfidf_svd_feat_99,ID
0,0.060962,0.061691,0.007819,-0.005386,0.010163,0.122989,-0.031966,0.169891,-0.052629,-0.041681,8.7e-05,0.000114,0.003125,0.018338,-0.007044,0.002105,-0.000638,-0.011309,-5.4e-05,0.010194,-0.024599,0.005771,0.060013,0.082291,0.014366,-0.002614,-0.000838,-0.007339,0.001453,-0.024649,0.013288,0.015152,0.00963,-0.000824,0.006813,0.002714,-0.001167,-0.048817,0.044592,-0.023015,0.014851,0.01762,0.045387,-0.030906,0.007173,0.003776,0.015211,-0.020953,0.001952,0.002525,-0.012142,-0.039369,-0.036521,0.006348,-0.004644,0.035691,-0.024049,-0.003855,-0.033096,-0.037986,-0.013133,0.001466,0.024612,0.029046,0.05214,-0.109363,0.061481,0.204481,0.011506,0.108139,-0.091808,0.022072,-0.016145,0.002995,-0.015557,-0.028934,0.015169,-0.062787,-0.069708,0.03964,-0.042153,0.005731,0.013151,-0.000734,0.010738,-0.00706,0.011914,-0.01746,-0.007601,-0.006578,-0.01149,0.038085,0.003787,-0.012301,-0.004903,-0.011139,-0.008844,0.004524,-0.008519,0.011668,1
1,0.061386,0.106169,0.039712,-0.022115,-0.001442,0.25622,-0.133783,0.057254,0.07011,0.368548,-0.032032,-0.02902,-0.009688,-0.002736,-0.010054,0.009549,-0.010733,-0.00452,-0.00468,0.056203,-0.036254,-0.031328,-0.058391,-0.076084,0.004778,0.000173,-0.008377,0.005955,-0.005303,-0.04595,0.040073,0.075139,-0.018612,-0.00345,-0.01091,0.069516,-0.014071,-0.08287,-0.023071,0.010588,0.019175,0.093382,0.085442,0.164878,0.047891,-0.010951,0.004498,0.028886,-0.00566,-0.012144,-0.003479,0.044457,-0.03638,-0.010161,-0.001523,0.089393,-0.010338,0.078346,0.110565,-0.116,0.080699,0.157722,-0.040778,0.007456,-0.012392,0.074226,-0.026915,0.011818,-0.026203,-0.004931,0.016195,-0.004656,-0.007018,0.022842,0.029457,-0.001146,0.012281,-0.01895,-0.012118,-0.00679,0.009417,-0.013138,0.027137,-0.026554,-0.002295,-0.060466,-0.01176,0.007211,0.002758,-0.026364,-0.036195,-0.002549,-0.120221,-0.004409,0.039388,0.012507,0.021483,0.008412,-0.028538,0.027954,2
2,0.085394,0.080778,0.187741,0.096003,-0.056783,0.185343,-0.092121,0.017749,0.354819,0.051304,0.546795,0.106911,-0.065122,0.042808,-0.002319,-0.034398,0.006515,0.007039,-0.005046,0.126782,0.083252,-0.015792,0.070587,-0.038367,-0.000258,0.038814,-0.001622,-0.045462,-0.046426,-0.000749,-0.007771,-0.051177,-0.034677,0.03,0.02875,-0.035737,0.001802,-0.022585,-0.007161,0.000114,-0.022794,0.012452,0.008845,-0.020313,0.036642,-0.015894,-0.108759,-0.006246,0.001708,0.006674,0.007521,-0.012477,-0.011467,0.003313,0.002281,0.01708,-0.013459,0.014161,0.022507,-0.050819,-0.019345,0.044401,0.000509,-0.01457,0.011857,0.012091,0.010073,0.045209,-0.013167,-0.073434,-0.001989,0.017422,0.044464,-0.003512,0.007467,-0.007361,-0.008367,-0.087822,0.007927,-0.012307,0.115061,0.017562,0.005589,-0.013595,-0.082689,0.063649,-0.00719,0.070586,-0.057214,0.024234,0.057142,0.006521,0.044223,-0.013236,-0.031193,0.002898,-0.011746,0.001596,-0.018319,-0.022193,3
3,0.065943,0.003518,0.002292,0.004772,0.000859,0.013724,-0.005891,0.010576,0.013124,0.00073,0.039471,0.029992,0.03954,0.050844,-0.002245,0.00648,-0.00136,0.001858,0.001124,-0.003984,4.5e-05,0.001779,-0.00011,-0.001377,0.028214,0.025489,-0.0078,-0.00545,0.007739,-0.001245,0.01649,0.016,0.021962,-0.00552,0.005282,0.016232,0.003902,-0.023093,0.019198,0.004068,-0.002802,0.02151,0.104149,-0.120228,0.299261,0.006753,0.078253,0.073201,-0.000257,-0.147866,-0.14277,0.063666,-0.013431,-0.020292,0.038206,-0.010222,0.018246,-0.005087,-0.00145,-0.001958,0.002247,-0.015871,-0.002024,-0.006565,0.007519,0.004419,0.003121,0.013731,-0.0138,-0.046812,0.015801,0.008353,0.02983,-0.009662,0.011692,-0.012283,-0.019239,-0.070503,0.002441,-0.010813,0.073758,-0.028762,0.018652,-0.007678,-0.089336,0.04953,0.036433,0.150812,0.118271,-0.016834,-0.02304,0.007082,-0.004794,0.015024,0.033526,0.022352,-0.005259,0.054683,-0.007977,-0.0203,4
4,0.084158,0.055242,0.128703,0.121288,-0.053909,0.08898,-0.043985,-0.022355,0.15933,-0.03648,0.349268,-0.157263,-0.143386,-0.076917,0.020187,-0.044569,0.017891,0.006057,-0.001914,0.142199,0.115685,-0.002385,0.054068,-0.026289,0.039297,0.073491,-0.005021,-0.111425,-0.023931,0.001102,-0.001901,-0.031237,0.008405,0.004268,0.002261,-0.001451,0.005269,-0.022212,0.007788,0.005823,-0.003994,0.003012,0.05399,-0.063023,0.161336,-0.010324,0.031453,0.066795,0.000615,-0.13832,-0.130195,0.04948,-0.008541,-0.022808,0.03353,-0.010723,0.020226,-0.006271,-0.00172,0.014261,0.011536,-0.012269,-0.032024,-0.000264,-0.010689,0.007725,-0.002767,-0.01693,-0.027398,-0.00163,-0.006268,0.000786,-0.003166,-0.003716,0.003938,-0.001242,-0.00938,-0.024807,0.014103,0.000217,0.019762,-0.002233,0.00853,0.005114,-0.012223,-0.026074,0.022001,-0.029778,0.030757,-0.03758,-0.021316,-0.014337,0.00981,-0.01676,-0.011608,0.085863,0.002514,-0.022417,0.06161,0.029805,5


In [15]:
# %%time
# vectorizer = TfidfVectorizer(input='content', encoding='utf-8', decode_error='strict', strip_accents=None, 
#          lowercase=False, preprocessor=None, tokenizer=None, analyzer='word', stop_words=None, 
#          token_pattern='\S+', ngram_range=(1, 3), max_df=0.99, min_df=100, max_features=None, 
#          vocabulary=None, binary=False, norm='l2', use_idf=True, 
#          smooth_idf=True, sublinear_tf=False)
# data3,svd = get_tfidf_svd_feats(vectorizer,bur_df['dpd_seq'])
# data3['ID'] = bur_df['ID']

In [16]:
# svd.explained_variance_ratio_

In [17]:
# svd.explained_variance_ratio_.sum()

In [18]:
# data3.head()

In [19]:
temp = copy.deepcopy(bur_df)
cond1 = (temp['DISBURSED-DT']>=(temp.DisbursalDate))
temp = temp[cond1]
print(temp.shape)
temp['products'] = temp['SELF-INDICATOR'].astype('str')+'_'+temp['MATCH-TYPE'].astype('str')+'_'+\
temp['ACCT-TYPE'].astype('str')+'_'+temp['CONTRIBUTOR-TYPE'].astype('str')+'_'+temp['OWNERSHIP-IND'].astype('str')
seq_df = temp.groupby('ID')['products'].apply(list).reset_index(name='seq')
seq_df.seq = seq_df.seq.apply(lambda x:' '.join(x))
seq_df.seq = seq_df.seq.str.lower()

(415318, 37)


In [20]:
%%time
vectorizer = TfidfVectorizer(input='content', encoding='utf-8', decode_error='strict', strip_accents=None, 
         lowercase=False, preprocessor=None, tokenizer=None, analyzer='word', stop_words=None, 
         token_pattern='\S+', ngram_range=(1, 3), max_df=0.99, min_df=10, max_features=None, 
         vocabulary=None, binary=False, norm='l2', use_idf=True, 
         smooth_idf=True, sublinear_tf=False)
data2,svd = get_tfidf_svd_feats(vectorizer,seq_df.seq)
data2['ID'] = seq_df['ID']

CPU times: user 17 s, sys: 910 ms, total: 18 s
Wall time: 5.36 s


In [21]:
# %%time
# vectorizer = TfidfVectorizer(input='content', encoding='utf-8', decode_error='strict', strip_accents=None, 
#          lowercase=False, preprocessor=None, tokenizer=None, analyzer='word', stop_words=None, 
#          token_pattern='\S+', ngram_range=(1, 3), max_df=0.99, min_df=100, max_features=None, 
#          vocabulary=None, binary=False, norm='l2', use_idf=True, 
#          smooth_idf=True, sublinear_tf=False)
# data4,svd = get_tfidf_svd_feats(vectorizer,temp['dpd_seq'])
# data4['ID'] = temp['ID']

In [22]:
data2.shape,data.shape
# ,data3.shape,data4.shape

((139994, 101), (143400, 101))

In [23]:
# def get_avg_tfidf(x):
#     _id,m = x
#     _dict = dict(m.iloc[:,:-1].mean())
#     _dict['ID'] = _id
#     return _dict

In [24]:
# %%time
# temp = data4.groupby('ID')
# try:
#     pool = Pool(8) 
#     data_outputs = pool.map(get_avg_tfidf, temp)
# finally: # To make sure processes are closed in the end, even if errors happen
#     pool.close()
#     pool.join()
# data4 = pd.DataFrame(data_outputs)

In [25]:
# data4.shape

In [26]:
# data4.head()

In [27]:
# %%time
# temp = data3.groupby('ID')
# try:
#     pool = Pool(8) 
#     data_outputs = pool.map(get_avg_tfidf, temp)
# finally: # To make sure processes are closed in the end, even if errors happen
#     pool.close()
#     pool.join()
# data3 = pd.DataFrame(data_outputs)

In [28]:
# data3.shape

In [29]:
# data3.head()

In [30]:
data2.columns = ['post_disb_tfidf_'+x if x!='ID' else x for x in data2.columns]
# data3.columns = ['dpd_str_tfidf_'+x if x!='ID' else x for x in data3.columns]
# data4.columns = ['post_disb_dpd_str_tfidf_'+x if x!='ID' else x for x in data4.columns]

In [31]:
# data = data.merge(data2,on='ID').merge(data3,on='ID').merge(data4,on='ID')
data = data.merge(data2,on='ID')

In [32]:
data.head()

Unnamed: 0,tfidf_svd_feat_0,tfidf_svd_feat_1,tfidf_svd_feat_2,tfidf_svd_feat_3,tfidf_svd_feat_4,tfidf_svd_feat_5,tfidf_svd_feat_6,tfidf_svd_feat_7,tfidf_svd_feat_8,tfidf_svd_feat_9,tfidf_svd_feat_10,tfidf_svd_feat_11,tfidf_svd_feat_12,tfidf_svd_feat_13,tfidf_svd_feat_14,tfidf_svd_feat_15,tfidf_svd_feat_16,tfidf_svd_feat_17,tfidf_svd_feat_18,tfidf_svd_feat_19,tfidf_svd_feat_20,tfidf_svd_feat_21,tfidf_svd_feat_22,tfidf_svd_feat_23,tfidf_svd_feat_24,tfidf_svd_feat_25,tfidf_svd_feat_26,tfidf_svd_feat_27,tfidf_svd_feat_28,tfidf_svd_feat_29,tfidf_svd_feat_30,tfidf_svd_feat_31,tfidf_svd_feat_32,tfidf_svd_feat_33,tfidf_svd_feat_34,tfidf_svd_feat_35,tfidf_svd_feat_36,tfidf_svd_feat_37,tfidf_svd_feat_38,tfidf_svd_feat_39,tfidf_svd_feat_40,tfidf_svd_feat_41,tfidf_svd_feat_42,tfidf_svd_feat_43,tfidf_svd_feat_44,tfidf_svd_feat_45,tfidf_svd_feat_46,tfidf_svd_feat_47,tfidf_svd_feat_48,tfidf_svd_feat_49,tfidf_svd_feat_50,tfidf_svd_feat_51,tfidf_svd_feat_52,tfidf_svd_feat_53,tfidf_svd_feat_54,tfidf_svd_feat_55,tfidf_svd_feat_56,tfidf_svd_feat_57,tfidf_svd_feat_58,tfidf_svd_feat_59,tfidf_svd_feat_60,tfidf_svd_feat_61,tfidf_svd_feat_62,tfidf_svd_feat_63,tfidf_svd_feat_64,tfidf_svd_feat_65,tfidf_svd_feat_66,tfidf_svd_feat_67,tfidf_svd_feat_68,tfidf_svd_feat_69,tfidf_svd_feat_70,tfidf_svd_feat_71,tfidf_svd_feat_72,tfidf_svd_feat_73,tfidf_svd_feat_74,tfidf_svd_feat_75,tfidf_svd_feat_76,tfidf_svd_feat_77,tfidf_svd_feat_78,tfidf_svd_feat_79,tfidf_svd_feat_80,tfidf_svd_feat_81,tfidf_svd_feat_82,tfidf_svd_feat_83,tfidf_svd_feat_84,tfidf_svd_feat_85,tfidf_svd_feat_86,tfidf_svd_feat_87,tfidf_svd_feat_88,tfidf_svd_feat_89,tfidf_svd_feat_90,tfidf_svd_feat_91,tfidf_svd_feat_92,tfidf_svd_feat_93,tfidf_svd_feat_94,tfidf_svd_feat_95,tfidf_svd_feat_96,tfidf_svd_feat_97,tfidf_svd_feat_98,tfidf_svd_feat_99,ID,post_disb_tfidf_tfidf_svd_feat_0,post_disb_tfidf_tfidf_svd_feat_1,post_disb_tfidf_tfidf_svd_feat_2,post_disb_tfidf_tfidf_svd_feat_3,post_disb_tfidf_tfidf_svd_feat_4,post_disb_tfidf_tfidf_svd_feat_5,post_disb_tfidf_tfidf_svd_feat_6,post_disb_tfidf_tfidf_svd_feat_7,post_disb_tfidf_tfidf_svd_feat_8,post_disb_tfidf_tfidf_svd_feat_9,post_disb_tfidf_tfidf_svd_feat_10,post_disb_tfidf_tfidf_svd_feat_11,post_disb_tfidf_tfidf_svd_feat_12,post_disb_tfidf_tfidf_svd_feat_13,post_disb_tfidf_tfidf_svd_feat_14,post_disb_tfidf_tfidf_svd_feat_15,post_disb_tfidf_tfidf_svd_feat_16,post_disb_tfidf_tfidf_svd_feat_17,post_disb_tfidf_tfidf_svd_feat_18,post_disb_tfidf_tfidf_svd_feat_19,post_disb_tfidf_tfidf_svd_feat_20,post_disb_tfidf_tfidf_svd_feat_21,post_disb_tfidf_tfidf_svd_feat_22,post_disb_tfidf_tfidf_svd_feat_23,post_disb_tfidf_tfidf_svd_feat_24,post_disb_tfidf_tfidf_svd_feat_25,post_disb_tfidf_tfidf_svd_feat_26,post_disb_tfidf_tfidf_svd_feat_27,post_disb_tfidf_tfidf_svd_feat_28,post_disb_tfidf_tfidf_svd_feat_29,post_disb_tfidf_tfidf_svd_feat_30,post_disb_tfidf_tfidf_svd_feat_31,post_disb_tfidf_tfidf_svd_feat_32,post_disb_tfidf_tfidf_svd_feat_33,post_disb_tfidf_tfidf_svd_feat_34,post_disb_tfidf_tfidf_svd_feat_35,post_disb_tfidf_tfidf_svd_feat_36,post_disb_tfidf_tfidf_svd_feat_37,post_disb_tfidf_tfidf_svd_feat_38,post_disb_tfidf_tfidf_svd_feat_39,post_disb_tfidf_tfidf_svd_feat_40,post_disb_tfidf_tfidf_svd_feat_41,post_disb_tfidf_tfidf_svd_feat_42,post_disb_tfidf_tfidf_svd_feat_43,post_disb_tfidf_tfidf_svd_feat_44,post_disb_tfidf_tfidf_svd_feat_45,post_disb_tfidf_tfidf_svd_feat_46,post_disb_tfidf_tfidf_svd_feat_47,post_disb_tfidf_tfidf_svd_feat_48,post_disb_tfidf_tfidf_svd_feat_49,post_disb_tfidf_tfidf_svd_feat_50,post_disb_tfidf_tfidf_svd_feat_51,post_disb_tfidf_tfidf_svd_feat_52,post_disb_tfidf_tfidf_svd_feat_53,post_disb_tfidf_tfidf_svd_feat_54,post_disb_tfidf_tfidf_svd_feat_55,post_disb_tfidf_tfidf_svd_feat_56,post_disb_tfidf_tfidf_svd_feat_57,post_disb_tfidf_tfidf_svd_feat_58,post_disb_tfidf_tfidf_svd_feat_59,post_disb_tfidf_tfidf_svd_feat_60,post_disb_tfidf_tfidf_svd_feat_61,post_disb_tfidf_tfidf_svd_feat_62,post_disb_tfidf_tfidf_svd_feat_63,post_disb_tfidf_tfidf_svd_feat_64,post_disb_tfidf_tfidf_svd_feat_65,post_disb_tfidf_tfidf_svd_feat_66,post_disb_tfidf_tfidf_svd_feat_67,post_disb_tfidf_tfidf_svd_feat_68,post_disb_tfidf_tfidf_svd_feat_69,post_disb_tfidf_tfidf_svd_feat_70,post_disb_tfidf_tfidf_svd_feat_71,post_disb_tfidf_tfidf_svd_feat_72,post_disb_tfidf_tfidf_svd_feat_73,post_disb_tfidf_tfidf_svd_feat_74,post_disb_tfidf_tfidf_svd_feat_75,post_disb_tfidf_tfidf_svd_feat_76,post_disb_tfidf_tfidf_svd_feat_77,post_disb_tfidf_tfidf_svd_feat_78,post_disb_tfidf_tfidf_svd_feat_79,post_disb_tfidf_tfidf_svd_feat_80,post_disb_tfidf_tfidf_svd_feat_81,post_disb_tfidf_tfidf_svd_feat_82,post_disb_tfidf_tfidf_svd_feat_83,post_disb_tfidf_tfidf_svd_feat_84,post_disb_tfidf_tfidf_svd_feat_85,post_disb_tfidf_tfidf_svd_feat_86,post_disb_tfidf_tfidf_svd_feat_87,post_disb_tfidf_tfidf_svd_feat_88,post_disb_tfidf_tfidf_svd_feat_89,post_disb_tfidf_tfidf_svd_feat_90,post_disb_tfidf_tfidf_svd_feat_91,post_disb_tfidf_tfidf_svd_feat_92,post_disb_tfidf_tfidf_svd_feat_93,post_disb_tfidf_tfidf_svd_feat_94,post_disb_tfidf_tfidf_svd_feat_95,post_disb_tfidf_tfidf_svd_feat_96,post_disb_tfidf_tfidf_svd_feat_97,post_disb_tfidf_tfidf_svd_feat_98,post_disb_tfidf_tfidf_svd_feat_99
0,0.060962,0.061691,0.007819,-0.005386,0.010163,0.122989,-0.031966,0.169891,-0.052629,-0.041681,8.7e-05,0.000114,0.003125,0.018338,-0.007044,0.002105,-0.000638,-0.011309,-5.4e-05,0.010194,-0.024599,0.005771,0.060013,0.082291,0.014366,-0.002614,-0.000838,-0.007339,0.001453,-0.024649,0.013288,0.015152,0.00963,-0.000824,0.006813,0.002714,-0.001167,-0.048817,0.044592,-0.023015,0.014851,0.01762,0.045387,-0.030906,0.007173,0.003776,0.015211,-0.020953,0.001952,0.002525,-0.012142,-0.039369,-0.036521,0.006348,-0.004644,0.035691,-0.024049,-0.003855,-0.033096,-0.037986,-0.013133,0.001466,0.024612,0.029046,0.05214,-0.109363,0.061481,0.204481,0.011506,0.108139,-0.091808,0.022072,-0.016145,0.002995,-0.015557,-0.028934,0.015169,-0.062787,-0.069708,0.03964,-0.042153,0.005731,0.013151,-0.000734,0.010738,-0.00706,0.011914,-0.01746,-0.007601,-0.006578,-0.01149,0.038085,0.003787,-0.012301,-0.004903,-0.011139,-0.008844,0.004524,-0.008519,0.011668,1,0.064131,0.083285,-0.007225,-0.002341,0.037705,0.077547,0.061693,0.218878,-0.05536,-0.024182,-0.003413,0.019007,-0.010311,-0.005655,-0.00056,0.001646,-0.002676,-0.003372,-0.000853,-0.002451,0.013765,0.147729,-0.002456,-0.023496,0.014881,-0.000688,0.014512,-0.012152,0.016592,0.000324,-0.003694,0.009987,0.009035,0.000492,0.028048,-0.032956,-0.015247,0.017078,0.002273,0.021226,0.004263,0.002758,0.017279,-0.045105,0.101041,0.012067,0.013965,0.057461,-0.01643,0.024719,0.015322,0.015364,0.059942,0.00602,-0.033143,0.095649,0.239087,-0.098142,0.033347,0.011513,-0.016978,-0.12574,-0.023412,0.032868,-0.042118,0.049618,-0.110613,0.007598,0.009761,-0.006243,-0.018391,0.007166,-0.003868,-0.014171,0.006676,-0.005818,-0.003126,0.006893,-0.001992,0.000212,0.016476,0.010848,-0.000719,0.003157,-0.015997,-0.005344,-0.002455,0.002321,-0.003111,-0.014271,0.009977,-0.003418,0.003937,-0.006548,0.005039,0.001767,-0.001195,0.020522,-0.012854,0.026207
1,0.061386,0.106169,0.039712,-0.022115,-0.001442,0.25622,-0.133783,0.057254,0.07011,0.368548,-0.032032,-0.02902,-0.009688,-0.002736,-0.010054,0.009549,-0.010733,-0.00452,-0.00468,0.056203,-0.036254,-0.031328,-0.058391,-0.076084,0.004778,0.000173,-0.008377,0.005955,-0.005303,-0.04595,0.040073,0.075139,-0.018612,-0.00345,-0.01091,0.069516,-0.014071,-0.08287,-0.023071,0.010588,0.019175,0.093382,0.085442,0.164878,0.047891,-0.010951,0.004498,0.028886,-0.00566,-0.012144,-0.003479,0.044457,-0.03638,-0.010161,-0.001523,0.089393,-0.010338,0.078346,0.110565,-0.116,0.080699,0.157722,-0.040778,0.007456,-0.012392,0.074226,-0.026915,0.011818,-0.026203,-0.004931,0.016195,-0.004656,-0.007018,0.022842,0.029457,-0.001146,0.012281,-0.01895,-0.012118,-0.00679,0.009417,-0.013138,0.027137,-0.026554,-0.002295,-0.060466,-0.01176,0.007211,0.002758,-0.026364,-0.036195,-0.002549,-0.120221,-0.004409,0.039388,0.012507,0.021483,0.008412,-0.028538,0.027954,2,0.05449,0.123497,0.005819,0.012323,0.060342,0.234049,-0.008504,0.136982,0.181547,0.332597,-0.046735,-0.006891,0.017518,-0.009591,0.000547,-0.004467,-0.066492,0.033466,0.003361,-0.03483,0.001122,-0.084536,-0.005273,-0.005797,0.006553,-0.00031,0.042755,-0.070924,0.100625,-0.022162,-0.032521,0.086107,0.096562,-0.040163,0.055208,-0.111675,-0.055545,0.118132,-0.049402,-0.013217,-0.010953,-0.023677,0.045662,0.112783,0.022669,0.000311,0.000816,-0.004062,-0.008572,0.022782,0.002942,0.012465,0.007262,0.145992,0.012335,0.183559,-0.083484,0.005271,-0.017552,0.012517,0.055812,0.059758,0.000917,0.022878,-0.018318,0.012237,-0.009575,-0.043748,-0.008869,0.015958,0.001182,0.036075,-0.015525,-0.084302,-0.000644,-5.1e-05,-0.004684,0.002867,0.008032,0.02206,-0.020456,-0.010215,0.017534,0.062186,0.081131,0.094971,0.008468,0.030984,0.016115,-0.000712,-0.001109,-0.00325,-0.046941,-0.034074,0.074221,-0.066376,-0.091969,0.010639,-0.069766,-0.071809
2,0.085394,0.080778,0.187741,0.096003,-0.056783,0.185343,-0.092121,0.017749,0.354819,0.051304,0.546795,0.106911,-0.065122,0.042808,-0.002319,-0.034398,0.006515,0.007039,-0.005046,0.126782,0.083252,-0.015792,0.070587,-0.038367,-0.000258,0.038814,-0.001622,-0.045462,-0.046426,-0.000749,-0.007771,-0.051177,-0.034677,0.03,0.02875,-0.035737,0.001802,-0.022585,-0.007161,0.000114,-0.022794,0.012452,0.008845,-0.020313,0.036642,-0.015894,-0.108759,-0.006246,0.001708,0.006674,0.007521,-0.012477,-0.011467,0.003313,0.002281,0.01708,-0.013459,0.014161,0.022507,-0.050819,-0.019345,0.044401,0.000509,-0.01457,0.011857,0.012091,0.010073,0.045209,-0.013167,-0.073434,-0.001989,0.017422,0.044464,-0.003512,0.007467,-0.007361,-0.008367,-0.087822,0.007927,-0.012307,0.115061,0.017562,0.005589,-0.013595,-0.082689,0.063649,-0.00719,0.070586,-0.057214,0.024234,0.057142,0.006521,0.044223,-0.013236,-0.031193,0.002898,-0.011746,0.001596,-0.018319,-0.022193,3,0.080031,0.046312,0.006669,0.106649,0.005621,0.066932,0.00592,0.051393,0.184272,-0.074315,0.453361,0.278804,-0.006263,-0.007809,0.053751,-0.000852,0.000322,-0.009668,0.013843,0.005699,-0.040802,0.001319,0.036687,-0.012093,-0.008894,0.007124,-2e-06,0.000122,-0.010865,0.006906,-0.000317,0.000242,-0.00613,0.002393,0.015913,0.002526,-0.003617,-0.005551,-0.00737,0.009355,0.001529,-0.018319,-0.003978,-0.005741,0.019271,0.002625,-0.006481,0.002468,-0.003191,0.085471,0.042504,-0.023831,0.078824,0.01608,-0.026644,0.017499,0.116738,0.184933,-0.037223,0.019593,-0.042384,0.065553,-0.144726,0.092352,-0.10341,-0.009464,0.127514,0.032087,-0.019126,-0.001365,-0.158587,-0.032244,0.003444,0.026099,-0.005711,-0.008612,-0.02303,0.002778,0.022355,-0.039336,-0.075431,-0.024943,0.006756,-0.051429,-0.031282,-0.017053,0.000742,-0.005955,-0.025086,0.003621,-0.007413,0.003173,0.006584,0.010087,-0.01471,0.018515,-0.016587,0.033405,-0.014236,-0.036534
3,0.065943,0.003518,0.002292,0.004772,0.000859,0.013724,-0.005891,0.010576,0.013124,0.00073,0.039471,0.029992,0.03954,0.050844,-0.002245,0.00648,-0.00136,0.001858,0.001124,-0.003984,4.5e-05,0.001779,-0.00011,-0.001377,0.028214,0.025489,-0.0078,-0.00545,0.007739,-0.001245,0.01649,0.016,0.021962,-0.00552,0.005282,0.016232,0.003902,-0.023093,0.019198,0.004068,-0.002802,0.02151,0.104149,-0.120228,0.299261,0.006753,0.078253,0.073201,-0.000257,-0.147866,-0.14277,0.063666,-0.013431,-0.020292,0.038206,-0.010222,0.018246,-0.005087,-0.00145,-0.001958,0.002247,-0.015871,-0.002024,-0.006565,0.007519,0.004419,0.003121,0.013731,-0.0138,-0.046812,0.015801,0.008353,0.02983,-0.009662,0.011692,-0.012283,-0.019239,-0.070503,0.002441,-0.010813,0.073758,-0.028762,0.018652,-0.007678,-0.089336,0.04953,0.036433,0.150812,0.118271,-0.016834,-0.02304,0.007082,-0.004794,0.015024,0.033526,0.022352,-0.005259,0.054683,-0.007977,-0.0203,4,0.063757,0.005128,-0.00301,0.004661,0.003859,0.007914,0.001403,0.010739,0.013031,-0.001047,0.028295,0.072942,-0.026316,0.00215,-0.002032,0.001645,0.002356,0.001141,0.0017,0.003661,0.024347,-0.004677,0.00135,-0.006502,0.019216,0.003726,0.012463,-0.004193,0.027378,-0.004047,0.005095,0.01243,0.020232,0.040122,0.339422,-0.111749,-0.043352,-0.215594,-0.026852,-0.027716,-0.006373,-0.060726,-0.031858,0.01128,-0.018147,0.009752,0.028014,-0.016038,-0.016854,0.112485,0.061246,0.009818,0.011467,0.003834,-0.024607,-0.002267,0.075501,0.163091,-0.039142,0.010827,-0.020286,0.042188,-0.044898,0.022316,-0.03369,-0.006654,0.045348,0.02715,-0.036104,-0.00504,-0.041216,-0.018337,0.005066,-0.005961,-0.000203,-0.002896,-0.004301,0.005392,0.01492,-0.013924,-0.04461,-0.013606,-0.003218,-0.017281,-0.074919,0.033174,-0.022215,0.158261,-0.191191,0.2876,-0.202623,0.145727,0.111458,-0.025915,-0.101048,0.030881,0.014436,0.00303,0.020866,-0.11069
4,0.084158,0.055242,0.128703,0.121288,-0.053909,0.08898,-0.043985,-0.022355,0.15933,-0.03648,0.349268,-0.157263,-0.143386,-0.076917,0.020187,-0.044569,0.017891,0.006057,-0.001914,0.142199,0.115685,-0.002385,0.054068,-0.026289,0.039297,0.073491,-0.005021,-0.111425,-0.023931,0.001102,-0.001901,-0.031237,0.008405,0.004268,0.002261,-0.001451,0.005269,-0.022212,0.007788,0.005823,-0.003994,0.003012,0.05399,-0.063023,0.161336,-0.010324,0.031453,0.066795,0.000615,-0.13832,-0.130195,0.04948,-0.008541,-0.022808,0.03353,-0.010723,0.020226,-0.006271,-0.00172,0.014261,0.011536,-0.012269,-0.032024,-0.000264,-0.010689,0.007725,-0.002767,-0.01693,-0.027398,-0.00163,-0.006268,0.000786,-0.003166,-0.003716,0.003938,-0.001242,-0.00938,-0.024807,0.014103,0.000217,0.019762,-0.002233,0.00853,0.005114,-0.012223,-0.026074,0.022001,-0.029778,0.030757,-0.03758,-0.021316,-0.014337,0.00981,-0.01676,-0.011608,0.085863,0.002514,-0.022417,0.06161,0.029805,5,0.082212,0.069038,0.00725,0.200777,-0.020447,0.082596,-0.000484,0.005725,0.115339,-0.056718,0.149524,0.097139,0.420825,-0.002986,0.020074,-0.001272,-0.111091,0.041765,0.205561,0.052636,0.0897,-0.006634,-0.002739,-0.003782,-0.015956,0.035426,-0.007514,-0.001965,-0.052764,0.014301,0.00118,0.023889,-0.005234,0.020855,0.210933,-0.075143,-0.025636,-0.142468,-0.022055,-0.041777,-0.004774,-0.025169,-0.032845,0.027916,-0.030769,0.006847,0.074017,-0.022711,-0.03592,0.004774,-0.002423,0.027584,-0.023832,-0.003821,-0.008696,0.001753,0.010359,0.007408,-0.002949,0.002062,-0.00476,0.007768,-0.022104,0.027491,-0.013228,-0.005398,0.021415,-0.007518,0.032806,0.017288,0.102621,0.020181,-0.006442,0.02798,-0.001286,-0.033708,-0.001633,-2.6e-05,0.001859,0.012743,-0.007251,-0.003244,-0.010028,-0.022241,-0.008151,0.053213,-0.011148,0.056524,-0.091992,0.115079,-0.095257,0.069671,0.066849,-0.002277,-0.067812,0.033779,0.022523,-0.017414,0.006721,-0.005138


In [33]:
for col in data.columns:
    if col!='ID':
        data[col] = data[col].astype('float32')

In [34]:
data.head()

Unnamed: 0,tfidf_svd_feat_0,tfidf_svd_feat_1,tfidf_svd_feat_2,tfidf_svd_feat_3,tfidf_svd_feat_4,tfidf_svd_feat_5,tfidf_svd_feat_6,tfidf_svd_feat_7,tfidf_svd_feat_8,tfidf_svd_feat_9,tfidf_svd_feat_10,tfidf_svd_feat_11,tfidf_svd_feat_12,tfidf_svd_feat_13,tfidf_svd_feat_14,tfidf_svd_feat_15,tfidf_svd_feat_16,tfidf_svd_feat_17,tfidf_svd_feat_18,tfidf_svd_feat_19,tfidf_svd_feat_20,tfidf_svd_feat_21,tfidf_svd_feat_22,tfidf_svd_feat_23,tfidf_svd_feat_24,tfidf_svd_feat_25,tfidf_svd_feat_26,tfidf_svd_feat_27,tfidf_svd_feat_28,tfidf_svd_feat_29,tfidf_svd_feat_30,tfidf_svd_feat_31,tfidf_svd_feat_32,tfidf_svd_feat_33,tfidf_svd_feat_34,tfidf_svd_feat_35,tfidf_svd_feat_36,tfidf_svd_feat_37,tfidf_svd_feat_38,tfidf_svd_feat_39,tfidf_svd_feat_40,tfidf_svd_feat_41,tfidf_svd_feat_42,tfidf_svd_feat_43,tfidf_svd_feat_44,tfidf_svd_feat_45,tfidf_svd_feat_46,tfidf_svd_feat_47,tfidf_svd_feat_48,tfidf_svd_feat_49,tfidf_svd_feat_50,tfidf_svd_feat_51,tfidf_svd_feat_52,tfidf_svd_feat_53,tfidf_svd_feat_54,tfidf_svd_feat_55,tfidf_svd_feat_56,tfidf_svd_feat_57,tfidf_svd_feat_58,tfidf_svd_feat_59,tfidf_svd_feat_60,tfidf_svd_feat_61,tfidf_svd_feat_62,tfidf_svd_feat_63,tfidf_svd_feat_64,tfidf_svd_feat_65,tfidf_svd_feat_66,tfidf_svd_feat_67,tfidf_svd_feat_68,tfidf_svd_feat_69,tfidf_svd_feat_70,tfidf_svd_feat_71,tfidf_svd_feat_72,tfidf_svd_feat_73,tfidf_svd_feat_74,tfidf_svd_feat_75,tfidf_svd_feat_76,tfidf_svd_feat_77,tfidf_svd_feat_78,tfidf_svd_feat_79,tfidf_svd_feat_80,tfidf_svd_feat_81,tfidf_svd_feat_82,tfidf_svd_feat_83,tfidf_svd_feat_84,tfidf_svd_feat_85,tfidf_svd_feat_86,tfidf_svd_feat_87,tfidf_svd_feat_88,tfidf_svd_feat_89,tfidf_svd_feat_90,tfidf_svd_feat_91,tfidf_svd_feat_92,tfidf_svd_feat_93,tfidf_svd_feat_94,tfidf_svd_feat_95,tfidf_svd_feat_96,tfidf_svd_feat_97,tfidf_svd_feat_98,tfidf_svd_feat_99,ID,post_disb_tfidf_tfidf_svd_feat_0,post_disb_tfidf_tfidf_svd_feat_1,post_disb_tfidf_tfidf_svd_feat_2,post_disb_tfidf_tfidf_svd_feat_3,post_disb_tfidf_tfidf_svd_feat_4,post_disb_tfidf_tfidf_svd_feat_5,post_disb_tfidf_tfidf_svd_feat_6,post_disb_tfidf_tfidf_svd_feat_7,post_disb_tfidf_tfidf_svd_feat_8,post_disb_tfidf_tfidf_svd_feat_9,post_disb_tfidf_tfidf_svd_feat_10,post_disb_tfidf_tfidf_svd_feat_11,post_disb_tfidf_tfidf_svd_feat_12,post_disb_tfidf_tfidf_svd_feat_13,post_disb_tfidf_tfidf_svd_feat_14,post_disb_tfidf_tfidf_svd_feat_15,post_disb_tfidf_tfidf_svd_feat_16,post_disb_tfidf_tfidf_svd_feat_17,post_disb_tfidf_tfidf_svd_feat_18,post_disb_tfidf_tfidf_svd_feat_19,post_disb_tfidf_tfidf_svd_feat_20,post_disb_tfidf_tfidf_svd_feat_21,post_disb_tfidf_tfidf_svd_feat_22,post_disb_tfidf_tfidf_svd_feat_23,post_disb_tfidf_tfidf_svd_feat_24,post_disb_tfidf_tfidf_svd_feat_25,post_disb_tfidf_tfidf_svd_feat_26,post_disb_tfidf_tfidf_svd_feat_27,post_disb_tfidf_tfidf_svd_feat_28,post_disb_tfidf_tfidf_svd_feat_29,post_disb_tfidf_tfidf_svd_feat_30,post_disb_tfidf_tfidf_svd_feat_31,post_disb_tfidf_tfidf_svd_feat_32,post_disb_tfidf_tfidf_svd_feat_33,post_disb_tfidf_tfidf_svd_feat_34,post_disb_tfidf_tfidf_svd_feat_35,post_disb_tfidf_tfidf_svd_feat_36,post_disb_tfidf_tfidf_svd_feat_37,post_disb_tfidf_tfidf_svd_feat_38,post_disb_tfidf_tfidf_svd_feat_39,post_disb_tfidf_tfidf_svd_feat_40,post_disb_tfidf_tfidf_svd_feat_41,post_disb_tfidf_tfidf_svd_feat_42,post_disb_tfidf_tfidf_svd_feat_43,post_disb_tfidf_tfidf_svd_feat_44,post_disb_tfidf_tfidf_svd_feat_45,post_disb_tfidf_tfidf_svd_feat_46,post_disb_tfidf_tfidf_svd_feat_47,post_disb_tfidf_tfidf_svd_feat_48,post_disb_tfidf_tfidf_svd_feat_49,post_disb_tfidf_tfidf_svd_feat_50,post_disb_tfidf_tfidf_svd_feat_51,post_disb_tfidf_tfidf_svd_feat_52,post_disb_tfidf_tfidf_svd_feat_53,post_disb_tfidf_tfidf_svd_feat_54,post_disb_tfidf_tfidf_svd_feat_55,post_disb_tfidf_tfidf_svd_feat_56,post_disb_tfidf_tfidf_svd_feat_57,post_disb_tfidf_tfidf_svd_feat_58,post_disb_tfidf_tfidf_svd_feat_59,post_disb_tfidf_tfidf_svd_feat_60,post_disb_tfidf_tfidf_svd_feat_61,post_disb_tfidf_tfidf_svd_feat_62,post_disb_tfidf_tfidf_svd_feat_63,post_disb_tfidf_tfidf_svd_feat_64,post_disb_tfidf_tfidf_svd_feat_65,post_disb_tfidf_tfidf_svd_feat_66,post_disb_tfidf_tfidf_svd_feat_67,post_disb_tfidf_tfidf_svd_feat_68,post_disb_tfidf_tfidf_svd_feat_69,post_disb_tfidf_tfidf_svd_feat_70,post_disb_tfidf_tfidf_svd_feat_71,post_disb_tfidf_tfidf_svd_feat_72,post_disb_tfidf_tfidf_svd_feat_73,post_disb_tfidf_tfidf_svd_feat_74,post_disb_tfidf_tfidf_svd_feat_75,post_disb_tfidf_tfidf_svd_feat_76,post_disb_tfidf_tfidf_svd_feat_77,post_disb_tfidf_tfidf_svd_feat_78,post_disb_tfidf_tfidf_svd_feat_79,post_disb_tfidf_tfidf_svd_feat_80,post_disb_tfidf_tfidf_svd_feat_81,post_disb_tfidf_tfidf_svd_feat_82,post_disb_tfidf_tfidf_svd_feat_83,post_disb_tfidf_tfidf_svd_feat_84,post_disb_tfidf_tfidf_svd_feat_85,post_disb_tfidf_tfidf_svd_feat_86,post_disb_tfidf_tfidf_svd_feat_87,post_disb_tfidf_tfidf_svd_feat_88,post_disb_tfidf_tfidf_svd_feat_89,post_disb_tfidf_tfidf_svd_feat_90,post_disb_tfidf_tfidf_svd_feat_91,post_disb_tfidf_tfidf_svd_feat_92,post_disb_tfidf_tfidf_svd_feat_93,post_disb_tfidf_tfidf_svd_feat_94,post_disb_tfidf_tfidf_svd_feat_95,post_disb_tfidf_tfidf_svd_feat_96,post_disb_tfidf_tfidf_svd_feat_97,post_disb_tfidf_tfidf_svd_feat_98,post_disb_tfidf_tfidf_svd_feat_99
0,0.060962,0.061691,0.007819,-0.005386,0.010163,0.122989,-0.031966,0.169891,-0.052629,-0.041681,8.7e-05,0.000114,0.003125,0.018338,-0.007044,0.002105,-0.000638,-0.011309,-5.4e-05,0.010194,-0.024599,0.005771,0.060013,0.082291,0.014366,-0.002614,-0.000838,-0.007339,0.001453,-0.024649,0.013288,0.015152,0.00963,-0.000824,0.006813,0.002714,-0.001167,-0.048817,0.044592,-0.023015,0.014851,0.01762,0.045387,-0.030906,0.007173,0.003776,0.015211,-0.020953,0.001952,0.002525,-0.012142,-0.039369,-0.036521,0.006348,-0.004644,0.035691,-0.024049,-0.003855,-0.033096,-0.037986,-0.013133,0.001466,0.024612,0.029046,0.05214,-0.109363,0.061481,0.204481,0.011506,0.108139,-0.091808,0.022072,-0.016145,0.002995,-0.015557,-0.028934,0.015169,-0.062787,-0.069708,0.03964,-0.042153,0.005731,0.013151,-0.000734,0.010738,-0.00706,0.011914,-0.01746,-0.007601,-0.006578,-0.01149,0.038085,0.003787,-0.012301,-0.004903,-0.011139,-0.008844,0.004524,-0.008519,0.011668,1,0.064131,0.083285,-0.007225,-0.002341,0.037705,0.077547,0.061693,0.218878,-0.05536,-0.024182,-0.003413,0.019007,-0.010311,-0.005655,-0.00056,0.001646,-0.002676,-0.003372,-0.000853,-0.002451,0.013765,0.147729,-0.002456,-0.023496,0.014881,-0.000688,0.014512,-0.012152,0.016592,0.000324,-0.003694,0.009987,0.009035,0.000492,0.028048,-0.032956,-0.015247,0.017078,0.002273,0.021226,0.004263,0.002758,0.017279,-0.045105,0.101041,0.012067,0.013965,0.057461,-0.01643,0.024719,0.015322,0.015364,0.059942,0.00602,-0.033143,0.095649,0.239087,-0.098142,0.033347,0.011513,-0.016978,-0.12574,-0.023412,0.032868,-0.042118,0.049618,-0.110613,0.007598,0.009761,-0.006243,-0.018391,0.007166,-0.003868,-0.014171,0.006676,-0.005818,-0.003126,0.006893,-0.001992,0.000212,0.016476,0.010848,-0.000719,0.003157,-0.015997,-0.005344,-0.002455,0.002321,-0.003111,-0.014271,0.009977,-0.003418,0.003937,-0.006548,0.005039,0.001767,-0.001195,0.020522,-0.012854,0.026207
1,0.061386,0.106169,0.039712,-0.022115,-0.001442,0.25622,-0.133783,0.057254,0.07011,0.368548,-0.032032,-0.02902,-0.009688,-0.002736,-0.010054,0.009549,-0.010733,-0.00452,-0.00468,0.056203,-0.036254,-0.031328,-0.058391,-0.076084,0.004778,0.000173,-0.008377,0.005955,-0.005303,-0.04595,0.040073,0.075139,-0.018612,-0.00345,-0.01091,0.069516,-0.014071,-0.08287,-0.023071,0.010588,0.019175,0.093382,0.085442,0.164878,0.047891,-0.010951,0.004498,0.028886,-0.00566,-0.012144,-0.003479,0.044457,-0.03638,-0.010161,-0.001523,0.089393,-0.010338,0.078346,0.110565,-0.116,0.080699,0.157722,-0.040778,0.007456,-0.012392,0.074226,-0.026915,0.011818,-0.026203,-0.004931,0.016195,-0.004656,-0.007018,0.022842,0.029457,-0.001146,0.012281,-0.01895,-0.012118,-0.00679,0.009417,-0.013138,0.027137,-0.026554,-0.002295,-0.060466,-0.01176,0.007211,0.002758,-0.026364,-0.036195,-0.002549,-0.120221,-0.004409,0.039388,0.012507,0.021483,0.008412,-0.028538,0.027954,2,0.05449,0.123497,0.005819,0.012323,0.060342,0.234049,-0.008504,0.136982,0.181547,0.332597,-0.046735,-0.006891,0.017518,-0.009591,0.000547,-0.004467,-0.066492,0.033466,0.003361,-0.03483,0.001122,-0.084536,-0.005273,-0.005797,0.006553,-0.00031,0.042755,-0.070924,0.100625,-0.022162,-0.032521,0.086107,0.096562,-0.040163,0.055208,-0.111675,-0.055545,0.118132,-0.049402,-0.013217,-0.010953,-0.023677,0.045662,0.112783,0.022669,0.000311,0.000816,-0.004062,-0.008572,0.022782,0.002942,0.012465,0.007262,0.145992,0.012335,0.183559,-0.083484,0.005271,-0.017552,0.012517,0.055812,0.059758,0.000917,0.022878,-0.018318,0.012237,-0.009575,-0.043748,-0.008869,0.015958,0.001182,0.036075,-0.015525,-0.084302,-0.000644,-5.1e-05,-0.004684,0.002867,0.008032,0.02206,-0.020456,-0.010215,0.017534,0.062186,0.081131,0.094971,0.008468,0.030984,0.016115,-0.000712,-0.001109,-0.00325,-0.046941,-0.034074,0.074221,-0.066376,-0.091969,0.010639,-0.069766,-0.071809
2,0.085394,0.080778,0.187741,0.096003,-0.056783,0.185343,-0.092121,0.017749,0.354819,0.051304,0.546795,0.106911,-0.065122,0.042808,-0.002319,-0.034398,0.006515,0.007039,-0.005046,0.126782,0.083252,-0.015792,0.070587,-0.038367,-0.000258,0.038814,-0.001622,-0.045462,-0.046426,-0.000749,-0.007771,-0.051177,-0.034677,0.03,0.02875,-0.035737,0.001802,-0.022585,-0.007161,0.000114,-0.022794,0.012452,0.008845,-0.020313,0.036642,-0.015894,-0.108759,-0.006246,0.001708,0.006674,0.007521,-0.012477,-0.011467,0.003313,0.002281,0.01708,-0.013459,0.014161,0.022507,-0.050819,-0.019345,0.044401,0.000509,-0.01457,0.011857,0.012091,0.010073,0.045209,-0.013167,-0.073434,-0.001989,0.017422,0.044464,-0.003512,0.007467,-0.007361,-0.008367,-0.087822,0.007927,-0.012307,0.115061,0.017562,0.005589,-0.013595,-0.082689,0.063649,-0.00719,0.070586,-0.057214,0.024234,0.057142,0.006521,0.044223,-0.013236,-0.031193,0.002898,-0.011746,0.001596,-0.018319,-0.022193,3,0.080031,0.046312,0.006669,0.106649,0.005621,0.066932,0.00592,0.051393,0.184272,-0.074315,0.453361,0.278804,-0.006263,-0.007809,0.053751,-0.000852,0.000322,-0.009668,0.013843,0.005699,-0.040802,0.001319,0.036687,-0.012093,-0.008894,0.007124,-2e-06,0.000122,-0.010865,0.006906,-0.000317,0.000242,-0.00613,0.002393,0.015913,0.002526,-0.003617,-0.005551,-0.00737,0.009355,0.001529,-0.018319,-0.003978,-0.005741,0.019271,0.002625,-0.006481,0.002468,-0.003191,0.085471,0.042504,-0.023831,0.078824,0.01608,-0.026644,0.017499,0.116738,0.184933,-0.037223,0.019593,-0.042384,0.065553,-0.144726,0.092352,-0.10341,-0.009464,0.127514,0.032087,-0.019126,-0.001365,-0.158587,-0.032244,0.003444,0.026099,-0.005711,-0.008612,-0.02303,0.002778,0.022355,-0.039336,-0.075431,-0.024943,0.006756,-0.051429,-0.031282,-0.017053,0.000742,-0.005955,-0.025086,0.003621,-0.007413,0.003173,0.006584,0.010087,-0.01471,0.018515,-0.016587,0.033405,-0.014236,-0.036534
3,0.065943,0.003518,0.002292,0.004772,0.000859,0.013724,-0.005891,0.010576,0.013124,0.00073,0.039471,0.029992,0.03954,0.050844,-0.002245,0.00648,-0.00136,0.001858,0.001124,-0.003984,4.5e-05,0.001779,-0.00011,-0.001377,0.028214,0.025489,-0.0078,-0.00545,0.007739,-0.001245,0.01649,0.016,0.021962,-0.00552,0.005282,0.016232,0.003902,-0.023093,0.019198,0.004068,-0.002802,0.02151,0.104149,-0.120228,0.299261,0.006753,0.078253,0.073201,-0.000257,-0.147866,-0.14277,0.063666,-0.013431,-0.020292,0.038206,-0.010222,0.018246,-0.005087,-0.00145,-0.001958,0.002247,-0.015871,-0.002024,-0.006565,0.007519,0.004419,0.003121,0.013731,-0.0138,-0.046812,0.015801,0.008353,0.02983,-0.009662,0.011692,-0.012283,-0.019239,-0.070503,0.002441,-0.010813,0.073758,-0.028762,0.018652,-0.007678,-0.089336,0.04953,0.036433,0.150812,0.118271,-0.016834,-0.02304,0.007082,-0.004794,0.015024,0.033526,0.022352,-0.005259,0.054683,-0.007977,-0.0203,4,0.063757,0.005128,-0.00301,0.004661,0.003859,0.007914,0.001403,0.010739,0.013031,-0.001047,0.028295,0.072942,-0.026316,0.00215,-0.002032,0.001645,0.002356,0.001141,0.0017,0.003661,0.024347,-0.004677,0.00135,-0.006502,0.019216,0.003726,0.012463,-0.004193,0.027378,-0.004047,0.005095,0.01243,0.020232,0.040122,0.339422,-0.111749,-0.043352,-0.215594,-0.026852,-0.027716,-0.006373,-0.060726,-0.031858,0.01128,-0.018147,0.009752,0.028014,-0.016038,-0.016854,0.112485,0.061246,0.009818,0.011467,0.003834,-0.024607,-0.002267,0.075501,0.163091,-0.039142,0.010827,-0.020286,0.042188,-0.044898,0.022316,-0.03369,-0.006654,0.045348,0.02715,-0.036104,-0.00504,-0.041216,-0.018337,0.005066,-0.005961,-0.000203,-0.002896,-0.004301,0.005392,0.01492,-0.013924,-0.04461,-0.013606,-0.003218,-0.017281,-0.074919,0.033174,-0.022215,0.158261,-0.191191,0.2876,-0.202623,0.145727,0.111458,-0.025915,-0.101048,0.030881,0.014436,0.00303,0.020866,-0.11069
4,0.084158,0.055242,0.128703,0.121288,-0.053909,0.08898,-0.043985,-0.022355,0.15933,-0.03648,0.349268,-0.157263,-0.143386,-0.076917,0.020187,-0.044569,0.017891,0.006057,-0.001914,0.142199,0.115685,-0.002385,0.054068,-0.026289,0.039297,0.073491,-0.005021,-0.111425,-0.023931,0.001102,-0.001901,-0.031237,0.008405,0.004268,0.002261,-0.001451,0.005269,-0.022212,0.007788,0.005823,-0.003994,0.003012,0.05399,-0.063023,0.161336,-0.010324,0.031453,0.066795,0.000615,-0.13832,-0.130195,0.04948,-0.008541,-0.022808,0.03353,-0.010723,0.020226,-0.006271,-0.00172,0.014261,0.011536,-0.012269,-0.032024,-0.000264,-0.010689,0.007725,-0.002767,-0.01693,-0.027398,-0.00163,-0.006268,0.000786,-0.003166,-0.003716,0.003938,-0.001242,-0.00938,-0.024807,0.014103,0.000217,0.019762,-0.002233,0.00853,0.005114,-0.012223,-0.026074,0.022001,-0.029778,0.030757,-0.03758,-0.021316,-0.014337,0.00981,-0.01676,-0.011608,0.085863,0.002514,-0.022417,0.06161,0.029805,5,0.082212,0.069038,0.00725,0.200777,-0.020447,0.082596,-0.000484,0.005725,0.115339,-0.056718,0.149524,0.097139,0.420825,-0.002986,0.020074,-0.001272,-0.111091,0.041765,0.205561,0.052636,0.0897,-0.006634,-0.002739,-0.003782,-0.015956,0.035426,-0.007514,-0.001965,-0.052764,0.014301,0.00118,0.023889,-0.005234,0.020855,0.210933,-0.075143,-0.025636,-0.142468,-0.022055,-0.041777,-0.004774,-0.025169,-0.032845,0.027916,-0.030769,0.006847,0.074017,-0.022711,-0.03592,0.004774,-0.002423,0.027584,-0.023832,-0.003821,-0.008696,0.001753,0.010359,0.007408,-0.002949,0.002062,-0.00476,0.007768,-0.022104,0.027491,-0.013228,-0.005398,0.021415,-0.007518,0.032806,0.017288,0.102621,0.020181,-0.006442,0.02798,-0.001286,-0.033708,-0.001633,-2.6e-05,0.001859,0.012743,-0.007251,-0.003244,-0.010028,-0.022241,-0.008151,0.053213,-0.011148,0.056524,-0.091992,0.115079,-0.095257,0.069671,0.066849,-0.002277,-0.067812,0.033779,0.022523,-0.017414,0.006721,-0.005138


In [35]:
data.to_pickle('tfidf_feats1.pkl')