In [1]:
import gzip
import math
import pickle
import zlib
import io
from collections import defaultdict
import pandas as pd
import numpy as np

# import scipy.stats

from sklearn.preprocessing import LabelEncoder

import engines
from utils import *

np.random.seed(2016)

transformers = {}

In [2]:
train_cols = ['customerID', 'Gender', 'State', 'transactionDate','product_code']

In [3]:
df_train = pd.read_csv('products.csv',usecols=train_cols)

In [4]:
pd.set_option('display.max_columns', None)

In [5]:
## fill missing values
#products['customerID'].fillna('BBID_0000', inplace=True)
#df_train['promotion_description'].fillna('no_promo', inplace=True)
df_train['Gender'].fillna('no_gender', inplace=True)
df_train['State'].fillna('no_state', inplace=True)
#df_train['PinCode'].fillna(-1, inplace=True)
#df_train['DOB'].fillna("1", inplace=True)

In [6]:
df_train.head()

Unnamed: 0,customerID,Gender,State,transactionDate,product_code
0,BBID_2041,male,no_state,2016-01-19,300785100.0
1,BBID_2041,male,no_state,2016-06-21,1000316000.0
2,BBID_2041,male,no_state,2016-04-07,1000315000.0
3,BBID_2041,male,no_state,2016-03-19,300930100.0
4,BBID_2041,male,no_state,2016-01-19,1000010000.0


In [7]:
df_train = df_train[np.isfinite(df_train['product_code'])]

In [8]:
cnt_srs = df_train['State'].value_counts().reset_index().head(5)
#cnt_srs = products['product_code'].value_counts().reset_index()
cnt_srs.columns = ['State', 'frequency_count']
cnt_srs


Unnamed: 0,State,frequency_count
0,MADHYA PRADESH,2043584
1,KARNATAKA,1592215
2,no_state,1047578
3,JHARKHAND,1037743
4,TAMIL NADU,850109


In [9]:
state_dict = {'MADHY PRADESH':'MADHYA PRADESH', 'TAMILNADU':'TAMIL NADU', 'MADHYA  PRADESH':'MADHYA PRADESH', 'HARAYANA':'HARYANA',
             'Jharkhand':'JHARKHAND','Tamilnadu':'TAMIL NADU','Tamil Nadu':'TAMIL NADU','Madhya Pradesh':'MADHYA PRADESH',
             'REST OF WEST BENGAL':'WEST BENGAL', 'west bengal':'WEST BENGAL','Uttar Pradesh':'UTTAR PRADESH', 'Delhi':'DELHI',
             'Bhopal':'BHOPAL','CHHATISGARH':'CHHATTISGARH','CHATTISGARH':'CHHATTISGARH', 'jharkhand':'JHARKHAND','Chandigarh':'CHANDIGARH',
             'UTTAR PRADESH WEST': 'UTTAR PRADESH','ODISHA':'ORISSA','MAHARASTRA':'MAHARASHTRA','madhya pradesh':'MADHYA PRADESH',
             'KARNATAK':'KARNATAKA','JAMMU and KASHMIR':'JAMMU AND KASHMIR','JAMMU KASHMIR':'JAMMU AND KASHMIR','Rajasthan':'RAJASTHAN',
             'east singhbhum':'JHARKHAND', 'ORRISA':'ORISSA','Andhra Pradesh':'ANDHRA PRADESH', 'UTTARANCHAL':'UTTARAKHAND',
             'Uttar pradesh':'UTTAR PRADESH','Maharashtra':'MAHARASHTRA','MP':'MADHYA PRADESH', 'UTTAR PRADESH EAST':'UTTAR PRADESH',
             'Punjab':'PUNJAB','maharashtra':'MAHARASHTRA','Karnataka':'KARNATAKA','M.P.':'MADHYA PRADESH','DAMAN':'DAMAN AND DIU',
             'HUBLI':'KARNATAKA','Tamil nadu':'TAMIL NADU','GUJRAT':'GUJARAT', 'Mp':'MADHYA PRADESH','Madhya pradesh':'MADHYA PRADESH',
             'West Bengal':'WEST BENGAL','Gujarat':'GUJARAT','UP':'UTTAR PRADESH','Chennai':'CHENNAI', 'm.p.':'MADHYA PRADESH',
             'kerala':'KERALA'}

df_train.replace({"State": state_dict}, inplace=True)

In [10]:
df_train_MP = df_train[df_train['State'] == 'MADHYA PRADESH']
df_train_KAR = df_train[df_train['State'] == 'KARNATAKA']
df_train_no_state = df_train[df_train['State'] == 'no_state']
df_train_JH = df_train[df_train['State'] == 'JHARKHAND']
df_train_TN = df_train[df_train['State'] == 'TAMIL NADU']

In [11]:
cnt_srs_MP_male = df_train_MP['product_code'][df_train_MP['Gender'] == 'male'].astype('int64').value_counts().reset_index().head(500)
#cnt_srs = products['product_code'].value_counts().reset_index()
cnt_srs_MP_male.columns = ['product_code', 'frequency_count']
cnt_srs_MP_male


Unnamed: 0,product_code,frequency_count
0,300776411,14449
1,300776410,14078
2,108100382,12984
3,300111517,10205
4,108100362,9687
5,108037568,9401
6,300785150,8154
7,108100308,7877
8,108100306,7328
9,300785148,6912


In [12]:
cnt_srs_MP_female = df_train_MP['product_code'][df_train_MP['Gender'] == 'female'].astype('int64').value_counts().reset_index().head(500)
#cnt_srs = products['product_code'].value_counts().reset_index()
cnt_srs_MP_female.columns = ['product_code', 'frequency_count']
cnt_srs_MP_female


Unnamed: 0,product_code,frequency_count
0,300776410,6792
1,108100382,6478
2,300776411,6026
3,300111517,4774
4,108100362,4493
5,108100308,4366
6,108100306,3798
7,300785150,3440
8,300785148,3362
9,108100294,3257


In [13]:
cnt_srs_MP_no_gender = df_train_MP['product_code'][df_train_MP['Gender'] == 'no_gender'].astype('int64').value_counts().reset_index().head(500)
#cnt_srs = products['product_code'].value_counts().reset_index()
cnt_srs_MP_no_gender.columns = ['product_code', 'frequency_count']
cnt_srs_MP_no_gender


Unnamed: 0,product_code,frequency_count
0,300781593,1503
1,300776410,1500
2,300776411,1214
3,108100382,994
4,108100362,757
5,108037568,756
6,300776409,747
7,300111517,736
8,300785148,718
9,300785150,598


In [14]:
cnt_srs_KAR_male = df_train_KAR['product_code'][df_train_KAR['Gender'] == 'male'].astype('int64').value_counts().reset_index().head(500)
#cnt_srs = products['product_code'].value_counts().reset_index()
cnt_srs_KAR_male.columns = ['product_code', 'frequency_count']
cnt_srs_KAR_male


Unnamed: 0,product_code,frequency_count
0,108037568,9441
1,300785148,8132
2,108000707,7517
3,300785150,5846
4,300676075,4897
5,108037504,4732
6,108005676,4628
7,300177372,4558
8,300616595,3965
9,108001125,3911


In [15]:
cnt_srs_KAR_female = df_train_KAR['product_code'][df_train_KAR['Gender'] == 'female'].astype('int64').value_counts().reset_index().head(500)
#cnt_srs = products['product_code'].value_counts().reset_index()
cnt_srs_KAR_female.columns = ['product_code', 'frequency_count']
cnt_srs_KAR_female


Unnamed: 0,product_code,frequency_count
0,300785148,2574
1,108037568,2554
2,108000707,1931
3,300785150,1856
4,108005676,1425
5,300676075,1414
6,300177372,1248
7,108037504,1203
8,108001125,1117
9,108004880,1003


In [16]:
cnt_srs_KAR_no_gender = df_train_KAR['product_code'][df_train_KAR['Gender'] == 'no_gender'].astype('int64').value_counts().reset_index().head(500)
#cnt_srs = products['product_code'].value_counts().reset_index()
cnt_srs_KAR_no_gender.columns = ['product_code', 'frequency_count']
cnt_srs_KAR_no_gender


Unnamed: 0,product_code,frequency_count
0,108037568,1620
1,108000707,1376
2,300676075,905
3,108005676,884
4,300177372,854
5,108037504,823
6,300616595,802
7,108004880,725
8,108037506,711
9,108037499,692


In [17]:
cnt_srs_no_state_male = df_train_no_state['product_code'][df_train_no_state['Gender'] == 'male'].astype('int64').value_counts().reset_index().head(500)
#cnt_srs = products['product_code'].value_counts().reset_index()
cnt_srs_no_state_male.columns = ['product_code', 'frequency_count']
cnt_srs_no_state_male


Unnamed: 0,product_code,frequency_count
0,300776411,5075
1,300776410,4304
2,108037568,3521
3,300785148,3180
4,300785150,2716
5,108100382,1872
6,108005676,1817
7,108000707,1775
8,300776409,1762
9,108100362,1439


In [18]:
cnt_srs_no_state_female = df_train_no_state['product_code'][df_train_no_state['Gender'] == 'female'].astype('int64').value_counts().reset_index().head(500)
#cnt_srs = products['product_code'].value_counts().reset_index()
cnt_srs_no_state_female.columns = ['product_code', 'frequency_count']
cnt_srs_no_state_female

Unnamed: 0,product_code,frequency_count
0,300776411,1905
1,300776410,1897
2,108100382,1413
3,300785148,1314
4,108037568,1287
5,300785150,1122
6,108100306,1060
7,300111517,990
8,108100308,950
9,108100362,938


In [19]:
cnt_srs_no_state_no_gender = df_train_no_state['product_code'][df_train_no_state['Gender'] == 'no_gender'].astype('int64').value_counts().reset_index().head(500)
#cnt_srs = products['product_code'].value_counts().reset_index()
cnt_srs_no_state_no_gender.columns = ['product_code', 'frequency_count']
cnt_srs_no_state_no_gender

Unnamed: 0,product_code,frequency_count
0,1000336252,3510
1,300785148,3458
2,300785150,3285
3,300785147,1691
4,108037568,1547
5,108100382,1216
6,500096181,1032
7,108100362,975
8,6000017313,962
9,108100306,898


In [20]:
cnt_srs_JH_male = df_train_JH['product_code'][df_train_JH['Gender'] == 'male'].astype('int64').value_counts().reset_index().head(500)
#cnt_srs = products['product_code'].value_counts().reset_index()
cnt_srs_JH_male.columns = ['product_code', 'frequency_count']
cnt_srs_JH_male

Unnamed: 0,product_code,frequency_count
0,300776411,15234
1,300776410,10427
2,108037568,5874
3,300776409,5390
4,300481673,2771
5,108000707,2440
6,108037500,2250
7,300343549,2073
8,108005676,2056
9,108037495,2029


In [21]:
cnt_srs_JH_female = df_train_JH['product_code'][df_train_JH['Gender'] == 'female'].astype('int64').value_counts().reset_index().head(500)
#cnt_srs = products['product_code'].value_counts().reset_index()
cnt_srs_JH_female.columns = ['product_code', 'frequency_count']
cnt_srs_JH_female

Unnamed: 0,product_code,frequency_count
0,300776411,3734
1,300776410,2514
2,108037568,1218
3,300776409,1211
4,300481673,680
5,108000707,514
6,300343549,481
7,108018925,476
8,108037500,474
9,108017242,412


In [22]:
cnt_srs_JH_no_gender = df_train_JH['product_code'][df_train_JH['Gender'] == 'no_gender'].astype('int64').value_counts().reset_index().head(500)
#cnt_srs = products['product_code'].value_counts().reset_index()
cnt_srs_JH_no_gender.columns = ['product_code', 'frequency_count']
cnt_srs_JH_no_gender

Unnamed: 0,product_code,frequency_count
0,300776411,1195
1,300776410,744
2,108037568,562
3,300776409,336
4,300481673,291
5,108037500,230
6,108000707,227
7,300101297,197
8,108000568,190
9,300781593,183


In [23]:
cnt_srs_TN_male = df_train_TN['product_code'][df_train_TN['Gender'] == 'male'].astype('int64').value_counts().reset_index().head(500)
#cnt_srs = products['product_code'].value_counts().reset_index()
cnt_srs_TN_male.columns = ['product_code', 'frequency_count']
cnt_srs_TN_male

Unnamed: 0,product_code,frequency_count
0,300776410,6699
1,100105505,6390
2,300785148,5349
3,300144423,4109
4,300776411,3663
5,108100294,3264
6,300785147,3133
7,108100253,2934
8,108100306,2895
9,108000707,2580


In [24]:
cnt_srs_TN_female = df_train_TN['product_code'][df_train_TN['Gender'] == 'female'].astype('int64').value_counts().reset_index().head(500)
#cnt_srs = products['product_code'].value_counts().reset_index()
cnt_srs_TN_female.columns = ['product_code', 'frequency_count']
cnt_srs_TN_female

Unnamed: 0,product_code,frequency_count
0,100105505,2958
1,300776410,2861
2,300785148,2306
3,300144423,1816
4,300776411,1604
5,108100294,1550
6,108100306,1520
7,300785147,1210
8,300356444,1170
9,108100253,1144


In [25]:
cnt_srs_TN_no_gender = df_train_TN['product_code'][df_train_TN['Gender'] == 'no_gender'].astype('int64').value_counts().reset_index().head(500)
#cnt_srs = products['product_code'].value_counts().reset_index()
cnt_srs_TN_no_gender.columns = ['product_code', 'frequency_count']
cnt_srs_TN_no_gender

Unnamed: 0,product_code,frequency_count
0,300776410,72
1,300781593,58
2,300776411,57
3,300785148,51
4,108100253,46
5,100105505,45
6,108000707,40
7,108037568,36
8,300144423,33
9,108100294,32


In [26]:
df_train_MP_male = df_train_MP[df_train_MP['Gender'] == 'male'].reset_index(drop=True)
df_train_MP_male = df_train_MP_male[df_train_MP_male['product_code'].isin(cnt_srs_MP_male.product_code)]
df_train_MP_female = df_train_MP[df_train_MP['Gender'] == 'female'].reset_index(drop=True)
df_train_MP_female = df_train_MP_female[df_train_MP_female['product_code'].isin(cnt_srs_MP_female.product_code)]
df_train_MP_no_gender = df_train_MP[df_train_MP['Gender'] == 'no_gender'].reset_index(drop=True)
df_train_MP_no_gender = df_train_MP_no_gender[df_train_MP_no_gender['product_code'].isin(cnt_srs_MP_no_gender.product_code)]

df_train_KAR_male = df_train_KAR[df_train_KAR['Gender'] == 'male'].reset_index(drop=True)
df_train_KAR_male = df_train_KAR_male[df_train_KAR_male['product_code'].isin(cnt_srs_KAR_male.product_code)]
df_train_KAR_female = df_train_KAR[df_train_KAR['Gender'] == 'female'].reset_index(drop=True)
df_train_KAR_female = df_train_KAR_female[df_train_KAR_female['product_code'].isin(cnt_srs_KAR_female.product_code)]
df_train_KAR_no_gender = df_train_KAR[df_train_KAR['Gender'] == 'no_gender'].reset_index(drop=True)
df_train_KAR_no_gender = df_train_KAR_no_gender[df_train_KAR_no_gender['product_code'].isin(cnt_srs_KAR_no_gender.product_code)]

df_train_no_state_male = df_train_no_state[df_train_no_state['Gender'] == 'male'].reset_index(drop=True)
df_train_no_state_male = df_train_no_state_male[df_train_no_state_male['product_code'].isin(cnt_srs_no_state_male.product_code)]
df_train_no_state_female = df_train_no_state[df_train_no_state['Gender'] == 'female'].reset_index(drop=True)
df_train_no_state_female = df_train_no_state_female[df_train_no_state_female['product_code'].isin(cnt_srs_no_state_female.product_code)]
df_train_no_state_no_gender = df_train_no_state[df_train_no_state['Gender'] == 'no_gender'].reset_index(drop=True)
df_train_no_state_no_gender = df_train_no_state_no_gender[df_train_no_state_no_gender['product_code'].isin(cnt_srs_no_state_no_gender.product_code)]

df_train_JH_male = df_train_JH[df_train_JH['Gender'] == 'male'].reset_index(drop=True)
df_train_JH_male = df_train_JH_male[df_train_JH_male['product_code'].isin(cnt_srs_JH_male.product_code)]
df_train_JH_female = df_train_JH[df_train_JH['Gender'] == 'female'].reset_index(drop=True)
df_train_JH_female = df_train_JH_female[df_train_JH_female['product_code'].isin(cnt_srs_JH_female.product_code)]
df_train_JH_no_gender = df_train_JH[df_train_JH['Gender'] == 'no_gender'].reset_index(drop=True)
df_train_JH_no_gender = df_train_JH_no_gender[df_train_JH_no_gender['product_code'].isin(cnt_srs_JH_no_gender.product_code)]


df_train_TN_male = df_train_TN[df_train_TN['Gender'] == 'male'].reset_index(drop=True)
df_train_TN_male = df_train_TN_male[df_train_TN_male['product_code'].isin(cnt_srs_TN_male.product_code)]
df_train_TN_female = df_train_TN[df_train_TN['Gender'] == 'female'].reset_index(drop=True)
df_train_TN_female = df_train_TN_female[df_train_TN_female['product_code'].isin(cnt_srs_TN_female.product_code)]
df_train_TN_no_gender = df_train_TN[df_train_TN['Gender'] == 'no_gender'].reset_index(drop=True)
df_train_TN_no_gender = df_train_TN_no_gender[df_train_TN_no_gender['product_code'].isin(cnt_srs_TN_no_gender.product_code)]



In [27]:
df_train_MP_male = pd.get_dummies(df_train_MP_male, columns=['product_code'],prefix='_')
df_train_MP_male.columns=df_train_MP_male.columns.str.replace('_','')
df_train_MP_male.reset_index(drop=True)

df_train_MP_female = pd.get_dummies(df_train_MP_female, columns=['product_code'],prefix='_')
df_train_MP_female.columns=df_train_MP_female.columns.str.replace('_','')
df_train_MP_female.reset_index(drop=True)

df_train_MP_no_gender = pd.get_dummies(df_train_MP_no_gender, columns=['product_code'],prefix='_')
df_train_MP_no_gender.columns=df_train_MP_no_gender.columns.str.replace('_','')
df_train_MP_no_gender.reset_index(drop=True)



df_train_KAR_male = pd.get_dummies(df_train_KAR_male, columns=['product_code'],prefix='_')
df_train_KAR_male.columns=df_train_KAR_male.columns.str.replace('_','')
df_train_KAR_male.reset_index(drop=True)

df_train_KAR_female = pd.get_dummies(df_train_KAR_female, columns=['product_code'],prefix='_')
df_train_KAR_female.columns=df_train_KAR_female.columns.str.replace('_','')
df_train_KAR_female.reset_index(drop=True)

df_train_KAR_no_gender = pd.get_dummies(df_train_KAR_no_gender, columns=['product_code'],prefix='_')
df_train_KAR_no_gender.columns=df_train_KAR_no_gender.columns.str.replace('_','')
df_train_KAR_no_gender.reset_index(drop=True)



df_train_no_state_male = pd.get_dummies(df_train_no_state_male, columns=['product_code'],prefix='_')
df_train_no_state_male.columns=df_train_no_state_male.columns.str.replace('_','')
df_train_no_state_male.reset_index(drop=True)

df_train_no_state_female = pd.get_dummies(df_train_no_state_female, columns=['product_code'],prefix='_')
df_train_no_state_female.columns=df_train_no_state_female.columns.str.replace('_','')
df_train_no_state_female.reset_index(drop=True)

df_train_no_state_no_gender = pd.get_dummies(df_train_no_state_no_gender, columns=['product_code'],prefix='_')
df_train_no_state_no_gender.columns=df_train_no_state_no_gender.columns.str.replace('_','')
df_train_no_state_no_gender.reset_index(drop=True)



df_train_JH_male = pd.get_dummies(df_train_JH_male, columns=['product_code'],prefix='_')
df_train_JH_male.columns=df_train_JH_male.columns.str.replace('_','')
df_train_JH_male.reset_index(drop=True)

df_train_JH_female = pd.get_dummies(df_train_JH_female, columns=['product_code'],prefix='_')
df_train_JH_female.columns=df_train_JH_female.columns.str.replace('_','')
df_train_JH_female.reset_index(drop=True)

df_train_JH_no_gender = pd.get_dummies(df_train_JH_no_gender, columns=['product_code'],prefix='_')
df_train_JH_no_gender.columns=df_train_JH_no_gender.columns.str.replace('_','')
df_train_JH_no_gender.reset_index(drop=True)




df_train_TN_male = pd.get_dummies(df_train_TN_male, columns=['product_code'],prefix='_')
df_train_TN_male.columns=df_train_TN_male.columns.str.replace('_','')
df_train_TN_male.reset_index(drop=True)

df_train_TN_female = pd.get_dummies(df_train_TN_female, columns=['product_code'],prefix='_')
df_train_TN_female.columns=df_train_TN_female.columns.str.replace('_','')
df_train_TN_female.reset_index(drop=True)

df_train_TN_no_gender = pd.get_dummies(df_train_TN_no_gender, columns=['product_code'],prefix='_')
df_train_TN_no_gender.columns=df_train_TN_no_gender.columns.str.replace('_','')
df_train_TN_no_gender.reset_index(drop=True)



Unnamed: 0,customerID,Gender,State,transactionDate,100105465.0,100105505.0,108000490.0,108000568.0,108000589.0,108000590.0,108000599.0,108000600.0,108000621.0,108000653.0,108000655.0,108000704.0,108000705.0,108000706.0,108000707.0,108000763.0,108000770.0,108000822.0,108000853.0,108000959.0,108000969.0,108000980.0,108000982.0,108001018.0,108001019.0,108001038.0,108001050.0,108001065.0,108001107.0,108001109.0,108001125.0,108001127.0,108001138.0,108001140.0,108001161.0,108001163.0,108001179.0,108001180.0,108001187.0,108001261.0,108001274.0,108001293.0,108001298.0,108001732.0,108001735.0,108003214.0,108003355.0,108003356.0,108003449.0,108003451.0,108003543.0,108003545.0,108003897.0,108003898.0,108004008.0,108004035.0,108004039.0,108004132.0,108004303.0,108004398.0,108004511.0,108004526.0,108004528.0,108004624.0,108004629.0,108004632.0,108004977.0,108005031.0,108005628.0,108005629.0,108005676.0,108005681.0,108007630.0,108007928.0,108008234.0,108008595.0,108008866.0,108009896.0,108010004.0,108010135.0,108010225.0,108010676.0,108013319.0,108014378.0,108014379.0,108015124.0,108015133.0,108015251.0,108015369.0,108015910.0,108016189.0,108017242.0,108017243.0,108017245.0,108017697.0,108018156.0,108018159.0,108018729.0,108018730.0,108018749.0,108018813.0,108018870.0,108018925.0,108019094.0,108019568.0,108020318.0,108020320.0,108020335.0,108020364.0,108020365.0,108020370.0,108020371.0,108020392.0,108020412.0,108020413.0,108020414.0,108020416.0,108020418.0,108020428.0,108020429.0,108020430.0,108020457.0,108020460.0,108020461.0,108020474.0,108020477.0,108020517.0,108020526.0,108020545.0,108020710.0,108022578.0,108022591.0,108022595.0,108022599.0,108024686.0,108024733.0,108024740.0,108024758.0,108025238.0,108025239.0,108026135.0,108026141.0,108026386.0,108026494.0,108026801.0,108026926.0,108027411.0,108028237.0,108028801.0,108029297.0,108029299.0,108029333.0,108032047.0,108032050.0,108034703.0,108035179.0,108037489.0,108037499.0,108037504.0,108037534.0,108037568.0,108037578.0,108037817.0,108038324.0,108042408.0,108043458.0,108100064.0,108100077.0,108100112.0,108100148.0,108100183.0,108100213.0,108100218.0,108100226.0,108100232.0,108100241.0,108100253.0,108100261.0,108100267.0,108100270.0,108100273.0,108100276.0,108100277.0,108100280.0,108100288.0,108100290.0,108100294.0,108100296.0,108100297.0,108100302.0,108100306.0,108100308.0,108100319.0,108100325.0,108100328.0,108100330.0,108100331.0,108100334.0,108100340.0,108100349.0,108100352.0,108100361.0,108100371.0,108100375.0,108100377.0,108100382.0,300027193.0,300028156.0,300034770.0,300034896.0,300055034.0,300067574.0,300067606.0,300070015.0,300070102.0,300074161.0,300077074.0,300077079.0,300081559.0,300088564.0,300117600.0,300138428.0,300142826.0,300142839.0,300142841.0,300142906.0,300142925.0,300143077.0,300143926.0,300143948.0,300143957.0,300143959.0,300143960.0,300143991.0,300144051.0,300144055.0,300144056.0,300144423.0,300145584.0,300145607.0,300150379.0,300150382.0,300151171.0,300151173.0,300157247.0,300158417.0,300158420.0,300158421.0,300158422.0,300158424.0,300158425.0,300158426.0,300158464.0,300158525.0,300158532.0,300163969.0,300165590.0,300167209.0,300171759.0,300183959.0,300184748.0,300201592.0,300201658.0,300218389.0,300225323.0,300228136.0,300228672.0,300228677.0,300229149.0,300229150.0,300233616.0,300238857.0,300263127.0,300276633.0,300280999.0,300312125.0,300312163.0,300313472.0,300313506.0,300313581.0,300317078.0,300317668.0,300317672.0,300317686.0,300317690.0,300326493.0,300342889.0,300349272.0,300356444.0,300356445.0,300359448.0,300359638.0,300361270.0,300361324.0,300361361.0,300361369.0,300374464.0,300392495.0,300397985.0,300408789.0,300412431.0,300443760.0,300481673.0,300481674.0,300481737.0,300481741.0,300505436.0,300517383.0,300528552.0,300530534.0,300530538.0,300556883.0,300565068.0,300570144.0,300572518.0,300573802.0,300573804.0,300577983.0,300591560.0,300594119.0,300595507.0,300595509.0,300599199.0,300600438.0,300603007.0,300606884.0,300611179.0,300615072.0,300624681.0,300626813.0,300627342.0,300641716.0,300653531.0,300654004.0,300654274.0,300659097.0,300659101.0,300667814.0,300677424.0,300677450.0,300677464.0,300679288.0,300680079.0,300694542.0,300702014.0,300702056.0,300705608.0,300706395.0,300738911.0,300753494.0,300754229.0,300776409.0,300776410.0,300776411.0,300776542.0,300779555.0,300781593.0,300785123.0,300785147.0,300785148.0,300785150.0,300791438.0,300791520.0,300810668.0,300812356.0,300825747.0,300836263.0,300840291.0,300846441.0,300846793.0,300859128.0,300880665.0,300897618.0,300910402.0,300911206.0,300911265.0,300911268.0,300914065.0,300923897.0,300927907.0,300929986.0,300930022.0,300934672.0,300936601.0,300938002.0,300942688.0,300942704.0,300948276.0,300955762.0,300955906.0,300974146.0,300974316.0,300974360.0,300974966.0,300977981.0,300981554.0,300988808.0,300989165.0,300995093.0,300995645.0,301016799.0,301021066.0,301026534.0,301026560.0,500273976.0,500273977.0,530000060.0,730007573.0,1000000273.0,1000001646.0,1000001687.0,1000001696.0,1000005438.0,1000017392.0,1000020629.0,1000025076.0,1000025152.0,1000025160.0,1000027658.0,1000032404.0,1000032449.0,1000049653.0,1000049681.0,1000058092.0,1000059431.0,1000071425.0,1000079204.0,1000087676.0,1000088500.0,1000092917.0,1000101441.0,1000101633.0,1000109064.0,1000109366.0,1000110589.0,1000122581.0,1000123864.0,1000123958.0,1000147158.0,1000147770.0,1000147907.0,1000147908.0,1000147954.0,1000152756.0,1000159986.0,1000184478.0,1000191641.0,1000196166.0,1000197484.0,1000203181.0,1000207760.0,1000225699.0,1000226987.0,1000255102.0,1000257325.0,1000261493.0,1000269555.0,1000310765.0,1000327894.0,1000334998.0,1000336252.0,1000349544.0,1000368786.0,1000434939.0,1000437355.0,1000443887.0,1000450352.0,1000454563.0,1000469126.0,1000475623.0,1000482664.0,1000486566.0,1000490382.0,1000491109.0,1000496569.0,1000524264.0,1000533232.0,1000543204.0,1000569756.0,1000577934.0,1000580468.0,1000584543.0,1000594703.0,1000599132.0,1000601005.0,1000601035.0,1000609658.0,1000619267.0,1000619269.0,1000638372.0,1000652689.0,1000652760.0,1000652761.0,1000691503.0,1000694851.0,1000701119.0,1000718135.0,300275441002.0,1.00013563714e+12,1.00013563715e+12
0,BBID_2042272,no_gender,TAMIL NADU,2016-04-03,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,BBID_2042272,no_gender,TAMIL NADU,2016-06-19,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,BBID_2042272,no_gender,TAMIL NADU,2016-04-03,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,BBID_2042272,no_gender,TAMIL NADU,2016-04-03,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,BBID_2042272,no_gender,TAMIL NADU,2016-04-03,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5,BBID_2042272,no_gender,TAMIL NADU,2016-04-03,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6,BBID_2042272,no_gender,TAMIL NADU,2016-04-03,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
7,BBID_2042272,no_gender,TAMIL NADU,2016-04-03,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
8,BBID_2044600,no_gender,TAMIL NADU,2016-02-15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
9,BBID_2044886,no_gender,TAMIL NADU,2015-06-23,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [28]:
df_train_may_MP_male = df_train_MP_male[df_train_MP_male['transactionDate']=='2016-05-28'] #made these two 2017 from 2016
df_train_june_MP_male = df_train_MP_male[df_train_MP_male['transactionDate']=='2016-06-28']
df_train_may_MP_female = df_train_MP_female[df_train_MP_female['transactionDate']=='2016-05-28'] #made these two 2017 from 2016
df_train_june_MP_female = df_train_MP_female[df_train_MP_female['transactionDate']=='2016-06-28']
df_train_may_MP_no_gender = df_train_MP_no_gender[df_train_MP_no_gender['transactionDate']=='2016-05-28'] #made these two 2017 from 2016
df_train_june_MP_no_gender = df_train_MP_no_gender[df_train_MP_no_gender['transactionDate']=='2016-06-28']


df_train_may_KAR_male = df_train_KAR_male[df_train_KAR_male['transactionDate']=='2016-05-28'] #made these two 2017 from 2016
df_train_june_KAR_male = df_train_KAR_male[df_train_KAR_male['transactionDate']=='2016-06-28']
df_train_may_KAR_female = df_train_KAR_female[df_train_KAR_female['transactionDate']=='2016-05-28'] #made these two 2017 from 2016
df_train_june_KAR_female = df_train_KAR_female[df_train_KAR_female['transactionDate']=='2016-06-28']
df_train_may_KAR_no_gender = df_train_KAR_no_gender[df_train_KAR_no_gender['transactionDate']=='2016-05-28'] #made these two 2017 from 2016
df_train_june_KAR_no_gender = df_train_KAR_no_gender[df_train_KAR_no_gender['transactionDate']=='2016-06-28']


df_train_may_no_state_male = df_train_no_state_male[df_train_no_state_male['transactionDate']=='2016-05-28'] #made these two 2017 from 2016
df_train_june_no_state_male = df_train_no_state_male[df_train_no_state_male['transactionDate']=='2016-06-28']
df_train_may_no_state_female = df_train_no_state_female[df_train_no_state_female['transactionDate']=='2016-05-28'] #made these two 2017 from 2016
df_train_june_no_state_female = df_train_no_state_female[df_train_no_state_female['transactionDate']=='2016-06-28']
df_train_may_no_state_no_gender = df_train_no_state_no_gender[df_train_no_state_no_gender['transactionDate']=='2016-05-28'] #made these two 2017 from 2016
df_train_june_no_state_no_gender = df_train_no_state_no_gender[df_train_no_state_no_gender['transactionDate']=='2016-06-28']


df_train_may_JH_male = df_train_JH_male[df_train_JH_male['transactionDate']=='2016-05-28'] #made these two 2017 from 2016
df_train_june_JH_male = df_train_JH_male[df_train_JH_male['transactionDate']=='2016-06-28']
df_train_may_JH_female = df_train_JH_female[df_train_JH_female['transactionDate']=='2016-05-28'] #made these two 2017 from 2016
df_train_june_JH_female = df_train_JH_female[df_train_JH_female['transactionDate']=='2016-06-28']
df_train_may_JH_no_gender = df_train_JH_no_gender[df_train_JH_no_gender['transactionDate']=='2016-05-28'] #made these two 2017 from 2016
df_train_june_JH_no_gender = df_train_JH_no_gender[df_train_JH_no_gender['transactionDate']=='2016-06-28']


df_train_may_TN_male = df_train_TN_male[df_train_TN_male['transactionDate']=='2016-05-28'] #made these two 2017 from 2016
df_train_june_TN_male = df_train_TN_male[df_train_TN_male['transactionDate']=='2016-06-28']
df_train_may_TN_female = df_train_TN_female[df_train_TN_female['transactionDate']=='2016-05-28'] #made these two 2017 from 2016
df_train_june_TN_female = df_train_TN_female[df_train_TN_female['transactionDate']=='2016-06-28']
df_train_may_TN_no_gender = df_train_TN_no_gender[df_train_TN_no_gender['transactionDate']=='2016-05-28'] #made these two 2017 from 2016
df_train_june_TN_no_gender = df_train_TN_no_gender[df_train_TN_no_gender['transactionDate']=='2016-06-28']


In [29]:
dfm_MP_male = pd.merge(df_train_june_MP_male,df_train_may_MP_male, how='left', on=['customerID'], suffixes=('', '_prev'))
dfm_MP_female = pd.merge(df_train_june_MP_female,df_train_may_MP_female, how='left', on=['customerID'], suffixes=('', '_prev'))
dfm_MP_no_gender = pd.merge(df_train_june_MP_no_gender,df_train_may_MP_no_gender, how='left', on=['customerID'], suffixes=('', '_prev'))


dfm_KAR_male = pd.merge(df_train_june_KAR_male,df_train_may_KAR_male, how='left', on=['customerID'], suffixes=('', '_prev'))
dfm_KAR_female = pd.merge(df_train_june_KAR_female,df_train_may_KAR_female, how='left', on=['customerID'], suffixes=('', '_prev'))
dfm_KAR_no_gender = pd.merge(df_train_june_KAR_no_gender,df_train_may_KAR_no_gender, how='left', on=['customerID'], suffixes=('', '_prev'))

dfm_no_state_male = pd.merge(df_train_june_no_state_male,df_train_may_no_state_male, how='left', on=['customerID'], suffixes=('', '_prev'))
dfm_no_state_female = pd.merge(df_train_june_no_state_female,df_train_may_no_state_female, how='left', on=['customerID'], suffixes=('', '_prev'))
dfm_no_state_no_gender = pd.merge(df_train_june_no_state_no_gender,df_train_may_no_state_no_gender, how='left', on=['customerID'], suffixes=('', '_prev'))

dfm_JH_male = pd.merge(df_train_june_JH_male,df_train_may_JH_male, how='left', on=['customerID'], suffixes=('', '_prev'))
dfm_JH_female = pd.merge(df_train_june_JH_female,df_train_may_JH_female, how='left', on=['customerID'], suffixes=('', '_prev'))
dfm_JH_no_gender = pd.merge(df_train_june_JH_no_gender,df_train_may_JH_no_gender, how='left', on=['customerID'], suffixes=('', '_prev'))

dfm_TN_male = pd.merge(df_train_june_TN_male,df_train_may_TN_male, how='left', on=['customerID'], suffixes=('', '_prev'))
dfm_TN_female = pd.merge(df_train_june_TN_female,df_train_may_TN_female, how='left', on=['customerID'], suffixes=('', '_prev'))
dfm_TN_no_gender = pd.merge(df_train_june_TN_no_gender,df_train_may_TN_no_gender, how='left', on=['customerID'], suffixes=('', '_prev'))


In [30]:
prevcols_MP_male = [col for col in dfm_MP_male.columns if '.0_prev' in col]
prevcols_MP_female = [col for col in dfm_MP_female.columns if '.0_prev' in col]
prevcols_MP_no_gender = [col for col in dfm_MP_no_gender.columns if '.0_prev' in col]

currcols_MP_male = [col for col in dfm_MP_male.columns if '.0' in col and '.0_prev' not in col]
currcols_MP_female = [col for col in dfm_MP_female.columns if '.0' in col and '.0_prev' not in col]
currcols_MP_no_gender = [col for col in dfm_MP_no_gender.columns if '.0' in col and '.0_prev' not in col]

prevcols_KAR_male = [col for col in dfm_KAR_male.columns if '.0_prev' in col]
prevcols_KAR_female = [col for col in dfm_KAR_female.columns if '.0_prev' in col]
prevcols_KAR_no_gender = [col for col in dfm_KAR_no_gender.columns if '.0_prev' in col]

currcols_KAR_male = [col for col in dfm_KAR_male.columns if '.0' in col and '.0_prev' not in col]
currcols_KAR_female = [col for col in dfm_KAR_female.columns if '.0' in col and '.0_prev' not in col]
currcols_KAR_no_gender = [col for col in dfm_KAR_no_gender.columns if '.0' in col and '.0_prev' not in col]

prevcols_no_state_male = [col for col in dfm_no_state_male.columns if '.0_prev' in col]
prevcols_no_state_female = [col for col in dfm_no_state_female.columns if '.0_prev' in col]
prevcols_no_state_no_gender = [col for col in dfm_no_state_no_gender.columns if '.0_prev' in col]

currcols_no_state_male = [col for col in dfm_no_state_male.columns if '.0' in col and '.0_prev' not in col]
currcols_no_state_female = [col for col in dfm_no_state_female.columns if '.0' in col and '.0_prev' not in col]
currcols_no_state_no_gender = [col for col in dfm_no_state_no_gender.columns if '.0' in col and '.0_prev' not in col]

prevcols_JH_male = [col for col in dfm_JH_male.columns if '.0_prev' in col]
prevcols_JH_female = [col for col in dfm_JH_female.columns if '.0_prev' in col]
prevcols_JH_no_gender = [col for col in dfm_JH_no_gender.columns if '.0_prev' in col]

currcols_JH_male = [col for col in dfm_JH_male.columns if '.0' in col and '.0_prev' not in col]
currcols_JH_female = [col for col in dfm_JH_female.columns if '.0' in col and '.0_prev' not in col]
currcols_JH_no_gender = [col for col in dfm_JH_no_gender.columns if '.0' in col and '.0_prev' not in col]

prevcols_TN_male = [col for col in dfm_TN_male.columns if '.0_prev' in col]
prevcols_TN_female = [col for col in dfm_TN_female.columns if '.0_prev' in col]
prevcols_TN_no_gender = [col for col in dfm_TN_no_gender.columns if '.0_prev' in col]

currcols_TN_male = [col for col in dfm_TN_male.columns if '.0' in col and '.0_prev' not in col]
currcols_TN_female = [col for col in dfm_TN_female.columns if '.0' in col and '.0_prev' not in col]
currcols_TN_no_gender = [col for col in dfm_TN_no_gender.columns if '.0' in col and '.0_prev' not in col]



In [31]:
for col in prevcols_MP_male:
    dfm_MP_male[col].fillna(0, inplace=True)
    
for col in prevcols_MP_female:
    dfm_MP_female[col].fillna(0, inplace=True)
    
for col in prevcols_MP_no_gender:
    dfm_MP_no_gender[col].fillna(0, inplace=True)

  

for col in prevcols_KAR_male:
    dfm_KAR_male[col].fillna(0, inplace=True)
    
for col in prevcols_KAR_female:
    dfm_KAR_female[col].fillna(0, inplace=True)
    
for col in prevcols_KAR_no_gender:
    dfm_KAR_no_gender[col].fillna(0, inplace=True)
    
    
    
for col in prevcols_no_state_male:
    dfm_no_state_male[col].fillna(0, inplace=True)
    
for col in prevcols_no_state_female:
    dfm_no_state_female[col].fillna(0, inplace=True)
    
for col in prevcols_no_state_no_gender:
    dfm_no_state_no_gender[col].fillna(0, inplace=True)
    
    
    
    
for col in prevcols_JH_male:
    dfm_JH_male[col].fillna(0, inplace=True)
    
for col in prevcols_JH_female:
    dfm_JH_female[col].fillna(0, inplace=True)
    
for col in prevcols_JH_no_gender:
    dfm_JH_no_gender[col].fillna(0, inplace=True)

    
    
    
for col in prevcols_TN_male:
    dfm_TN_male[col].fillna(0, inplace=True)
    
for col in prevcols_TN_female:
    dfm_TN_female[col].fillna(0, inplace=True)
    
for col in prevcols_TN_no_gender:
    dfm_TN_no_gender[col].fillna(0, inplace=True)


In [32]:
for col in currcols_MP_male:
    dfm_MP_male[col].fillna(0, inplace=True)
    
for col in currcols_MP_female:
    dfm_MP_female[col].fillna(0, inplace=True)
    
for col in currcols_MP_no_gender:
    dfm_MP_no_gender[col].fillna(0, inplace=True)

  

for col in currcols_KAR_male:
    dfm_KAR_male[col].fillna(0, inplace=True)
    
for col in currcols_KAR_female:
    dfm_KAR_female[col].fillna(0, inplace=True)
    
for col in currcols_KAR_no_gender:
    dfm_KAR_no_gender[col].fillna(0, inplace=True)
    
    
    
for col in currcols_no_state_male:
    dfm_no_state_male[col].fillna(0, inplace=True)
    
for col in currcols_no_state_female:
    dfm_no_state_female[col].fillna(0, inplace=True)
    
for col in currcols_no_state_no_gender:
    dfm_no_state_no_gender[col].fillna(0, inplace=True)
    
    
    
    
for col in currcols_JH_male:
    dfm_JH_male[col].fillna(0, inplace=True)
    
for col in currcols_JH_female:
    dfm_JH_female[col].fillna(0, inplace=True)
    
for col in currcols_JH_no_gender:
    dfm_JH_no_gender[col].fillna(0, inplace=True)

    
    
    
for col in currcols_TN_male:
    dfm_TN_male[col].fillna(0, inplace=True)
    
for col in currcols_TN_female:
    dfm_TN_female[col].fillna(0, inplace=True)
    
for col in currcols_TN_no_gender:
    dfm_TN_no_gender[col].fillna(0, inplace=True)


In [33]:
for col in currcols_MP_male:
    dfm_MP_male[col] = dfm_MP_male[col] - dfm_MP_male[col+'_prev']
    dfm_MP_male[col] = dfm_MP_male[col].apply(lambda x: max(x,0))

for col in currcols_MP_female:
    dfm_MP_female[col] = dfm_MP_female[col] - dfm_MP_female[col+'_prev']
    dfm_MP_female[col] = dfm_MP_female[col].apply(lambda x: max(x,0))

for col in currcols_MP_no_gender:
    dfm_MP_no_gender[col] = dfm_MP_no_gender[col] - dfm_MP_no_gender[col+'_prev']
    dfm_MP_no_gender[col] = dfm_MP_no_gender[col].apply(lambda x: max(x,0))
    
        
    
for col in currcols_KAR_male:
    dfm_KAR_male[col] = dfm_KAR_male[col] - dfm_KAR_male[col+'_prev']
    dfm_KAR_male[col] = dfm_KAR_male[col].apply(lambda x: max(x,0))

for col in currcols_KAR_female:
    dfm_KAR_female[col] = dfm_KAR_female[col] - dfm_KAR_female[col+'_prev']
    dfm_KAR_female[col] = dfm_KAR_female[col].apply(lambda x: max(x,0))

for col in currcols_KAR_no_gender:
    dfm_KAR_no_gender[col] = dfm_KAR_no_gender[col] - dfm_KAR_no_gender[col+'_prev']
    dfm_KAR_no_gender[col] = dfm_KAR_no_gender[col].apply(lambda x: max(x,0))

       
    
for col in currcols_no_state_male:
    dfm_no_state_male[col] = dfm_no_state_male[col] - dfm_no_state_male[col+'_prev']
    dfm_no_state_male[col] = dfm_no_state_male[col].apply(lambda x: max(x,0))

for col in currcols_no_state_female:
    dfm_no_state_female[col] = dfm_no_state_female[col] - dfm_no_state_female[col+'_prev']
    dfm_no_state_female[col] = dfm_no_state_female[col].apply(lambda x: max(x,0))

for col in currcols_no_state_no_gender:
    dfm_no_state_no_gender[col] = dfm_no_state_no_gender[col] - dfm_no_state_no_gender[col+'_prev']
    dfm_no_state_no_gender[col] = dfm_no_state_no_gender[col].apply(lambda x: max(x,0))

    
    
    
for col in currcols_JH_male:
    dfm_JH_male[col] = dfm_JH_male[col] - dfm_JH_male[col+'_prev']
    dfm_JH_male[col] = dfm_JH_male[col].apply(lambda x: max(x,0))

for col in currcols_JH_female:
    dfm_JH_female[col] = dfm_JH_female[col] - dfm_JH_female[col+'_prev']
    dfm_JH_female[col] = dfm_JH_female[col].apply(lambda x: max(x,0))

for col in currcols_JH_no_gender:
    dfm_JH_no_gender[col] = dfm_JH_no_gender[col] - dfm_JH_no_gender[col+'_prev']
    dfm_JH_no_gender[col] = dfm_JH_no_gender[col].apply(lambda x: max(x,0))
    
    
    
for col in currcols_TN_male:
    dfm_TN_male[col] = dfm_TN_male[col] - dfm_TN_male[col+'_prev']
    dfm_TN_male[col] = dfm_TN_male[col].apply(lambda x: max(x,0))

for col in currcols_TN_female:
    dfm_TN_female[col] = dfm_TN_female[col] - dfm_TN_female[col+'_prev']
    dfm_TN_female[col] = dfm_TN_female[col].apply(lambda x: max(x,0))

#for col in currcols_TN_no_gender:
#    dfm_TN_no_gender[col] = dfm_TN_no_gender[col] - dfm_TN_no_gender[col+'_prev']
#    dfm_TN_no_gender[col] = dfm_TN_no_gender[col].apply(lambda x: max(x,0))

In [34]:
prevcols2_MP_male = [col for col in dfm_MP_male.columns if '_prev' in col and col not in prevcols_MP_male]
dfm_MP_male.drop(prevcols2_MP_male, axis=1, inplace=True)

prevcols2_MP_female = [col for col in dfm_MP_female.columns if '_prev' in col and col not in prevcols_MP_female]
dfm_MP_female.drop(prevcols2_MP_female, axis=1, inplace=True)

prevcols2_MP_no_gender = [col for col in dfm_MP_no_gender.columns if '_prev' in col and col not in prevcols_MP_no_gender]
dfm_MP_no_gender.drop(prevcols2_MP_no_gender, axis=1, inplace=True)



prevcols2_KAR_male = [col for col in dfm_KAR_male.columns if '_prev' in col and col not in prevcols_KAR_male]
dfm_KAR_male.drop(prevcols2_KAR_male, axis=1, inplace=True)

prevcols2_KAR_female = [col for col in dfm_KAR_female.columns if '_prev' in col and col not in prevcols_KAR_female]
dfm_KAR_female.drop(prevcols2_KAR_female, axis=1, inplace=True)

prevcols2_KAR_no_gender = [col for col in dfm_KAR_no_gender.columns if '_prev' in col and col not in prevcols_KAR_no_gender]
dfm_KAR_no_gender.drop(prevcols2_KAR_no_gender, axis=1, inplace=True)




prevcols2_no_state_male = [col for col in dfm_no_state_male.columns if '_prev' in col and col not in prevcols_no_state_male]
dfm_no_state_male.drop(prevcols2_no_state_male, axis=1, inplace=True)

prevcols2_no_state_female = [col for col in dfm_no_state_female.columns if '_prev' in col and col not in prevcols_no_state_female]
dfm_no_state_female.drop(prevcols2_no_state_female, axis=1, inplace=True)

prevcols2_no_state_no_gender = [col for col in dfm_no_state_no_gender.columns if '_prev' in col and col not in prevcols_no_state_no_gender]
dfm_no_state_no_gender.drop(prevcols2_no_state_no_gender, axis=1, inplace=True)



prevcols2_JH_male = [col for col in dfm_JH_male.columns if '_prev' in col and col not in prevcols_JH_male]
dfm_JH_male.drop(prevcols2_JH_male, axis=1, inplace=True)

prevcols2_JH_female = [col for col in dfm_JH_female.columns if '_prev' in col and col not in prevcols_JH_female]
dfm_JH_female.drop(prevcols2_JH_female, axis=1, inplace=True)

prevcols2_JH_no_gender = [col for col in dfm_JH_no_gender.columns if '_prev' in col and col not in prevcols_JH_no_gender]
dfm_JH_no_gender.drop(prevcols2_JH_no_gender, axis=1, inplace=True)



prevcols2_TN_male = [col for col in dfm_TN_male.columns if '_prev' in col and col not in prevcols_TN_male]
dfm_TN_male.drop(prevcols2_TN_male, axis=1, inplace=True)

prevcols2_TN_female = [col for col in dfm_TN_female.columns if '_prev' in col and col not in prevcols_TN_female]
dfm_TN_female.drop(prevcols2_TN_female, axis=1, inplace=True)


In [35]:
dfm_MP_male = dfm_MP_male[dfm_MP_male[currcols_MP_male].sum(axis=1) >0]
dfm_MP_female = dfm_MP_female[dfm_MP_female[currcols_MP_female].sum(axis=1) >0]
dfm_MP_no_gender = dfm_MP_no_gender[dfm_MP_no_gender[currcols_MP_no_gender].sum(axis=1) >0]

dfm_KAR_male = dfm_KAR_male[dfm_KAR_male[currcols_KAR_male].sum(axis=1) >0]
dfm_KAR_female = dfm_KAR_female[dfm_KAR_female[currcols_KAR_female].sum(axis=1) >0]
dfm_KAR_no_gender = dfm_KAR_no_gender[dfm_KAR_no_gender[currcols_KAR_no_gender].sum(axis=1) >0]

dfm_no_state_male = dfm_no_state_male[dfm_no_state_male[currcols_no_state_male].sum(axis=1) >0]
dfm_no_state_female = dfm_no_state_female[dfm_no_state_female[currcols_no_state_female].sum(axis=1) >0]
dfm_no_state_no_gender = dfm_no_state_no_gender[dfm_no_state_no_gender[currcols_no_state_no_gender].sum(axis=1) >0]

dfm_JH_male = dfm_JH_male[dfm_JH_male[currcols_JH_male].sum(axis=1) >0]
dfm_JH_female = dfm_JH_female[dfm_JH_female[currcols_JH_female].sum(axis=1) >0]
dfm_JH_no_gender = dfm_JH_no_gender[dfm_JH_no_gender[currcols_JH_no_gender].sum(axis=1) >0]

dfm_TN_male = dfm_TN_male[dfm_TN_male[currcols_TN_male].sum(axis=1) >0]
dfm_TN_female = dfm_TN_female[dfm_TN_female[currcols_TN_female].sum(axis=1) >0]

In [None]:
print(dfm_MP[currcols_MP].sum().sum())
print(dfm_KAR[currcols_KAR].sum().sum())
print(dfm_no_state[currcols_no_state].sum().sum())
print(dfm_JH[currcols_JH].sum().sum())
print(dfm_TN[currcols_TN].sum().sum())

In [38]:
dfm_MP_male.to_csv('cleaned_data/statewise/MP_male.csv')
dfm_MP_female.to_csv('cleaned_data/statewise/MP_female.csv')
dfm_MP_no_gender.to_csv('cleaned_data/statewise/MP_no_gender.csv')
dfm_KAR_male.to_csv('cleaned_data/statewise/KAR_male.csv')
dfm_KAR_female.to_csv('cleaned_data/statewise/KAR_female.csv')
dfm_KAR_no_gender.to_csv('cleaned_data/statewise/KAR_no_gender.csv')
dfm_no_state_male.to_csv('cleaned_data/statewise/no_state_male.csv')
dfm_no_state_female.to_csv('cleaned_data/statewise/no_state_female.csv')
dfm_no_state_no_gender.to_csv('cleaned_data/statewise/no_state_no_gender.csv')
dfm_JH_male.to_csv('cleaned_data/statewise/JH_male.csv')
dfm_JH_female.to_csv('cleaned_data/statewise/JH_female.csv')
dfm_JH_no_gender.to_csv('cleaned_data/statewise/JH_no_gender.csv')
dfm_TN_male.to_csv('cleaned_data/statewise/TN_male.csv')
dfm_TN_female.to_csv('cleaned_data/statewise/TN_female.csv')

In [None]:
df_new_MP_male = pd.DataFrame()

for index, row in dfm_MP_male.iterrows():
    if index%300 ==0:
        print(index)
    for i,col in enumerate(currcols_MP_male):
        if row[col] == 1:
            row['target'] = currcols_MP_male.index(col)
            df_new_MP_male = df_new_MP_male.append(row)
            
df_new_MP_male.drop(currcols_MP_male, axis=1, inplace=True)



In [None]:
df_new_MP_female = pd.DataFrame()

for index, row in dfm_MP_female.iterrows():
    if index%300 ==0:
        print(index)
    for i,col in enumerate(currcols_MP_female):
        if row[col] == 1:
            row['target'] = currcols_MP_female.index(col)
            df_new_MP_female = df_new_MP_female.append(row)
            
df_new_MP_female.drop(currcols_MP_female, axis=1, inplace=True)




In [None]:
df_new_MP_no_gender = pd.DataFrame()

for index, row in dfm_MP_no_gender.iterrows():
    if index%300 ==0:
        print(index)
    for i,col in enumerate(currcols_MP_no_gender):
        if row[col] == 1:
            row['target'] = currcols_MP_no_gender.index(col)
            df_new_MP_no_gender = df_new_MP_no_gender.append(row)
            
df_new_MP_no_gender.drop(currcols_MP_no_gender, axis=1, inplace=True)









df_new_KAR_male = pd.DataFrame()

for index, row in dfm_KAR_male.iterrows():
    if index%300 ==0:
        print(index)
    for i,col in enumerate(currcols_KAR_male):
        if row[col] == 1:
            row['target'] = currcols_KAR_male.index(col)
            df_new_KAR_male = df_new_KAR_male.append(row)
            
df_new_KAR_male.drop(currcols_KAR_male, axis=1, inplace=True)

df_new_KAR_female = pd.DataFrame()

for index, row in dfm_KAR_female.iterrows():
    if index%300 ==0:
        print(index)
    for i,col in enumerate(currcols_KAR_female):
        if row[col] == 1:
            row['target'] = currcols_KAR_female.index(col)
            df_new_KAR_female = df_new_KAR_female.append(row)
            
df_new_KAR_female.drop(currcols_KAR_female, axis=1, inplace=True)

df_new_KAR_no_gender = pd.DataFrame()

for index, row in dfm_KAR_no_gender.iterrows():
    if index%300 ==0:
        print(index)
    for i,col in enumerate(currcols_KAR_no_gender):
        if row[col] == 1:
            row['target'] = currcols_KAR_no_gender.index(col)
            df_new_KAR_no_gender = df_new_KAR_no_gender.append(row)
            
df_new_KAR_no_gender.drop(currcols_KAR_no_gender, axis=1, inplace=True)




df_new_no_state_male = pd.DataFrame()

for index, row in dfm_no_state_male.iterrows():
    if index%300 ==0:
        print(index)
    for i,col in enumerate(currcols_no_state_male):
        if row[col] == 1:
            row['target'] = currcols_no_state_male.index(col)
            df_new_no_state_male = df_new_no_state_male.append(row)
            
df_new_no_state_male.drop(currcols_no_state_male, axis=1, inplace=True)

df_new_no_state_female = pd.DataFrame()

for index, row in dfm_no_state_female.iterrows():
    if index%300 ==0:
        print(index)
    for i,col in enumerate(currcols_no_state_female):
        if row[col] == 1:
            row['target'] = currcols_no_state_female.index(col)
            df_new_no_state_female = df_new_no_state_female.append(row)
            
df_new_no_state_female.drop(currcols_no_state_female, axis=1, inplace=True)

df_new_no_state_no_gender = pd.DataFrame()

for index, row in dfm_no_state_no_gender.iterrows():
    if index%300 ==0:
        print(index)
    for i,col in enumerate(currcols_no_state_no_gender):
        if row[col] == 1:
            row['target'] = currcols_no_state_no_gender.index(col)
            df_new_no_state_no_gender = df_new_no_state_no_gender.append(row)
            
df_new_no_state_no_gender.drop(currcols_no_state_no_gender, axis=1, inplace=True)




df_new_JH_male = pd.DataFrame()

for index, row in dfm_JH_male.iterrows():
    if index%300 ==0:
        print(index)
    for i,col in enumerate(currcols_JH_male):
        if row[col] == 1:
            row['target'] = currcols_JH_male.index(col)
            df_new_JH_male = df_new_JH_male.append(row)
            
df_new_JH_male.drop(currcols_JH_male, axis=1, inplace=True)

df_new_JH_female = pd.DataFrame()

for index, row in dfm_JH_female.iterrows():
    if index%300 ==0:
        print(index)
    for i,col in enumerate(currcols_JH_female):
        if row[col] == 1:
            row['target'] = currcols_JH_female.index(col)
            df_new_JH_female = df_new_JH_female.append(row)
            
df_new_JH_female.drop(currcols_JH_female, axis=1, inplace=True)

df_new_JH_no_gender = pd.DataFrame()

for index, row in dfm_JH_no_gender.iterrows():
    if index%300 ==0:
        print(index)
    for i,col in enumerate(currcols_JH_no_gender):
        if row[col] == 1:
            row['target'] = currcols_JH_no_gender.index(col)
            df_new_JH_no_gender = df_new_JH_no_gender.append(row)
            
df_new_JH_no_gender.drop(currcols_JH_no_gender, axis=1, inplace=True)




df_new_TN_male = pd.DataFrame()

for index, row in dfm_TN_male.iterrows():
    if index%300 ==0:
        print(index)
    for i,col in enumerate(currcols_TN_male):
        if row[col] == 1:
            row['target'] = currcols_TN_male.index(col)
            df_new_TN_male = df_new_TN_male.append(row)
            
df_new_TN_male.drop(currcols_TN_male, axis=1, inplace=True)

df_new_TN_female = pd.DataFrame()

for index, row in dfm_TN_female.iterrows():
    if index%300 ==0:
        print(index)
    for i,col in enumerate(currcols_TN_female):
        if row[col] == 1:
            row['target'] = currcols_TN_female.index(col)
            df_new_TN_female = df_new_TN_female.append(row)
            
df_new_TN_female.drop(currcols_TN_female, axis=1, inplace=True)


In [None]:
df_new_MP.to_csv('cleaned_data/statewise/700_prods_2016_MP.csv')
df_new_KAR.to_csv('cleaned_data/statewise/700_prods_2016_KAR.csv')
df_new_no_state.to_csv('cleaned_data/statewise/700_prods_2016_no_state.csv')
df_new_JH.to_csv('cleaned_data/statewise/700_prods_2016_JH.csv')
df_new_TN.to_csv('cleaned_data/statewise/700_prods_2016_TN.csv')
