In [1]:
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn
import os
import datetime

c_demographics = pd.read_csv('customer_demographics.csv')
c_transaction = pd.read_csv('customer_transaction_data.csv')
item_data = pd.read_csv('item_data.csv')
coupon_item_map = pd.read_csv('coupon_item_mapping.csv')
campaignData = pd.read_csv('campaign_data.csv')
train   = pd.read_csv('train.csv')
test = pd.read_csv('test_QyjYwdj.csv')
sampleSubm = pd.read_csv('sample_submission_Byiv0dS.csv')

item_data['brand_type_category'] = item_data['brand_type'] + '_' + item_data['category']
c_transaction = pd.merge(c_transaction, item_data, on = 'item_id', how = 'left')

c_transaction['date'] = pd.to_datetime(c_transaction['date'], format= '%Y-%m-%d')
campaignData['start_date'] = pd.to_datetime(campaignData['start_date'], format= '%d/%m/%y')
campaignData['end_date'] = pd.to_datetime(campaignData['end_date'], format= '%d/%m/%y')

campaignData = campaignData.sort_values(by = 'start_date')
campaignData['duration'] = campaignData['end_date'] - campaignData['start_date']

In [2]:
c_transaction

Unnamed: 0,date,customer_id,item_id,quantity,selling_price,other_discount,coupon_discount,brand,brand_type,category,brand_type_category
0,2012-01-02,1501,26830,1,35.26,-10.69,0.0,56,Local,Natural Products,Local_Natural Products
1,2012-01-02,1501,54253,1,53.43,-13.89,0.0,56,Local,Natural Products,Local_Natural Products
2,2012-01-02,1501,31962,1,106.50,-14.25,0.0,524,Established,Grocery,Established_Grocery
3,2012-01-02,1501,33647,1,67.32,0.00,0.0,1134,Established,Grocery,Established_Grocery
4,2012-01-02,1501,48199,1,71.24,-28.14,0.0,524,Established,Grocery,Established_Grocery
5,2012-01-02,1501,57397,1,71.24,-28.14,0.0,524,Established,Grocery,Established_Grocery
6,2012-01-02,857,12424,1,106.50,-14.25,0.0,971,Established,Grocery,Established_Grocery
7,2012-01-02,857,14930,1,110.07,0.00,0.0,3235,Established,Meat,Established_Meat
8,2012-01-02,857,16657,1,89.05,-35.26,0.0,2011,Established,Packaged Meat,Established_Packaged Meat
9,2012-01-02,67,10537,3,32.06,0.00,0.0,487,Established,Grocery,Established_Grocery


In [3]:
c_transaction['total_disc'] = c_transaction['other_discount'] + c_transaction.coupon_discount
c_transaction['Total_Bill'] = c_transaction['selling_price'] * c_transaction['quantity'] + c_transaction['total_disc']
c_transaction['dayofweek'] = c_transaction['date'].dt.dayofweek
c_transaction['month'] = c_transaction['date'].dt.month
c_transaction['is_monthEnd'] = c_transaction['date'].dt.day > 25
c_transaction['is_monthStart'] = c_transaction['date'].dt.day < 7
c_transaction['disc_perc'] = abs(c_transaction['other_discount'] + c_transaction['coupon_discount']) / c_transaction['selling_price']
c_transaction['is_coupoun_disc'] = c_transaction['coupon_discount'] < 0
c_transaction['brand_category'] = c_transaction['brand'].astype('str') + '_' + c_transaction['category']
coupon_item_dict = {x : coupon_item_map[coupon_item_map.coupon_id == x]['item_id'].values for x in coupon_item_map.coupon_id.unique()}
c_transaction['dayofmonth'] = c_transaction['date'].dt.day

treat_age = {'70+' : 72, '46-55' : 50.5, '36-45' : 40.5, '26-35' : 30.5, '56-70' : 63, '18-25' : 21.5}
c_demographics['age_approximated'] = c_demographics['age_range'].apply(lambda x : treat_age[x])
c_demographics['income_bucket_div_age'] = c_demographics['income_bracket'] / c_demographics['age_approximated']
treat_family_size = {'1' : 1, '2' : 2, '3' : 3, '4' : 4, '5+' : 5}
c_demographics['family_size_'] = c_demographics['family_size'].map(treat_family_size)
c_demographics['income_div_family_size'] = c_demographics['income_bracket'] / c_demographics['family_size_']


c_demographics['no_of_children'] = c_demographics['no_of_children'].map({'1' : 1, '2' : 2, '3+' : 3})
c_demographics['income_bucket_div_no_of_child'] = c_demographics['income_bracket'] / c_demographics['no_of_children']

c_demographics['family_to_number_of_child'] = c_demographics['family_size_'] / c_demographics['no_of_children']
c_demographics.drop('family_size_', axis = 1, inplace = True)

campaignData['duration'] = campaignData['duration'].dt.days

In [4]:
aggregators = {
'nunique' : lambda x, col : x[col].nunique(),
'mode' : lambda x , col : x[col].value_counts().index[0],
'mean' : lambda x , col : x[col].mean(),
'median' : lambda x , col : x[col].median(),
'std' : lambda x , col : x[col].std(),
'max_' : lambda x , col : x[col].max(),
'min_' : lambda x , col : x[col].min(),
'sum' : lambda x , col : x[col].sum()
}
GroupByDict = {
    'item_id' : ['nunique', 'mode'],
    'quantity' : ['mean', 'nunique', 'mode', 'std', 'max_'],
    'selling_price' : ['mean', 'max_', 'min_', 'std'],
    'other_discount' : ['mean', 'max_', 'min_', 'std'],
    'coupon_discount' : ['mean', 'max_', 'min_', 'std'],
    'brand' : ['nunique', 'mode'],
    'brand_type' : ['nunique', 'mode'],
    'brand_type_category' : ['nunique', 'mode'],
    'total_disc' : ['mean', 'max_', 'min_', 'std'],
    'Total_Bill' : ['mean', 'max_', 'min_', 'std'],
    'dayofweek' : ['nunique', 'mode'],
    'month' : ['nunique', 'mode'],
    'is_monthEnd' : ['mean'],
    'is_monthStart' : ['mean'],
    'disc_perc' : ['mean', 'max_', 'min_', 'std'],
    'is_coupoun_disc' : ['mean', 'sum'],
    'brand_category' : ['nunique', 'mode'],
    'dayofmonth' : ['nunique', 'mode']
}
def group_transaction_by_cust(x):
    d = {}
    serIndex = []
    for col, aggs in GroupByDict.items():
        for agg in aggs:
            name = col + '_' + agg
            d[name] = aggregators[agg](x, col)
            serIndex.append(name)
    return pd.Series(d, 
                    serIndex)

In [5]:
## Please comment the cell if already ran it once. 
aggData = pd.DataFrame()
for cID in train.campaign_id.unique():
    date = campaignData[campaignData.campaign_id == cID]['start_date'].values[0]
    print("#" * 10 + '{} is {}'.format(cID, date) + '#'*10)
    temp = c_transaction[(c_transaction.date < date)]
    tempGrpd = temp.groupby(by = 'customer_id').apply(group_transaction_by_cust)
    tempGrpd['campaign_id'] = cID
    aggData = aggData.append(tempGrpd)

aggDataTest = pd.DataFrame()
for cID in test.campaign_id.unique():
    date = campaignData[campaignData.campaign_id == cID]['start_date'].values[0]
    print("#" * 10 + '{} is {}'.format(cID, date) + '#'*10)
    temp = c_transaction[c_transaction.date < date]
    tempGrpd = temp.groupby(by = 'customer_id').apply(group_transaction_by_cust)
    tempGrpd['campaign_id'] = cID
    aggDataTest = aggDataTest.append(tempGrpd)

aggDataTest = aggDataTest.reset_index()
test = pd.merge(test, aggDataTest, on = [ 'campaign_id', 'customer_id'], how = 'left')
aggData = aggData.reset_index()
train = pd.merge(train, aggData, on = [ 'campaign_id', 'customer_id'], how = 'left')

train.to_pickle('grpd_by_train.pkl')
test.to_pickle('grpd_by_test.pkl')

##########13 is 2013-05-19T00:00:00.000000000##########
##########9 is 2013-03-11T00:00:00.000000000##########
##########8 is 2013-02-16T00:00:00.000000000##########
##########11 is 2013-04-22T00:00:00.000000000##########
##########29 is 2012-10-08T00:00:00.000000000##########
##########30 is 2012-11-19T00:00:00.000000000##########
##########2 is 2012-12-17T00:00:00.000000000##########
##########5 is 2013-01-12T00:00:00.000000000##########
##########12 is 2013-04-22T00:00:00.000000000##########
##########26 is 2012-08-12T00:00:00.000000000##########
##########3 is 2012-12-22T00:00:00.000000000##########
##########4 is 2013-01-07T00:00:00.000000000##########
##########10 is 2013-04-08T00:00:00.000000000##########
##########7 is 2013-02-02T00:00:00.000000000##########
##########28 is 2012-09-16T00:00:00.000000000##########
##########27 is 2012-08-25T00:00:00.000000000##########
##########6 is 2013-01-28T00:00:00.000000000##########
##########1 is 2012-12-12T00:00:00.000000000##########
#

In [6]:
train = pd.read_pickle('grpd_by_train.pkl')
test = pd.read_pickle('grpd_by_test.pkl')

In [7]:
def do_parallel_train(campIDs):
    Ans = []
    temp = train[train['campaign_id'] == campIDs]
    date = campaignData[campaignData.campaign_id == campIDs]['start_date'].values[0]
    tempTransaction = c_transaction[c_transaction.date < date]
    for custID in temp.customer_id.unique():
        print('{} ...... {}'.format(custID, campIDs))
        tempTransactionCust = tempTransaction[tempTransaction['customer_id'] == custID]
        coupnsTrain = train[(train.customer_id == custID) & (train.campaign_id == campIDs)]
        for coupID in coupnsTrain.coupon_id.unique():
            count = 0
            itemsForCoupon = coupon_item_dict[coupID]
            itemPastSellingMean = tempTransactionCust[tempTransactionCust.item_id.isin(itemsForCoupon)]['selling_price'].mean()
            itemPastSellingStd = tempTransactionCust[tempTransactionCust.item_id.isin(itemsForCoupon)]['selling_price'].std()
            itemPastSellingMax = tempTransactionCust[tempTransactionCust.item_id.isin(itemsForCoupon)]['selling_price'].max()
            itemPastSellingMin = tempTransactionCust[tempTransactionCust.item_id.isin(itemsForCoupon)]['selling_price'].min()
            itemPastOtherMean = tempTransactionCust[tempTransactionCust.item_id.isin(itemsForCoupon)]['other_discount'].mean()
            itemPastOtherStd = tempTransactionCust[tempTransactionCust.item_id.isin(itemsForCoupon)]['other_discount'].std()
            itemPastOtherMax = tempTransactionCust[tempTransactionCust.item_id.isin(itemsForCoupon)]['other_discount'].max()
            itemPastOtherMin = tempTransactionCust[tempTransactionCust.item_id.isin(itemsForCoupon)]['other_discount'].min()
            itemPastCouponMean = tempTransactionCust[tempTransactionCust.item_id.isin(itemsForCoupon)]['coupon_discount'].mean()
            itemPastCouponStd = tempTransactionCust[tempTransactionCust.item_id.isin(itemsForCoupon)]['coupon_discount'].std()
            itemPastCouponMax = tempTransactionCust[tempTransactionCust.item_id.isin(itemsForCoupon)]['coupon_discount'].max()
            itemPastCouponMin = tempTransactionCust[tempTransactionCust.item_id.isin(itemsForCoupon)]['coupon_discount'].min()
            itemPastCouponMean = tempTransactionCust[tempTransactionCust.item_id.isin(itemsForCoupon)]['coupon_discount'].mean()
            itemPastCouponStd = tempTransactionCust[tempTransactionCust.item_id.isin(itemsForCoupon)]['coupon_discount'].std()
            itemPastCouponMax = tempTransactionCust[tempTransactionCust.item_id.isin(itemsForCoupon)]['coupon_discount'].max()
            itemPastCouponMin = tempTransactionCust[tempTransactionCust.item_id.isin(itemsForCoupon)]['coupon_discount'].min()
            itemPastTotalBillMean = tempTransactionCust[tempTransactionCust.item_id.isin(itemsForCoupon)]['Total_Bill'].mean()
            itemPastTotalBillStd = tempTransactionCust[tempTransactionCust.item_id.isin(itemsForCoupon)]['Total_Bill'].std()
            itemPastTotalBillMax = tempTransactionCust[tempTransactionCust.item_id.isin(itemsForCoupon)]['Total_Bill'].max()
            itemPastTotalBillMin = tempTransactionCust[tempTransactionCust.item_id.isin(itemsForCoupon)]['Total_Bill'].min()
            for item in tempTransactionCust.item_id:
                if item in itemsForCoupon:
                    count+=1
            Ans.append((campIDs, custID, coupID, count, 
                        itemPastSellingMean ,
                        itemPastSellingStd ,
                        itemPastSellingMax ,
                        itemPastSellingMin ,
                        itemPastOtherMean ,
                        itemPastOtherStd ,
                        itemPastOtherMax, 
                        itemPastOtherMin , 
                        itemPastCouponMean ,
                        itemPastCouponStd , 
                        itemPastCouponMax , 
                        itemPastCouponMin , 
                        itemPastCouponMean ,
                        itemPastCouponStd , 
                        itemPastCouponMax , 
                        itemPastCouponMin ,
                        itemPastTotalBillMean , 
                        itemPastTotalBillStd ,
                        itemPastTotalBillMax , 
                        itemPastTotalBillMin , 
                        ))

    return Ans

In [8]:
import multiprocessing

#Select maximum number of workers available. 

p = multiprocessing.Pool(processes=8)

Ans_train = p.map(do_parallel_train, train.campaign_id.unique())

523 ...... 30
368 ...... 29
1489 ...... 8
679 ...... 2
761 ...... 5
205 ...... 9
793 ...... 11
1053 ...... 13
186 ...... 5
1255 ...... 30
695 ...... 8
384 ...... 2
1464 ...... 11
590 ...... 9
828 ...... 5
88 ...... 30
48 ...... 13
1241 ...... 2
1491 ...... 5
1162 ...... 29
1351 ...... 11
108 ...... 9
44 ...... 5
1050 ...... 13
886 ...... 8
375 ...... 2
666 ...... 11
958 ...... 30
1346 ...... 5
712 ...... 9
233 ...... 2
106 ...... 11
1398 ...... 8
113 ...... 29
795 ...... 5
91 ...... 30
89 ...... 13
301 ...... 5
1324 ...... 11
608 ...... 9
633 ...... 2
88 ...... 5
124 ...... 30
1067 ...... 13
1058 ...... 11
197 ...... 8
278 ...... 29
843 ...... 2
755 ...... 9
42 ...... 5
430 ...... 11
248 ...... 13
895 ...... 30
196 ...... 2
829 ...... 5
1095 ...... 8
155 ...... 9
1489 ...... 11
1194 ...... 5
569 ...... 29
416 ...... 2
1199 ...... 11
1383 ...... 30
1415 ...... 5
569 ...... 9
1152 ...... 13
1447 ...... 8
1012 ...... 2
557 ...... 5
1332 ...... 11
384 ...... 9
340 ...... 30
1334 ...... 5
1

295 ...... 9
93 ...... 13
787 ...... 30
1239 ...... 11
793 ...... 5
293 ...... 12
189 ...... 8
464 ...... 5
582 ...... 9
38 ...... 30
474 ...... 13
962 ...... 11
366 ...... 12
184 ...... 29
60 ...... 5
1006 ...... 8
456 ...... 11
1264 ...... 9
1384 ...... 30
450 ...... 13
386 ...... 12
1215 ...... 5
1014 ...... 11
36 ...... 9
40 ...... 8
435 ...... 30
578 ...... 12
81 ...... 29
616 ...... 5
763 ...... 11
38 ...... 13
216 ...... 5
1372 ...... 9
1481 ...... 30
1383 ...... 12
691 ...... 11
1167 ...... 5
1183 ...... 13
779 ...... 8
865 ...... 29
195 ...... 30
1099 ...... 9
1252 ...... 5
1174 ...... 11
833 ...... 12
136 ...... 13
269 ...... 30
1434 ...... 5
1463 ...... 11
1479 ...... 8
327 ...... 9
621 ...... 30
295 ...... 12
1047 ...... 5
855 ...... 29
929 ...... 11
626 ...... 13
1293 ...... 12
1544 ...... 5
413 ...... 9
1441 ...... 30
1023 ...... 11
590 ...... 8
202 ...... 12
1490 ...... 5
822 ...... 9
338 ...... 29
130 ...... 13
1520 ...... 30
208 ...... 11
1472 ...... 5
21 ...... 12
764

1349 ...... 12
1112 ...... 8
590 ...... 30
1369 ...... 9
668 ...... 11
1320 ...... 26
962 ...... 29
1507 ...... 13
911 ...... 12
625 ...... 11
840 ...... 30
42 ...... 8
91 ...... 9
834 ...... 26
586 ...... 13
446 ...... 11
1 ...... 12
1060 ...... 30
506 ...... 26
575 ...... 29
902 ...... 9
938 ...... 8
890 ...... 11
1015 ...... 13
294 ...... 30
1350 ...... 12
402 ...... 26
39 ...... 9
191 ...... 11
514 ...... 8
1205 ...... 12
1506 ...... 13
285 ...... 29
1457 ...... 30
703 ...... 11
825 ...... 9
1363 ...... 26
289 ...... 8
1441 ...... 12
80 ...... 11
1535 ...... 30
749 ...... 9
849 ...... 26
353 ...... 8
1004 ...... 13
1485 ...... 12
1115 ...... 29
78 ...... 11
1121 ...... 30
1397 ...... 26
1556 ...... 9
74 ...... 8
1036 ...... 12
767 ...... 11
1073 ...... 13
1547 ...... 26
1536 ...... 8
989 ...... 30
287 ...... 9
515 ...... 11
1337 ...... 12
669 ...... 29
748 ...... 13
25 ...... 8
690 ...... 9
1469 ...... 11
1355 ...... 30
455 ...... 26
1187 ...... 12
1388 ...... 8
44 ...... 9
1415 ..

1556 ...... 4
724 ...... 10
1382 ...... 8
219 ...... 7
400 ...... 30
234 ...... 28
474 ...... 4
658 ...... 10
626 ...... 7
599 ...... 26
1060 ...... 13
119 ...... 7
1479 ...... 4
520 ...... 10
1046 ...... 8
725 ...... 30
1165 ...... 7
1450 ...... 26
119 ...... 4
1140 ...... 28
1320 ...... 7
1557 ...... 10
1465 ...... 13
1049 ...... 7
1223 ...... 30
242 ...... 4
979 ...... 26
63 ...... 7
1490 ...... 10
376 ...... 8
754 ...... 4
760 ...... 7
843 ...... 30
895 ...... 26
1014 ...... 28
1347 ...... 13
277 ...... 7
758 ...... 8
1364 ...... 10
42 ...... 4
1487 ...... 30
1291 ...... 7
78 ...... 4
687 ...... 30
1305 ...... 10
619 ...... 7
400 ...... 26
1451 ...... 8
1332 ...... 28
440 ...... 13
412 ...... 7
571 ...... 4
795 ...... 26
607 ...... 30
978 ...... 10
162 ...... 7
1027 ...... 4
915 ...... 13
1077 ...... 8
1516 ...... 7
510 ...... 10
968 ...... 26
17 ...... 30
621 ...... 4
957 ...... 7
1337 ...... 28
1471 ...... 10
795 ...... 7
675 ...... 4
98 ...... 13
409 ...... 8
1549 ...... 26
1451

1423 ...... 26
1104 ...... 30
724 ...... 13
436 ...... 26
927 ...... 30
743 ...... 10
442 ...... 26
1029 ...... 30
171 ...... 8
1558 ...... 13
267 ...... 10
198 ...... 26
992 ...... 30
428 ...... 13
461 ...... 10
607 ...... 26
369 ...... 30
1185 ...... 8
820 ...... 13
783 ...... 26
1521 ...... 30
1537 ...... 8
401 ...... 13
1049 ...... 26
685 ...... 30
427 ...... 26
277 ...... 8
1035 ...... 13
86 ...... 30
1463 ...... 26
405 ...... 8
1133 ...... 13
437 ...... 30
1378 ...... 26
1358 ...... 8
752 ...... 13
1017 ...... 30
1457 ...... 26
1565 ...... 8
1339 ...... 13
1397 ...... 30
757 ...... 8
65 ...... 26
1405 ...... 30
550 ...... 13
207 ...... 26
510 ...... 30
510 ...... 8
1550 ...... 30
478 ...... 13
1107 ...... 26
1330 ...... 8
324 ...... 13
519 ...... 30
1140 ...... 26
342 ...... 8
174 ...... 13
804 ...... 30
141 ...... 26
87 ...... 8
675 ...... 30
178 ...... 26
1124 ...... 13
1290 ...... 8
290 ...... 26
290 ...... 30
1208 ...... 13
1550 ...... 8
163 ...... 26
1574 ...... 30
327 .....

154 ...... 13
798 ...... 8
411 ...... 13
90 ...... 8
9 ...... 13
179 ...... 8
244 ...... 13
1174 ...... 8
380 ...... 13
194 ...... 8
303 ...... 13
1193 ...... 8
1287 ...... 13
108 ...... 8
1304 ...... 13
3 ...... 8
573 ...... 13
577 ...... 8
982 ...... 13
1270 ...... 13
1017 ...... 8
1064 ...... 13
1054 ...... 8
1323 ...... 8
702 ...... 13
1101 ...... 8
911 ...... 13
11 ...... 8
295 ...... 13
412 ...... 8
1502 ...... 13
100 ...... 8
543 ...... 13
383 ...... 13
788 ...... 8
735 ...... 13
437 ...... 8
648 ...... 13
131 ...... 8
714 ...... 13
1211 ...... 8
381 ...... 13
998 ...... 13
488 ...... 8
359 ...... 13
760 ...... 8
223 ...... 8
944 ...... 13
1506 ...... 8
1450 ...... 13
868 ...... 8
660 ...... 13
1241 ...... 8
1014 ...... 13
560 ...... 8
879 ...... 13
1394 ...... 8
1098 ...... 13
1067 ...... 8
385 ...... 13
1276 ...... 8
1166 ...... 8
92 ...... 13
489 ...... 8
652 ...... 13
461 ...... 8
173 ...... 8
490 ...... 13
675 ...... 8
789 ...... 13
1055 ...... 8
1008 ...... 13
1391 ...... 

1315 ...... 8
421 ...... 13
781 ...... 8
1578 ...... 13
434 ...... 8
1290 ...... 13
672 ...... 8
912 ...... 13
301 ...... 8
163 ...... 13
462 ...... 8
1366 ...... 13
325 ...... 8
1212 ...... 13
154 ...... 8
484 ...... 13
851 ...... 8
302 ...... 8
1084 ...... 13
208 ...... 8
1258 ...... 13
1249 ...... 8
1498 ...... 13
244 ...... 8
262 ...... 13
85 ...... 8
518 ...... 13
1168 ...... 8
756 ...... 13
1194 ...... 8
1497 ...... 13
789 ...... 8
858 ...... 8
357 ...... 13
1092 ...... 13
458 ...... 8
983 ...... 8
140 ...... 13
1103 ...... 8
483 ...... 13
881 ...... 8
344 ...... 13
1147 ...... 8
1477 ...... 13
421 ...... 8
1095 ...... 13
1302 ...... 8
445 ...... 8
972 ...... 13
1019 ...... 8
91 ...... 13
1159 ...... 13
441 ...... 8
428 ...... 8
1372 ...... 13
1197 ...... 8
373 ...... 13
745 ...... 8
110 ...... 13
466 ...... 8
744 ...... 13
350 ...... 8
708 ...... 13
1237 ...... 8
1521 ...... 13
352 ...... 8
643 ...... 13
998 ...... 8
1140 ...... 13
73 ...... 8
650 ...... 13
538 ...... 8
281 ....

1493 ...... 13
731 ...... 8
49 ...... 13
678 ...... 8
802 ...... 13
936 ...... 8
570 ...... 8
431 ...... 13
153 ...... 8
1123 ...... 13
1403 ...... 8
595 ...... 13
914 ...... 8
1215 ...... 8
737 ...... 13
607 ...... 8
1546 ...... 13
861 ...... 8
1413 ...... 13
1456 ...... 8
1431 ...... 13
970 ...... 8
1081 ...... 13
452 ...... 8
564 ...... 13
15 ...... 8
362 ...... 13
422 ...... 8
633 ...... 13
1176 ...... 8
1485 ...... 8
537 ...... 13
805 ...... 8
164 ...... 13
537 ...... 8
779 ...... 13
1477 ...... 8
37 ...... 13
411 ...... 8
1086 ...... 13
1203 ...... 8
817 ...... 13
501 ...... 8
459 ...... 13
1376 ...... 8
433 ...... 13
1282 ...... 8
1384 ...... 13
770 ...... 8
242 ...... 8
761 ...... 13
982 ...... 8
47 ...... 8
661 ...... 13
542 ...... 8
386 ...... 13
469 ...... 13
1121 ...... 8
1100 ...... 8
1390 ...... 13
1434 ...... 13
1182 ...... 8
505 ...... 13
1091 ...... 8
400 ...... 13
97 ...... 13
1455 ...... 8
668 ...... 13
349 ...... 8
631 ...... 13
408 ...... 8
689 ...... 13
665 ......

In [9]:
Ans = []
for x in Ans_train:
    for i in x:
        Ans.append(i)

itemCount = pd.DataFrame(Ans, columns=['campaign_id', 'customer_id', 'coupon_id', 'prev_int_count',
                                       'itemPastSellingMean' ,
                                        'itemPastSellingStd' ,
                                        'itemPastSellingMax' ,
                                        'itemPastSellingMin' ,
                                        'itemPastOtherMean' ,
                                        'itemPastOtherStd' ,
                                        'itemPastOtherMax',
                                        'itemPastOtherMin' ,
                                        'itemPastCouponMean' ,
                                        'itemPastCouponStd' ,
                                        'itemPastCouponMax' ,
                                        'itemPastCouponMin' ,
                                       'itemPastCouponMean' ,
                                        'itemPastCouponStd' ,
                                        'itemPastCouponMax' ,
                                        'itemPastCouponMin' ,
                                        'itemPastTotalBillMean' ,
                                        'itemPastTotalBillStd' ,
                                        'itemPastTotalBillMax' ,
                                        'itemPastTotalBillMin' ,
                                    ])



In [10]:
itemCount.to_csv('item_Past_info_big_imp_TRAIN.csv', index = False)

In [11]:
def do_parallel_train(campIDs):
    Ans = []
    temp = test[test['campaign_id'] == campIDs]
    date = campaignData[campaignData.campaign_id == campIDs]['start_date'].values[0]
    tempTransaction = c_transaction[c_transaction.date < date]
    for custID in temp.customer_id.unique():
        print('{} ...... {}'.format(custID, campIDs))
        tempTransactionCust = tempTransaction[tempTransaction['customer_id'] == custID]
        coupnsTrain = test[(test.customer_id == custID) & (test.campaign_id == campIDs)]
        for coupID in coupnsTrain.coupon_id.unique():
            count = 0
            itemsForCoupon = coupon_item_dict[coupID]
            itemPastSellingMean = tempTransactionCust[tempTransactionCust.item_id.isin(itemsForCoupon)]['selling_price'].mean()
            itemPastSellingStd = tempTransactionCust[tempTransactionCust.item_id.isin(itemsForCoupon)]['selling_price'].std()
            itemPastSellingMax = tempTransactionCust[tempTransactionCust.item_id.isin(itemsForCoupon)]['selling_price'].max()
            itemPastSellingMin = tempTransactionCust[tempTransactionCust.item_id.isin(itemsForCoupon)]['selling_price'].min()
            itemPastOtherMean = tempTransactionCust[tempTransactionCust.item_id.isin(itemsForCoupon)]['other_discount'].mean()
            itemPastOtherStd = tempTransactionCust[tempTransactionCust.item_id.isin(itemsForCoupon)]['other_discount'].std()
            itemPastOtherMax = tempTransactionCust[tempTransactionCust.item_id.isin(itemsForCoupon)]['other_discount'].max()
            itemPastOtherMin = tempTransactionCust[tempTransactionCust.item_id.isin(itemsForCoupon)]['other_discount'].min()
            itemPastCouponMean = tempTransactionCust[tempTransactionCust.item_id.isin(itemsForCoupon)]['coupon_discount'].mean()
            itemPastCouponStd = tempTransactionCust[tempTransactionCust.item_id.isin(itemsForCoupon)]['coupon_discount'].std()
            itemPastCouponMax = tempTransactionCust[tempTransactionCust.item_id.isin(itemsForCoupon)]['coupon_discount'].max()
            itemPastCouponMin = tempTransactionCust[tempTransactionCust.item_id.isin(itemsForCoupon)]['coupon_discount'].min()
            itemPastCouponMean = tempTransactionCust[tempTransactionCust.item_id.isin(itemsForCoupon)]['coupon_discount'].mean()
            itemPastCouponStd = tempTransactionCust[tempTransactionCust.item_id.isin(itemsForCoupon)]['coupon_discount'].std()
            itemPastCouponMax = tempTransactionCust[tempTransactionCust.item_id.isin(itemsForCoupon)]['coupon_discount'].max()
            itemPastCouponMin = tempTransactionCust[tempTransactionCust.item_id.isin(itemsForCoupon)]['coupon_discount'].min()
            itemPastTotalBillMean = tempTransactionCust[tempTransactionCust.item_id.isin(itemsForCoupon)]['Total_Bill'].mean()
            itemPastTotalBillStd = tempTransactionCust[tempTransactionCust.item_id.isin(itemsForCoupon)]['Total_Bill'].std()
            itemPastTotalBillMax = tempTransactionCust[tempTransactionCust.item_id.isin(itemsForCoupon)]['Total_Bill'].max()
            itemPastTotalBillMin = tempTransactionCust[tempTransactionCust.item_id.isin(itemsForCoupon)]['Total_Bill'].min() 
            for item in tempTransactionCust.item_id:
                if item in itemsForCoupon:
                    count+=1
            Ans.append((campIDs, custID, coupID, count, 
                       itemPastSellingMean ,
                        itemPastSellingStd ,
                        itemPastSellingMax ,
                        itemPastSellingMin ,
                        itemPastOtherMean ,
                        itemPastOtherStd ,
                        itemPastOtherMax, 
                        itemPastOtherMin , 
                        itemPastCouponMean ,
                        itemPastCouponStd , 
                        itemPastCouponMax , 
                        itemPastCouponMin , 
                        itemPastCouponMean ,
                        itemPastCouponStd , 
                        itemPastCouponMax , 
                        itemPastCouponMin , 
                        itemPastTotalBillMean , 
                        itemPastTotalBillStd ,
                        itemPastTotalBillMax , 
                        itemPastTotalBillMin ))
            
    return Ans

In [12]:
import multiprocessing

p = multiprocessing.Pool(processes=8)

Ans_test = p.map(do_parallel_train, test.campaign_id.unique())

967 ...... 22
28 ...... 19
466 ...... 23
811 ...... 17
682 ...... 18
361 ...... 25
922 ...... 21
1566 ...... 20
928 ...... 19
1498 ...... 25
427 ...... 21
1182 ...... 23
616 ...... 17
510 ...... 22
808 ...... 19
1023 ...... 18
565 ...... 25
744 ...... 20
1197 ...... 21
35 ...... 19
1303 ...... 17
1093 ...... 23
672 ...... 19
1186 ...... 22
1574 ...... 21
36 ...... 25
866 ...... 19
464 ...... 18
701 ...... 17
379 ...... 20
1434 ...... 23
315 ...... 22
1002 ...... 19
78 ...... 21
962 ...... 25
1023 ...... 17
800 ...... 19
207 ...... 23
842 ...... 19
1323 ...... 22
305 ...... 25
1082 ...... 20
1007 ...... 21
43 ...... 19
1442 ...... 17
894 ...... 23
421 ...... 18
538 ...... 21
1078 ...... 19
45 ...... 22
699 ...... 25
704 ...... 20
1555 ...... 19
1292 ...... 17
1494 ...... 23
1166 ...... 21
1538 ...... 25
832 ...... 22
527 ...... 18
519 ...... 19
963 ...... 17
322 ...... 20
1391 ...... 23
1022 ...... 25
1120 ...... 21
1552 ...... 19
1545 ...... 18
1518 ...... 19
1555 ...... 22
82 ...... 1

1407 ...... 19
1097 ...... 22
566 ...... 16
33 ...... 23
1178 ...... 18
682 ...... 20
1447 ...... 19
911 ...... 25
42 ...... 17
549 ...... 19
1270 ...... 16
657 ...... 22
839 ...... 23
781 ...... 18
1215 ...... 19
284 ...... 20
1574 ...... 25
748 ...... 16
709 ...... 17
1064 ...... 19
379 ...... 23
779 ...... 22
1466 ...... 19
1524 ...... 16
1309 ...... 18
628 ...... 20
1218 ...... 17
878 ...... 25
433 ...... 22
271 ...... 23
1411 ...... 16
333 ...... 24
911 ...... 24
305 ...... 17
755 ...... 25
559 ...... 24
578 ...... 20
852 ...... 24
470 ...... 16
856 ...... 24
931 ...... 18
1347 ...... 24
1526 ...... 24
456 ...... 23
338 ...... 24
695 ...... 22
572 ...... 24
498 ...... 24
346 ...... 25
1580 ...... 16
1152 ...... 24
570 ...... 17
231 ...... 24
36 ...... 24
1558 ...... 24
1278 ...... 18
355 ...... 20
1504 ...... 16
1292 ...... 24
566 ...... 23
1131 ...... 24
1441 ...... 25
1186 ...... 24
430 ...... 22
533 ...... 24
668 ...... 17
699 ...... 24
1022 ...... 24
319 ...... 16
432 ...... 2

355 ...... 16
855 ...... 18
230 ...... 25
989 ...... 22
289 ...... 16
253 ...... 20
1 ...... 23
1266 ...... 17
964 ...... 16
481 ...... 25
920 ...... 18
1192 ...... 23
1168 ...... 20
901 ...... 22
517 ...... 17
651 ...... 16
948 ...... 25
1015 ...... 16
249 ...... 18
632 ...... 20
1070 ...... 23
1578 ...... 17
304 ...... 22
1025 ...... 16
772 ...... 23
649 ...... 25
1418 ...... 17
1350 ...... 16
1429 ...... 18
569 ...... 22
1285 ...... 20
1369 ...... 23
510 ...... 25
58 ...... 16
53 ...... 17
1537 ...... 18
1005 ...... 20
1367 ...... 22
214 ...... 23
638 ...... 16
920 ...... 25
1025 ...... 17
1377 ...... 16
8 ...... 20
749 ...... 18
141 ...... 23
1336 ...... 25
1327 ...... 22
1518 ...... 16
1215 ...... 17
706 ...... 22
239 ...... 20
1257 ...... 23
711 ...... 25
1296 ...... 16
1507 ...... 18
58 ...... 17
1146 ...... 16
755 ...... 22
1453 ...... 23
269 ...... 16
204 ...... 18
1472 ...... 17
1386 ...... 22
467 ...... 25
898 ...... 20
194 ...... 16
1360 ...... 23
711 ...... 22
431 ...... 2

1271 ...... 20
1276 ...... 22
877 ...... 18
174 ...... 20
911 ...... 22
485 ...... 18
1089 ...... 20
929 ...... 18
1558 ...... 22
726 ...... 20
1208 ...... 18
1215 ...... 22
416 ...... 20
678 ...... 18
265 ...... 22
317 ...... 18
1537 ...... 20
495 ...... 22
1475 ...... 20
857 ...... 18
1565 ...... 22
479 ...... 22
579 ...... 18
1076 ...... 20
715 ...... 18
878 ...... 22
71 ...... 20
1111 ...... 18
1450 ...... 22
514 ...... 18
1320 ...... 20
733 ...... 22
164 ...... 18
981 ...... 20
1517 ...... 18
1033 ...... 22
1 ...... 20
1104 ...... 18
944 ...... 22
240 ...... 20
1399 ...... 18
1319 ...... 22
1215 ...... 18
508 ...... 20
613 ...... 22
1191 ...... 18
244 ...... 20
1387 ...... 22
19 ...... 18
1492 ...... 20
483 ...... 22
262 ...... 18
474 ...... 22
981 ...... 18
267 ...... 20
577 ...... 22
1028 ...... 18
1239 ...... 20
1073 ...... 18
951 ...... 22
1093 ...... 20
1178 ...... 22
1045 ...... 18
284 ...... 18
1491 ...... 20
1209 ...... 22
288 ...... 18
319 ...... 20
624 ...... 22
157 ....

205 ...... 18
750 ...... 18
1317 ...... 18
906 ...... 18
465 ...... 18
181 ...... 18
1395 ...... 18
1142 ...... 18
1444 ...... 18
1235 ...... 18
1083 ...... 18
685 ...... 18
86 ...... 18
410 ...... 18
1499 ...... 18
1243 ...... 18
428 ...... 18
149 ...... 18
1195 ...... 18
864 ...... 18
168 ...... 18
711 ...... 18
1524 ...... 18
1163 ...... 18
998 ...... 18
1091 ...... 18
1368 ...... 18
1370 ...... 18
521 ...... 18
522 ...... 18
1269 ...... 18
222 ...... 18
963 ...... 18
1565 ...... 18
1464 ...... 18
324 ...... 18
900 ...... 18
735 ...... 18
672 ...... 18
1223 ...... 18
363 ...... 18
733 ...... 18
1315 ...... 18
174 ...... 18
1311 ...... 18
396 ...... 18
303 ...... 18
1184 ...... 18
314 ...... 18
833 ...... 18
1479 ...... 18
294 ...... 18
764 ...... 18
134 ...... 18
1067 ...... 18
257 ...... 18
949 ...... 18
42 ...... 18
1305 ...... 18
127 ...... 18
674 ...... 18
90 ...... 18
357 ...... 18
475 ...... 18
958 ...... 18
1025 ...... 18
806 ...... 18
572 ...... 18
1207 ...... 18
886 ...... 

In [13]:
Ans = []
for x in Ans_test:
    for i in x:
        Ans.append(i)

In [14]:
itemCount = pd.DataFrame(Ans, columns=['campaign_id', 'customer_id', 'coupon_id', 'prev_int_count',
                                       'itemPastSellingMean' ,
                                        'itemPastSellingStd' ,
                                        'itemPastSellingMax' ,
                                        'itemPastSellingMin' ,
                                        'itemPastOtherMean' ,
                                        'itemPastOtherStd' ,
                                        'itemPastOtherMax',
                                        'itemPastOtherMin' ,
                                        'itemPastCouponMean' ,
                                        'itemPastCouponStd' ,
                                        'itemPastCouponMax' ,
                                        'itemPastCouponMin' ,
                                        'itemPastCouponMean' ,
                                        'itemPastCouponStd' ,
                                        'itemPastCouponMax' ,
                                        'itemPastCouponMin' ,
                                        'itemPastTotalBillMean' ,
                                        'itemPastTotalBillStd' ,
                                        'itemPastTotalBillMax' ,
                                        'itemPastTotalBillMin' ,
                                    ])

In [15]:
itemCount.to_csv('item_Past_info_big_imp_TEST.csv', index = False)