In [1]:
import pandas as pd
import numpy as np
from scipy import stats
import math

In [2]:
df = pd.read_csv('Data/card transactions.csv')

In [3]:
df['Date'] = df['Date'].astype('datetime64[ns]')

In [4]:
df = df[df["Date"] < '2010-11-01']

In [5]:
df = df.set_index('Date')

In [6]:
df.head()

Unnamed: 0_level_0,Recnum,Cardnum,Merchnum,Merch description,Merch state,Merch zip,Transtype,Amount,Fraud
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2010-01-01,1,5142190439,5509006296254,FEDEX SHP 12/23/09 AB#,TN,38118.0,P,3.62,0
2010-01-01,2,5142183973,61003026333,SERVICE MERCHANDISE #81,MA,1803.0,P,31.42,0
2010-01-01,3,5142131721,4503082993600,OFFICE DEPOT #191,MD,20706.0,P,178.49,0
2010-01-01,4,5142148452,5509006296254,FEDEX SHP 12/28/09 AB#,TN,38118.0,P,3.62,0
2010-01-01,5,5142190439,5509006296254,FEDEX SHP 12/23/09 AB#,TN,38118.0,P,3.62,0


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 84299 entries, 2010-01-01 to 2010-10-31
Data columns (total 9 columns):
Recnum               84299 non-null int64
Cardnum              84299 non-null int64
Merchnum             81497 non-null object
Merch description    84299 non-null object
Merch state          83272 non-null object
Merch zip            80604 non-null float64
Transtype            84299 non-null object
Amount               84299 non-null float64
Fraud                84299 non-null int64
dtypes: float64(2), int64(3), object(4)
memory usage: 6.4+ MB


In [8]:
df.describe(include = 'all')

Unnamed: 0,Recnum,Cardnum,Merchnum,Merch description,Merch state,Merch zip,Transtype,Amount,Fraud
count,84299.0,84299.0,81497.0,84299,83272,80604.0,84299,84299.0,84299.0
unique,,,12015.0,12108,214,,4,,
top,,,930090121224.0,SIGMA-ALDRICH,TN,,P,,
freq,,,9289.0,1419,11290,,83971,,
mean,42150.0,5142203000.0,,,,44639.600677,,434.9611,0.010439
std,24335.169508,56500.4,,,,28225.631383,,10715.17,0.101638
min,1.0,5142110000.0,,,,1.0,,0.01,0.0
25%,21075.5,5142152000.0,,,,20884.0,,31.71,0.0
50%,42150.0,5142196000.0,,,,38118.0,,137.4,0.0
75%,63224.5,5142249000.0,,,,63103.0,,430.0,0.0


# Remove Outliers

In [9]:
df = df[df['Transtype'] == 'P']
df = df[df['Amount'] != max(df['Amount'])]

In [10]:
df.shape

(83970, 9)

# Filling Missing Values

## Filling State by Zip + All

In [11]:
temp = df.groupby(['Merch zip'])['Merch state'].apply(lambda x: x.mode()).reset_index()
temp = temp[temp['level_1'] == 0][['Merch zip', 'Merch state']]

In [12]:
temp_1 = df.merge(temp, on = 'Merch zip', how = 'left')
res = []
for index, row in temp_1.iterrows():
    try:
        math.isnan(row['Merch state_x'])
        res.append(row['Merch state_y'])
    except:
        res.append(row['Merch state_x'])
df['Merch state'] = res

In [13]:
sum(df['Merch state'].isna())

844

In [14]:
temp_df = df[df['Merch state'].notna()]

In [15]:
stats.mode(temp_df['Merch state'])



ModeResult(mode=array(['TN'], dtype=object), count=array([11293]))

In [16]:
df['Merch state'] = df['Merch state'].fillna(stats.mode(temp_df['Merch state'])[0][0])

In [17]:
sum(df['Merch state'].isna())

0

## Filling Zip by Cardnum, Merch state and All

In [18]:
sum(df['Merch zip'] == 0)

0

In [19]:
sum(df['Merch zip'].isna())

3366

In [20]:
temp = df.groupby(['Cardnum', 'Merch state'])['Merch zip'].apply(lambda x: x.mode()).reset_index()
temp = temp[temp['level_2'] == 0][['Cardnum', 'Merch state', 'Merch zip']]
temp.head()

Unnamed: 0,Cardnum,Merch state,Merch zip
0,5142110402,CO,60007.0
2,5142110402,MD,21090.0
3,5142110434,IN,46032.0
4,5142110749,CA,94025.0
5,5142110749,NJ,8701.0


In [21]:
temp_1 = df.merge(temp, on = ['Cardnum', 'Merch state'], how = 'left')
res = []
for index, row in temp_1.iterrows():
    try:
        math.isnan(row['Merch zip_x'])
        res.append(row['Merch zip_y'])
    except:
        res.append(row['Merch zip_x'])
df['Merch zip'] = res

In [22]:
sum(df['Merch zip'] .isna())

1063

In [23]:
temp = df.groupby(['Merch state'])['Merch zip'].apply(lambda x: x.mode()).reset_index()
temp = temp[temp['level_1'] == 0][['Merch state', 'Merch zip']]
temp.head()

Unnamed: 0,Merch state,Merch zip
0,AK,99501.0
1,AL,36117.0
2,AR,72764.0
3,AZ,85285.0
4,CA,92656.0


In [24]:
temp_1 = df.merge(temp, on = ['Merch state'], how = 'left')
res = []
for index, row in temp_1.iterrows():
    try:
        math.isnan(row['Merch zip_x'])
        res.append(row['Merch zip_y'])
    except:
        res.append(row['Merch zip_x'])
df['Merch zip'] = res

In [25]:
sum(df['Merch zip'] .isna())

161

In [26]:
temp_df = df[df['Merch zip'].notna()]

In [27]:
stats.mode(temp_df['Merch zip'])

ModeResult(mode=array([38118.]), count=array([12137]))

In [28]:
df['Merch zip'] = df['Merch zip'].fillna(stats.mode(temp_df['Merch zip'])[0][0])

In [29]:
sum(df['Merch zip'].isna())

0

In [30]:
sum(df['Merch zip'] == 0)

0

## Filling Merchnum by Cardnum, Merch state and all

In [31]:
sum(df['Merchnum'] == 0)

0

In [32]:
sum(df['Merchnum'].isna())

2637

In [33]:
temp = df.groupby(['Cardnum', 'Merch state'])['Merchnum'].apply(lambda x: x.mode()).reset_index()
temp = temp[temp['level_2'] == 0][['Cardnum', 'Merch state', 'Merchnum']]
temp.head()

Unnamed: 0,Cardnum,Merch state,Merchnum
0,5142110402,CO,456090506
2,5142110402,MD,8001760001616
5,5142110434,IN,8292309000040
6,5142110749,CA,674615479337
7,5142110749,NJ,9765000409955


In [34]:
temp_1 = df.merge(temp, on = ['Cardnum', 'Merch state'], how = 'left')
res = []
for index, row in temp_1.iterrows():
    try:
        math.isnan(row['Merchnum_x'])
        res.append(row['Merchnum_y'])
    except:
        res.append(row['Merchnum_x'])
df['Merchnum'] = res

In [35]:
sum(df['Merchnum'].isna())

921

In [36]:
temp = df.groupby(['Merch state'])['Merchnum'].apply(lambda x: x.mode()).reset_index()
temp = temp[temp['level_1'] == 0][['Merch state', 'Merchnum']]
temp.head()

Unnamed: 0,Merch state,Merchnum
0,AK,7080800554000
1,AL,806097308
2,AR,9025544550905
3,AZ,4620006380064
4,BC,9006005910301


In [37]:
temp_1 = df.merge(temp, on = ['Merch state'], how = 'left')
res = []
for index, row in temp_1.iterrows():
    try:
        math.isnan(row['Merchnum_x'])
        res.append(row['Merchnum_y'])
    except:
        res.append(row['Merchnum_x'])
df['Merchnum'] = res

In [38]:
sum(df['Merchnum'].isna())

13

In [39]:
temp_df = df[df['Merchnum'].notna()]

In [40]:
stats.mode(temp_df['Merchnum'])

ModeResult(mode=array(['930090121224'], dtype=object), count=array([9904]))

In [41]:
df['Merchnum'] = df['Merchnum'].fillna(stats.mode(temp_df['Merchnum'])[0][0])

In [42]:
sum(df['Merchnum'].isna())

0

In [43]:
sum(df['Merchnum'] == 0)

0

# Creating Variables

## Amount Variables

### Card

In [44]:
for i in [1, 3, 7, 14, 30]:
    locals()['avg_card_' + str(i)] = df.groupby(['Cardnum'])['Amount'].rolling(str(i) + 'd').mean().reset_index()
    locals()['avg_card_' + str(i)]['order'] = locals()['avg_card_' + str(i)].groupby(['Cardnum', 'Date']).cumcount() + 1

In [45]:
avg_card_1.tail()

Unnamed: 0,Cardnum,Date,Amount,order
83965,5142847398,2010-03-21,199.14,1
83966,5142847398,2010-03-22,78.23,1
83967,5142847398,2010-03-24,440.06,1
83968,5142847398,2010-03-28,288.82,1
83969,5142847398,2010-03-29,736.55,1


In [46]:
for i in [1, 3, 7, 14, 30]:
    locals()['max_card_' + str(i)] = df.groupby(['Cardnum'])['Amount'].rolling(str(i) + 'd').max().reset_index()
    locals()['max_card_' + str(i)]['order'] = locals()['max_card_' + str(i)].groupby(['Cardnum', 'Date']).cumcount() + 1

In [47]:
max_card_1.head()

Unnamed: 0,Cardnum,Date,Amount,order
0,5142110002,2010-10-12,150.0,1
1,5142110081,2010-03-08,495.9,1
2,5142110081,2010-03-08,636.2,2
3,5142110313,2010-10-07,144.0,1
4,5142110313,2010-10-07,144.0,2


In [48]:
for i in [1, 3, 7, 14, 30]:
    locals()['median_card_' + str(i)] = df.groupby(['Cardnum'])['Amount'].rolling(str(i) + 'd').median().reset_index()
    locals()['median_card_' + str(i)]['order'] = locals()['median_card_' + str(i)].groupby(['Cardnum', 'Date']).cumcount() + 1

In [49]:
median_card_1.head()

Unnamed: 0,Cardnum,Date,Amount,order
0,5142110002,2010-10-12,150.0,1
1,5142110081,2010-03-08,495.9,1
2,5142110081,2010-03-08,566.05,2
3,5142110313,2010-10-07,144.0,1
4,5142110313,2010-10-07,94.0,2


In [50]:
for i in [1, 3, 7, 14, 30]:
    locals()['sum_card_' + str(i)] = df.groupby(['Cardnum'])['Amount'].rolling(str(i) + 'd').sum().reset_index()
    locals()['sum_card_' + str(i)]['order'] = locals()['sum_card_' + str(i)].groupby(['Cardnum', 'Date']).cumcount() + 1

In [51]:
sum_card_1.head()

Unnamed: 0,Cardnum,Date,Amount,order
0,5142110002,2010-10-12,150.0,1
1,5142110081,2010-03-08,495.9,1
2,5142110081,2010-03-08,1132.1,2
3,5142110313,2010-10-07,144.0,1
4,5142110313,2010-10-07,188.0,2


In [52]:
card_df = df.copy()
card_df = card_df.reset_index()
card_df['order'] = card_df.groupby(['Cardnum', 'Date']).cumcount() + 1

In [53]:
card_df.head()

Unnamed: 0,Date,Recnum,Cardnum,Merchnum,Merch description,Merch state,Merch zip,Transtype,Amount,Fraud,order
0,2010-01-01,1,5142190439,5509006296254,FEDEX SHP 12/23/09 AB#,TN,38118.0,P,3.62,0,1
1,2010-01-01,2,5142183973,61003026333,SERVICE MERCHANDISE #81,MA,1730.0,P,31.42,0,1
2,2010-01-01,3,5142131721,4503082993600,OFFICE DEPOT #191,MD,20763.0,P,178.49,0,1
3,2010-01-01,4,5142148452,5509006296254,FEDEX SHP 12/28/09 AB#,TN,38118.0,P,3.62,0,1
4,2010-01-01,5,5142190439,5509006296254,FEDEX SHP 12/23/09 AB#,TN,38118.0,P,3.62,0,2


In [54]:
not_needed_columns = ['index', 'Recnum', 'Merchnum', 'Merch description', 'Merch state', 'Merch zip', 'Transtype']
for column in not_needed_columns:
    try:
        del card_df[column]
    except:
        pass

In [55]:
card_df.head()

Unnamed: 0,Date,Cardnum,Amount,Fraud,order
0,2010-01-01,5142190439,3.62,0,1
1,2010-01-01,5142183973,31.42,0,1
2,2010-01-01,5142131721,178.49,0,1
3,2010-01-01,5142148452,3.62,0,1
4,2010-01-01,5142190439,3.62,0,2


In [56]:
merged_data_card = card_df \
.merge(avg_card_1, on = ['Date', 'Cardnum', 'order'], how = 'left', suffixes=['', '_avg_card_1']) \
.merge(avg_card_3, on = ['Date', 'Cardnum', 'order'], how = 'left', suffixes=['', '_avg_card_3']) \
.merge(avg_card_7, on = ['Date', 'Cardnum', 'order'], how = 'left', suffixes=['', '_avg_card_7']) \
.merge(avg_card_14, on = ['Date', 'Cardnum', 'order'], how = 'left', suffixes=['', '_avg_card_14']) \
.merge(avg_card_30, on = ['Date', 'Cardnum', 'order'], how = 'left', suffixes=['', '_avg_card_30']) \
.merge(max_card_1, on = ['Date', 'Cardnum', 'order'], how = 'left', suffixes=['', '_max_card_1'])\
.merge(max_card_3, on = ['Date', 'Cardnum', 'order'], how = 'left', suffixes=['', '_max_card_3']) \
.merge(max_card_7, on = ['Date', 'Cardnum', 'order'], how = 'left', suffixes=['', '_max_card_7']) \
.merge(max_card_14, on = ['Date', 'Cardnum', 'order'], how = 'left', suffixes=['', '_max_card_14']) \
.merge(max_card_30, on = ['Date', 'Cardnum', 'order'], how = 'left', suffixes=['', '_max_card_30']) \
.merge(median_card_1, on = ['Date', 'Cardnum', 'order'], how = 'left', suffixes=['', '_median_card_1'])\
.merge(median_card_3, on = ['Date', 'Cardnum', 'order'], how = 'left', suffixes=['', '_median_card_3']) \
.merge(median_card_7, on = ['Date', 'Cardnum', 'order'], how = 'left', suffixes=['', '_median_card_7']) \
.merge(median_card_14, on = ['Date', 'Cardnum', 'order'], how = 'left', suffixes=['', '_median_card_14']) \
.merge(median_card_30, on = ['Date', 'Cardnum', 'order'], how = 'left', suffixes=['', '_median_card_30']) \
.merge(sum_card_1, on = ['Date', 'Cardnum', 'order'], how = 'left', suffixes=['', '_sum_card_1'])\
.merge(sum_card_3, on = ['Date', 'Cardnum', 'order'], how = 'left', suffixes=['', '_sum_card_3']) \
.merge(sum_card_7, on = ['Date', 'Cardnum', 'order'], how = 'left', suffixes=['', '_sum_card_7']) \
.merge(sum_card_14, on = ['Date', 'Cardnum', 'order'], how = 'left', suffixes=['', '_sum_card_14']) \
.merge(sum_card_30, on = ['Date', 'Cardnum', 'order'], how = 'left', suffixes=['', '_sum_card_30'])

In [57]:
merged_data_card['Amount_avg_card_0'] = merged_data_card['Amount']
merged_data_card['Amount_max_card_0'] = merged_data_card['Amount']
merged_data_card['Amount_median_card_0'] = merged_data_card['Amount']
merged_data_card['Amount_sum_card_0'] = merged_data_card['Amount']

In [58]:
merged_data_card.head()

Unnamed: 0,Date,Cardnum,Amount,Fraud,order,Amount_avg_card_1,Amount_avg_card_3,Amount_avg_card_7,Amount_avg_card_14,Amount_avg_card_30,...,Amount_median_card_30,Amount_sum_card_1,Amount_sum_card_3,Amount_sum_card_7,Amount_sum_card_14,Amount_sum_card_30,Amount_avg_card_0,Amount_max_card_0,Amount_median_card_0,Amount_sum_card_0
0,2010-01-01,5142190439,3.62,0,1,3.62,3.62,3.62,3.62,3.62,...,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62
1,2010-01-01,5142183973,31.42,0,1,31.42,31.42,31.42,31.42,31.42,...,31.42,31.42,31.42,31.42,31.42,31.42,31.42,31.42,31.42,31.42
2,2010-01-01,5142131721,178.49,0,1,178.49,178.49,178.49,178.49,178.49,...,178.49,178.49,178.49,178.49,178.49,178.49,178.49,178.49,178.49,178.49
3,2010-01-01,5142148452,3.62,0,1,3.62,3.62,3.62,3.62,3.62,...,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62
4,2010-01-01,5142190439,3.62,0,2,3.62,3.62,3.62,3.62,3.62,...,3.62,7.24,7.24,7.24,7.24,7.24,3.62,3.62,3.62,3.62


In [59]:
for i in [0, 1, 3, 7, 14, 30]:
    merged_data_card['qaa_cm_' + str(i)] = merged_data_card['Amount'] / merged_data_card['Amount_avg_card_' + str(i)]
    merged_data_card['qam_cm_' + str(i)] = merged_data_card['Amount'] / merged_data_card['Amount_max_card_' + str(i)]
    merged_data_card['qame_cm_' + str(i)] = merged_data_card['Amount'] / merged_data_card['Amount_median_card_' + str(i)]
    merged_data_card['qas_cm_' + str(i)] = merged_data_card['Amount'] / merged_data_card['Amount_sum_card_' + str(i)]

In [60]:
merged_data_card.head()

Unnamed: 0,Date,Cardnum,Amount,Fraud,order,Amount_avg_card_1,Amount_avg_card_3,Amount_avg_card_7,Amount_avg_card_14,Amount_avg_card_30,...,qame_cm_7,qas_cm_7,qaa_cm_14,qam_cm_14,qame_cm_14,qas_cm_14,qaa_cm_30,qam_cm_30,qame_cm_30,qas_cm_30
0,2010-01-01,5142190439,3.62,0,1,3.62,3.62,3.62,3.62,3.62,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,2010-01-01,5142183973,31.42,0,1,31.42,31.42,31.42,31.42,31.42,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,2010-01-01,5142131721,178.49,0,1,178.49,178.49,178.49,178.49,178.49,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,2010-01-01,5142148452,3.62,0,1,3.62,3.62,3.62,3.62,3.62,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4,2010-01-01,5142190439,3.62,0,2,3.62,3.62,3.62,3.62,3.62,...,1.0,0.5,1.0,1.0,1.0,0.5,1.0,1.0,1.0,0.5


### Merchant

In [61]:
for i in [1, 3, 7, 14, 30]:
    locals()['avg_merchant_' + str(i)] = df.groupby(['Merchnum'])['Amount'].rolling(str(i) + 'd').mean().reset_index()
    locals()['avg_merchant_' + str(i)]['order'] = locals()['avg_merchant_' + str(i)].groupby(['Merchnum', 'Date']).cumcount() + 1
    
    locals()['max_merchant_' + str(i)] = df.groupby(['Merchnum'])['Amount'].rolling(str(i) + 'd').max().reset_index()
    locals()['max_merchant_' + str(i)]['order'] = locals()['max_merchant_' + str(i)].groupby(['Merchnum', 'Date']).cumcount() + 1
    
    locals()['median_merchant_' + str(i)] = df.groupby(['Merchnum'])['Amount'].rolling(str(i) + 'd').median().reset_index()
    locals()['median_merchant_' + str(i)]['order'] = locals()['median_merchant_' + str(i)].groupby(['Merchnum', 'Date']).cumcount() + 1
    
    locals()['sum_merchant_' + str(i)] = df.groupby(['Merchnum'])['Amount'].rolling(str(i) + 'd').sum().reset_index()
    locals()['sum_merchant_' + str(i)]['order'] = locals()['sum_merchant_' + str(i)].groupby(['Merchnum', 'Date']).cumcount() + 1

In [62]:
max_merchant_3.head()

Unnamed: 0,Merchnum,Date,Amount,order
0,0,2010-01-06,48.97,1
1,0,2010-01-07,87.02,1
2,0,2010-01-07,460.0,2
3,0,2010-01-11,25.0,1
4,0,2010-01-11,110.0,2


In [63]:
median_merchant_3.head()

Unnamed: 0,Merchnum,Date,Amount,order
0,0,2010-01-06,48.97,1
1,0,2010-01-07,67.995,1
2,0,2010-01-07,87.02,2
3,0,2010-01-11,25.0,1
4,0,2010-01-11,67.5,2


In [64]:
sum_merchant_3.head()

Unnamed: 0,Merchnum,Date,Amount,order
0,0,2010-01-06,48.97,1
1,0,2010-01-07,135.99,1
2,0,2010-01-07,595.99,2
3,0,2010-01-11,25.0,1
4,0,2010-01-11,135.0,2


In [65]:
merchant_df = df.copy()
merchant_df = merchant_df.reset_index()
merchant_df['order'] = merchant_df.groupby(['Merchnum', 'Date']).cumcount() + 1

In [66]:
merchant_df.head()

Unnamed: 0,Date,Recnum,Cardnum,Merchnum,Merch description,Merch state,Merch zip,Transtype,Amount,Fraud,order
0,2010-01-01,1,5142190439,5509006296254,FEDEX SHP 12/23/09 AB#,TN,38118.0,P,3.62,0,1
1,2010-01-01,2,5142183973,61003026333,SERVICE MERCHANDISE #81,MA,1730.0,P,31.42,0,1
2,2010-01-01,3,5142131721,4503082993600,OFFICE DEPOT #191,MD,20763.0,P,178.49,0,1
3,2010-01-01,4,5142148452,5509006296254,FEDEX SHP 12/28/09 AB#,TN,38118.0,P,3.62,0,2
4,2010-01-01,5,5142190439,5509006296254,FEDEX SHP 12/23/09 AB#,TN,38118.0,P,3.62,0,3


In [67]:
not_needed_columns = ['index', 'Recnum', 'Cardnum', 'Merch description', 'Merch state', 'Merch zip', 'Transtype']
for column in not_needed_columns:
    try:
        del merchant_df[column]
    except:
        pass

In [68]:
merchant_df.head()

Unnamed: 0,Date,Merchnum,Amount,Fraud,order
0,2010-01-01,5509006296254,3.62,0,1
1,2010-01-01,61003026333,31.42,0,1
2,2010-01-01,4503082993600,178.49,0,1
3,2010-01-01,5509006296254,3.62,0,2
4,2010-01-01,5509006296254,3.62,0,3


In [69]:
merged_data_merchant = merchant_df \
.merge(avg_merchant_1, on = ['Date', 'Merchnum', 'order'], how = 'left', suffixes=['', '_avg_merchant_1'])\
.merge(avg_merchant_3, on = ['Date', 'Merchnum', 'order'], how = 'left', suffixes=['', '_avg_merchant_3']) \
.merge(avg_merchant_7, on = ['Date', 'Merchnum', 'order'], how = 'left', suffixes=['', '_avg_merchant_7']) \
.merge(avg_merchant_14, on = ['Date', 'Merchnum', 'order'], how = 'left', suffixes=['', '_avg_merchant_14']) \
.merge(avg_merchant_30, on = ['Date', 'Merchnum', 'order'], how = 'left', suffixes=['', '_avg_merchant_30']) \
.merge(max_merchant_1, on = ['Date', 'Merchnum', 'order'], how = 'left', suffixes=['', '_max_merchant_1'])\
.merge(max_merchant_3, on = ['Date', 'Merchnum', 'order'], how = 'left', suffixes=['', '_max_merchant_3']) \
.merge(max_merchant_7, on = ['Date', 'Merchnum', 'order'], how = 'left', suffixes=['', '_max_merchant_7']) \
.merge(max_merchant_14, on = ['Date', 'Merchnum', 'order'], how = 'left', suffixes=['', '_max_merchant_14']) \
.merge(max_merchant_30, on = ['Date', 'Merchnum', 'order'], how = 'left', suffixes=['', '_max_merchant_30']) \
.merge(median_merchant_1, on = ['Date', 'Merchnum', 'order'], how = 'left', suffixes=['', '_median_merchant_1'])\
.merge(median_merchant_3, on = ['Date', 'Merchnum', 'order'], how = 'left', suffixes=['', '_median_merchant_3']) \
.merge(median_merchant_7, on = ['Date', 'Merchnum', 'order'], how = 'left', suffixes=['', '_median_merchant_7']) \
.merge(median_merchant_14, on = ['Date', 'Merchnum', 'order'], how = 'left', suffixes=['', '_median_merchant_14']) \
.merge(median_merchant_30, on = ['Date', 'Merchnum', 'order'], how = 'left', suffixes=['', '_median_merchant_30']) \
.merge(sum_merchant_1, on = ['Date', 'Merchnum', 'order'], how = 'left', suffixes=['', '_sum_merchant_1'])\
.merge(sum_merchant_3, on = ['Date', 'Merchnum', 'order'], how = 'left', suffixes=['', '_sum_merchant_3']) \
.merge(sum_merchant_7, on = ['Date', 'Merchnum', 'order'], how = 'left', suffixes=['', '_sum_merchant_7']) \
.merge(sum_merchant_14, on = ['Date', 'Merchnum', 'order'], how = 'left', suffixes=['', '_sum_merchant_14']) \
.merge(sum_merchant_30, on = ['Date', 'Merchnum', 'order'], how = 'left', suffixes=['', '_sum_merchant_30'])

In [70]:
merged_data_merchant['Amount_avg_merchant_0'] = merged_data_merchant['Amount']
merged_data_merchant['Amount_max_merchant_0'] = merged_data_merchant['Amount']
merged_data_merchant['Amount_median_merchant_0'] = merged_data_merchant['Amount']
merged_data_merchant['Amount_sum_merchant_0'] = merged_data_merchant['Amount']

In [71]:
merged_data_merchant.head()

Unnamed: 0,Date,Merchnum,Amount,Fraud,order,Amount_avg_merchant_1,Amount_avg_merchant_3,Amount_avg_merchant_7,Amount_avg_merchant_14,Amount_avg_merchant_30,...,Amount_median_merchant_30,Amount_sum_merchant_1,Amount_sum_merchant_3,Amount_sum_merchant_7,Amount_sum_merchant_14,Amount_sum_merchant_30,Amount_avg_merchant_0,Amount_max_merchant_0,Amount_median_merchant_0,Amount_sum_merchant_0
0,2010-01-01,5509006296254,3.62,0,1,3.62,3.62,3.62,3.62,3.62,...,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62
1,2010-01-01,61003026333,31.42,0,1,31.42,31.42,31.42,31.42,31.42,...,31.42,31.42,31.42,31.42,31.42,31.42,31.42,31.42,31.42,31.42
2,2010-01-01,4503082993600,178.49,0,1,178.49,178.49,178.49,178.49,178.49,...,178.49,178.49,178.49,178.49,178.49,178.49,178.49,178.49,178.49,178.49
3,2010-01-01,5509006296254,3.62,0,2,3.62,3.62,3.62,3.62,3.62,...,3.62,7.24,7.24,7.24,7.24,7.24,3.62,3.62,3.62,3.62
4,2010-01-01,5509006296254,3.62,0,3,3.62,3.62,3.62,3.62,3.62,...,3.62,10.86,10.86,10.86,10.86,10.86,3.62,3.62,3.62,3.62


In [72]:
for i in [0, 1, 3, 7, 14, 30]:
    merged_data_merchant['qaa_cm_' + str(i)] = merged_data_merchant['Amount'] / merged_data_merchant['Amount_avg_merchant_' + str(i)]
    merged_data_merchant['qam_cm_' + str(i)] = merged_data_merchant['Amount'] / merged_data_merchant['Amount_max_merchant_' + str(i)]
    merged_data_merchant['qame_cm_' + str(i)] = merged_data_merchant['Amount'] / merged_data_merchant['Amount_median_merchant_' + str(i)]
    merged_data_merchant['qas_cm_' + str(i)] = merged_data_merchant['Amount'] / merged_data_merchant['Amount_sum_merchant_' + str(i)]

In [73]:
sum(merged_data_merchant['Merchnum'] == 0)

0

In [74]:
merged_data_merchant.head()

Unnamed: 0,Date,Merchnum,Amount,Fraud,order,Amount_avg_merchant_1,Amount_avg_merchant_3,Amount_avg_merchant_7,Amount_avg_merchant_14,Amount_avg_merchant_30,...,qame_cm_7,qas_cm_7,qaa_cm_14,qam_cm_14,qame_cm_14,qas_cm_14,qaa_cm_30,qam_cm_30,qame_cm_30,qas_cm_30
0,2010-01-01,5509006296254,3.62,0,1,3.62,3.62,3.62,3.62,3.62,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,2010-01-01,61003026333,31.42,0,1,31.42,31.42,31.42,31.42,31.42,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,2010-01-01,4503082993600,178.49,0,1,178.49,178.49,178.49,178.49,178.49,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,2010-01-01,5509006296254,3.62,0,2,3.62,3.62,3.62,3.62,3.62,...,1.0,0.5,1.0,1.0,1.0,0.5,1.0,1.0,1.0,0.5
4,2010-01-01,5509006296254,3.62,0,3,3.62,3.62,3.62,3.62,3.62,...,1.0,0.333333,1.0,1.0,1.0,0.333333,1.0,1.0,1.0,0.333333


In [75]:
merged_data_merchant.shape

(83970, 53)

### Card + Merchnum

In [76]:
for i in [1, 3, 7, 14, 30]:
    locals()['avg_card_merchant_' + str(i)] = df.groupby(['Cardnum', 'Merchnum'])['Amount'].rolling(str(i) + 'd').mean().reset_index()
    locals()['avg_card_merchant_' + str(i)]['order'] = locals()['avg_card_merchant_' + str(i)].groupby(['Cardnum', 'Merchnum', 'Date']).cumcount() + 1
    
    locals()['max_card_merchant_' + str(i)] = df.groupby(['Cardnum', 'Merchnum'])['Amount'].rolling(str(i) + 'd').max().reset_index()
    locals()['max_card_merchant_' + str(i)]['order'] = locals()['max_card_merchant_' + str(i)].groupby(['Cardnum', 'Merchnum', 'Date']).cumcount() + 1
    
    locals()['median_card_merchant_' + str(i)] = df.groupby(['Cardnum', 'Merchnum'])['Amount'].rolling(str(i) + 'd').median().reset_index()
    locals()['median_card_merchant_' + str(i)]['order'] = locals()['median_card_merchant_' + str(i)].groupby(['Cardnum', 'Merchnum', 'Date']).cumcount() + 1
    
    locals()['sum_card_merchant_' + str(i)] = df.groupby(['Cardnum', 'Merchnum'])['Amount'].rolling(str(i) + 'd').sum().reset_index()
    locals()['sum_card_merchant_' + str(i)]['order'] = locals()['sum_card_merchant_' + str(i)].groupby(['Cardnum', 'Merchnum', 'Date']).cumcount() + 1

In [77]:
avg_card_merchant_7.head()

Unnamed: 0,Cardnum,Merchnum,Date,Amount,order
0,5142110002,9900020006406,2010-10-12,150.0,1
1,5142110081,930090121224,2010-03-08,495.9,1
2,5142110081,930090121224,2010-03-08,566.05,2
3,5142110313,930090121224,2010-10-07,144.0,1
4,5142110313,930090121224,2010-10-07,94.0,2


In [78]:
max_card_merchant_7.head()

Unnamed: 0,Cardnum,Merchnum,Date,Amount,order
0,5142110002,9900020006406,2010-10-12,150.0,1
1,5142110081,930090121224,2010-03-08,495.9,1
2,5142110081,930090121224,2010-03-08,636.2,2
3,5142110313,930090121224,2010-10-07,144.0,1
4,5142110313,930090121224,2010-10-07,144.0,2


In [79]:
median_card_merchant_7.head()

Unnamed: 0,Cardnum,Merchnum,Date,Amount,order
0,5142110002,9900020006406,2010-10-12,150.0,1
1,5142110081,930090121224,2010-03-08,495.9,1
2,5142110081,930090121224,2010-03-08,566.05,2
3,5142110313,930090121224,2010-10-07,144.0,1
4,5142110313,930090121224,2010-10-07,94.0,2


In [80]:
sum_card_merchant_7.head()

Unnamed: 0,Cardnum,Merchnum,Date,Amount,order
0,5142110002,9900020006406,2010-10-12,150.0,1
1,5142110081,930090121224,2010-03-08,495.9,1
2,5142110081,930090121224,2010-03-08,1132.1,2
3,5142110313,930090121224,2010-10-07,144.0,1
4,5142110313,930090121224,2010-10-07,188.0,2


In [81]:
card_merchant_df = df.copy()
card_merchant_df = card_merchant_df.reset_index()
card_merchant_df['order'] = card_merchant_df.groupby(['Cardnum', 'Merchnum', 'Date']).cumcount() + 1

In [82]:
card_merchant_df.head()

Unnamed: 0,Date,Recnum,Cardnum,Merchnum,Merch description,Merch state,Merch zip,Transtype,Amount,Fraud,order
0,2010-01-01,1,5142190439,5509006296254,FEDEX SHP 12/23/09 AB#,TN,38118.0,P,3.62,0,1
1,2010-01-01,2,5142183973,61003026333,SERVICE MERCHANDISE #81,MA,1730.0,P,31.42,0,1
2,2010-01-01,3,5142131721,4503082993600,OFFICE DEPOT #191,MD,20763.0,P,178.49,0,1
3,2010-01-01,4,5142148452,5509006296254,FEDEX SHP 12/28/09 AB#,TN,38118.0,P,3.62,0,1
4,2010-01-01,5,5142190439,5509006296254,FEDEX SHP 12/23/09 AB#,TN,38118.0,P,3.62,0,2


In [83]:
not_needed_columns = ['index', 'Recnum', 'Merch description', 'Merch state', 'Merch zip', 'Transtype']
for column in not_needed_columns:
    try:
        del card_merchant_df[column]
    except:
        pass

In [84]:
card_merchant_df.head()

Unnamed: 0,Date,Cardnum,Merchnum,Amount,Fraud,order
0,2010-01-01,5142190439,5509006296254,3.62,0,1
1,2010-01-01,5142183973,61003026333,31.42,0,1
2,2010-01-01,5142131721,4503082993600,178.49,0,1
3,2010-01-01,5142148452,5509006296254,3.62,0,1
4,2010-01-01,5142190439,5509006296254,3.62,0,2


In [85]:
merged_data_card_merchant = card_merchant_df \
.merge(avg_card_merchant_1, on = ['Date', 'Merchnum', 'Cardnum', 'order'], how = 'left', suffixes=['', '_avg_card_merchant_1'])\
.merge(avg_card_merchant_3, on = ['Date', 'Merchnum', 'Cardnum', 'order'], how = 'left', suffixes=['', '_avg_card_merchant_3']) \
.merge(avg_card_merchant_7, on = ['Date', 'Merchnum', 'Cardnum', 'order'], how = 'left', suffixes=['', '_avg_card_merchant_7']) \
.merge(avg_card_merchant_14, on = ['Date', 'Merchnum', 'Cardnum', 'order'], how = 'left', suffixes=['', '_avg_card_merchant_14']) \
.merge(avg_card_merchant_30, on = ['Date', 'Merchnum', 'Cardnum', 'order'], how = 'left', suffixes=['', '_avg_card_merchant_30']) \
.merge(max_card_merchant_1, on = ['Date', 'Merchnum', 'Cardnum', 'order'], how = 'left', suffixes=['', '_max_card_merchant_1'])\
.merge(max_card_merchant_3, on = ['Date', 'Merchnum', 'Cardnum', 'order'], how = 'left', suffixes=['', '_max_card_merchant_3']) \
.merge(max_card_merchant_7, on = ['Date', 'Merchnum', 'Cardnum', 'order'], how = 'left', suffixes=['', '_max_card_merchant_7']) \
.merge(max_card_merchant_14, on = ['Date', 'Merchnum', 'Cardnum', 'order'], how = 'left', suffixes=['', '_max_card_merchant_14']) \
.merge(max_card_merchant_30, on = ['Date', 'Merchnum', 'Cardnum', 'order'], how = 'left', suffixes=['', '_max_card_merchant_30']) \
.merge(median_card_merchant_1, on = ['Date', 'Merchnum', 'Cardnum', 'order'], how = 'left', suffixes=['', '_median_card_merchant_1'])\
.merge(median_card_merchant_3, on = ['Date', 'Merchnum', 'Cardnum', 'order'], how = 'left', suffixes=['', '_median_card_merchant_3']) \
.merge(median_card_merchant_7, on = ['Date', 'Merchnum', 'Cardnum', 'order'], how = 'left', suffixes=['', '_median_card_merchant_7']) \
.merge(median_card_merchant_14, on = ['Date', 'Merchnum', 'Cardnum', 'order'], how = 'left', suffixes=['', '_median_card_merchant_14']) \
.merge(median_card_merchant_30, on = ['Date', 'Merchnum', 'Cardnum', 'order'], how = 'left', suffixes=['', '_median_card_merchant_30']) \
.merge(sum_card_merchant_1, on = ['Date', 'Merchnum', 'Cardnum', 'order'], how = 'left', suffixes=['', '_sum_card_merchant_1'])\
.merge(sum_card_merchant_3, on = ['Date', 'Merchnum', 'Cardnum', 'order'], how = 'left', suffixes=['', '_sum_card_merchant_3']) \
.merge(sum_card_merchant_7, on = ['Date', 'Merchnum', 'Cardnum', 'order'], how = 'left', suffixes=['', '_sum_card_merchant_7']) \
.merge(sum_card_merchant_14, on = ['Date', 'Merchnum', 'Cardnum', 'order'], how = 'left', suffixes=['', '_sum_card_merchant_14']) \
.merge(sum_card_merchant_30, on = ['Date', 'Merchnum', 'Cardnum', 'order'], how = 'left', suffixes=['', '_sum_card_merchant_30'])

In [86]:
merged_data_card_merchant['Amount_avg_card_merchant_0'] = merged_data_card_merchant['Amount']
merged_data_card_merchant['Amount_max_card_merchant_0'] = merged_data_card_merchant['Amount']
merged_data_card_merchant['Amount_median_card_merchant_0'] = merged_data_card_merchant['Amount']
merged_data_card_merchant['Amount_sum_card_merchant_0'] = merged_data_card_merchant['Amount']

In [87]:
merged_data_card_merchant.head()

Unnamed: 0,Date,Cardnum,Merchnum,Amount,Fraud,order,Amount_avg_card_merchant_1,Amount_avg_card_merchant_3,Amount_avg_card_merchant_7,Amount_avg_card_merchant_14,...,Amount_median_card_merchant_30,Amount_sum_card_merchant_1,Amount_sum_card_merchant_3,Amount_sum_card_merchant_7,Amount_sum_card_merchant_14,Amount_sum_card_merchant_30,Amount_avg_card_merchant_0,Amount_max_card_merchant_0,Amount_median_card_merchant_0,Amount_sum_card_merchant_0
0,2010-01-01,5142190439,5509006296254,3.62,0,1,3.62,3.62,3.62,3.62,...,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62
1,2010-01-01,5142183973,61003026333,31.42,0,1,31.42,31.42,31.42,31.42,...,31.42,31.42,31.42,31.42,31.42,31.42,31.42,31.42,31.42,31.42
2,2010-01-01,5142131721,4503082993600,178.49,0,1,178.49,178.49,178.49,178.49,...,178.49,178.49,178.49,178.49,178.49,178.49,178.49,178.49,178.49,178.49
3,2010-01-01,5142148452,5509006296254,3.62,0,1,3.62,3.62,3.62,3.62,...,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62
4,2010-01-01,5142190439,5509006296254,3.62,0,2,3.62,3.62,3.62,3.62,...,3.62,7.24,7.24,7.24,7.24,7.24,3.62,3.62,3.62,3.62


In [88]:
for i in [0, 1, 3, 7, 14, 30]:
    merged_data_card_merchant['qaa_cm_' + str(i)] = merged_data_card_merchant['Amount'] / merged_data_card_merchant['Amount_avg_card_merchant_' + str(i)]
    merged_data_card_merchant['qam_cm_' + str(i)] = merged_data_card_merchant['Amount'] / merged_data_card_merchant['Amount_max_card_merchant_' + str(i)]
    merged_data_card_merchant['qame_cm_' + str(i)] = merged_data_card_merchant['Amount'] / merged_data_card_merchant['Amount_median_card_merchant_' + str(i)]
    merged_data_card_merchant['qas_cm_' + str(i)] = merged_data_card_merchant['Amount'] / merged_data_card_merchant['Amount_sum_card_merchant_' + str(i)]

In [89]:
merged_data_card_merchant.head()

Unnamed: 0,Date,Cardnum,Merchnum,Amount,Fraud,order,Amount_avg_card_merchant_1,Amount_avg_card_merchant_3,Amount_avg_card_merchant_7,Amount_avg_card_merchant_14,...,qame_cm_7,qas_cm_7,qaa_cm_14,qam_cm_14,qame_cm_14,qas_cm_14,qaa_cm_30,qam_cm_30,qame_cm_30,qas_cm_30
0,2010-01-01,5142190439,5509006296254,3.62,0,1,3.62,3.62,3.62,3.62,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,2010-01-01,5142183973,61003026333,31.42,0,1,31.42,31.42,31.42,31.42,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,2010-01-01,5142131721,4503082993600,178.49,0,1,178.49,178.49,178.49,178.49,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,2010-01-01,5142148452,5509006296254,3.62,0,1,3.62,3.62,3.62,3.62,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4,2010-01-01,5142190439,5509006296254,3.62,0,2,3.62,3.62,3.62,3.62,...,1.0,0.5,1.0,1.0,1.0,0.5,1.0,1.0,1.0,0.5


In [90]:
merged_data_card_merchant.shape

(83970, 54)

### Card + Zip Code

In [91]:
for i in [1, 3, 7, 14, 30]:
    locals()['avg_card_zip_' + str(i)] = df.groupby(['Cardnum', 'Merch zip'])['Amount'].rolling(str(i) + 'd').mean().reset_index()
    locals()['avg_card_zip_' + str(i)]['order'] = locals()['avg_card_zip_' + str(i)].groupby(['Cardnum', 'Merch zip', 'Date']).cumcount() + 1
    
    locals()['max_card_zip_' + str(i)] = df.groupby(['Cardnum', 'Merch zip'])['Amount'].rolling(str(i) + 'd').max().reset_index()
    locals()['max_card_zip_' + str(i)]['order'] = locals()['max_card_zip_' + str(i)].groupby(['Cardnum', 'Merch zip', 'Date']).cumcount() + 1
    
    locals()['median_card_zip_' + str(i)] = df.groupby(['Cardnum', 'Merch zip'])['Amount'].rolling(str(i) + 'd').median().reset_index()
    locals()['median_card_zip_' + str(i)]['order'] = locals()['median_card_zip_' + str(i)].groupby(['Cardnum', 'Merch zip', 'Date']).cumcount() + 1
    
    locals()['sum_card_zip_' + str(i)] = df.groupby(['Cardnum', 'Merch zip'])['Amount'].rolling(str(i) + 'd').sum().reset_index()
    locals()['sum_card_zip_' + str(i)]['order'] = locals()['sum_card_zip_' + str(i)].groupby(['Cardnum', 'Merch zip', 'Date']).cumcount() + 1

In [92]:
avg_card_zip_30.head()

Unnamed: 0,Cardnum,Merch zip,Date,Amount,order
0,5142110002,22202.0,2010-10-12,150.0,1
1,5142110081,38118.0,2010-03-08,495.9,1
2,5142110081,38118.0,2010-03-08,566.05,2
3,5142110313,38118.0,2010-10-07,144.0,1
4,5142110313,38118.0,2010-10-07,94.0,2


In [93]:
max_card_zip_30.head()

Unnamed: 0,Cardnum,Merch zip,Date,Amount,order
0,5142110002,22202.0,2010-10-12,150.0,1
1,5142110081,38118.0,2010-03-08,495.9,1
2,5142110081,38118.0,2010-03-08,636.2,2
3,5142110313,38118.0,2010-10-07,144.0,1
4,5142110313,38118.0,2010-10-07,144.0,2


In [94]:
median_card_zip_30.head()

Unnamed: 0,Cardnum,Merch zip,Date,Amount,order
0,5142110002,22202.0,2010-10-12,150.0,1
1,5142110081,38118.0,2010-03-08,495.9,1
2,5142110081,38118.0,2010-03-08,566.05,2
3,5142110313,38118.0,2010-10-07,144.0,1
4,5142110313,38118.0,2010-10-07,94.0,2


In [95]:
sum_card_zip_30.head()

Unnamed: 0,Cardnum,Merch zip,Date,Amount,order
0,5142110002,22202.0,2010-10-12,150.0,1
1,5142110081,38118.0,2010-03-08,495.9,1
2,5142110081,38118.0,2010-03-08,1132.1,2
3,5142110313,38118.0,2010-10-07,144.0,1
4,5142110313,38118.0,2010-10-07,188.0,2


In [96]:
card_zip_df = df.copy()
card_zip_df = card_zip_df.reset_index()
card_zip_df['order'] = card_zip_df.groupby(['Cardnum', 'Merch zip', 'Date']).cumcount() + 1

In [97]:
card_zip_df.head()

Unnamed: 0,Date,Recnum,Cardnum,Merchnum,Merch description,Merch state,Merch zip,Transtype,Amount,Fraud,order
0,2010-01-01,1,5142190439,5509006296254,FEDEX SHP 12/23/09 AB#,TN,38118.0,P,3.62,0,1
1,2010-01-01,2,5142183973,61003026333,SERVICE MERCHANDISE #81,MA,1730.0,P,31.42,0,1
2,2010-01-01,3,5142131721,4503082993600,OFFICE DEPOT #191,MD,20763.0,P,178.49,0,1
3,2010-01-01,4,5142148452,5509006296254,FEDEX SHP 12/28/09 AB#,TN,38118.0,P,3.62,0,1
4,2010-01-01,5,5142190439,5509006296254,FEDEX SHP 12/23/09 AB#,TN,38118.0,P,3.62,0,2


In [98]:
not_needed_columns = ['index', 'Recnum', 'Merchnum', 'Merch description', 'Merch state', 'Transtype']
for column in not_needed_columns:
    try:
        del card_zip_df[column]
    except:
        pass

In [99]:
card_zip_df.head()

Unnamed: 0,Date,Cardnum,Merch zip,Amount,Fraud,order
0,2010-01-01,5142190439,38118.0,3.62,0,1
1,2010-01-01,5142183973,1730.0,31.42,0,1
2,2010-01-01,5142131721,20763.0,178.49,0,1
3,2010-01-01,5142148452,38118.0,3.62,0,1
4,2010-01-01,5142190439,38118.0,3.62,0,2


In [100]:
merged_data_card_zip = card_zip_df \
.merge(avg_card_zip_1, on = ['Date', 'Merch zip', 'Cardnum', 'order'], how = 'left', suffixes=['', '_avg_card_zip_1'])\
.merge(avg_card_zip_3, on = ['Date', 'Merch zip', 'Cardnum', 'order'], how = 'left', suffixes=['', '_avg_card_zip_3']) \
.merge(avg_card_zip_7, on = ['Date', 'Merch zip', 'Cardnum', 'order'], how = 'left', suffixes=['', '_avg_card_zip_7']) \
.merge(avg_card_zip_14, on = ['Date', 'Merch zip', 'Cardnum', 'order'], how = 'left', suffixes=['', '_avg_card_zip_14']) \
.merge(avg_card_zip_30, on = ['Date', 'Merch zip', 'Cardnum', 'order'], how = 'left', suffixes=['', '_avg_card_zip_30']) \
.merge(max_card_zip_1, on = ['Date', 'Merch zip', 'Cardnum', 'order'], how = 'left', suffixes=['', '_max_card_zip_1'])\
.merge(max_card_zip_3, on = ['Date', 'Merch zip', 'Cardnum', 'order'], how = 'left', suffixes=['', '_max_card_zip_3']) \
.merge(max_card_zip_7, on = ['Date', 'Merch zip', 'Cardnum', 'order'], how = 'left', suffixes=['', '_max_card_zip_7']) \
.merge(max_card_zip_14, on = ['Date', 'Merch zip', 'Cardnum', 'order'], how = 'left', suffixes=['', '_max_card_zip_14']) \
.merge(max_card_zip_30, on = ['Date', 'Merch zip', 'Cardnum', 'order'], how = 'left', suffixes=['', '_max_card_zip_30']) \
.merge(median_card_zip_1, on = ['Date', 'Merch zip', 'Cardnum', 'order'], how = 'left', suffixes=['', '_median_card_zip_1'])\
.merge(median_card_zip_3, on = ['Date', 'Merch zip', 'Cardnum', 'order'], how = 'left', suffixes=['', '_median_card_zip_3']) \
.merge(median_card_zip_7, on = ['Date', 'Merch zip', 'Cardnum', 'order'], how = 'left', suffixes=['', '_median_card_zip_7']) \
.merge(median_card_zip_14, on = ['Date', 'Merch zip', 'Cardnum', 'order'], how = 'left', suffixes=['', '_median_card_zip_14']) \
.merge(median_card_zip_30, on = ['Date', 'Merch zip', 'Cardnum', 'order'], how = 'left', suffixes=['', '_median_card_zip_30']) \
.merge(sum_card_zip_1, on = ['Date', 'Merch zip', 'Cardnum', 'order'], how = 'left', suffixes=['', '_sum_card_zip_1'])\
.merge(sum_card_zip_3, on = ['Date', 'Merch zip', 'Cardnum', 'order'], how = 'left', suffixes=['', '_sum_card_zip_3']) \
.merge(sum_card_zip_7, on = ['Date', 'Merch zip', 'Cardnum', 'order'], how = 'left', suffixes=['', '_sum_card_zip_7']) \
.merge(sum_card_zip_14, on = ['Date', 'Merch zip', 'Cardnum', 'order'], how = 'left', suffixes=['', '_sum_card_zip_14']) \
.merge(sum_card_zip_30, on = ['Date', 'Merch zip', 'Cardnum', 'order'], how = 'left', suffixes=['', '_sum_card_zip_30'])

In [101]:
merged_data_card_zip['Amount_avg_card_zip_0'] = merged_data_card_zip['Amount']
merged_data_card_zip['Amount_max_card_zip_0'] = merged_data_card_zip['Amount']
merged_data_card_zip['Amount_median_card_zip_0'] = merged_data_card_zip['Amount']
merged_data_card_zip['Amount_sum_card_zip_0'] = merged_data_card_zip['Amount']

In [102]:
merged_data_card_zip.head()

Unnamed: 0,Date,Cardnum,Merch zip,Amount,Fraud,order,Amount_avg_card_zip_1,Amount_avg_card_zip_3,Amount_avg_card_zip_7,Amount_avg_card_zip_14,...,Amount_median_card_zip_30,Amount_sum_card_zip_1,Amount_sum_card_zip_3,Amount_sum_card_zip_7,Amount_sum_card_zip_14,Amount_sum_card_zip_30,Amount_avg_card_zip_0,Amount_max_card_zip_0,Amount_median_card_zip_0,Amount_sum_card_zip_0
0,2010-01-01,5142190439,38118.0,3.62,0,1,3.62,3.62,3.62,3.62,...,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62
1,2010-01-01,5142183973,1730.0,31.42,0,1,31.42,31.42,31.42,31.42,...,31.42,31.42,31.42,31.42,31.42,31.42,31.42,31.42,31.42,31.42
2,2010-01-01,5142131721,20763.0,178.49,0,1,178.49,178.49,178.49,178.49,...,178.49,178.49,178.49,178.49,178.49,178.49,178.49,178.49,178.49,178.49
3,2010-01-01,5142148452,38118.0,3.62,0,1,3.62,3.62,3.62,3.62,...,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62
4,2010-01-01,5142190439,38118.0,3.62,0,2,3.62,3.62,3.62,3.62,...,3.62,7.24,7.24,7.24,7.24,7.24,3.62,3.62,3.62,3.62


In [103]:
for i in [0, 1, 3, 7, 14, 30]:
    merged_data_card_zip['qaa_cm_' + str(i)] = merged_data_card_zip['Amount'] / merged_data_card_zip['Amount_avg_card_zip_' + str(i)]
    merged_data_card_zip['qam_cm_' + str(i)] = merged_data_card_zip['Amount'] / merged_data_card_zip['Amount_max_card_zip_' + str(i)]
    merged_data_card_zip['qame_cm_' + str(i)] = merged_data_card_zip['Amount'] / merged_data_card_zip['Amount_median_card_zip_' + str(i)]
    merged_data_card_zip['qas_cm_' + str(i)] = merged_data_card_zip['Amount'] / merged_data_card_zip['Amount_sum_card_zip_' + str(i)]

In [104]:
merged_data_card_zip.head()

Unnamed: 0,Date,Cardnum,Merch zip,Amount,Fraud,order,Amount_avg_card_zip_1,Amount_avg_card_zip_3,Amount_avg_card_zip_7,Amount_avg_card_zip_14,...,qame_cm_7,qas_cm_7,qaa_cm_14,qam_cm_14,qame_cm_14,qas_cm_14,qaa_cm_30,qam_cm_30,qame_cm_30,qas_cm_30
0,2010-01-01,5142190439,38118.0,3.62,0,1,3.62,3.62,3.62,3.62,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,2010-01-01,5142183973,1730.0,31.42,0,1,31.42,31.42,31.42,31.42,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,2010-01-01,5142131721,20763.0,178.49,0,1,178.49,178.49,178.49,178.49,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,2010-01-01,5142148452,38118.0,3.62,0,1,3.62,3.62,3.62,3.62,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4,2010-01-01,5142190439,38118.0,3.62,0,2,3.62,3.62,3.62,3.62,...,1.0,0.5,1.0,1.0,1.0,0.5,1.0,1.0,1.0,0.5


In [105]:
merged_data_card_zip.shape

(83970, 54)

### Card + State

In [106]:
for i in [1, 3, 7, 14, 30]:
    locals()['avg_card_state_' + str(i)] = df.groupby(['Cardnum', 'Merch state'])['Amount'].rolling(str(i) + 'd').mean().reset_index()
    locals()['avg_card_state_' + str(i)]['order'] = locals()['avg_card_state_' + str(i)].groupby(['Cardnum', 'Merch state', 'Date']).cumcount() + 1
    
    locals()['max_card_state_' + str(i)] = df.groupby(['Cardnum', 'Merch state'])['Amount'].rolling(str(i) + 'd').max().reset_index()
    locals()['max_card_state_' + str(i)]['order'] = locals()['max_card_state_' + str(i)].groupby(['Cardnum', 'Merch state', 'Date']).cumcount() + 1
    
    locals()['median_card_state_' + str(i)] = df.groupby(['Cardnum', 'Merch state'])['Amount'].rolling(str(i) + 'd').median().reset_index()
    locals()['median_card_state_' + str(i)]['order'] = locals()['median_card_state_' + str(i)].groupby(['Cardnum', 'Merch state', 'Date']).cumcount() + 1
    
    locals()['sum_card_state_' + str(i)] = df.groupby(['Cardnum', 'Merch state'])['Amount'].rolling(str(i) + 'd').sum().reset_index()
    locals()['sum_card_state_' + str(i)]['order'] = locals()['sum_card_state_' + str(i)].groupby(['Cardnum', 'Merch state', 'Date']).cumcount() + 1

In [107]:
avg_card_state_14.head()

Unnamed: 0,Cardnum,Merch state,Date,Amount,order
0,5142110002,VA,2010-10-12,150.0,1
1,5142110081,TN,2010-03-08,495.9,1
2,5142110081,TN,2010-03-08,566.05,2
3,5142110313,TN,2010-10-07,144.0,1
4,5142110313,TN,2010-10-07,94.0,2


In [108]:
max_card_state_14.head()

Unnamed: 0,Cardnum,Merch state,Date,Amount,order
0,5142110002,VA,2010-10-12,150.0,1
1,5142110081,TN,2010-03-08,495.9,1
2,5142110081,TN,2010-03-08,636.2,2
3,5142110313,TN,2010-10-07,144.0,1
4,5142110313,TN,2010-10-07,144.0,2


In [109]:
median_card_state_14.head()

Unnamed: 0,Cardnum,Merch state,Date,Amount,order
0,5142110002,VA,2010-10-12,150.0,1
1,5142110081,TN,2010-03-08,495.9,1
2,5142110081,TN,2010-03-08,566.05,2
3,5142110313,TN,2010-10-07,144.0,1
4,5142110313,TN,2010-10-07,94.0,2


In [110]:
sum_card_state_14.head()

Unnamed: 0,Cardnum,Merch state,Date,Amount,order
0,5142110002,VA,2010-10-12,150.0,1
1,5142110081,TN,2010-03-08,495.9,1
2,5142110081,TN,2010-03-08,1132.1,2
3,5142110313,TN,2010-10-07,144.0,1
4,5142110313,TN,2010-10-07,188.0,2


In [111]:
card_state_df = df.copy()
card_state_df = card_state_df.reset_index()
card_state_df['order'] = card_state_df.groupby(['Cardnum', 'Merch state', 'Date']).cumcount() + 1

In [112]:
card_state_df.head()

Unnamed: 0,Date,Recnum,Cardnum,Merchnum,Merch description,Merch state,Merch zip,Transtype,Amount,Fraud,order
0,2010-01-01,1,5142190439,5509006296254,FEDEX SHP 12/23/09 AB#,TN,38118.0,P,3.62,0,1
1,2010-01-01,2,5142183973,61003026333,SERVICE MERCHANDISE #81,MA,1730.0,P,31.42,0,1
2,2010-01-01,3,5142131721,4503082993600,OFFICE DEPOT #191,MD,20763.0,P,178.49,0,1
3,2010-01-01,4,5142148452,5509006296254,FEDEX SHP 12/28/09 AB#,TN,38118.0,P,3.62,0,1
4,2010-01-01,5,5142190439,5509006296254,FEDEX SHP 12/23/09 AB#,TN,38118.0,P,3.62,0,2


In [113]:
not_needed_columns = ['index', 'Recnum', 'Merchnum', 'Merch description', 'Merch zip', 'Transtype']
for column in not_needed_columns:
    try:
        del card_state_df[column]
    except:
        pass

In [114]:
card_state_df.head()

Unnamed: 0,Date,Cardnum,Merch state,Amount,Fraud,order
0,2010-01-01,5142190439,TN,3.62,0,1
1,2010-01-01,5142183973,MA,31.42,0,1
2,2010-01-01,5142131721,MD,178.49,0,1
3,2010-01-01,5142148452,TN,3.62,0,1
4,2010-01-01,5142190439,TN,3.62,0,2


In [115]:
merged_data_card_state = card_state_df \
.merge(avg_card_state_1, on = ['Date', 'Merch state', 'Cardnum', 'order'], how = 'left', suffixes=['', '_avg_card_state_1'])\
.merge(avg_card_state_3, on = ['Date', 'Merch state', 'Cardnum', 'order'], how = 'left', suffixes=['', '_avg_card_state_3']) \
.merge(avg_card_state_7, on = ['Date', 'Merch state', 'Cardnum', 'order'], how = 'left', suffixes=['', '_avg_card_state_7']) \
.merge(avg_card_state_14, on = ['Date', 'Merch state', 'Cardnum', 'order'], how = 'left', suffixes=['', '_avg_card_state_14']) \
.merge(avg_card_state_30, on = ['Date', 'Merch state', 'Cardnum', 'order'], how = 'left', suffixes=['', '_avg_card_state_30']) \
.merge(max_card_state_1, on = ['Date', 'Merch state', 'Cardnum', 'order'], how = 'left', suffixes=['', '_max_card_state_1'])\
.merge(max_card_state_3, on = ['Date', 'Merch state', 'Cardnum', 'order'], how = 'left', suffixes=['', '_max_card_state_3']) \
.merge(max_card_state_7, on = ['Date', 'Merch state', 'Cardnum', 'order'], how = 'left', suffixes=['', '_max_card_state_7']) \
.merge(max_card_state_14, on = ['Date', 'Merch state', 'Cardnum', 'order'], how = 'left', suffixes=['', '_max_card_state_14']) \
.merge(max_card_state_30, on = ['Date', 'Merch state', 'Cardnum', 'order'], how = 'left', suffixes=['', '_max_card_state_30']) \
.merge(median_card_state_1, on = ['Date', 'Merch state', 'Cardnum', 'order'], how = 'left', suffixes=['', '_median_card_state_1'])\
.merge(median_card_state_3, on = ['Date', 'Merch state', 'Cardnum', 'order'], how = 'left', suffixes=['', '_median_card_state_3']) \
.merge(median_card_state_7, on = ['Date', 'Merch state', 'Cardnum', 'order'], how = 'left', suffixes=['', '_median_card_state_7']) \
.merge(median_card_state_14, on = ['Date', 'Merch state', 'Cardnum', 'order'], how = 'left', suffixes=['', '_median_card_state_14']) \
.merge(median_card_state_30, on = ['Date', 'Merch state', 'Cardnum', 'order'], how = 'left', suffixes=['', '_median_card_state_30']) \
.merge(sum_card_state_1, on = ['Date', 'Merch state', 'Cardnum', 'order'], how = 'left', suffixes=['', '_sum_card_state_1'])\
.merge(sum_card_state_3, on = ['Date', 'Merch state', 'Cardnum', 'order'], how = 'left', suffixes=['', '_sum_card_state_3']) \
.merge(sum_card_state_7, on = ['Date', 'Merch state', 'Cardnum', 'order'], how = 'left', suffixes=['', '_sum_card_state_7']) \
.merge(sum_card_state_14, on = ['Date', 'Merch state', 'Cardnum', 'order'], how = 'left', suffixes=['', '_sum_card_state_14']) \
.merge(sum_card_state_30, on = ['Date', 'Merch state', 'Cardnum', 'order'], how = 'left', suffixes=['', '_sum_card_state_30'])

In [116]:
merged_data_card_state['Amount_avg_card_state_0'] = merged_data_card_state['Amount']
merged_data_card_state['Amount_max_card_state_0'] = merged_data_card_state['Amount']
merged_data_card_state['Amount_median_card_state_0'] = merged_data_card_state['Amount']
merged_data_card_state['Amount_sum_card_state_0'] = merged_data_card_state['Amount']

In [117]:
merged_data_card_state.head()

Unnamed: 0,Date,Cardnum,Merch state,Amount,Fraud,order,Amount_avg_card_state_1,Amount_avg_card_state_3,Amount_avg_card_state_7,Amount_avg_card_state_14,...,Amount_median_card_state_30,Amount_sum_card_state_1,Amount_sum_card_state_3,Amount_sum_card_state_7,Amount_sum_card_state_14,Amount_sum_card_state_30,Amount_avg_card_state_0,Amount_max_card_state_0,Amount_median_card_state_0,Amount_sum_card_state_0
0,2010-01-01,5142190439,TN,3.62,0,1,3.62,3.62,3.62,3.62,...,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62
1,2010-01-01,5142183973,MA,31.42,0,1,31.42,31.42,31.42,31.42,...,31.42,31.42,31.42,31.42,31.42,31.42,31.42,31.42,31.42,31.42
2,2010-01-01,5142131721,MD,178.49,0,1,178.49,178.49,178.49,178.49,...,178.49,178.49,178.49,178.49,178.49,178.49,178.49,178.49,178.49,178.49
3,2010-01-01,5142148452,TN,3.62,0,1,3.62,3.62,3.62,3.62,...,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62
4,2010-01-01,5142190439,TN,3.62,0,2,3.62,3.62,3.62,3.62,...,3.62,7.24,7.24,7.24,7.24,7.24,3.62,3.62,3.62,3.62


In [118]:
for i in [0, 1, 3, 7, 14, 30]:
    merged_data_card_state['qaa_cm_' + str(i)] = merged_data_card_state['Amount'] / merged_data_card_state['Amount_avg_card_state_' + str(i)]
    merged_data_card_state['qam_cm_' + str(i)] = merged_data_card_state['Amount'] / merged_data_card_state['Amount_max_card_state_' + str(i)]
    merged_data_card_state['qame_cm_' + str(i)] = merged_data_card_state['Amount'] / merged_data_card_state['Amount_median_card_state_' + str(i)]
    merged_data_card_state['qas_cm_' + str(i)] = merged_data_card_state['Amount'] / merged_data_card_state['Amount_sum_card_state_' + str(i)]

In [119]:
merged_data_card_state.head()

Unnamed: 0,Date,Cardnum,Merch state,Amount,Fraud,order,Amount_avg_card_state_1,Amount_avg_card_state_3,Amount_avg_card_state_7,Amount_avg_card_state_14,...,qame_cm_7,qas_cm_7,qaa_cm_14,qam_cm_14,qame_cm_14,qas_cm_14,qaa_cm_30,qam_cm_30,qame_cm_30,qas_cm_30
0,2010-01-01,5142190439,TN,3.62,0,1,3.62,3.62,3.62,3.62,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,2010-01-01,5142183973,MA,31.42,0,1,31.42,31.42,31.42,31.42,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,2010-01-01,5142131721,MD,178.49,0,1,178.49,178.49,178.49,178.49,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,2010-01-01,5142148452,TN,3.62,0,1,3.62,3.62,3.62,3.62,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4,2010-01-01,5142190439,TN,3.62,0,2,3.62,3.62,3.62,3.62,...,1.0,0.5,1.0,1.0,1.0,0.5,1.0,1.0,1.0,0.5


In [120]:
merged_data_card_state.shape

(83970, 54)

### Amount Variables 240

In [121]:
merged_data_card.head()

Unnamed: 0,Date,Cardnum,Amount,Fraud,order,Amount_avg_card_1,Amount_avg_card_3,Amount_avg_card_7,Amount_avg_card_14,Amount_avg_card_30,...,qame_cm_7,qas_cm_7,qaa_cm_14,qam_cm_14,qame_cm_14,qas_cm_14,qaa_cm_30,qam_cm_30,qame_cm_30,qas_cm_30
0,2010-01-01,5142190439,3.62,0,1,3.62,3.62,3.62,3.62,3.62,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,2010-01-01,5142183973,31.42,0,1,31.42,31.42,31.42,31.42,31.42,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,2010-01-01,5142131721,178.49,0,1,178.49,178.49,178.49,178.49,178.49,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,2010-01-01,5142148452,3.62,0,1,3.62,3.62,3.62,3.62,3.62,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4,2010-01-01,5142190439,3.62,0,2,3.62,3.62,3.62,3.62,3.62,...,1.0,0.5,1.0,1.0,1.0,0.5,1.0,1.0,1.0,0.5


In [122]:
merged_data_merchant.head()

Unnamed: 0,Date,Merchnum,Amount,Fraud,order,Amount_avg_merchant_1,Amount_avg_merchant_3,Amount_avg_merchant_7,Amount_avg_merchant_14,Amount_avg_merchant_30,...,qame_cm_7,qas_cm_7,qaa_cm_14,qam_cm_14,qame_cm_14,qas_cm_14,qaa_cm_30,qam_cm_30,qame_cm_30,qas_cm_30
0,2010-01-01,5509006296254,3.62,0,1,3.62,3.62,3.62,3.62,3.62,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,2010-01-01,61003026333,31.42,0,1,31.42,31.42,31.42,31.42,31.42,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,2010-01-01,4503082993600,178.49,0,1,178.49,178.49,178.49,178.49,178.49,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,2010-01-01,5509006296254,3.62,0,2,3.62,3.62,3.62,3.62,3.62,...,1.0,0.5,1.0,1.0,1.0,0.5,1.0,1.0,1.0,0.5
4,2010-01-01,5509006296254,3.62,0,3,3.62,3.62,3.62,3.62,3.62,...,1.0,0.333333,1.0,1.0,1.0,0.333333,1.0,1.0,1.0,0.333333


In [123]:
merged_data_card_merchant.head()

Unnamed: 0,Date,Cardnum,Merchnum,Amount,Fraud,order,Amount_avg_card_merchant_1,Amount_avg_card_merchant_3,Amount_avg_card_merchant_7,Amount_avg_card_merchant_14,...,qame_cm_7,qas_cm_7,qaa_cm_14,qam_cm_14,qame_cm_14,qas_cm_14,qaa_cm_30,qam_cm_30,qame_cm_30,qas_cm_30
0,2010-01-01,5142190439,5509006296254,3.62,0,1,3.62,3.62,3.62,3.62,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,2010-01-01,5142183973,61003026333,31.42,0,1,31.42,31.42,31.42,31.42,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,2010-01-01,5142131721,4503082993600,178.49,0,1,178.49,178.49,178.49,178.49,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,2010-01-01,5142148452,5509006296254,3.62,0,1,3.62,3.62,3.62,3.62,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4,2010-01-01,5142190439,5509006296254,3.62,0,2,3.62,3.62,3.62,3.62,...,1.0,0.5,1.0,1.0,1.0,0.5,1.0,1.0,1.0,0.5


In [124]:
merged_data_card_zip.head()

Unnamed: 0,Date,Cardnum,Merch zip,Amount,Fraud,order,Amount_avg_card_zip_1,Amount_avg_card_zip_3,Amount_avg_card_zip_7,Amount_avg_card_zip_14,...,qame_cm_7,qas_cm_7,qaa_cm_14,qam_cm_14,qame_cm_14,qas_cm_14,qaa_cm_30,qam_cm_30,qame_cm_30,qas_cm_30
0,2010-01-01,5142190439,38118.0,3.62,0,1,3.62,3.62,3.62,3.62,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,2010-01-01,5142183973,1730.0,31.42,0,1,31.42,31.42,31.42,31.42,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,2010-01-01,5142131721,20763.0,178.49,0,1,178.49,178.49,178.49,178.49,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,2010-01-01,5142148452,38118.0,3.62,0,1,3.62,3.62,3.62,3.62,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4,2010-01-01,5142190439,38118.0,3.62,0,2,3.62,3.62,3.62,3.62,...,1.0,0.5,1.0,1.0,1.0,0.5,1.0,1.0,1.0,0.5


In [125]:
merged_data_card_state.head()

Unnamed: 0,Date,Cardnum,Merch state,Amount,Fraud,order,Amount_avg_card_state_1,Amount_avg_card_state_3,Amount_avg_card_state_7,Amount_avg_card_state_14,...,qame_cm_7,qas_cm_7,qaa_cm_14,qam_cm_14,qame_cm_14,qas_cm_14,qaa_cm_30,qam_cm_30,qame_cm_30,qas_cm_30
0,2010-01-01,5142190439,TN,3.62,0,1,3.62,3.62,3.62,3.62,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,2010-01-01,5142183973,MA,31.42,0,1,31.42,31.42,31.42,31.42,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,2010-01-01,5142131721,MD,178.49,0,1,178.49,178.49,178.49,178.49,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,2010-01-01,5142148452,TN,3.62,0,1,3.62,3.62,3.62,3.62,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4,2010-01-01,5142190439,TN,3.62,0,2,3.62,3.62,3.62,3.62,...,1.0,0.5,1.0,1.0,1.0,0.5,1.0,1.0,1.0,0.5


In [126]:
merged_data_card_pure = merged_data_card.iloc[:, 5:]
merged_data_merchant_pure = merged_data_merchant.iloc[:, 5:]
merged_data_card_merchant_pure = merged_data_card_merchant.iloc[:, 6:]
merged_data_card_zip_pure = merged_data_card_zip.iloc[:, 6:]
merged_data_card_state_pure = merged_data_card_state.iloc[:, 6:]

In [127]:
[merged_data_card_pure.shape, 
merged_data_merchant_pure.shape,
merged_data_card_merchant_pure.shape, 
merged_data_card_zip_pure.shape,
merged_data_card_state_pure.shape]

[(83970, 48), (83970, 48), (83970, 48), (83970, 48), (83970, 48)]

In [128]:
Amount_variables = pd.concat([merged_data_card_pure,
                              merged_data_merchant_pure,
                              merged_data_card_merchant_pure, 
                              merged_data_card_zip_pure, 
                              merged_data_card_state_pure], axis = 1)

In [129]:
Amount_variables.shape

(83970, 240)

In [130]:
Amount_variables.head()

Unnamed: 0,Amount_avg_card_1,Amount_avg_card_3,Amount_avg_card_7,Amount_avg_card_14,Amount_avg_card_30,Amount_max_card_1,Amount_max_card_3,Amount_max_card_7,Amount_max_card_14,Amount_max_card_30,...,qame_cm_7,qas_cm_7,qaa_cm_14,qam_cm_14,qame_cm_14,qas_cm_14,qaa_cm_30,qam_cm_30,qame_cm_30,qas_cm_30
0,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,31.42,31.42,31.42,31.42,31.42,31.42,31.42,31.42,31.42,31.42,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,178.49,178.49,178.49,178.49,178.49,178.49,178.49,178.49,178.49,178.49,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,...,1.0,0.5,1.0,1.0,1.0,0.5,1.0,1.0,1.0,0.5


In [131]:
Amount_variables['Recnum'] = df['Recnum'].values
Amount_variables['Fraud'] = df['Fraud'].values
Amount_variables['Date'] = df.index.values

In [132]:
Amount_variables.head()

Unnamed: 0,Amount_avg_card_1,Amount_avg_card_3,Amount_avg_card_7,Amount_avg_card_14,Amount_avg_card_30,Amount_max_card_1,Amount_max_card_3,Amount_max_card_7,Amount_max_card_14,Amount_max_card_30,...,qam_cm_14,qame_cm_14,qas_cm_14,qaa_cm_30,qam_cm_30,qame_cm_30,qas_cm_30,Recnum,Fraud,Date
0,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1,0,2010-01-01
1,31.42,31.42,31.42,31.42,31.42,31.42,31.42,31.42,31.42,31.42,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2,0,2010-01-01
2,178.49,178.49,178.49,178.49,178.49,178.49,178.49,178.49,178.49,178.49,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,3,0,2010-01-01
3,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,4,0,2010-01-01
4,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,...,1.0,1.0,0.5,1.0,1.0,1.0,0.5,5,0,2010-01-01


In [133]:
Amount_variables.shape

(83970, 243)

## Frequency Variables

### Card

In [134]:
count_card_1 = df.groupby(['Cardnum'])['Amount'].rolling('1d').count().reset_index()
count_card_1['Count'] = count_card_1['Amount'].astype('int')
del count_card_1['Amount']
count_card_1['order'] = count_card_1.groupby(['Cardnum', 'Date']).cumcount() + 1

count_card_3 = df.groupby(['Cardnum'])['Amount'].rolling('3d').count().reset_index()
count_card_3['Count'] = count_card_3['Amount'].astype('int')
del count_card_3['Amount']
count_card_3['order'] = count_card_3.groupby(['Cardnum', 'Date']).cumcount() + 1

count_card_7 = df.groupby(['Cardnum'])['Amount'].rolling('7d').count().reset_index()
count_card_7['Count'] = count_card_7['Amount'].astype('int')
del count_card_7['Amount']
count_card_7['order'] = count_card_7.groupby(['Cardnum', 'Date']).cumcount() + 1

count_card_14 = df.groupby(['Cardnum'])['Amount'].rolling('14d').count().reset_index()
count_card_14['Count'] = count_card_14['Amount'].astype('int')
del count_card_14['Amount']
count_card_14['order'] = count_card_14.groupby(['Cardnum', 'Date']).cumcount() + 1

count_card_30 = df.groupby(['Cardnum'])['Amount'].rolling('30d').count().reset_index()
count_card_30['Count'] = count_card_30['Amount'].astype('int')
del count_card_30['Amount']
count_card_30['order'] = count_card_30.groupby(['Cardnum', 'Date']).cumcount() + 1

In [135]:
count_card_1.head()

Unnamed: 0,Cardnum,Date,Count,order
0,5142110002,2010-10-12,1,1
1,5142110081,2010-03-08,1,1
2,5142110081,2010-03-08,2,2
3,5142110313,2010-10-07,1,1
4,5142110313,2010-10-07,2,2


In [136]:
card_df = df.copy()
card_df = card_df.reset_index()
card_df['order'] = card_df.groupby(['Cardnum', 'Date']).cumcount() + 1

In [137]:
card_df.head()

Unnamed: 0,Date,Recnum,Cardnum,Merchnum,Merch description,Merch state,Merch zip,Transtype,Amount,Fraud,order
0,2010-01-01,1,5142190439,5509006296254,FEDEX SHP 12/23/09 AB#,TN,38118.0,P,3.62,0,1
1,2010-01-01,2,5142183973,61003026333,SERVICE MERCHANDISE #81,MA,1730.0,P,31.42,0,1
2,2010-01-01,3,5142131721,4503082993600,OFFICE DEPOT #191,MD,20763.0,P,178.49,0,1
3,2010-01-01,4,5142148452,5509006296254,FEDEX SHP 12/28/09 AB#,TN,38118.0,P,3.62,0,1
4,2010-01-01,5,5142190439,5509006296254,FEDEX SHP 12/23/09 AB#,TN,38118.0,P,3.62,0,2


In [138]:
not_needed_columns = ['index', 'Recnum', 'Merchnum', 'Merch description', 'Merch zip', 'Transtype', 'Merch state', 'Amount']
for column in not_needed_columns:
    try:
        del card_df[column]
    except:
        pass

In [139]:
card_df['Count'] = 0

In [140]:
card_df.head()

Unnamed: 0,Date,Cardnum,Fraud,order,Count
0,2010-01-01,5142190439,0,1,0
1,2010-01-01,5142183973,0,1,0
2,2010-01-01,5142131721,0,1,0
3,2010-01-01,5142148452,0,1,0
4,2010-01-01,5142190439,0,2,0


In [141]:
merged_data_card = card_df \
.merge(count_card_1, on = ['Date', 'Cardnum', 'order'], how = 'left', suffixes=['', '_count_card_1'])\
.merge(count_card_3, on = ['Date', 'Cardnum', 'order'], how = 'left', suffixes=['', '_count_card_3']) \
.merge(count_card_7, on = ['Date', 'Cardnum', 'order'], how = 'left', suffixes=['', '_count_card_7']) \
.merge(count_card_14, on = ['Date', 'Cardnum', 'order'], how = 'left', suffixes=['', '_count_card_14']) \
.merge(count_card_30, on = ['Date', 'Cardnum', 'order'], how = 'left', suffixes=['', '_count_card_30']) 

In [142]:
merged_data_card['Count_count_card_0'] = 1

In [143]:
merged_data_card.tail()

Unnamed: 0,Date,Cardnum,Fraud,order,Count,Count_count_card_1,Count_count_card_3,Count_count_card_7,Count_count_card_14,Count_count_card_30,Count_count_card_0
83965,2010-10-31,5142130739,0,1,0,1,1,1,1,5,1
83966,2010-10-31,5142219772,0,2,0,2,2,2,2,2,1
83967,2010-10-31,5142257707,0,1,0,1,1,2,3,4,1
83968,2010-10-31,5142168022,0,1,0,1,1,1,1,1,1
83969,2010-10-31,5142137416,0,1,0,1,1,1,1,2,1


### Merchant

In [144]:
count_merchant_1 = df.groupby(['Merchnum'])['Amount'].rolling('1d').count().reset_index()
count_merchant_1['Count'] = count_merchant_1['Amount'].astype('int')
del count_merchant_1['Amount']
count_merchant_1['order'] = count_merchant_1.groupby(['Merchnum', 'Date']).cumcount() + 1

count_merchant_3 = df.groupby(['Merchnum'])['Amount'].rolling('3d').count().reset_index()
count_merchant_3['Count'] = count_merchant_3['Amount'].astype('int')
del count_merchant_3['Amount']
count_merchant_3['order'] = count_merchant_3.groupby(['Merchnum', 'Date']).cumcount() + 1

count_merchant_7 = df.groupby(['Merchnum'])['Amount'].rolling('7d').count().reset_index()
count_merchant_7['Count'] = count_merchant_7['Amount'].astype('int')
del count_merchant_7['Amount']
count_merchant_7['order'] = count_merchant_7.groupby(['Merchnum', 'Date']).cumcount() + 1

count_merchant_14 = df.groupby(['Merchnum'])['Amount'].rolling('14d').count().reset_index()
count_merchant_14['Count'] = count_merchant_14['Amount'].astype('int')
del count_merchant_14['Amount']
count_merchant_14['order'] = count_merchant_14.groupby(['Merchnum', 'Date']).cumcount() + 1

count_merchant_30 = df.groupby(['Merchnum'])['Amount'].rolling('30d').count().reset_index()
count_merchant_30['Count'] = count_merchant_30['Amount'].astype('int')
del count_merchant_30['Amount']
count_merchant_30['order'] = count_merchant_30.groupby(['Merchnum', 'Date']).cumcount() + 1

In [145]:
merchant_df = df.copy()
merchant_df = merchant_df.reset_index()
merchant_df['order'] = merchant_df.groupby(['Merchnum', 'Date']).cumcount() + 1

In [146]:
merchant_df.head()

Unnamed: 0,Date,Recnum,Cardnum,Merchnum,Merch description,Merch state,Merch zip,Transtype,Amount,Fraud,order
0,2010-01-01,1,5142190439,5509006296254,FEDEX SHP 12/23/09 AB#,TN,38118.0,P,3.62,0,1
1,2010-01-01,2,5142183973,61003026333,SERVICE MERCHANDISE #81,MA,1730.0,P,31.42,0,1
2,2010-01-01,3,5142131721,4503082993600,OFFICE DEPOT #191,MD,20763.0,P,178.49,0,1
3,2010-01-01,4,5142148452,5509006296254,FEDEX SHP 12/28/09 AB#,TN,38118.0,P,3.62,0,2
4,2010-01-01,5,5142190439,5509006296254,FEDEX SHP 12/23/09 AB#,TN,38118.0,P,3.62,0,3


In [147]:
not_needed_columns = ['index', 'Recnum', 'Cardnum', 'Merch description', 'Merch zip', 'Transtype', 'Merch state', 'Amount']
for column in not_needed_columns:
    try:
        del merchant_df[column]
    except:
        pass

In [148]:
merchant_df.head()

Unnamed: 0,Date,Merchnum,Fraud,order
0,2010-01-01,5509006296254,0,1
1,2010-01-01,61003026333,0,1
2,2010-01-01,4503082993600,0,1
3,2010-01-01,5509006296254,0,2
4,2010-01-01,5509006296254,0,3


In [149]:
merged_data_merchant = merchant_df \
.merge(count_merchant_1, on = ['Date', 'Merchnum', 'order'], how = 'left', suffixes=['', '_count_merchant_1'])\
.merge(count_merchant_3, on = ['Date', 'Merchnum', 'order'], how = 'left', suffixes=['', '_count_merchant_3']) \
.merge(count_merchant_7, on = ['Date', 'Merchnum', 'order'], how = 'left', suffixes=['', '_count_merchant_7']) \
.merge(count_merchant_14, on = ['Date', 'Merchnum', 'order'], how = 'left', suffixes=['', '_count_merchant_14']) \
.merge(count_merchant_30, on = ['Date', 'Merchnum', 'order'], how = 'left', suffixes=['', '_count_merchant_30']) 

In [150]:
merged_data_merchant['Count_count_merchant_0'] = 1

In [151]:
merged_data_merchant = merged_data_merchant.rename(columns={'Count':"Count_count_merchant_1"})

In [152]:
merged_data_merchant.tail()

Unnamed: 0,Date,Merchnum,Fraud,order,Count_count_merchant_1,Count_count_merchant_3,Count_count_merchant_7,Count_count_merchant_14,Count_count_merchant_30,Count_count_merchant_0
83965,2010-10-31,9108347680000,0,2,2,3,7,15,22,1
83966,2010-10-31,6855293370648,0,4,4,5,6,7,20,1
83967,2010-10-31,300025852,0,1,1,1,1,1,1,1
83968,2010-10-31,607900047334,0,1,1,1,1,1,1,1
83969,2010-10-31,9108347680000,0,3,3,4,8,16,23,1


### Card + Merchant

In [153]:
count_card_merchant_1 = df.groupby(['Cardnum', 'Merchnum'])['Amount'].rolling('1d').count().reset_index()
count_card_merchant_1['Count'] = count_card_merchant_1['Amount'].astype('int')
del count_card_merchant_1['Amount']
count_card_merchant_1['order'] = count_card_merchant_1.groupby(['Cardnum', 'Merchnum', 'Date']).cumcount() + 1

count_card_merchant_3 = df.groupby(['Cardnum', 'Merchnum'])['Amount'].rolling('3d').count().reset_index()
count_card_merchant_3['Count'] = count_card_merchant_3['Amount'].astype('int')
del count_card_merchant_3['Amount']
count_card_merchant_3['order'] = count_card_merchant_3.groupby(['Cardnum', 'Merchnum', 'Date']).cumcount() + 1

count_card_merchant_7 = df.groupby(['Cardnum', 'Merchnum'])['Amount'].rolling('7d').count().reset_index()
count_card_merchant_7['Count'] = count_card_merchant_7['Amount'].astype('int')
del count_card_merchant_7['Amount']
count_card_merchant_7['order'] = count_card_merchant_7.groupby(['Cardnum', 'Merchnum', 'Date']).cumcount() + 1

count_card_merchant_14 = df.groupby(['Cardnum', 'Merchnum'])['Amount'].rolling('14d').count().reset_index()
count_card_merchant_14['Count'] = count_card_merchant_14['Amount'].astype('int')
del count_card_merchant_14['Amount']
count_card_merchant_14['order'] = count_card_merchant_14.groupby(['Cardnum', 'Merchnum', 'Date']).cumcount() + 1

count_card_merchant_30 = df.groupby(['Cardnum', 'Merchnum'])['Amount'].rolling('30d').count().reset_index()
count_card_merchant_30['Count'] = count_card_merchant_30['Amount'].astype('int')
del count_card_merchant_30['Amount']
count_card_merchant_30['order'] = count_card_merchant_30.groupby(['Cardnum', 'Merchnum', 'Date']).cumcount() + 1

In [154]:
card_merchant_df = df.copy()
card_merchant_df = card_merchant_df.reset_index()
card_merchant_df['order'] = card_merchant_df.groupby(['Cardnum', 'Merchnum', 'Date']).cumcount() + 1

In [155]:
card_merchant_df.head()

Unnamed: 0,Date,Recnum,Cardnum,Merchnum,Merch description,Merch state,Merch zip,Transtype,Amount,Fraud,order
0,2010-01-01,1,5142190439,5509006296254,FEDEX SHP 12/23/09 AB#,TN,38118.0,P,3.62,0,1
1,2010-01-01,2,5142183973,61003026333,SERVICE MERCHANDISE #81,MA,1730.0,P,31.42,0,1
2,2010-01-01,3,5142131721,4503082993600,OFFICE DEPOT #191,MD,20763.0,P,178.49,0,1
3,2010-01-01,4,5142148452,5509006296254,FEDEX SHP 12/28/09 AB#,TN,38118.0,P,3.62,0,1
4,2010-01-01,5,5142190439,5509006296254,FEDEX SHP 12/23/09 AB#,TN,38118.0,P,3.62,0,2


In [156]:
not_needed_columns = ['index', 'Recnum', 'Merch description', 'Merch zip', 'Transtype', 'Merch state', 'Amount']
for column in not_needed_columns:
    try:
        del card_merchant_df[column]
    except:
        pass

In [157]:
card_merchant_df.head()

Unnamed: 0,Date,Cardnum,Merchnum,Fraud,order
0,2010-01-01,5142190439,5509006296254,0,1
1,2010-01-01,5142183973,61003026333,0,1
2,2010-01-01,5142131721,4503082993600,0,1
3,2010-01-01,5142148452,5509006296254,0,1
4,2010-01-01,5142190439,5509006296254,0,2


In [158]:
merged_data_card_merchant = card_merchant_df \
.merge(count_card_merchant_1, on = ['Date', 'Cardnum', 'Merchnum', 'order'], how = 'left', suffixes=['', '_count_card_merchant_1'])\
.merge(count_card_merchant_3, on = ['Date', 'Cardnum', 'Merchnum', 'order'], how = 'left', suffixes=['', '_count_card_merchant_3']) \
.merge(count_card_merchant_7, on = ['Date', 'Cardnum', 'Merchnum', 'order'], how = 'left', suffixes=['', '_count_card_merchant_7']) \
.merge(count_card_merchant_14, on = ['Date', 'Cardnum', 'Merchnum', 'order'], how = 'left', suffixes=['', '_count_card_merchant_14']) \
.merge(count_card_merchant_30, on = ['Date', 'Cardnum', 'Merchnum', 'order'], how = 'left', suffixes=['', '_count_card_merchant_30']) 

In [159]:
merged_data_card_merchant['Count_count_card_merchant_0'] = 1

In [160]:
merged_data_card_merchant = merged_data_card_merchant.rename(columns={'Count':"Count_count_card_merchant_1"})

In [161]:
merged_data_card_merchant.tail()

Unnamed: 0,Date,Cardnum,Merchnum,Fraud,order,Count_count_card_merchant_1,Count_count_card_merchant_3,Count_count_card_merchant_7,Count_count_card_merchant_14,Count_count_card_merchant_30,Count_count_card_merchant_0
83965,2010-10-31,5142130739,9108347680000,0,1,1,1,1,1,1,1
83966,2010-10-31,5142219772,6855293370648,0,2,2,2,2,2,2,1
83967,2010-10-31,5142257707,300025852,0,1,1,1,1,1,1,1
83968,2010-10-31,5142168022,607900047334,0,1,1,1,1,1,1,1
83969,2010-10-31,5142137416,9108347680000,0,1,1,1,1,1,2,1


### Card + Zip code

In [162]:
count_card_zip_1 = df.groupby(['Cardnum', 'Merch zip'])['Amount'].rolling('1d').count().reset_index()
count_card_zip_1['Count'] = count_card_zip_1['Amount'].astype('int')
del count_card_zip_1['Amount']
count_card_zip_1['order'] = count_card_zip_1.groupby(['Cardnum', 'Merch zip', 'Date']).cumcount() + 1

count_card_zip_3 = df.groupby(['Cardnum', 'Merch zip'])['Amount'].rolling('3d').count().reset_index()
count_card_zip_3['Count'] = count_card_zip_3['Amount'].astype('int')
del count_card_zip_3['Amount']
count_card_zip_3['order'] = count_card_zip_3.groupby(['Cardnum', 'Merch zip', 'Date']).cumcount() + 1

count_card_zip_7 = df.groupby(['Cardnum', 'Merch zip'])['Amount'].rolling('7d').count().reset_index()
count_card_zip_7['Count'] = count_card_zip_7['Amount'].astype('int')
del count_card_zip_7['Amount']
count_card_zip_7['order'] = count_card_zip_7.groupby(['Cardnum', 'Merch zip', 'Date']).cumcount() + 1

count_card_zip_14 = df.groupby(['Cardnum', 'Merch zip'])['Amount'].rolling('14d').count().reset_index()
count_card_zip_14['Count'] = count_card_zip_14['Amount'].astype('int')
del count_card_zip_14['Amount']
count_card_zip_14['order'] = count_card_zip_14.groupby(['Cardnum', 'Merch zip', 'Date']).cumcount() + 1

count_card_zip_30 = df.groupby(['Cardnum', 'Merch zip'])['Amount'].rolling('30d').count().reset_index()
count_card_zip_30['Count'] = count_card_zip_30['Amount'].astype('int')
del count_card_zip_30['Amount']
count_card_zip_30['order'] = count_card_zip_30.groupby(['Cardnum', 'Merch zip', 'Date']).cumcount() + 1

In [163]:
card_zip_df = df.copy()
card_zip_df = card_zip_df.reset_index()
card_zip_df['order'] = card_zip_df.groupby(['Cardnum', 'Merch zip', 'Date']).cumcount() + 1

In [164]:
card_zip_df.head()

Unnamed: 0,Date,Recnum,Cardnum,Merchnum,Merch description,Merch state,Merch zip,Transtype,Amount,Fraud,order
0,2010-01-01,1,5142190439,5509006296254,FEDEX SHP 12/23/09 AB#,TN,38118.0,P,3.62,0,1
1,2010-01-01,2,5142183973,61003026333,SERVICE MERCHANDISE #81,MA,1730.0,P,31.42,0,1
2,2010-01-01,3,5142131721,4503082993600,OFFICE DEPOT #191,MD,20763.0,P,178.49,0,1
3,2010-01-01,4,5142148452,5509006296254,FEDEX SHP 12/28/09 AB#,TN,38118.0,P,3.62,0,1
4,2010-01-01,5,5142190439,5509006296254,FEDEX SHP 12/23/09 AB#,TN,38118.0,P,3.62,0,2


In [165]:
not_needed_columns = ['index', 'Recnum', 'Merch description', 'Merchnum', 'Transtype', 'Merch state', 'Amount']
for column in not_needed_columns:
    try:
        del card_zip_df[column]
    except:
        pass

In [166]:
card_zip_df.head()

Unnamed: 0,Date,Cardnum,Merch zip,Fraud,order
0,2010-01-01,5142190439,38118.0,0,1
1,2010-01-01,5142183973,1730.0,0,1
2,2010-01-01,5142131721,20763.0,0,1
3,2010-01-01,5142148452,38118.0,0,1
4,2010-01-01,5142190439,38118.0,0,2


In [167]:
merged_data_card_zip = card_zip_df \
.merge(count_card_zip_1, on = ['Date', 'Cardnum', 'Merch zip', 'order'], how = 'left', suffixes=['', '_count_card_zip_1'])\
.merge(count_card_zip_3, on = ['Date', 'Cardnum', 'Merch zip', 'order'], how = 'left', suffixes=['', '_count_card_zip_3']) \
.merge(count_card_zip_7, on = ['Date', 'Cardnum', 'Merch zip', 'order'], how = 'left', suffixes=['', '_count_card_zip_7']) \
.merge(count_card_zip_14, on = ['Date', 'Cardnum', 'Merch zip', 'order'], how = 'left', suffixes=['', '_count_card_zip_14']) \
.merge(count_card_zip_30, on = ['Date', 'Cardnum', 'Merch zip', 'order'], how = 'left', suffixes=['', '_count_card_zip_30']) 

In [168]:
merged_data_card_zip['Count_count_card_zip_0'] = 1

In [169]:
merged_data_card_zip = merged_data_card_zip.rename(columns={'Count':"Count_count_card_zip_1"})

In [170]:
merged_data_card_zip.tail()

Unnamed: 0,Date,Cardnum,Merch zip,Fraud,order,Count_count_card_zip_1,Count_count_card_zip_3,Count_count_card_zip_7,Count_count_card_zip_14,Count_count_card_zip_30,Count_count_card_zip_0
83965,2010-10-31,5142130739,8701.0,0,1,1,1,1,1,1,1
83966,2010-10-31,5142219772,27713.0,0,2,2,2,2,2,2,1
83967,2010-10-31,5142257707,53546.0,0,1,1,1,1,1,1,1
83968,2010-10-31,5142168022,55806.0,0,1,1,1,1,1,1,1
83969,2010-10-31,5142137416,8701.0,0,1,1,1,1,1,2,1


### Card + State

In [171]:
count_card_state_1 = df.groupby(['Cardnum', 'Merch state'])['Amount'].rolling('1d').count().reset_index()
count_card_state_1['Count'] = count_card_state_1['Amount'].astype('int')
del count_card_state_1['Amount']
count_card_state_1['order'] = count_card_state_1.groupby(['Cardnum', 'Merch state', 'Date']).cumcount() + 1

count_card_state_3 = df.groupby(['Cardnum', 'Merch state'])['Amount'].rolling('3d').count().reset_index()
count_card_state_3['Count'] = count_card_state_3['Amount'].astype('int')
del count_card_state_3['Amount']
count_card_state_3['order'] = count_card_state_3.groupby(['Cardnum', 'Merch state', 'Date']).cumcount() + 1

count_card_state_7 = df.groupby(['Cardnum', 'Merch state'])['Amount'].rolling('7d').count().reset_index()
count_card_state_7['Count'] = count_card_state_7['Amount'].astype('int')
del count_card_state_7['Amount']
count_card_state_7['order'] = count_card_state_7.groupby(['Cardnum', 'Merch state', 'Date']).cumcount() + 1

count_card_state_14 = df.groupby(['Cardnum', 'Merch state'])['Amount'].rolling('14d').count().reset_index()
count_card_state_14['Count'] = count_card_state_14['Amount'].astype('int')
del count_card_state_14['Amount']
count_card_state_14['order'] = count_card_state_14.groupby(['Cardnum', 'Merch state', 'Date']).cumcount() + 1

count_card_state_30 = df.groupby(['Cardnum', 'Merch state'])['Amount'].rolling('30d').count().reset_index()
count_card_state_30['Count'] = count_card_state_30['Amount'].astype('int')
del count_card_state_30['Amount']
count_card_state_30['order'] = count_card_state_30.groupby(['Cardnum', 'Merch state', 'Date']).cumcount() + 1

In [172]:
card_state_df = df.copy()
card_state_df = card_state_df.reset_index()
card_state_df['order'] = card_state_df.groupby(['Cardnum', 'Merch state', 'Date']).cumcount() + 1

In [173]:
card_state_df.head()

Unnamed: 0,Date,Recnum,Cardnum,Merchnum,Merch description,Merch state,Merch zip,Transtype,Amount,Fraud,order
0,2010-01-01,1,5142190439,5509006296254,FEDEX SHP 12/23/09 AB#,TN,38118.0,P,3.62,0,1
1,2010-01-01,2,5142183973,61003026333,SERVICE MERCHANDISE #81,MA,1730.0,P,31.42,0,1
2,2010-01-01,3,5142131721,4503082993600,OFFICE DEPOT #191,MD,20763.0,P,178.49,0,1
3,2010-01-01,4,5142148452,5509006296254,FEDEX SHP 12/28/09 AB#,TN,38118.0,P,3.62,0,1
4,2010-01-01,5,5142190439,5509006296254,FEDEX SHP 12/23/09 AB#,TN,38118.0,P,3.62,0,2


In [174]:
not_needed_columns = ['index', 'Recnum', 'Merch description', 'Merchnum', 'Transtype', 'Merch zip', 'Amount']
for column in not_needed_columns:
    try:
        del card_state_df[column]
    except:
        pass

In [175]:
card_state_df.head()

Unnamed: 0,Date,Cardnum,Merch state,Fraud,order
0,2010-01-01,5142190439,TN,0,1
1,2010-01-01,5142183973,MA,0,1
2,2010-01-01,5142131721,MD,0,1
3,2010-01-01,5142148452,TN,0,1
4,2010-01-01,5142190439,TN,0,2


In [176]:
merged_data_card_state = card_state_df \
.merge(count_card_state_1, on = ['Date', 'Cardnum', 'Merch state', 'order'], how = 'left', suffixes=['', '_count_card_state_1'])\
.merge(count_card_state_3, on = ['Date', 'Cardnum', 'Merch state', 'order'], how = 'left', suffixes=['', '_count_card_state_3']) \
.merge(count_card_state_7, on = ['Date', 'Cardnum', 'Merch state', 'order'], how = 'left', suffixes=['', '_count_card_state_7']) \
.merge(count_card_state_14, on = ['Date', 'Cardnum', 'Merch state', 'order'], how = 'left', suffixes=['', '_count_card_state_14']) \
.merge(count_card_state_30, on = ['Date', 'Cardnum', 'Merch state', 'order'], how = 'left', suffixes=['', '_count_card_state_30']) 

In [177]:
merged_data_card_state['Count_count_card_state_0'] = 1

In [178]:
merged_data_card_state = merged_data_card_state.rename(columns={'Count':"Count_count_card_state_1"})

In [179]:
merged_data_card_state.tail()

Unnamed: 0,Date,Cardnum,Merch state,Fraud,order,Count_count_card_state_1,Count_count_card_state_3,Count_count_card_state_7,Count_count_card_state_14,Count_count_card_state_30,Count_count_card_state_0
83965,2010-10-31,5142130739,NJ,0,1,1,1,1,1,1,1
83966,2010-10-31,5142219772,NC,0,2,2,2,2,2,2,1
83967,2010-10-31,5142257707,WI,0,1,1,1,1,1,1,1
83968,2010-10-31,5142168022,MN,0,1,1,1,1,1,1,1
83969,2010-10-31,5142137416,NJ,0,1,1,1,1,1,2,1


### Frequency Variables 30

In [180]:
merged_data_card.head()

Unnamed: 0,Date,Cardnum,Fraud,order,Count,Count_count_card_1,Count_count_card_3,Count_count_card_7,Count_count_card_14,Count_count_card_30,Count_count_card_0
0,2010-01-01,5142190439,0,1,0,1,1,1,1,1,1
1,2010-01-01,5142183973,0,1,0,1,1,1,1,1,1
2,2010-01-01,5142131721,0,1,0,1,1,1,1,1,1
3,2010-01-01,5142148452,0,1,0,1,1,1,1,1,1
4,2010-01-01,5142190439,0,2,0,2,2,2,2,2,1


In [181]:
merged_data_merchant.head()

Unnamed: 0,Date,Merchnum,Fraud,order,Count_count_merchant_1,Count_count_merchant_3,Count_count_merchant_7,Count_count_merchant_14,Count_count_merchant_30,Count_count_merchant_0
0,2010-01-01,5509006296254,0,1,1,1,1,1,1,1
1,2010-01-01,61003026333,0,1,1,1,1,1,1,1
2,2010-01-01,4503082993600,0,1,1,1,1,1,1,1
3,2010-01-01,5509006296254,0,2,2,2,2,2,2,1
4,2010-01-01,5509006296254,0,3,3,3,3,3,3,1


In [182]:
merged_data_card_merchant.head()

Unnamed: 0,Date,Cardnum,Merchnum,Fraud,order,Count_count_card_merchant_1,Count_count_card_merchant_3,Count_count_card_merchant_7,Count_count_card_merchant_14,Count_count_card_merchant_30,Count_count_card_merchant_0
0,2010-01-01,5142190439,5509006296254,0,1,1,1,1,1,1,1
1,2010-01-01,5142183973,61003026333,0,1,1,1,1,1,1,1
2,2010-01-01,5142131721,4503082993600,0,1,1,1,1,1,1,1
3,2010-01-01,5142148452,5509006296254,0,1,1,1,1,1,1,1
4,2010-01-01,5142190439,5509006296254,0,2,2,2,2,2,2,1


In [183]:
merged_data_card_zip.head()

Unnamed: 0,Date,Cardnum,Merch zip,Fraud,order,Count_count_card_zip_1,Count_count_card_zip_3,Count_count_card_zip_7,Count_count_card_zip_14,Count_count_card_zip_30,Count_count_card_zip_0
0,2010-01-01,5142190439,38118.0,0,1,1,1,1,1,1,1
1,2010-01-01,5142183973,1730.0,0,1,1,1,1,1,1,1
2,2010-01-01,5142131721,20763.0,0,1,1,1,1,1,1,1
3,2010-01-01,5142148452,38118.0,0,1,1,1,1,1,1,1
4,2010-01-01,5142190439,38118.0,0,2,2,2,2,2,2,1


In [184]:
merged_data_card_state.head()

Unnamed: 0,Date,Cardnum,Merch state,Fraud,order,Count_count_card_state_1,Count_count_card_state_3,Count_count_card_state_7,Count_count_card_state_14,Count_count_card_state_30,Count_count_card_state_0
0,2010-01-01,5142190439,TN,0,1,1,1,1,1,1,1
1,2010-01-01,5142183973,MA,0,1,1,1,1,1,1,1
2,2010-01-01,5142131721,MD,0,1,1,1,1,1,1,1
3,2010-01-01,5142148452,TN,0,1,1,1,1,1,1,1
4,2010-01-01,5142190439,TN,0,2,2,2,2,2,2,1


In [185]:
merged_data_card_pure = merged_data_card.iloc[:, 5:]
merged_data_merchant_pure = merged_data_merchant.iloc[:, 4:]
merged_data_card_merchant_pure = merged_data_card_merchant.iloc[:, 5:]
merged_data_card_zip_pure = merged_data_card_zip.iloc[:, 5:]
merged_data_card_state_pure = merged_data_card_state.iloc[:, 5:]

In [186]:
[merged_data_card_pure.shape, 
merged_data_merchant_pure.shape,
merged_data_card_merchant_pure.shape, 
merged_data_card_zip_pure.shape,
merged_data_card_state_pure.shape]

[(83970, 6), (83970, 6), (83970, 6), (83970, 6), (83970, 6)]

In [187]:
Frequency_variables = pd.concat([merged_data_card_pure,
                              merged_data_merchant_pure,
                              merged_data_card_merchant_pure, 
                              merged_data_card_zip_pure, 
                              merged_data_card_state_pure], axis = 1)

In [188]:
Frequency_variables.shape

(83970, 30)

In [189]:
Frequency_variables.tail()

Unnamed: 0,Count_count_card_1,Count_count_card_3,Count_count_card_7,Count_count_card_14,Count_count_card_30,Count_count_card_0,Count_count_merchant_1,Count_count_merchant_3,Count_count_merchant_7,Count_count_merchant_14,...,Count_count_card_zip_7,Count_count_card_zip_14,Count_count_card_zip_30,Count_count_card_zip_0,Count_count_card_state_1,Count_count_card_state_3,Count_count_card_state_7,Count_count_card_state_14,Count_count_card_state_30,Count_count_card_state_0
83965,1,1,1,1,5,1,2,3,7,15,...,1,1,1,1,1,1,1,1,1,1
83966,2,2,2,2,2,1,4,5,6,7,...,2,2,2,1,2,2,2,2,2,1
83967,1,1,2,3,4,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
83968,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
83969,1,1,1,1,2,1,3,4,8,16,...,1,1,2,1,1,1,1,1,2,1


In [190]:
Frequency_variables['Recnum'] = df['Recnum'].values
Frequency_variables['Fraud'] = df['Fraud'].values
Frequency_variables['Date'] = df.index.values
Frequency_variables['Transtype'] = df['Transtype'].values

In [191]:
Frequency_variables.tail()

Unnamed: 0,Count_count_card_1,Count_count_card_3,Count_count_card_7,Count_count_card_14,Count_count_card_30,Count_count_card_0,Count_count_merchant_1,Count_count_merchant_3,Count_count_merchant_7,Count_count_merchant_14,...,Count_count_card_state_1,Count_count_card_state_3,Count_count_card_state_7,Count_count_card_state_14,Count_count_card_state_30,Count_count_card_state_0,Recnum,Fraud,Date,Transtype
83965,1,1,1,1,5,1,2,3,7,15,...,1,1,1,1,1,1,84295,0,2010-10-31,P
83966,2,2,2,2,2,1,4,5,6,7,...,2,2,2,2,2,1,84296,0,2010-10-31,P
83967,1,1,2,3,4,1,1,1,1,1,...,1,1,1,1,1,1,84297,0,2010-10-31,P
83968,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,84298,0,2010-10-31,P
83969,1,1,1,1,2,1,3,4,8,16,...,1,1,1,1,2,1,84299,0,2010-10-31,P


In [192]:
Frequency_variables.shape

(83970, 34)

In [193]:
len(Frequency_variables['Recnum'].unique())

83970

## Days since Variables

### Card

In [194]:
DateDiff = df.copy()

In [195]:
DateDiff.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 83970 entries, 2010-01-01 to 2010-10-31
Data columns (total 9 columns):
Recnum               83970 non-null int64
Cardnum              83970 non-null int64
Merchnum             83970 non-null object
Merch description    83970 non-null object
Merch state          83970 non-null object
Merch zip            83970 non-null float64
Transtype            83970 non-null object
Amount               83970 non-null float64
Fraud                83970 non-null int64
dtypes: float64(2), int64(3), object(4)
memory usage: 6.4+ MB


In [196]:
def subtract(df):
    temp_df = df.copy()
    temp_df = temp_df.reset_index()
#     print(temp_df)
    temp_df['LastTime'] = temp_df['Date'].shift(1)
    temp_df['Card_SinceLastTime'] = temp_df['Date'] - temp_df['LastTime']
    return temp_df

In [197]:
Card_df = DateDiff.groupby(['Cardnum']).apply(lambda df: subtract(df))

In [198]:
Card_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Date,Recnum,Cardnum,Merchnum,Merch description,Merch state,Merch zip,Transtype,Amount,Fraud,LastTime,Card_SinceLastTime
Cardnum,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
5142110002,0,2010-10-12,81127,5142110002,9900020006406,X REVERSAL OF CR BAL REF,VA,22202.0,P,150.0,0,NaT,NaT
5142110081,0,2010-03-08,16628,5142110081,930090121224,X OFFICE DEPOT #1,TN,38118.0,P,495.9,0,NaT,NaT
5142110081,1,2010-03-08,16801,5142110081,930090121224,X FRANKLIN COVEY,TN,38118.0,P,636.2,0,2010-03-08,0 days
5142110313,0,2010-10-07,80318,5142110313,930090121224,XFR TO RICE 120013402434,TN,38118.0,P,144.0,0,NaT,NaT
5142110313,1,2010-10-07,80327,5142110313,930090121224,XFR TO RICE 120013402434,TN,38118.0,P,44.0,0,2010-10-07,0 days


In [199]:
len(Card_df['Recnum'].unique())

83970

In [200]:
Card = Card_df[['Recnum', 'Card_SinceLastTime']].reset_index(drop = 1)

In [201]:
Card.head()

Unnamed: 0,Recnum,Card_SinceLastTime
0,81127,NaT
1,16628,NaT
2,16801,0 days
3,80318,NaT
4,80327,0 days


### Merchant

In [202]:
def subtract(df):
    temp_df = df.copy()
    temp_df = temp_df.reset_index()
#     print(temp_df)
    temp_df['LastTime'] = temp_df['Date'].shift(1)
    temp_df['Merchant_SinceLastTime'] = temp_df['Date'] - temp_df['LastTime']
    return temp_df

In [203]:
Merchant_df = DateDiff.groupby(['Merchnum']).apply(lambda df: subtract(df))

In [204]:
Merchant_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Date,Recnum,Cardnum,Merchnum,Merch description,Merch state,Merch zip,Transtype,Amount,Fraud,LastTime,Merchant_SinceLastTime
Merchnum,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
0,0,2010-01-06,818,5142230669,0,TOMMY'S TRAILERS,OK,74820.0,P,48.97,0,NaT,NaT
0,1,2010-01-07,1222,5142231496,0,TOMMY'S TRAILERS,OK,74820.0,P,87.02,0,2010-01-06,1 days
0,2,2010-01-07,1403,5142159706,0,INTERACTIVE SOFTWARE S,TN,38118.0,P,460.0,0,2010-01-07,0 days
0,3,2010-01-11,2090,5142243966,0,ESTUARINE RESEARCH FDRTN,MD,20763.0,P,25.0,0,2010-01-07,4 days
0,4,2010-01-11,2169,5142141358,0,REED BUSINESS PUBLISHI,TN,38118.0,P,110.0,0,2010-01-11,0 days


In [205]:
len(Merchant_df['Recnum'].unique())

83970

In [206]:
Merchant = Merchant_df[['Recnum', 'Merchant_SinceLastTime']].reset_index(drop = 1)

In [207]:
Merchant.head()

Unnamed: 0,Recnum,Merchant_SinceLastTime
0,818,NaT
1,1222,1 days
2,1403,0 days
3,2090,4 days
4,2169,0 days


### Card_Merchant

In [208]:
def subtract(df):
    temp_df = df.copy()
    temp_df = temp_df.reset_index()
#     print(temp_df)
    temp_df['LastTime'] = temp_df['Date'].shift(1)
    temp_df['Card_Merchant_SinceLastTime'] = temp_df['Date'] - temp_df['LastTime']
    return temp_df

In [209]:
Card_Merchant_df = DateDiff.groupby(['Cardnum', 'Merchnum']).apply(lambda df: subtract(df))

In [210]:
Card_Merchant_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Date,Recnum,Cardnum,Merchnum,Merch description,Merch state,Merch zip,Transtype,Amount,Fraud,LastTime,Card_Merchant_SinceLastTime
Cardnum,Merchnum,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
5142110002,9900020006406,0,2010-10-12,81127,5142110002,9900020006406,X REVERSAL OF CR BAL REF,VA,22202.0,P,150.0,0,NaT,NaT
5142110081,930090121224,0,2010-03-08,16628,5142110081,930090121224,X OFFICE DEPOT #1,TN,38118.0,P,495.9,0,NaT,NaT
5142110081,930090121224,1,2010-03-08,16801,5142110081,930090121224,X FRANKLIN COVEY,TN,38118.0,P,636.2,0,2010-03-08,0 days
5142110313,930090121224,0,2010-10-07,80318,5142110313,930090121224,XFR TO RICE 120013402434,TN,38118.0,P,144.0,0,NaT,NaT
5142110313,930090121224,1,2010-10-07,80327,5142110313,930090121224,XFR TO RICE 120013402434,TN,38118.0,P,44.0,0,2010-10-07,0 days


In [211]:
len(Card_Merchant_df['Recnum'].unique())

83970

In [212]:
Card_Merchant = Card_Merchant_df[['Recnum', 'Card_Merchant_SinceLastTime']].reset_index(drop = 1)

In [213]:
Card_Merchant.head()

Unnamed: 0,Recnum,Card_Merchant_SinceLastTime
0,81127,NaT
1,16628,NaT
2,16801,0 days
3,80318,NaT
4,80327,0 days


### Card_Zip code

In [214]:
def subtract(df):
    temp_df = df.copy()
    temp_df = temp_df.reset_index()
#     print(temp_df)
    temp_df['LastTime'] = temp_df['Date'].shift(1)
    temp_df['Card_Zip_SinceLastTime'] = temp_df['Date'] - temp_df['LastTime']
    return temp_df

In [215]:
Card_Zip_df = DateDiff.groupby(['Cardnum', 'Merch zip']).apply(lambda df: subtract(df))

In [216]:
Card_Zip_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Date,Recnum,Cardnum,Merchnum,Merch description,Merch state,Merch zip,Transtype,Amount,Fraud,LastTime,Card_Zip_SinceLastTime
Cardnum,Merch zip,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
5142110002,22202.0,0,2010-10-12,81127,5142110002,9900020006406,X REVERSAL OF CR BAL REF,VA,22202.0,P,150.0,0,NaT,NaT
5142110081,38118.0,0,2010-03-08,16628,5142110081,930090121224,X OFFICE DEPOT #1,TN,38118.0,P,495.9,0,NaT,NaT
5142110081,38118.0,1,2010-03-08,16801,5142110081,930090121224,X FRANKLIN COVEY,TN,38118.0,P,636.2,0,2010-03-08,0 days
5142110313,38118.0,0,2010-10-07,80318,5142110313,930090121224,XFR TO RICE 120013402434,TN,38118.0,P,144.0,0,NaT,NaT
5142110313,38118.0,1,2010-10-07,80327,5142110313,930090121224,XFR TO RICE 120013402434,TN,38118.0,P,44.0,0,2010-10-07,0 days


In [217]:
len(Card_Zip_df['Recnum'].unique())

83970

In [218]:
Card_Zip = Card_Zip_df[['Recnum', 'Card_Zip_SinceLastTime']].reset_index(drop = 1)

In [219]:
Merchant.head()

Unnamed: 0,Recnum,Merchant_SinceLastTime
0,818,NaT
1,1222,1 days
2,1403,0 days
3,2090,4 days
4,2169,0 days


### Card_State

In [220]:
def subtract(df):
    temp_df = df.copy()
    temp_df = temp_df.reset_index()
#     print(temp_df)
    temp_df['LastTime'] = temp_df['Date'].shift(1)
    temp_df['Card_State_SinceLastTime'] = temp_df['Date'] - temp_df['LastTime']
    return temp_df

In [221]:
Card_State_df = DateDiff.groupby(['Cardnum', 'Merch state']).apply(lambda df: subtract(df))

In [222]:
Card_State_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Date,Recnum,Cardnum,Merchnum,Merch description,Merch state,Merch zip,Transtype,Amount,Fraud,LastTime,Card_State_SinceLastTime
Cardnum,Merch state,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
5142110002,VA,0,2010-10-12,81127,5142110002,9900020006406,X REVERSAL OF CR BAL REF,VA,22202.0,P,150.0,0,NaT,NaT
5142110081,TN,0,2010-03-08,16628,5142110081,930090121224,X OFFICE DEPOT #1,TN,38118.0,P,495.9,0,NaT,NaT
5142110081,TN,1,2010-03-08,16801,5142110081,930090121224,X FRANKLIN COVEY,TN,38118.0,P,636.2,0,2010-03-08,0 days
5142110313,TN,0,2010-10-07,80318,5142110313,930090121224,XFR TO RICE 120013402434,TN,38118.0,P,144.0,0,NaT,NaT
5142110313,TN,1,2010-10-07,80327,5142110313,930090121224,XFR TO RICE 120013402434,TN,38118.0,P,44.0,0,2010-10-07,0 days


In [223]:
len(Card_State_df['Recnum'].unique())

83970

In [224]:
Card_State = Card_State_df[['Recnum', 'Card_State_SinceLastTime']].reset_index(drop = 1)

In [225]:
Card_State.head()

Unnamed: 0,Recnum,Card_State_SinceLastTime
0,81127,NaT
1,16628,NaT
2,16801,0 days
3,80318,NaT
4,80327,0 days


### Days since Variables 5

In [226]:
Days_since_variables = Card.merge(Merchant, on = ['Recnum']) \
.merge(Card_Merchant, on = ['Recnum']) \
.merge(Card_Zip, on = ['Recnum']) \
.merge(Card_State, on = ['Recnum'])

In [227]:
Days_since_variables.head()

Unnamed: 0,Recnum,Card_SinceLastTime,Merchant_SinceLastTime,Card_Merchant_SinceLastTime,Card_Zip_SinceLastTime,Card_State_SinceLastTime
0,81127,NaT,0 days,NaT,NaT,NaT
1,16628,NaT,0 days,NaT,NaT,NaT
2,16801,0 days,0 days,0 days,0 days,0 days
3,80318,NaT,0 days,NaT,NaT,NaT
4,80327,0 days,0 days,0 days,0 days,0 days


In [228]:
Days_since_variables.shape

(83970, 6)

## Velocity change Variables

### Count_Card_0 + 1

In [229]:
count_card_1 = df.groupby(['Cardnum'])['Amount'].rolling('1d').count().reset_index()
count_card_1['Card_Count_1'] = count_card_1['Amount'].astype('int')
del count_card_1['Amount']
count_card_1['order'] = count_card_1.groupby(['Cardnum', 'Date']).cumcount() + 1

In [230]:
df['order'] = df.groupby(['Cardnum', 'Date']).cumcount() + 1

In [231]:
count_card_1.shape

(83970, 4)

In [232]:
NC1 = df.merge(count_card_1, on = ['Cardnum', 'Date', 'order'])

In [233]:
NC1.shape

(83970, 12)

In [234]:
NC1 = NC1[['Recnum', 'Card_Count_1']]

In [235]:
NC1['Card_Count_0'] = 1

In [236]:
NC01 = NC1.copy()

In [237]:
NC01 = NC01.rename(columns = {'Card_Count_1':"NC1", 'Card_Count_0':"NC0"})

In [238]:
NC01.head()

Unnamed: 0,Recnum,NC1,NC0
0,1,1,1
1,2,1,1
2,3,1,1
3,4,1,1
4,5,2,1


In [239]:
len(NC01['Recnum'].unique())

83970

### Count_Merchant_0 + 1

In [240]:
count_merchant_1 = df.groupby(['Merchnum'])['Amount'].rolling('1d').count().reset_index()
count_merchant_1['Merchant_Count_1'] = count_merchant_1['Amount'].astype('int')
del count_merchant_1['Amount']
count_merchant_1['order'] = count_merchant_1.groupby(['Merchnum', 'Date']).cumcount() + 1

In [241]:
count_merchant_1.shape

(83970, 4)

In [242]:
df['order'] = df.groupby(['Merchnum', 'Date']).cumcount() + 1

In [243]:
NM1 = df.merge(count_merchant_1, on = ['Merchnum', 'Date', 'order'])

In [244]:
NM1.shape

(83970, 12)

In [245]:
NM1 = NM1[['Recnum', 'Merchant_Count_1']]

In [246]:
NM1['Merchant_Count_0'] = 1

In [247]:
NM01 = NM1.copy()

In [248]:
NM01 = NM01.rename(columns = {'Merchant_Count_1':"NM1", 'Merchant_Count_0':"NM0"})

In [249]:
NM01.head()

Unnamed: 0,Recnum,NM1,NM0
0,1,1,1
1,2,1,1
2,3,1,1
3,4,2,1
4,5,3,1


In [250]:
len(NM01['Recnum'].unique())

83970

### Amount_Card_0 + 1

In [251]:
sum_card_1 = df.groupby(['Cardnum'])['Amount'].rolling('1d').sum().reset_index()
sum_card_1['order'] = sum_card_1.groupby(['Cardnum', 'Date']).cumcount() + 1

In [252]:
sum_card_1.shape

(83970, 4)

In [253]:
df['order'] = df.groupby(['Cardnum', 'Date']).cumcount() + 1

In [254]:
AC1 = df.merge(sum_card_1, on = ['Cardnum', 'Date', 'order'], suffixes=['_0', '_1'])

In [255]:
AC1.head()

Unnamed: 0,Recnum,Date,Cardnum,Merchnum,Merch description,Merch state,Merch zip,Transtype,Amount_0,Fraud,order,Amount_1
0,1,2010-01-01,5142190439,5509006296254,FEDEX SHP 12/23/09 AB#,TN,38118.0,P,3.62,0,1,3.62
1,2,2010-01-01,5142183973,61003026333,SERVICE MERCHANDISE #81,MA,1730.0,P,31.42,0,1,31.42
2,3,2010-01-01,5142131721,4503082993600,OFFICE DEPOT #191,MD,20763.0,P,178.49,0,1,178.49
3,4,2010-01-01,5142148452,5509006296254,FEDEX SHP 12/28/09 AB#,TN,38118.0,P,3.62,0,1,3.62
4,5,2010-01-01,5142190439,5509006296254,FEDEX SHP 12/23/09 AB#,TN,38118.0,P,3.62,0,2,7.24


In [256]:
AC01 = AC1[['Recnum', 'Amount_0', 'Amount_1']]

In [257]:
AC01 = AC01.rename(columns={'Amount_0':'Card_Amount_0', 'Amount_1':'Card_Amount_1'})

In [258]:
AC01 = AC01.rename(columns = {'Card_Amount_0':"AC0", 'Card_Amount_1':"AC1"})

In [259]:
AC01.tail()

Unnamed: 0,Recnum,AC0,AC1
83965,84295,299.77,299.77
83966,84296,609.34,789.32
83967,84297,235.0,235.0
83968,84298,600.0,600.0
83969,84299,30.0,30.0


In [260]:
len(AC01['Recnum'].unique())

83970

### Amount_Merchant_0 + 1

In [261]:
sum_merchant_1 = df.groupby(['Merchnum'])['Amount'].rolling('1d').sum().reset_index()
sum_merchant_1['order'] = sum_merchant_1.groupby(['Merchnum', 'Date']).cumcount() + 1

In [262]:
df['order'] = df.groupby(['Merchnum', 'Date']).cumcount() + 1

In [263]:
AM1 = df.merge(sum_merchant_1, on = ['Merchnum', 'Date', 'order'], suffixes=['_0', '_1'])

In [264]:
AM1.head()

Unnamed: 0,Recnum,Date,Cardnum,Merchnum,Merch description,Merch state,Merch zip,Transtype,Amount_0,Fraud,order,Amount_1
0,1,2010-01-01,5142190439,5509006296254,FEDEX SHP 12/23/09 AB#,TN,38118.0,P,3.62,0,1,3.62
1,2,2010-01-01,5142183973,61003026333,SERVICE MERCHANDISE #81,MA,1730.0,P,31.42,0,1,31.42
2,3,2010-01-01,5142131721,4503082993600,OFFICE DEPOT #191,MD,20763.0,P,178.49,0,1,178.49
3,4,2010-01-01,5142148452,5509006296254,FEDEX SHP 12/28/09 AB#,TN,38118.0,P,3.62,0,2,7.24
4,5,2010-01-01,5142190439,5509006296254,FEDEX SHP 12/23/09 AB#,TN,38118.0,P,3.62,0,3,10.86


In [265]:
AM01 = AM1[['Recnum', 'Amount_0', 'Amount_1']]

In [266]:
AM01 = AM01.rename(columns={'Amount_0':'Merchant_Amount_0', 'Amount_1':'Merchant_Amount_1'})

In [267]:
AM01 = AM01.rename(columns = {'Merchant_Amount_0':"AM0", 'Merchant_Amount_1':"AM1"})

In [268]:
AM01.tail()

Unnamed: 0,Recnum,AM0,AM1
83965,84295,299.77,718.89
83966,84296,609.34,1086.77
83967,84297,235.0,235.0
83968,84298,600.0,600.0
83969,84299,30.0,748.89


### Numerator

In [269]:
Numerator = NC01.merge(NM01, on = 'Recnum').merge(AC01, on = 'Recnum').merge(AM01, on = 'Recnum')

In [270]:
Numerator.tail()

Unnamed: 0,Recnum,NC1,NC0,NM1,NM0,AC0,AC1,AM0,AM1
83965,84295,1,1,2,1,299.77,299.77,299.77,718.89
83966,84296,2,1,4,1,609.34,789.32,609.34,1086.77
83967,84297,1,1,1,1,235.0,235.0,235.0,235.0
83968,84298,1,1,1,1,600.0,600.0,600.0,600.0
83969,84299,1,1,3,1,30.0,30.0,30.0,748.89


In [271]:
Numerator.shape

(83970, 9)

In [272]:
len(Numerator['Recnum'].unique())

83970

### Average Count_Card_7

In [273]:
count_card_7 = df.groupby(['Cardnum'])['Amount'].rolling('7d').count().reset_index()
count_card_7['Card_Count'] = count_card_7['Amount'].astype('int')
count_card_7['Average_Card_Count_7'] = count_card_7['Card_Count'] / 7.0
del count_card_7['Amount']
count_card_7['order'] = count_card_7.groupby(['Cardnum', 'Date']).cumcount() + 1

In [274]:
count_card_7.tail()

Unnamed: 0,Cardnum,Date,Card_Count,Average_Card_Count_7,order
83965,5142847398,2010-03-21,3,0.428571,1
83966,5142847398,2010-03-22,4,0.571429,1
83967,5142847398,2010-03-24,4,0.571429,1
83968,5142847398,2010-03-28,3,0.428571,1
83969,5142847398,2010-03-29,3,0.428571,1


In [275]:
df['order'] = df.groupby(['Cardnum', 'Date']).cumcount() + 1

In [276]:
ANC7 = df.merge(count_card_7, on = ['Cardnum', 'Date', 'order'], suffixes=['_Original', '_1'])

In [277]:
ANC7 = ANC7[['Recnum', 'Average_Card_Count_7']]

In [278]:
ANC7 = ANC7.rename(columns = {"Average_Card_Count_7": "ANC7"})

In [279]:
ANC7.head()

Unnamed: 0,Recnum,ANC7
0,1,0.142857
1,2,0.142857
2,3,0.142857
3,4,0.142857
4,5,0.285714


In [280]:
ANC7.shape

(83970, 2)

In [281]:
len(ANC7['Recnum'].unique())

83970

### Average Count_Card_14

In [282]:
count_card_14 = df.groupby(['Cardnum'])['Amount'].rolling('14d').count().reset_index()
count_card_14['Card_Count'] = count_card_14['Amount'].astype('int')
count_card_14['Average_Card_Count_14'] = count_card_14['Card_Count'] / 14.0
del count_card_14['Amount']
count_card_14['order'] = count_card_14.groupby(['Cardnum', 'Date']).cumcount() + 1

In [283]:
count_card_14.tail()

Unnamed: 0,Cardnum,Date,Card_Count,Average_Card_Count_14,order
83965,5142847398,2010-03-21,7,0.5,1
83966,5142847398,2010-03-22,7,0.5,1
83967,5142847398,2010-03-24,8,0.571429,1
83968,5142847398,2010-03-28,6,0.428571,1
83969,5142847398,2010-03-29,7,0.5,1


In [284]:
df['order'] = df.groupby(['Cardnum', 'Date']).cumcount() + 1

In [285]:
ANC14 = df.merge(count_card_14, on = ['Cardnum', 'Date', 'order'], suffixes=['_Original', '_1'])

In [286]:
ANC14 = ANC14[['Recnum', 'Average_Card_Count_14']]

In [287]:
ANC14 = ANC14.rename(columns = {"Average_Card_Count_14": "ANC14"})

In [288]:
ANC14.tail()

Unnamed: 0,Recnum,ANC14
83965,84295,0.071429
83966,84296,0.142857
83967,84297,0.214286
83968,84298,0.071429
83969,84299,0.071429


In [289]:
ANC14.shape

(83970, 2)

In [290]:
len(ANC14['Recnum'].unique())

83970

### Average Count_Card_30

In [291]:
count_card_30 = df.groupby(['Cardnum'])['Amount'].rolling('30d').count().reset_index()
count_card_30['Card_Count'] = count_card_30['Amount'].astype('int')
count_card_30['Average_Card_Count_30'] = count_card_30['Card_Count'] / 30.0
del count_card_30['Amount']
count_card_30['order'] = count_card_30.groupby(['Cardnum', 'Date']).cumcount() + 1

In [292]:
count_card_30.tail()

Unnamed: 0,Cardnum,Date,Card_Count,Average_Card_Count_30,order
83965,5142847398,2010-03-21,15,0.5,1
83966,5142847398,2010-03-22,16,0.533333,1
83967,5142847398,2010-03-24,15,0.5,1
83968,5142847398,2010-03-28,14,0.466667,1
83969,5142847398,2010-03-29,14,0.466667,1


In [293]:
df['order'] = df.groupby(['Cardnum', 'Date']).cumcount() + 1

In [294]:
ANC30 = df.merge(count_card_30, on = ['Cardnum', 'Date', 'order'], suffixes=['_Original', '_1'])

In [295]:
ANC30 = ANC30[['Recnum', 'Average_Card_Count_30']]

In [296]:
ANC30 = ANC30.rename(columns = {"Average_Card_Count_30": "ANC30"})

In [297]:
ANC30.tail()

Unnamed: 0,Recnum,ANC30
83965,84295,0.166667
83966,84296,0.066667
83967,84297,0.133333
83968,84298,0.033333
83969,84299,0.066667


In [298]:
ANC30.shape

(83970, 2)

In [299]:
len(ANC30['Recnum'].unique())

83970

### Average Amount_Card_7

In [300]:
avg_card_7 = df.groupby(['Cardnum'])['Amount'].rolling('7d').mean().reset_index()
avg_card_7['order'] = avg_card_7.groupby(['Cardnum', 'Date']).cumcount() + 1

In [301]:
avg_card_7 = avg_card_7.rename(columns = {"Amount":"Average_Card_Amount_7"})

In [302]:
avg_card_7.head()

Unnamed: 0,Cardnum,Date,Average_Card_Amount_7,order
0,5142110002,2010-10-12,150.0,1
1,5142110081,2010-03-08,495.9,1
2,5142110081,2010-03-08,566.05,2
3,5142110313,2010-10-07,144.0,1
4,5142110313,2010-10-07,94.0,2


In [303]:
df['order'] = df.groupby(['Cardnum', 'Date']).cumcount() + 1

In [304]:
AAC7 = df.merge(avg_card_7, on = ['Cardnum', 'Date', 'order'], suffixes=['_Original', '_1'])

In [305]:
AAC7 = AAC7[['Recnum', 'Average_Card_Amount_7']]

In [306]:
AAC7 = AAC7.rename(columns = {"Average_Card_Amount_7": "ACC7"})

In [307]:
AAC7.head()

Unnamed: 0,Recnum,ACC7
0,1,3.62
1,2,31.42
2,3,178.49
3,4,3.62
4,5,3.62


In [308]:
AAC7.shape

(83970, 2)

In [309]:
len(AAC7['Recnum'].unique())

83970

### Average Amount_Card_14

In [310]:
avg_card_14 = df.groupby(['Cardnum'])['Amount'].rolling('14d').mean().reset_index()
avg_card_14['order'] = avg_card_14.groupby(['Cardnum', 'Date']).cumcount() + 1

In [311]:
avg_card_14 = avg_card_14.rename(columns = {"Amount":"Average_Card_Amount_14"})

In [312]:
avg_card_14.head()

Unnamed: 0,Cardnum,Date,Average_Card_Amount_14,order
0,5142110002,2010-10-12,150.0,1
1,5142110081,2010-03-08,495.9,1
2,5142110081,2010-03-08,566.05,2
3,5142110313,2010-10-07,144.0,1
4,5142110313,2010-10-07,94.0,2


In [313]:
df['order'] = df.groupby(['Cardnum', 'Date']).cumcount() + 1

In [314]:
AAC14 = df.merge(avg_card_14, on = ['Cardnum', 'Date', 'order'], suffixes=['_Original', '_1'])

In [315]:
AAC14 = AAC14[['Recnum', 'Average_Card_Amount_14']]

In [316]:
AAC14 = AAC14.rename(columns = {"Average_Card_Amount_14": "ACC14"})

In [317]:
AAC14.head()

Unnamed: 0,Recnum,ACC14
0,1,3.62
1,2,31.42
2,3,178.49
3,4,3.62
4,5,3.62


In [318]:
AAC14.shape

(83970, 2)

In [319]:
len(AAC14['Recnum'].unique())

83970

### Average Amount_Card_30

In [320]:
avg_card_30 = df.groupby(['Cardnum'])['Amount'].rolling('30d').mean().reset_index()
avg_card_30['order'] = avg_card_30.groupby(['Cardnum', 'Date']).cumcount() + 1

In [321]:
avg_card_30 = avg_card_30.rename(columns = {"Amount":"Average_Card_Amount_30"})

In [322]:
avg_card_30.head()

Unnamed: 0,Cardnum,Date,Average_Card_Amount_30,order
0,5142110002,2010-10-12,150.0,1
1,5142110081,2010-03-08,495.9,1
2,5142110081,2010-03-08,566.05,2
3,5142110313,2010-10-07,144.0,1
4,5142110313,2010-10-07,94.0,2


In [323]:
df['order'] = df.groupby(['Cardnum', 'Date']).cumcount() + 1

In [324]:
AAC30 = df.merge(avg_card_30, on = ['Cardnum', 'Date', 'order'], suffixes=['_Original', '_1'])

In [325]:
AAC30 = AAC30[['Recnum', 'Average_Card_Amount_30']]

In [326]:
AAC30 = AAC30.rename(columns = {"Average_Card_Amount_30": "ACC30"})

In [327]:
AAC30.tail()

Unnamed: 0,Recnum,ACC30
83965,84295,344.38
83966,84296,394.66
83967,84297,221.5875
83968,84298,600.0
83969,84299,197.66


In [328]:
AAC30.shape

(83970, 2)

In [329]:
len(AAC30['Recnum'].unique())

83970

### Average Count_Merchant_7

In [330]:
count_merchant_7 = df.groupby(['Merchnum'])['Amount'].rolling('7d').count().reset_index()
count_merchant_7['Merchant_Count'] = count_merchant_7['Amount'].astype('int')
count_merchant_7['Average_Merchant_Count_7'] = count_merchant_7['Merchant_Count'] / 7.0
del count_merchant_7['Amount']
count_merchant_7['order'] = count_merchant_7.groupby(['Merchnum', 'Date']).cumcount() + 1

df['order'] = df.groupby(['Merchnum', 'Date']).cumcount() + 1

ANM7 = df.merge(count_merchant_7, on = ['Merchnum', 'Date', 'order'], suffixes=['_Original', '_1'])

ANM7 = ANM7[['Recnum', 'Average_Merchant_Count_7']]

In [331]:
ANM7 = ANM7.rename(columns = {"Average_Merchant_Count_7": "ANM7"})

In [332]:
ANM7.head()

Unnamed: 0,Recnum,ANM7
0,1,0.142857
1,2,0.142857
2,3,0.142857
3,4,0.285714
4,5,0.428571


In [333]:
ANM7.shape

(83970, 2)

In [334]:
len(ANM7['Recnum'].unique())

83970

### Average Count_Merchant_14

In [335]:
count_merchant_14 = df.groupby(['Merchnum'])['Amount'].rolling('14d').count().reset_index()
count_merchant_14['Merchant_Count'] = count_merchant_14['Amount'].astype('int')
count_merchant_14['Average_Merchant_Count_14'] = count_merchant_14['Merchant_Count'] / 14.0
del count_merchant_14['Amount']
count_merchant_14['order'] = count_merchant_14.groupby(['Merchnum', 'Date']).cumcount() + 1

df['order'] = df.groupby(['Merchnum', 'Date']).cumcount() + 1

ANM14 = df.merge(count_merchant_14, on = ['Merchnum', 'Date', 'order'], suffixes=['_Original', '_1'])

ANM14 = ANM14[['Recnum', 'Average_Merchant_Count_14']]

In [336]:
ANM14 = ANM14.rename(columns = {"Average_Merchant_Count_14": "ANM14"})

In [337]:
ANM14.tail()

Unnamed: 0,Recnum,ANM14
83965,84295,1.071429
83966,84296,0.5
83967,84297,0.071429
83968,84298,0.071429
83969,84299,1.142857


In [338]:
ANM14.shape

(83970, 2)

In [339]:
len(ANM14['Recnum'].unique())

83970

### Average Count_Merchant_30

In [340]:
count_merchant_30 = df.groupby(['Merchnum'])['Amount'].rolling('30d').count().reset_index()
count_merchant_30['Merchant_Count'] = count_merchant_30['Amount'].astype('int')
count_merchant_30['Average_Merchant_Count_30'] = count_merchant_30['Merchant_Count'] / 30.0
del count_merchant_30['Amount']
count_merchant_30['order'] = count_merchant_30.groupby(['Merchnum', 'Date']).cumcount() + 1

df['order'] = df.groupby(['Merchnum', 'Date']).cumcount() + 1

ANM30 = df.merge(count_merchant_30, on = ['Merchnum', 'Date', 'order'], suffixes=['_Original', '_1'])

ANM30 = ANM30[['Recnum', 'Average_Merchant_Count_30']]

In [341]:
ANM30 = ANM30.rename(columns = {"Average_Merchant_Count_30": "ANM30"})

In [342]:
ANM30.tail()

Unnamed: 0,Recnum,ANM30
83965,84295,0.733333
83966,84296,0.666667
83967,84297,0.033333
83968,84298,0.033333
83969,84299,0.766667


In [343]:
ANM30.shape

(83970, 2)

In [344]:
len(ANM30['Recnum'].unique())

83970

### Average Amount_Merchant_7

In [345]:
avg_merchant_7 = df.groupby(['Merchnum'])['Amount'].rolling('7d').mean().reset_index()
avg_merchant_7['order'] = avg_merchant_7.groupby(['Merchnum', 'Date']).cumcount() + 1

avg_merchant_7 = avg_merchant_7.rename(columns = {"Amount":"Average_Merchant_Amount_7"})

df['order'] = df.groupby(['Merchnum', 'Date']).cumcount() + 1

AAM7 = df.merge(avg_merchant_7, on = ['Merchnum', 'Date', 'order'], suffixes=['_Original', '_1'])

AAM7 = AAM7[['Recnum', 'Average_Merchant_Amount_7']]

In [346]:
AAM7 = AAM7.rename(columns = {"Average_Merchant_Amount_7": "AAM7"})

In [347]:
AAM7.head()

Unnamed: 0,Recnum,AAM7
0,1,3.62
1,2,31.42
2,3,178.49
3,4,3.62
4,5,3.62


In [348]:
AAM7.shape

(83970, 2)

In [349]:
len(AAM7['Recnum'].unique())

83970

### Average Amount_Merchant_14

In [350]:
avg_merchant_14 = df.groupby(['Merchnum'])['Amount'].rolling('14d').mean().reset_index()
avg_merchant_14['order'] = avg_merchant_14.groupby(['Merchnum', 'Date']).cumcount() + 1

avg_merchant_14 = avg_merchant_14.rename(columns = {"Amount":"Average_Merchant_Amount_14"})

df['order'] = df.groupby(['Merchnum', 'Date']).cumcount() + 1

AAM14 = df.merge(avg_merchant_14, on = ['Merchnum', 'Date', 'order'], suffixes=['_Original', '_1'])

AAM14 = AAM14[['Recnum', 'Average_Merchant_Amount_14']]

In [351]:
AAM14 = AAM14.rename(columns = {"Average_Merchant_Amount_14": "AAM14"})

In [352]:
AAM14.tail()

Unnamed: 0,Recnum,AAM14
83965,84295,339.343333
83966,84296,180.544286
83967,84297,235.0
83968,84298,600.0
83969,84299,320.009375


In [353]:
AAM14.shape

(83970, 2)

In [354]:
len(AAM14['Recnum'].unique())

83970

### Average Amount_Merchant_30

In [355]:
avg_merchant_30 = df.groupby(['Merchnum'])['Amount'].rolling('30d').mean().reset_index()
avg_merchant_30['order'] = avg_merchant_30.groupby(['Merchnum', 'Date']).cumcount() + 1

avg_merchant_30 = avg_merchant_30.rename(columns = {"Amount":"Average_Merchant_Amount_30"})

df['order'] = df.groupby(['Merchnum', 'Date']).cumcount() + 1

AAM30 = df.merge(avg_merchant_30, on = ['Merchnum', 'Date', 'order'], suffixes=['_Original', '_1'])

AAM30 = AAM30[['Recnum', 'Average_Merchant_Amount_30']]

In [356]:
AAM30 = AAM30.rename(columns = {"Average_Merchant_Amount_30": "AAM30"})

In [357]:
AAM30.tail()

Unnamed: 0,Recnum,AAM30
83965,84295,269.472273
83966,84296,246.719
83967,84297,235.0
83968,84298,600.0
83969,84299,259.060435


In [358]:
AAM30.shape

(83970, 2)

In [359]:
len(AAM30['Recnum'].unique())

83970

### Denominator

In [360]:
Denominator = ANC7.merge(ANC14, on = 'Recnum') \
.merge(ANC30, on = 'Recnum') \
.merge(AAC7, on = 'Recnum') \
.merge(AAC14, on = 'Recnum') \
.merge(AAC30, on = 'Recnum') \
.merge(ANM7, on = 'Recnum') \
.merge(ANM14, on = 'Recnum') \
.merge(ANM30, on = 'Recnum') \
.merge(AAM7, on = 'Recnum') \
.merge(AAM14, on = 'Recnum') \
.merge(AAM30, on = 'Recnum') \

In [361]:
Denominator.head()

Unnamed: 0,Recnum,ANC7,ANC14,ANC30,ACC7,ACC14,ACC30,ANM7,ANM14,ANM30,AAM7,AAM14,AAM30
0,1,0.142857,0.071429,0.033333,3.62,3.62,3.62,0.142857,0.071429,0.033333,3.62,3.62,3.62
1,2,0.142857,0.071429,0.033333,31.42,31.42,31.42,0.142857,0.071429,0.033333,31.42,31.42,31.42
2,3,0.142857,0.071429,0.033333,178.49,178.49,178.49,0.142857,0.071429,0.033333,178.49,178.49,178.49
3,4,0.142857,0.071429,0.033333,3.62,3.62,3.62,0.285714,0.142857,0.066667,3.62,3.62,3.62
4,5,0.285714,0.142857,0.066667,3.62,3.62,3.62,0.428571,0.214286,0.1,3.62,3.62,3.62


In [362]:
Denominator.shape

(83970, 13)

In [363]:
len(Denominator['Recnum'].unique())

83970

### Velocity change variables 96

In [364]:
temp = {}
for Num in Numerator.columns:
    if Num != 'Recnum':
        for Den in Denominator.columns:
            if Den != 'Recnum':
                temp[Num + '_' + Den] = Numerator.loc[:,Num].values / Denominator.loc[:,Den].values

In [365]:
Velocity_change_variables = pd.DataFrame(temp)

In [366]:
Velocity_change_variables['Recnum'] = df['Recnum'].values

In [367]:
Velocity_change_variables.head()

Unnamed: 0,NC1_ANC7,NC1_ANC14,NC1_ANC30,NC1_ACC7,NC1_ACC14,NC1_ACC30,NC1_ANM7,NC1_ANM14,NC1_ANM30,NC1_AAM7,...,AM1_ACC7,AM1_ACC14,AM1_ACC30,AM1_ANM7,AM1_ANM14,AM1_ANM30,AM1_AAM7,AM1_AAM14,AM1_AAM30,Recnum
0,7.0,14.0,30.0,0.276243,0.276243,0.276243,7.0,14.0,30.0,0.276243,...,1.0,1.0,1.0,25.34,50.68,108.6,1.0,1.0,1.0,1
1,7.0,14.0,30.0,0.031827,0.031827,0.031827,7.0,14.0,30.0,0.031827,...,1.0,1.0,1.0,219.94,439.88,942.6,1.0,1.0,1.0,2
2,7.0,14.0,30.0,0.005603,0.005603,0.005603,7.0,14.0,30.0,0.005603,...,1.0,1.0,1.0,1249.43,2498.86,5354.7,1.0,1.0,1.0,3
3,7.0,14.0,30.0,0.276243,0.276243,0.276243,3.5,7.0,15.0,0.276243,...,2.0,2.0,2.0,25.34,50.68,108.6,2.0,2.0,2.0,4
4,7.0,14.0,30.0,0.552486,0.552486,0.552486,4.666667,9.333333,20.0,0.552486,...,3.0,3.0,3.0,25.34,50.68,108.6,3.0,3.0,3.0,5


# Merge All DataFrame

In [368]:
Amount_variables.head()

Unnamed: 0,Amount_avg_card_1,Amount_avg_card_3,Amount_avg_card_7,Amount_avg_card_14,Amount_avg_card_30,Amount_max_card_1,Amount_max_card_3,Amount_max_card_7,Amount_max_card_14,Amount_max_card_30,...,qam_cm_14,qame_cm_14,qas_cm_14,qaa_cm_30,qam_cm_30,qame_cm_30,qas_cm_30,Recnum,Fraud,Date
0,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1,0,2010-01-01
1,31.42,31.42,31.42,31.42,31.42,31.42,31.42,31.42,31.42,31.42,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2,0,2010-01-01
2,178.49,178.49,178.49,178.49,178.49,178.49,178.49,178.49,178.49,178.49,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,3,0,2010-01-01
3,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,4,0,2010-01-01
4,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,...,1.0,1.0,0.5,1.0,1.0,1.0,0.5,5,0,2010-01-01


In [369]:
Frequency_variables.head()

Unnamed: 0,Count_count_card_1,Count_count_card_3,Count_count_card_7,Count_count_card_14,Count_count_card_30,Count_count_card_0,Count_count_merchant_1,Count_count_merchant_3,Count_count_merchant_7,Count_count_merchant_14,...,Count_count_card_state_1,Count_count_card_state_3,Count_count_card_state_7,Count_count_card_state_14,Count_count_card_state_30,Count_count_card_state_0,Recnum,Fraud,Date,Transtype
0,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,0,2010-01-01,P
1,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,2,0,2010-01-01,P
2,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,3,0,2010-01-01,P
3,1,1,1,1,1,1,2,2,2,2,...,1,1,1,1,1,1,4,0,2010-01-01,P
4,2,2,2,2,2,1,3,3,3,3,...,2,2,2,2,2,1,5,0,2010-01-01,P


In [370]:
Days_since_variables.head()

Unnamed: 0,Recnum,Card_SinceLastTime,Merchant_SinceLastTime,Card_Merchant_SinceLastTime,Card_Zip_SinceLastTime,Card_State_SinceLastTime
0,81127,NaT,0 days,NaT,NaT,NaT
1,16628,NaT,0 days,NaT,NaT,NaT
2,16801,0 days,0 days,0 days,0 days,0 days
3,80318,NaT,0 days,NaT,NaT,NaT
4,80327,0 days,0 days,0 days,0 days,0 days


In [371]:
Velocity_change_variables.head()

Unnamed: 0,NC1_ANC7,NC1_ANC14,NC1_ANC30,NC1_ACC7,NC1_ACC14,NC1_ACC30,NC1_ANM7,NC1_ANM14,NC1_ANM30,NC1_AAM7,...,AM1_ACC7,AM1_ACC14,AM1_ACC30,AM1_ANM7,AM1_ANM14,AM1_ANM30,AM1_AAM7,AM1_AAM14,AM1_AAM30,Recnum
0,7.0,14.0,30.0,0.276243,0.276243,0.276243,7.0,14.0,30.0,0.276243,...,1.0,1.0,1.0,25.34,50.68,108.6,1.0,1.0,1.0,1
1,7.0,14.0,30.0,0.031827,0.031827,0.031827,7.0,14.0,30.0,0.031827,...,1.0,1.0,1.0,219.94,439.88,942.6,1.0,1.0,1.0,2
2,7.0,14.0,30.0,0.005603,0.005603,0.005603,7.0,14.0,30.0,0.005603,...,1.0,1.0,1.0,1249.43,2498.86,5354.7,1.0,1.0,1.0,3
3,7.0,14.0,30.0,0.276243,0.276243,0.276243,3.5,7.0,15.0,0.276243,...,2.0,2.0,2.0,25.34,50.68,108.6,2.0,2.0,2.0,4
4,7.0,14.0,30.0,0.552486,0.552486,0.552486,4.666667,9.333333,20.0,0.552486,...,3.0,3.0,3.0,25.34,50.68,108.6,3.0,3.0,3.0,5


In [372]:
Days_since_int_variables = Days_since_variables.copy()

In [373]:
Days_since_int_variables['Card_SinceLastTime'] = Days_since_variables['Card_SinceLastTime'].dt.days
Days_since_int_variables['Merchant_SinceLastTime'] = Days_since_variables['Merchant_SinceLastTime'].dt.days
Days_since_int_variables['Card_Merchant_SinceLastTime'] = Days_since_variables['Card_Merchant_SinceLastTime'].dt.days
Days_since_int_variables['Card_Zip_SinceLastTime'] = Days_since_variables['Card_Zip_SinceLastTime'].dt.days
Days_since_int_variables['Card_State_SinceLastTime'] = Days_since_variables['Card_State_SinceLastTime'].dt.days

In [374]:
Days_since_int_variables = Days_since_int_variables.fillna(0)

In [375]:
Days_since_int_variables.head()

Unnamed: 0,Recnum,Card_SinceLastTime,Merchant_SinceLastTime,Card_Merchant_SinceLastTime,Card_Zip_SinceLastTime,Card_State_SinceLastTime
0,81127,0.0,0.0,0.0,0.0,0.0
1,16628,0.0,0.0,0.0,0.0,0.0
2,16801,0.0,0.0,0.0,0.0,0.0
3,80318,0.0,0.0,0.0,0.0,0.0
4,80327,0.0,0.0,0.0,0.0,0.0


In [376]:
all_df_Amount_variables = Amount_variables.copy()
del all_df_Amount_variables['Date']
del all_df_Amount_variables['Fraud']
all_df_Frequency_variables = Frequency_variables.copy()
del all_df_Frequency_variables['Date']
del all_df_Frequency_variables['Transtype']
all_df_Days_since_int_variables = Days_since_int_variables.copy()
all_df_Velocity_change_variables = Velocity_change_variables.copy()

In [377]:
all_df = all_df_Amount_variables.merge(all_df_Frequency_variables, on = 'Recnum') \
.merge(all_df_Days_since_int_variables, on = 'Recnum') \
.merge(all_df_Velocity_change_variables, on = 'Recnum')

In [378]:
all_df.shape

(83970, 373)

In [379]:
all_df.columns.values

array(['Amount_avg_card_1', 'Amount_avg_card_3', 'Amount_avg_card_7',
       'Amount_avg_card_14', 'Amount_avg_card_30', 'Amount_max_card_1',
       'Amount_max_card_3', 'Amount_max_card_7', 'Amount_max_card_14',
       'Amount_max_card_30', 'Amount_median_card_1',
       'Amount_median_card_3', 'Amount_median_card_7',
       'Amount_median_card_14', 'Amount_median_card_30',
       'Amount_sum_card_1', 'Amount_sum_card_3', 'Amount_sum_card_7',
       'Amount_sum_card_14', 'Amount_sum_card_30', 'Amount_avg_card_0',
       'Amount_max_card_0', 'Amount_median_card_0', 'Amount_sum_card_0',
       'qaa_cm_0', 'qam_cm_0', 'qame_cm_0', 'qas_cm_0', 'qaa_cm_1',
       'qam_cm_1', 'qame_cm_1', 'qas_cm_1', 'qaa_cm_3', 'qam_cm_3',
       'qame_cm_3', 'qas_cm_3', 'qaa_cm_7', 'qam_cm_7', 'qame_cm_7',
       'qas_cm_7', 'qaa_cm_14', 'qam_cm_14', 'qame_cm_14', 'qas_cm_14',
       'qaa_cm_30', 'qam_cm_30', 'qame_cm_30', 'qas_cm_30',
       'Amount_avg_merchant_1', 'Amount_avg_merchant_3',
       'Amoun

In [381]:
all_df.to_csv('Data/all_df_before_1101.csv')