In [1]:
import pandas as pd
import numpy as np
from scipy import stats

In [2]:
df = pd.read_csv('Data/card transactions.csv')

In [3]:
df['Date'] = df['Date'].astype('datetime64[ns]')

In [4]:
df = df[df["Date"] < '2010-11-01']

In [5]:
df = df.set_index('Date')

In [6]:
df.head()

Unnamed: 0_level_0,Recnum,Cardnum,Merchnum,Merch description,Merch state,Merch zip,Transtype,Amount,Fraud
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2010-01-01,1,5142190439,5509006296254,FEDEX SHP 12/23/09 AB#,TN,38118.0,P,3.62,0
2010-01-01,2,5142183973,61003026333,SERVICE MERCHANDISE #81,MA,1803.0,P,31.42,0
2010-01-01,3,5142131721,4503082993600,OFFICE DEPOT #191,MD,20706.0,P,178.49,0
2010-01-01,4,5142148452,5509006296254,FEDEX SHP 12/28/09 AB#,TN,38118.0,P,3.62,0
2010-01-01,5,5142190439,5509006296254,FEDEX SHP 12/23/09 AB#,TN,38118.0,P,3.62,0


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 84299 entries, 2010-01-01 to 2010-10-31
Data columns (total 9 columns):
Recnum               84299 non-null int64
Cardnum              84299 non-null int64
Merchnum             81497 non-null object
Merch description    84299 non-null object
Merch state          83272 non-null object
Merch zip            80604 non-null float64
Transtype            84299 non-null object
Amount               84299 non-null float64
Fraud                84299 non-null int64
dtypes: float64(2), int64(3), object(4)
memory usage: 6.4+ MB


In [8]:
df.describe()

Unnamed: 0,Recnum,Cardnum,Merch zip,Amount,Fraud
count,84299.0,84299.0,80604.0,84299.0,84299.0
mean,42150.0,5142203000.0,44639.600677,434.9611,0.010439
std,24335.169508,56500.4,28225.631383,10715.17,0.101638
min,1.0,5142110000.0,1.0,0.01,0.0
25%,21075.5,5142152000.0,20884.0,31.71,0.0
50%,42150.0,5142196000.0,38118.0,137.4,0.0
75%,63224.5,5142249000.0,63103.0,430.0,0.0
max,84299.0,5142847000.0,99999.0,3102046.0,1.0


# Remove Outliers

In [9]:
df = df[df['Transtype'] == 'P']
df = df[df['Amount'] != max(df['Amount'])]

In [10]:
df.shape

(83970, 9)

# Filling Missing Values

## Filling State

In [11]:
sum(df['Merch state'].isna())

864

In [12]:
temp_df = df[df['Merch state'].notna()]

In [13]:
stats.mode(temp_df['Merch state'])



ModeResult(mode=array(['TN'], dtype=object), count=array([11290]))

In [14]:
df['Merch state'] = df['Merch state'].fillna(stats.mode(temp_df['Merch state'])[0][0])

In [15]:
sum(df['Merch state'].isna())

0

## Filling Merchnum

In [16]:
sum(df['Merchnum'] == 0)

0

In [17]:
sum(df['Merchnum'].isna())

2637

In [18]:
temp_df = df[df['Merchnum'].notna()]

In [19]:
stats.mode(temp_df['Merchnum'])

ModeResult(mode=array(['930090121224'], dtype=object), count=array([9289]))

In [20]:
df['Merchnum'] = df['Merchnum'].fillna(stats.mode(temp_df['Merchnum'])[0][0])

In [21]:
sum(df['Merchnum'].isna())

0

In [22]:
sum(df['Merchnum'] == 0)

0

## Filling Zip

In [23]:
sum(df['Merch zip'] == 0)

0

In [24]:
sum(df['Merch zip'].isna())

3366

In [25]:
temp_df = df[df['Merch zip'].notna()]

In [26]:
stats.mode(temp_df['Merch zip'])

ModeResult(mode=array([38118.]), count=array([11142]))

In [27]:
df['Merch zip'] = df['Merch zip'].fillna(stats.mode(temp_df['Merch zip'])[0][0])

In [28]:
sum(df['Merch zip'] == 0)

0

In [29]:
sum(df['Merch zip'].isna())

0

# Creating Variables

## Amount Variables

### Card

In [30]:
for i in [1, 3, 7, 14, 30]:
    locals()['avg_card_' + str(i)] = df.groupby(['Cardnum'])['Amount'].rolling(str(i) + 'd').mean().reset_index()
    locals()['avg_card_' + str(i)]['order'] = locals()['avg_card_' + str(i)].groupby(['Cardnum', 'Date']).cumcount() + 1

In [31]:
avg_card_1.tail()

Unnamed: 0,Cardnum,Date,Amount,order
83965,5142847398,2010-03-21,199.14,1
83966,5142847398,2010-03-22,78.23,1
83967,5142847398,2010-03-24,440.06,1
83968,5142847398,2010-03-28,288.82,1
83969,5142847398,2010-03-29,736.55,1


In [32]:
for i in [1, 3, 7, 14, 30]:
    locals()['max_card_' + str(i)] = df.groupby(['Cardnum'])['Amount'].rolling(str(i) + 'd').max().reset_index()
    locals()['max_card_' + str(i)]['order'] = locals()['max_card_' + str(i)].groupby(['Cardnum', 'Date']).cumcount() + 1

In [33]:
max_card_1.head()

Unnamed: 0,Cardnum,Date,Amount,order
0,5142110002,2010-10-12,150.0,1
1,5142110081,2010-03-08,495.9,1
2,5142110081,2010-03-08,636.2,2
3,5142110313,2010-10-07,144.0,1
4,5142110313,2010-10-07,144.0,2


In [34]:
for i in [1, 3, 7, 14, 30]:
    locals()['median_card_' + str(i)] = df.groupby(['Cardnum'])['Amount'].rolling(str(i) + 'd').median().reset_index()
    locals()['median_card_' + str(i)]['order'] = locals()['median_card_' + str(i)].groupby(['Cardnum', 'Date']).cumcount() + 1

In [35]:
median_card_1.head()

Unnamed: 0,Cardnum,Date,Amount,order
0,5142110002,2010-10-12,150.0,1
1,5142110081,2010-03-08,495.9,1
2,5142110081,2010-03-08,566.05,2
3,5142110313,2010-10-07,144.0,1
4,5142110313,2010-10-07,94.0,2


In [36]:
for i in [1, 3, 7, 14, 30]:
    locals()['sum_card_' + str(i)] = df.groupby(['Cardnum'])['Amount'].rolling(str(i) + 'd').sum().reset_index()
    locals()['sum_card_' + str(i)]['order'] = locals()['sum_card_' + str(i)].groupby(['Cardnum', 'Date']).cumcount() + 1

In [37]:
sum_card_1.head()

Unnamed: 0,Cardnum,Date,Amount,order
0,5142110002,2010-10-12,150.0,1
1,5142110081,2010-03-08,495.9,1
2,5142110081,2010-03-08,1132.1,2
3,5142110313,2010-10-07,144.0,1
4,5142110313,2010-10-07,188.0,2


In [39]:
card_df = df.copy()
card_df = card_df.reset_index()
card_df['order'] = card_df.groupby(['Cardnum', 'Date']).cumcount() + 1

In [40]:
card_df.head()

Unnamed: 0,Date,Recnum,Cardnum,Merchnum,Merch description,Merch state,Merch zip,Transtype,Amount,Fraud,order
0,2010-01-01,1,5142190439,5509006296254,FEDEX SHP 12/23/09 AB#,TN,38118.0,P,3.62,0,1
1,2010-01-01,2,5142183973,61003026333,SERVICE MERCHANDISE #81,MA,1803.0,P,31.42,0,1
2,2010-01-01,3,5142131721,4503082993600,OFFICE DEPOT #191,MD,20706.0,P,178.49,0,1
3,2010-01-01,4,5142148452,5509006296254,FEDEX SHP 12/28/09 AB#,TN,38118.0,P,3.62,0,1
4,2010-01-01,5,5142190439,5509006296254,FEDEX SHP 12/23/09 AB#,TN,38118.0,P,3.62,0,2


In [41]:
not_needed_columns = ['index', 'Recnum', 'Merchnum', 'Merch description', 'Merch state', 'Merch zip', 'Transtype']
for column in not_needed_columns:
    try:
        del card_df[column]
    except:
        pass

In [42]:
card_df.head()

Unnamed: 0,Date,Cardnum,Amount,Fraud,order
0,2010-01-01,5142190439,3.62,0,1
1,2010-01-01,5142183973,31.42,0,1
2,2010-01-01,5142131721,178.49,0,1
3,2010-01-01,5142148452,3.62,0,1
4,2010-01-01,5142190439,3.62,0,2


In [43]:
merged_data_card = card_df \
.merge(avg_card_1, on = ['Date', 'Cardnum', 'order'], how = 'left', suffixes=['', '_avg_card_1']) \
.merge(avg_card_3, on = ['Date', 'Cardnum', 'order'], how = 'left', suffixes=['', '_avg_card_3']) \
.merge(avg_card_7, on = ['Date', 'Cardnum', 'order'], how = 'left', suffixes=['', '_avg_card_7']) \
.merge(avg_card_14, on = ['Date', 'Cardnum', 'order'], how = 'left', suffixes=['', '_avg_card_14']) \
.merge(avg_card_30, on = ['Date', 'Cardnum', 'order'], how = 'left', suffixes=['', '_avg_card_30']) \
.merge(max_card_1, on = ['Date', 'Cardnum', 'order'], how = 'left', suffixes=['', '_max_card_1'])\
.merge(max_card_3, on = ['Date', 'Cardnum', 'order'], how = 'left', suffixes=['', '_max_card_3']) \
.merge(max_card_7, on = ['Date', 'Cardnum', 'order'], how = 'left', suffixes=['', '_max_card_7']) \
.merge(max_card_14, on = ['Date', 'Cardnum', 'order'], how = 'left', suffixes=['', '_max_card_14']) \
.merge(max_card_30, on = ['Date', 'Cardnum', 'order'], how = 'left', suffixes=['', '_max_card_30']) \
.merge(median_card_1, on = ['Date', 'Cardnum', 'order'], how = 'left', suffixes=['', '_median_card_1'])\
.merge(median_card_3, on = ['Date', 'Cardnum', 'order'], how = 'left', suffixes=['', '_median_card_3']) \
.merge(median_card_7, on = ['Date', 'Cardnum', 'order'], how = 'left', suffixes=['', '_median_card_7']) \
.merge(median_card_14, on = ['Date', 'Cardnum', 'order'], how = 'left', suffixes=['', '_median_card_14']) \
.merge(median_card_30, on = ['Date', 'Cardnum', 'order'], how = 'left', suffixes=['', '_median_card_30']) \
.merge(sum_card_1, on = ['Date', 'Cardnum', 'order'], how = 'left', suffixes=['', '_sum_card_1'])\
.merge(sum_card_3, on = ['Date', 'Cardnum', 'order'], how = 'left', suffixes=['', '_sum_card_3']) \
.merge(sum_card_7, on = ['Date', 'Cardnum', 'order'], how = 'left', suffixes=['', '_sum_card_7']) \
.merge(sum_card_14, on = ['Date', 'Cardnum', 'order'], how = 'left', suffixes=['', '_sum_card_14']) \
.merge(sum_card_30, on = ['Date', 'Cardnum', 'order'], how = 'left', suffixes=['', '_sum_card_30'])

In [44]:
merged_data_card['Amount_avg_card_0'] = merged_data_card['Amount']
merged_data_card['Amount_max_card_0'] = merged_data_card['Amount']
merged_data_card['Amount_median_card_0'] = merged_data_card['Amount']
merged_data_card['Amount_sum_card_0'] = merged_data_card['Amount']

In [45]:
merged_data_card.head()

Unnamed: 0,Date,Cardnum,Amount,Fraud,order,Amount_avg_card_1,Amount_avg_card_3,Amount_avg_card_7,Amount_avg_card_14,Amount_avg_card_30,...,Amount_median_card_30,Amount_sum_card_1,Amount_sum_card_3,Amount_sum_card_7,Amount_sum_card_14,Amount_sum_card_30,Amount_avg_card_0,Amount_max_card_0,Amount_median_card_0,Amount_sum_card_0
0,2010-01-01,5142190439,3.62,0,1,3.62,3.62,3.62,3.62,3.62,...,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62
1,2010-01-01,5142183973,31.42,0,1,31.42,31.42,31.42,31.42,31.42,...,31.42,31.42,31.42,31.42,31.42,31.42,31.42,31.42,31.42,31.42
2,2010-01-01,5142131721,178.49,0,1,178.49,178.49,178.49,178.49,178.49,...,178.49,178.49,178.49,178.49,178.49,178.49,178.49,178.49,178.49,178.49
3,2010-01-01,5142148452,3.62,0,1,3.62,3.62,3.62,3.62,3.62,...,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62
4,2010-01-01,5142190439,3.62,0,2,3.62,3.62,3.62,3.62,3.62,...,3.62,7.24,7.24,7.24,7.24,7.24,3.62,3.62,3.62,3.62


In [47]:
for i in [0, 1, 3, 7, 14, 30]:
    merged_data_card['qaa_cm_' + str(i)] = merged_data_card['Amount'] / merged_data_card['Amount_avg_card_' + str(i)]
    merged_data_card['qam_cm_' + str(i)] = merged_data_card['Amount'] / merged_data_card['Amount_max_card_' + str(i)]
    merged_data_card['qame_cm_' + str(i)] = merged_data_card['Amount'] / merged_data_card['Amount_median_card_' + str(i)]
    merged_data_card['qas_cm_' + str(i)] = merged_data_card['Amount'] / merged_data_card['Amount_sum_card_' + str(i)]

In [48]:
merged_data_card.head()

Unnamed: 0,Date,Cardnum,Amount,Fraud,order,Amount_avg_card_1,Amount_avg_card_3,Amount_avg_card_7,Amount_avg_card_14,Amount_avg_card_30,...,qame_cm_7,qas_cm_7,qaa_cm_14,qam_cm_14,qame_cm_14,qas_cm_14,qaa_cm_30,qam_cm_30,qame_cm_30,qas_cm_30
0,2010-01-01,5142190439,3.62,0,1,3.62,3.62,3.62,3.62,3.62,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,2010-01-01,5142183973,31.42,0,1,31.42,31.42,31.42,31.42,31.42,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,2010-01-01,5142131721,178.49,0,1,178.49,178.49,178.49,178.49,178.49,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,2010-01-01,5142148452,3.62,0,1,3.62,3.62,3.62,3.62,3.62,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4,2010-01-01,5142190439,3.62,0,2,3.62,3.62,3.62,3.62,3.62,...,1.0,0.5,1.0,1.0,1.0,0.5,1.0,1.0,1.0,0.5


### Merchant

In [49]:
for i in [1, 3, 7, 14, 30]:
    locals()['avg_merchant_' + str(i)] = df.groupby(['Merchnum'])['Amount'].rolling(str(i) + 'd').mean().reset_index()
    locals()['avg_merchant_' + str(i)]['order'] = locals()['avg_merchant_' + str(i)].groupby(['Merchnum', 'Date']).cumcount() + 1
    
    locals()['max_merchant_' + str(i)] = df.groupby(['Merchnum'])['Amount'].rolling(str(i) + 'd').max().reset_index()
    locals()['max_merchant_' + str(i)]['order'] = locals()['max_merchant_' + str(i)].groupby(['Merchnum', 'Date']).cumcount() + 1
    
    locals()['median_merchant_' + str(i)] = df.groupby(['Merchnum'])['Amount'].rolling(str(i) + 'd').median().reset_index()
    locals()['median_merchant_' + str(i)]['order'] = locals()['median_merchant_' + str(i)].groupby(['Merchnum', 'Date']).cumcount() + 1
    
    locals()['sum_merchant_' + str(i)] = df.groupby(['Merchnum'])['Amount'].rolling(str(i) + 'd').sum().reset_index()
    locals()['sum_merchant_' + str(i)]['order'] = locals()['sum_merchant_' + str(i)].groupby(['Merchnum', 'Date']).cumcount() + 1

In [50]:
max_merchant_3.head()

Unnamed: 0,Merchnum,Date,Amount,order
0,0,2010-01-06,48.97,1
1,0,2010-01-07,87.02,1
2,0,2010-01-07,460.0,2
3,0,2010-01-11,25.0,1
4,0,2010-01-11,110.0,2


In [51]:
median_merchant_3.head()

Unnamed: 0,Merchnum,Date,Amount,order
0,0,2010-01-06,48.97,1
1,0,2010-01-07,67.995,1
2,0,2010-01-07,87.02,2
3,0,2010-01-11,25.0,1
4,0,2010-01-11,67.5,2


In [52]:
sum_merchant_3.head()

Unnamed: 0,Merchnum,Date,Amount,order
0,0,2010-01-06,48.97,1
1,0,2010-01-07,135.99,1
2,0,2010-01-07,595.99,2
3,0,2010-01-11,25.0,1
4,0,2010-01-11,135.0,2


In [53]:
merchant_df = df.copy()
merchant_df = merchant_df.reset_index()
merchant_df['order'] = merchant_df.groupby(['Merchnum', 'Date']).cumcount() + 1

In [54]:
merchant_df.head()

Unnamed: 0,Date,Recnum,Cardnum,Merchnum,Merch description,Merch state,Merch zip,Transtype,Amount,Fraud,order
0,2010-01-01,1,5142190439,5509006296254,FEDEX SHP 12/23/09 AB#,TN,38118.0,P,3.62,0,1
1,2010-01-01,2,5142183973,61003026333,SERVICE MERCHANDISE #81,MA,1803.0,P,31.42,0,1
2,2010-01-01,3,5142131721,4503082993600,OFFICE DEPOT #191,MD,20706.0,P,178.49,0,1
3,2010-01-01,4,5142148452,5509006296254,FEDEX SHP 12/28/09 AB#,TN,38118.0,P,3.62,0,2
4,2010-01-01,5,5142190439,5509006296254,FEDEX SHP 12/23/09 AB#,TN,38118.0,P,3.62,0,3


In [55]:
not_needed_columns = ['index', 'Recnum', 'Cardnum', 'Merch description', 'Merch state', 'Merch zip', 'Transtype']
for column in not_needed_columns:
    try:
        del merchant_df[column]
    except:
        pass

In [56]:
merchant_df.head()

Unnamed: 0,Date,Merchnum,Amount,Fraud,order
0,2010-01-01,5509006296254,3.62,0,1
1,2010-01-01,61003026333,31.42,0,1
2,2010-01-01,4503082993600,178.49,0,1
3,2010-01-01,5509006296254,3.62,0,2
4,2010-01-01,5509006296254,3.62,0,3


In [57]:
merged_data_merchant = merchant_df \
.merge(avg_merchant_1, on = ['Date', 'Merchnum', 'order'], how = 'left', suffixes=['', '_avg_merchant_1'])\
.merge(avg_merchant_3, on = ['Date', 'Merchnum', 'order'], how = 'left', suffixes=['', '_avg_merchant_3']) \
.merge(avg_merchant_7, on = ['Date', 'Merchnum', 'order'], how = 'left', suffixes=['', '_avg_merchant_7']) \
.merge(avg_merchant_14, on = ['Date', 'Merchnum', 'order'], how = 'left', suffixes=['', '_avg_merchant_14']) \
.merge(avg_merchant_30, on = ['Date', 'Merchnum', 'order'], how = 'left', suffixes=['', '_avg_merchant_30']) \
.merge(max_merchant_1, on = ['Date', 'Merchnum', 'order'], how = 'left', suffixes=['', '_max_merchant_1'])\
.merge(max_merchant_3, on = ['Date', 'Merchnum', 'order'], how = 'left', suffixes=['', '_max_merchant_3']) \
.merge(max_merchant_7, on = ['Date', 'Merchnum', 'order'], how = 'left', suffixes=['', '_max_merchant_7']) \
.merge(max_merchant_14, on = ['Date', 'Merchnum', 'order'], how = 'left', suffixes=['', '_max_merchant_14']) \
.merge(max_merchant_30, on = ['Date', 'Merchnum', 'order'], how = 'left', suffixes=['', '_max_merchant_30']) \
.merge(median_merchant_1, on = ['Date', 'Merchnum', 'order'], how = 'left', suffixes=['', '_median_merchant_1'])\
.merge(median_merchant_3, on = ['Date', 'Merchnum', 'order'], how = 'left', suffixes=['', '_median_merchant_3']) \
.merge(median_merchant_7, on = ['Date', 'Merchnum', 'order'], how = 'left', suffixes=['', '_median_merchant_7']) \
.merge(median_merchant_14, on = ['Date', 'Merchnum', 'order'], how = 'left', suffixes=['', '_median_merchant_14']) \
.merge(median_merchant_30, on = ['Date', 'Merchnum', 'order'], how = 'left', suffixes=['', '_median_merchant_30']) \
.merge(sum_merchant_1, on = ['Date', 'Merchnum', 'order'], how = 'left', suffixes=['', '_sum_merchant_1'])\
.merge(sum_merchant_3, on = ['Date', 'Merchnum', 'order'], how = 'left', suffixes=['', '_sum_merchant_3']) \
.merge(sum_merchant_7, on = ['Date', 'Merchnum', 'order'], how = 'left', suffixes=['', '_sum_merchant_7']) \
.merge(sum_merchant_14, on = ['Date', 'Merchnum', 'order'], how = 'left', suffixes=['', '_sum_merchant_14']) \
.merge(sum_merchant_30, on = ['Date', 'Merchnum', 'order'], how = 'left', suffixes=['', '_sum_merchant_30'])

In [58]:
merged_data_merchant['Amount_avg_merchant_0'] = merged_data_merchant['Amount']
merged_data_merchant['Amount_max_merchant_0'] = merged_data_merchant['Amount']
merged_data_merchant['Amount_median_merchant_0'] = merged_data_merchant['Amount']
merged_data_merchant['Amount_sum_merchant_0'] = merged_data_merchant['Amount']

In [59]:
merged_data_merchant.head()

Unnamed: 0,Date,Merchnum,Amount,Fraud,order,Amount_avg_merchant_1,Amount_avg_merchant_3,Amount_avg_merchant_7,Amount_avg_merchant_14,Amount_avg_merchant_30,...,Amount_median_merchant_30,Amount_sum_merchant_1,Amount_sum_merchant_3,Amount_sum_merchant_7,Amount_sum_merchant_14,Amount_sum_merchant_30,Amount_avg_merchant_0,Amount_max_merchant_0,Amount_median_merchant_0,Amount_sum_merchant_0
0,2010-01-01,5509006296254,3.62,0,1,3.62,3.62,3.62,3.62,3.62,...,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62
1,2010-01-01,61003026333,31.42,0,1,31.42,31.42,31.42,31.42,31.42,...,31.42,31.42,31.42,31.42,31.42,31.42,31.42,31.42,31.42,31.42
2,2010-01-01,4503082993600,178.49,0,1,178.49,178.49,178.49,178.49,178.49,...,178.49,178.49,178.49,178.49,178.49,178.49,178.49,178.49,178.49,178.49
3,2010-01-01,5509006296254,3.62,0,2,3.62,3.62,3.62,3.62,3.62,...,3.62,7.24,7.24,7.24,7.24,7.24,3.62,3.62,3.62,3.62
4,2010-01-01,5509006296254,3.62,0,3,3.62,3.62,3.62,3.62,3.62,...,3.62,10.86,10.86,10.86,10.86,10.86,3.62,3.62,3.62,3.62


In [60]:
for i in [0, 1, 3, 7, 14, 30]:
    merged_data_merchant['qaa_cm_' + str(i)] = merged_data_merchant['Amount'] / merged_data_merchant['Amount_avg_merchant_' + str(i)]
    merged_data_merchant['qam_cm_' + str(i)] = merged_data_merchant['Amount'] / merged_data_merchant['Amount_max_merchant_' + str(i)]
    merged_data_merchant['qame_cm_' + str(i)] = merged_data_merchant['Amount'] / merged_data_merchant['Amount_median_merchant_' + str(i)]
    merged_data_merchant['qas_cm_' + str(i)] = merged_data_merchant['Amount'] / merged_data_merchant['Amount_sum_merchant_' + str(i)]

In [61]:
sum(merged_data_merchant['Merchnum'] == 0)

0

In [62]:
merged_data_merchant.head()

Unnamed: 0,Date,Merchnum,Amount,Fraud,order,Amount_avg_merchant_1,Amount_avg_merchant_3,Amount_avg_merchant_7,Amount_avg_merchant_14,Amount_avg_merchant_30,...,qame_cm_7,qas_cm_7,qaa_cm_14,qam_cm_14,qame_cm_14,qas_cm_14,qaa_cm_30,qam_cm_30,qame_cm_30,qas_cm_30
0,2010-01-01,5509006296254,3.62,0,1,3.62,3.62,3.62,3.62,3.62,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,2010-01-01,61003026333,31.42,0,1,31.42,31.42,31.42,31.42,31.42,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,2010-01-01,4503082993600,178.49,0,1,178.49,178.49,178.49,178.49,178.49,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,2010-01-01,5509006296254,3.62,0,2,3.62,3.62,3.62,3.62,3.62,...,1.0,0.5,1.0,1.0,1.0,0.5,1.0,1.0,1.0,0.5
4,2010-01-01,5509006296254,3.62,0,3,3.62,3.62,3.62,3.62,3.62,...,1.0,0.333333,1.0,1.0,1.0,0.333333,1.0,1.0,1.0,0.333333


In [63]:
merged_data_merchant.shape

(83970, 53)

### Card + Merchnum

In [64]:
for i in [1, 3, 7, 14, 30]:
    locals()['avg_card_merchant_' + str(i)] = df.groupby(['Cardnum', 'Merchnum'])['Amount'].rolling(str(i) + 'd').mean().reset_index()
    locals()['avg_card_merchant_' + str(i)]['order'] = locals()['avg_card_merchant_' + str(i)].groupby(['Cardnum', 'Merchnum', 'Date']).cumcount() + 1
    
    locals()['max_card_merchant_' + str(i)] = df.groupby(['Cardnum', 'Merchnum'])['Amount'].rolling(str(i) + 'd').max().reset_index()
    locals()['max_card_merchant_' + str(i)]['order'] = locals()['max_card_merchant_' + str(i)].groupby(['Cardnum', 'Merchnum', 'Date']).cumcount() + 1
    
    locals()['median_card_merchant_' + str(i)] = df.groupby(['Cardnum', 'Merchnum'])['Amount'].rolling(str(i) + 'd').median().reset_index()
    locals()['median_card_merchant_' + str(i)]['order'] = locals()['median_card_merchant_' + str(i)].groupby(['Cardnum', 'Merchnum', 'Date']).cumcount() + 1
    
    locals()['sum_card_merchant_' + str(i)] = df.groupby(['Cardnum', 'Merchnum'])['Amount'].rolling(str(i) + 'd').sum().reset_index()
    locals()['sum_card_merchant_' + str(i)]['order'] = locals()['sum_card_merchant_' + str(i)].groupby(['Cardnum', 'Merchnum', 'Date']).cumcount() + 1

In [65]:
avg_card_merchant_7.head()

Unnamed: 0,Cardnum,Merchnum,Date,Amount,order
0,5142110002,930090121224,2010-10-12,150.0,1
1,5142110081,930090121224,2010-03-08,495.9,1
2,5142110081,930090121224,2010-03-08,566.05,2
3,5142110313,930090121224,2010-10-07,144.0,1
4,5142110313,930090121224,2010-10-07,94.0,2


In [66]:
max_card_merchant_7.head()

Unnamed: 0,Cardnum,Merchnum,Date,Amount,order
0,5142110002,930090121224,2010-10-12,150.0,1
1,5142110081,930090121224,2010-03-08,495.9,1
2,5142110081,930090121224,2010-03-08,636.2,2
3,5142110313,930090121224,2010-10-07,144.0,1
4,5142110313,930090121224,2010-10-07,144.0,2


In [67]:
median_card_merchant_7.head()

Unnamed: 0,Cardnum,Merchnum,Date,Amount,order
0,5142110002,930090121224,2010-10-12,150.0,1
1,5142110081,930090121224,2010-03-08,495.9,1
2,5142110081,930090121224,2010-03-08,566.05,2
3,5142110313,930090121224,2010-10-07,144.0,1
4,5142110313,930090121224,2010-10-07,94.0,2


In [68]:
sum_card_merchant_7.head()

Unnamed: 0,Cardnum,Merchnum,Date,Amount,order
0,5142110002,930090121224,2010-10-12,150.0,1
1,5142110081,930090121224,2010-03-08,495.9,1
2,5142110081,930090121224,2010-03-08,1132.1,2
3,5142110313,930090121224,2010-10-07,144.0,1
4,5142110313,930090121224,2010-10-07,188.0,2


In [69]:
card_merchant_df = df.copy()
card_merchant_df = card_merchant_df.reset_index()
card_merchant_df['order'] = card_merchant_df.groupby(['Cardnum', 'Merchnum', 'Date']).cumcount() + 1

In [70]:
card_merchant_df.head()

Unnamed: 0,Date,Recnum,Cardnum,Merchnum,Merch description,Merch state,Merch zip,Transtype,Amount,Fraud,order
0,2010-01-01,1,5142190439,5509006296254,FEDEX SHP 12/23/09 AB#,TN,38118.0,P,3.62,0,1
1,2010-01-01,2,5142183973,61003026333,SERVICE MERCHANDISE #81,MA,1803.0,P,31.42,0,1
2,2010-01-01,3,5142131721,4503082993600,OFFICE DEPOT #191,MD,20706.0,P,178.49,0,1
3,2010-01-01,4,5142148452,5509006296254,FEDEX SHP 12/28/09 AB#,TN,38118.0,P,3.62,0,1
4,2010-01-01,5,5142190439,5509006296254,FEDEX SHP 12/23/09 AB#,TN,38118.0,P,3.62,0,2


In [71]:
not_needed_columns = ['index', 'Recnum', 'Merch description', 'Merch state', 'Merch zip', 'Transtype']
for column in not_needed_columns:
    try:
        del card_merchant_df[column]
    except:
        pass

In [72]:
card_merchant_df.head()

Unnamed: 0,Date,Cardnum,Merchnum,Amount,Fraud,order
0,2010-01-01,5142190439,5509006296254,3.62,0,1
1,2010-01-01,5142183973,61003026333,31.42,0,1
2,2010-01-01,5142131721,4503082993600,178.49,0,1
3,2010-01-01,5142148452,5509006296254,3.62,0,1
4,2010-01-01,5142190439,5509006296254,3.62,0,2


In [73]:
merged_data_card_merchant = card_merchant_df \
.merge(avg_card_merchant_1, on = ['Date', 'Merchnum', 'Cardnum', 'order'], how = 'left', suffixes=['', '_avg_card_merchant_1'])\
.merge(avg_card_merchant_3, on = ['Date', 'Merchnum', 'Cardnum', 'order'], how = 'left', suffixes=['', '_avg_card_merchant_3']) \
.merge(avg_card_merchant_7, on = ['Date', 'Merchnum', 'Cardnum', 'order'], how = 'left', suffixes=['', '_avg_card_merchant_7']) \
.merge(avg_card_merchant_14, on = ['Date', 'Merchnum', 'Cardnum', 'order'], how = 'left', suffixes=['', '_avg_card_merchant_14']) \
.merge(avg_card_merchant_30, on = ['Date', 'Merchnum', 'Cardnum', 'order'], how = 'left', suffixes=['', '_avg_card_merchant_30']) \
.merge(max_card_merchant_1, on = ['Date', 'Merchnum', 'Cardnum', 'order'], how = 'left', suffixes=['', '_max_card_merchant_1'])\
.merge(max_card_merchant_3, on = ['Date', 'Merchnum', 'Cardnum', 'order'], how = 'left', suffixes=['', '_max_card_merchant_3']) \
.merge(max_card_merchant_7, on = ['Date', 'Merchnum', 'Cardnum', 'order'], how = 'left', suffixes=['', '_max_card_merchant_7']) \
.merge(max_card_merchant_14, on = ['Date', 'Merchnum', 'Cardnum', 'order'], how = 'left', suffixes=['', '_max_card_merchant_14']) \
.merge(max_card_merchant_30, on = ['Date', 'Merchnum', 'Cardnum', 'order'], how = 'left', suffixes=['', '_max_card_merchant_30']) \
.merge(median_card_merchant_1, on = ['Date', 'Merchnum', 'Cardnum', 'order'], how = 'left', suffixes=['', '_median_card_merchant_1'])\
.merge(median_card_merchant_3, on = ['Date', 'Merchnum', 'Cardnum', 'order'], how = 'left', suffixes=['', '_median_card_merchant_3']) \
.merge(median_card_merchant_7, on = ['Date', 'Merchnum', 'Cardnum', 'order'], how = 'left', suffixes=['', '_median_card_merchant_7']) \
.merge(median_card_merchant_14, on = ['Date', 'Merchnum', 'Cardnum', 'order'], how = 'left', suffixes=['', '_median_card_merchant_14']) \
.merge(median_card_merchant_30, on = ['Date', 'Merchnum', 'Cardnum', 'order'], how = 'left', suffixes=['', '_median_card_merchant_30']) \
.merge(sum_card_merchant_1, on = ['Date', 'Merchnum', 'Cardnum', 'order'], how = 'left', suffixes=['', '_sum_card_merchant_1'])\
.merge(sum_card_merchant_3, on = ['Date', 'Merchnum', 'Cardnum', 'order'], how = 'left', suffixes=['', '_sum_card_merchant_3']) \
.merge(sum_card_merchant_7, on = ['Date', 'Merchnum', 'Cardnum', 'order'], how = 'left', suffixes=['', '_sum_card_merchant_7']) \
.merge(sum_card_merchant_14, on = ['Date', 'Merchnum', 'Cardnum', 'order'], how = 'left', suffixes=['', '_sum_card_merchant_14']) \
.merge(sum_card_merchant_30, on = ['Date', 'Merchnum', 'Cardnum', 'order'], how = 'left', suffixes=['', '_sum_card_merchant_30'])

In [74]:
merged_data_card_merchant['Amount_avg_card_merchant_0'] = merged_data_card_merchant['Amount']
merged_data_card_merchant['Amount_max_card_merchant_0'] = merged_data_card_merchant['Amount']
merged_data_card_merchant['Amount_median_card_merchant_0'] = merged_data_card_merchant['Amount']
merged_data_card_merchant['Amount_sum_card_merchant_0'] = merged_data_card_merchant['Amount']

In [75]:
merged_data_card_merchant.head()

Unnamed: 0,Date,Cardnum,Merchnum,Amount,Fraud,order,Amount_avg_card_merchant_1,Amount_avg_card_merchant_3,Amount_avg_card_merchant_7,Amount_avg_card_merchant_14,...,Amount_median_card_merchant_30,Amount_sum_card_merchant_1,Amount_sum_card_merchant_3,Amount_sum_card_merchant_7,Amount_sum_card_merchant_14,Amount_sum_card_merchant_30,Amount_avg_card_merchant_0,Amount_max_card_merchant_0,Amount_median_card_merchant_0,Amount_sum_card_merchant_0
0,2010-01-01,5142190439,5509006296254,3.62,0,1,3.62,3.62,3.62,3.62,...,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62
1,2010-01-01,5142183973,61003026333,31.42,0,1,31.42,31.42,31.42,31.42,...,31.42,31.42,31.42,31.42,31.42,31.42,31.42,31.42,31.42,31.42
2,2010-01-01,5142131721,4503082993600,178.49,0,1,178.49,178.49,178.49,178.49,...,178.49,178.49,178.49,178.49,178.49,178.49,178.49,178.49,178.49,178.49
3,2010-01-01,5142148452,5509006296254,3.62,0,1,3.62,3.62,3.62,3.62,...,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62
4,2010-01-01,5142190439,5509006296254,3.62,0,2,3.62,3.62,3.62,3.62,...,3.62,7.24,7.24,7.24,7.24,7.24,3.62,3.62,3.62,3.62


In [76]:
for i in [0, 1, 3, 7, 14, 30]:
    merged_data_card_merchant['qaa_cm_' + str(i)] = merged_data_card_merchant['Amount'] / merged_data_card_merchant['Amount_avg_card_merchant_' + str(i)]
    merged_data_card_merchant['qam_cm_' + str(i)] = merged_data_card_merchant['Amount'] / merged_data_card_merchant['Amount_max_card_merchant_' + str(i)]
    merged_data_card_merchant['qame_cm_' + str(i)] = merged_data_card_merchant['Amount'] / merged_data_card_merchant['Amount_median_card_merchant_' + str(i)]
    merged_data_card_merchant['qas_cm_' + str(i)] = merged_data_card_merchant['Amount'] / merged_data_card_merchant['Amount_sum_card_merchant_' + str(i)]

In [77]:
merged_data_card_merchant.head()

Unnamed: 0,Date,Cardnum,Merchnum,Amount,Fraud,order,Amount_avg_card_merchant_1,Amount_avg_card_merchant_3,Amount_avg_card_merchant_7,Amount_avg_card_merchant_14,...,qame_cm_7,qas_cm_7,qaa_cm_14,qam_cm_14,qame_cm_14,qas_cm_14,qaa_cm_30,qam_cm_30,qame_cm_30,qas_cm_30
0,2010-01-01,5142190439,5509006296254,3.62,0,1,3.62,3.62,3.62,3.62,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,2010-01-01,5142183973,61003026333,31.42,0,1,31.42,31.42,31.42,31.42,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,2010-01-01,5142131721,4503082993600,178.49,0,1,178.49,178.49,178.49,178.49,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,2010-01-01,5142148452,5509006296254,3.62,0,1,3.62,3.62,3.62,3.62,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4,2010-01-01,5142190439,5509006296254,3.62,0,2,3.62,3.62,3.62,3.62,...,1.0,0.5,1.0,1.0,1.0,0.5,1.0,1.0,1.0,0.5


In [78]:
merged_data_card_merchant.shape

(83970, 54)

### Card + Zip Code

In [79]:
for i in [1, 3, 7, 14, 30]:
    locals()['avg_card_zip_' + str(i)] = df.groupby(['Cardnum', 'Merch zip'])['Amount'].rolling(str(i) + 'd').mean().reset_index()
    locals()['avg_card_zip_' + str(i)]['order'] = locals()['avg_card_zip_' + str(i)].groupby(['Cardnum', 'Merch zip', 'Date']).cumcount() + 1
    
    locals()['max_card_zip_' + str(i)] = df.groupby(['Cardnum', 'Merch zip'])['Amount'].rolling(str(i) + 'd').max().reset_index()
    locals()['max_card_zip_' + str(i)]['order'] = locals()['max_card_zip_' + str(i)].groupby(['Cardnum', 'Merch zip', 'Date']).cumcount() + 1
    
    locals()['median_card_zip_' + str(i)] = df.groupby(['Cardnum', 'Merch zip'])['Amount'].rolling(str(i) + 'd').median().reset_index()
    locals()['median_card_zip_' + str(i)]['order'] = locals()['median_card_zip_' + str(i)].groupby(['Cardnum', 'Merch zip', 'Date']).cumcount() + 1
    
    locals()['sum_card_zip_' + str(i)] = df.groupby(['Cardnum', 'Merch zip'])['Amount'].rolling(str(i) + 'd').sum().reset_index()
    locals()['sum_card_zip_' + str(i)]['order'] = locals()['sum_card_zip_' + str(i)].groupby(['Cardnum', 'Merch zip', 'Date']).cumcount() + 1

In [80]:
avg_card_zip_30.head()

Unnamed: 0,Cardnum,Merch zip,Date,Amount,order
0,5142110002,38118.0,2010-10-12,150.0,1
1,5142110081,38118.0,2010-03-08,495.9,1
2,5142110081,38118.0,2010-03-08,566.05,2
3,5142110313,38118.0,2010-10-07,144.0,1
4,5142110313,38118.0,2010-10-07,94.0,2


In [81]:
max_card_zip_30.head()

Unnamed: 0,Cardnum,Merch zip,Date,Amount,order
0,5142110002,38118.0,2010-10-12,150.0,1
1,5142110081,38118.0,2010-03-08,495.9,1
2,5142110081,38118.0,2010-03-08,636.2,2
3,5142110313,38118.0,2010-10-07,144.0,1
4,5142110313,38118.0,2010-10-07,144.0,2


In [82]:
median_card_zip_30.head()

Unnamed: 0,Cardnum,Merch zip,Date,Amount,order
0,5142110002,38118.0,2010-10-12,150.0,1
1,5142110081,38118.0,2010-03-08,495.9,1
2,5142110081,38118.0,2010-03-08,566.05,2
3,5142110313,38118.0,2010-10-07,144.0,1
4,5142110313,38118.0,2010-10-07,94.0,2


In [83]:
sum_card_zip_30.head()

Unnamed: 0,Cardnum,Merch zip,Date,Amount,order
0,5142110002,38118.0,2010-10-12,150.0,1
1,5142110081,38118.0,2010-03-08,495.9,1
2,5142110081,38118.0,2010-03-08,1132.1,2
3,5142110313,38118.0,2010-10-07,144.0,1
4,5142110313,38118.0,2010-10-07,188.0,2


In [84]:
card_zip_df = df.copy()
card_zip_df = card_zip_df.reset_index()
card_zip_df['order'] = card_zip_df.groupby(['Cardnum', 'Merch zip', 'Date']).cumcount() + 1

In [85]:
card_zip_df.head()

Unnamed: 0,Date,Recnum,Cardnum,Merchnum,Merch description,Merch state,Merch zip,Transtype,Amount,Fraud,order
0,2010-01-01,1,5142190439,5509006296254,FEDEX SHP 12/23/09 AB#,TN,38118.0,P,3.62,0,1
1,2010-01-01,2,5142183973,61003026333,SERVICE MERCHANDISE #81,MA,1803.0,P,31.42,0,1
2,2010-01-01,3,5142131721,4503082993600,OFFICE DEPOT #191,MD,20706.0,P,178.49,0,1
3,2010-01-01,4,5142148452,5509006296254,FEDEX SHP 12/28/09 AB#,TN,38118.0,P,3.62,0,1
4,2010-01-01,5,5142190439,5509006296254,FEDEX SHP 12/23/09 AB#,TN,38118.0,P,3.62,0,2


In [86]:
not_needed_columns = ['index', 'Recnum', 'Merchnum', 'Merch description', 'Merch state', 'Transtype']
for column in not_needed_columns:
    try:
        del card_zip_df[column]
    except:
        pass

In [87]:
card_zip_df.head()

Unnamed: 0,Date,Cardnum,Merch zip,Amount,Fraud,order
0,2010-01-01,5142190439,38118.0,3.62,0,1
1,2010-01-01,5142183973,1803.0,31.42,0,1
2,2010-01-01,5142131721,20706.0,178.49,0,1
3,2010-01-01,5142148452,38118.0,3.62,0,1
4,2010-01-01,5142190439,38118.0,3.62,0,2


In [88]:
merged_data_card_zip = card_zip_df \
.merge(avg_card_zip_1, on = ['Date', 'Merch zip', 'Cardnum', 'order'], how = 'left', suffixes=['', '_avg_card_zip_1'])\
.merge(avg_card_zip_3, on = ['Date', 'Merch zip', 'Cardnum', 'order'], how = 'left', suffixes=['', '_avg_card_zip_3']) \
.merge(avg_card_zip_7, on = ['Date', 'Merch zip', 'Cardnum', 'order'], how = 'left', suffixes=['', '_avg_card_zip_7']) \
.merge(avg_card_zip_14, on = ['Date', 'Merch zip', 'Cardnum', 'order'], how = 'left', suffixes=['', '_avg_card_zip_14']) \
.merge(avg_card_zip_30, on = ['Date', 'Merch zip', 'Cardnum', 'order'], how = 'left', suffixes=['', '_avg_card_zip_30']) \
.merge(max_card_zip_1, on = ['Date', 'Merch zip', 'Cardnum', 'order'], how = 'left', suffixes=['', '_max_card_zip_1'])\
.merge(max_card_zip_3, on = ['Date', 'Merch zip', 'Cardnum', 'order'], how = 'left', suffixes=['', '_max_card_zip_3']) \
.merge(max_card_zip_7, on = ['Date', 'Merch zip', 'Cardnum', 'order'], how = 'left', suffixes=['', '_max_card_zip_7']) \
.merge(max_card_zip_14, on = ['Date', 'Merch zip', 'Cardnum', 'order'], how = 'left', suffixes=['', '_max_card_zip_14']) \
.merge(max_card_zip_30, on = ['Date', 'Merch zip', 'Cardnum', 'order'], how = 'left', suffixes=['', '_max_card_zip_30']) \
.merge(median_card_zip_1, on = ['Date', 'Merch zip', 'Cardnum', 'order'], how = 'left', suffixes=['', '_median_card_zip_1'])\
.merge(median_card_zip_3, on = ['Date', 'Merch zip', 'Cardnum', 'order'], how = 'left', suffixes=['', '_median_card_zip_3']) \
.merge(median_card_zip_7, on = ['Date', 'Merch zip', 'Cardnum', 'order'], how = 'left', suffixes=['', '_median_card_zip_7']) \
.merge(median_card_zip_14, on = ['Date', 'Merch zip', 'Cardnum', 'order'], how = 'left', suffixes=['', '_median_card_zip_14']) \
.merge(median_card_zip_30, on = ['Date', 'Merch zip', 'Cardnum', 'order'], how = 'left', suffixes=['', '_median_card_zip_30']) \
.merge(sum_card_zip_1, on = ['Date', 'Merch zip', 'Cardnum', 'order'], how = 'left', suffixes=['', '_sum_card_zip_1'])\
.merge(sum_card_zip_3, on = ['Date', 'Merch zip', 'Cardnum', 'order'], how = 'left', suffixes=['', '_sum_card_zip_3']) \
.merge(sum_card_zip_7, on = ['Date', 'Merch zip', 'Cardnum', 'order'], how = 'left', suffixes=['', '_sum_card_zip_7']) \
.merge(sum_card_zip_14, on = ['Date', 'Merch zip', 'Cardnum', 'order'], how = 'left', suffixes=['', '_sum_card_zip_14']) \
.merge(sum_card_zip_30, on = ['Date', 'Merch zip', 'Cardnum', 'order'], how = 'left', suffixes=['', '_sum_card_zip_30'])

In [89]:
merged_data_card_zip['Amount_avg_card_zip_0'] = merged_data_card_zip['Amount']
merged_data_card_zip['Amount_max_card_zip_0'] = merged_data_card_zip['Amount']
merged_data_card_zip['Amount_median_card_zip_0'] = merged_data_card_zip['Amount']
merged_data_card_zip['Amount_sum_card_zip_0'] = merged_data_card_zip['Amount']

In [90]:
merged_data_card_zip.head()

Unnamed: 0,Date,Cardnum,Merch zip,Amount,Fraud,order,Amount_avg_card_zip_1,Amount_avg_card_zip_3,Amount_avg_card_zip_7,Amount_avg_card_zip_14,...,Amount_median_card_zip_30,Amount_sum_card_zip_1,Amount_sum_card_zip_3,Amount_sum_card_zip_7,Amount_sum_card_zip_14,Amount_sum_card_zip_30,Amount_avg_card_zip_0,Amount_max_card_zip_0,Amount_median_card_zip_0,Amount_sum_card_zip_0
0,2010-01-01,5142190439,38118.0,3.62,0,1,3.62,3.62,3.62,3.62,...,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62
1,2010-01-01,5142183973,1803.0,31.42,0,1,31.42,31.42,31.42,31.42,...,31.42,31.42,31.42,31.42,31.42,31.42,31.42,31.42,31.42,31.42
2,2010-01-01,5142131721,20706.0,178.49,0,1,178.49,178.49,178.49,178.49,...,178.49,178.49,178.49,178.49,178.49,178.49,178.49,178.49,178.49,178.49
3,2010-01-01,5142148452,38118.0,3.62,0,1,3.62,3.62,3.62,3.62,...,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62
4,2010-01-01,5142190439,38118.0,3.62,0,2,3.62,3.62,3.62,3.62,...,3.62,7.24,7.24,7.24,7.24,7.24,3.62,3.62,3.62,3.62


In [91]:
for i in [0, 1, 3, 7, 14, 30]:
    merged_data_card_zip['qaa_cm_' + str(i)] = merged_data_card_zip['Amount'] / merged_data_card_zip['Amount_avg_card_zip_' + str(i)]
    merged_data_card_zip['qam_cm_' + str(i)] = merged_data_card_zip['Amount'] / merged_data_card_zip['Amount_max_card_zip_' + str(i)]
    merged_data_card_zip['qame_cm_' + str(i)] = merged_data_card_zip['Amount'] / merged_data_card_zip['Amount_median_card_zip_' + str(i)]
    merged_data_card_zip['qas_cm_' + str(i)] = merged_data_card_zip['Amount'] / merged_data_card_zip['Amount_sum_card_zip_' + str(i)]

In [92]:
merged_data_card_zip.head()

Unnamed: 0,Date,Cardnum,Merch zip,Amount,Fraud,order,Amount_avg_card_zip_1,Amount_avg_card_zip_3,Amount_avg_card_zip_7,Amount_avg_card_zip_14,...,qame_cm_7,qas_cm_7,qaa_cm_14,qam_cm_14,qame_cm_14,qas_cm_14,qaa_cm_30,qam_cm_30,qame_cm_30,qas_cm_30
0,2010-01-01,5142190439,38118.0,3.62,0,1,3.62,3.62,3.62,3.62,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,2010-01-01,5142183973,1803.0,31.42,0,1,31.42,31.42,31.42,31.42,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,2010-01-01,5142131721,20706.0,178.49,0,1,178.49,178.49,178.49,178.49,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,2010-01-01,5142148452,38118.0,3.62,0,1,3.62,3.62,3.62,3.62,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4,2010-01-01,5142190439,38118.0,3.62,0,2,3.62,3.62,3.62,3.62,...,1.0,0.5,1.0,1.0,1.0,0.5,1.0,1.0,1.0,0.5


In [93]:
merged_data_card_zip.shape

(83970, 54)

### Card + State

In [94]:
for i in [1, 3, 7, 14, 30]:
    locals()['avg_card_state_' + str(i)] = df.groupby(['Cardnum', 'Merch state'])['Amount'].rolling(str(i) + 'd').mean().reset_index()
    locals()['avg_card_state_' + str(i)]['order'] = locals()['avg_card_state_' + str(i)].groupby(['Cardnum', 'Merch state', 'Date']).cumcount() + 1
    
    locals()['max_card_state_' + str(i)] = df.groupby(['Cardnum', 'Merch state'])['Amount'].rolling(str(i) + 'd').max().reset_index()
    locals()['max_card_state_' + str(i)]['order'] = locals()['max_card_state_' + str(i)].groupby(['Cardnum', 'Merch state', 'Date']).cumcount() + 1
    
    locals()['median_card_state_' + str(i)] = df.groupby(['Cardnum', 'Merch state'])['Amount'].rolling(str(i) + 'd').median().reset_index()
    locals()['median_card_state_' + str(i)]['order'] = locals()['median_card_state_' + str(i)].groupby(['Cardnum', 'Merch state', 'Date']).cumcount() + 1
    
    locals()['sum_card_state_' + str(i)] = df.groupby(['Cardnum', 'Merch state'])['Amount'].rolling(str(i) + 'd').sum().reset_index()
    locals()['sum_card_state_' + str(i)]['order'] = locals()['sum_card_state_' + str(i)].groupby(['Cardnum', 'Merch state', 'Date']).cumcount() + 1

In [95]:
avg_card_state_14.head()

Unnamed: 0,Cardnum,Merch state,Date,Amount,order
0,5142110002,VA,2010-10-12,150.0,1
1,5142110081,TN,2010-03-08,495.9,1
2,5142110081,TN,2010-03-08,566.05,2
3,5142110313,TN,2010-10-07,144.0,1
4,5142110313,TN,2010-10-07,94.0,2


In [96]:
max_card_state_14.head()

Unnamed: 0,Cardnum,Merch state,Date,Amount,order
0,5142110002,VA,2010-10-12,150.0,1
1,5142110081,TN,2010-03-08,495.9,1
2,5142110081,TN,2010-03-08,636.2,2
3,5142110313,TN,2010-10-07,144.0,1
4,5142110313,TN,2010-10-07,144.0,2


In [97]:
median_card_state_14.head()

Unnamed: 0,Cardnum,Merch state,Date,Amount,order
0,5142110002,VA,2010-10-12,150.0,1
1,5142110081,TN,2010-03-08,495.9,1
2,5142110081,TN,2010-03-08,566.05,2
3,5142110313,TN,2010-10-07,144.0,1
4,5142110313,TN,2010-10-07,94.0,2


In [98]:
sum_card_state_14.head()

Unnamed: 0,Cardnum,Merch state,Date,Amount,order
0,5142110002,VA,2010-10-12,150.0,1
1,5142110081,TN,2010-03-08,495.9,1
2,5142110081,TN,2010-03-08,1132.1,2
3,5142110313,TN,2010-10-07,144.0,1
4,5142110313,TN,2010-10-07,188.0,2


In [99]:
card_state_df = df.copy()
card_state_df = card_state_df.reset_index()
card_state_df['order'] = card_state_df.groupby(['Cardnum', 'Merch state', 'Date']).cumcount() + 1

In [100]:
card_state_df.head()

Unnamed: 0,Date,Recnum,Cardnum,Merchnum,Merch description,Merch state,Merch zip,Transtype,Amount,Fraud,order
0,2010-01-01,1,5142190439,5509006296254,FEDEX SHP 12/23/09 AB#,TN,38118.0,P,3.62,0,1
1,2010-01-01,2,5142183973,61003026333,SERVICE MERCHANDISE #81,MA,1803.0,P,31.42,0,1
2,2010-01-01,3,5142131721,4503082993600,OFFICE DEPOT #191,MD,20706.0,P,178.49,0,1
3,2010-01-01,4,5142148452,5509006296254,FEDEX SHP 12/28/09 AB#,TN,38118.0,P,3.62,0,1
4,2010-01-01,5,5142190439,5509006296254,FEDEX SHP 12/23/09 AB#,TN,38118.0,P,3.62,0,2


In [101]:
not_needed_columns = ['index', 'Recnum', 'Merchnum', 'Merch description', 'Merch zip', 'Transtype']
for column in not_needed_columns:
    try:
        del card_state_df[column]
    except:
        pass

In [102]:
card_state_df.head()

Unnamed: 0,Date,Cardnum,Merch state,Amount,Fraud,order
0,2010-01-01,5142190439,TN,3.62,0,1
1,2010-01-01,5142183973,MA,31.42,0,1
2,2010-01-01,5142131721,MD,178.49,0,1
3,2010-01-01,5142148452,TN,3.62,0,1
4,2010-01-01,5142190439,TN,3.62,0,2


In [103]:
merged_data_card_state = card_state_df \
.merge(avg_card_state_1, on = ['Date', 'Merch state', 'Cardnum', 'order'], how = 'left', suffixes=['', '_avg_card_state_1'])\
.merge(avg_card_state_3, on = ['Date', 'Merch state', 'Cardnum', 'order'], how = 'left', suffixes=['', '_avg_card_state_3']) \
.merge(avg_card_state_7, on = ['Date', 'Merch state', 'Cardnum', 'order'], how = 'left', suffixes=['', '_avg_card_state_7']) \
.merge(avg_card_state_14, on = ['Date', 'Merch state', 'Cardnum', 'order'], how = 'left', suffixes=['', '_avg_card_state_14']) \
.merge(avg_card_state_30, on = ['Date', 'Merch state', 'Cardnum', 'order'], how = 'left', suffixes=['', '_avg_card_state_30']) \
.merge(max_card_state_1, on = ['Date', 'Merch state', 'Cardnum', 'order'], how = 'left', suffixes=['', '_max_card_state_1'])\
.merge(max_card_state_3, on = ['Date', 'Merch state', 'Cardnum', 'order'], how = 'left', suffixes=['', '_max_card_state_3']) \
.merge(max_card_state_7, on = ['Date', 'Merch state', 'Cardnum', 'order'], how = 'left', suffixes=['', '_max_card_state_7']) \
.merge(max_card_state_14, on = ['Date', 'Merch state', 'Cardnum', 'order'], how = 'left', suffixes=['', '_max_card_state_14']) \
.merge(max_card_state_30, on = ['Date', 'Merch state', 'Cardnum', 'order'], how = 'left', suffixes=['', '_max_card_state_30']) \
.merge(median_card_state_1, on = ['Date', 'Merch state', 'Cardnum', 'order'], how = 'left', suffixes=['', '_median_card_state_1'])\
.merge(median_card_state_3, on = ['Date', 'Merch state', 'Cardnum', 'order'], how = 'left', suffixes=['', '_median_card_state_3']) \
.merge(median_card_state_7, on = ['Date', 'Merch state', 'Cardnum', 'order'], how = 'left', suffixes=['', '_median_card_state_7']) \
.merge(median_card_state_14, on = ['Date', 'Merch state', 'Cardnum', 'order'], how = 'left', suffixes=['', '_median_card_state_14']) \
.merge(median_card_state_30, on = ['Date', 'Merch state', 'Cardnum', 'order'], how = 'left', suffixes=['', '_median_card_state_30']) \
.merge(sum_card_state_1, on = ['Date', 'Merch state', 'Cardnum', 'order'], how = 'left', suffixes=['', '_sum_card_state_1'])\
.merge(sum_card_state_3, on = ['Date', 'Merch state', 'Cardnum', 'order'], how = 'left', suffixes=['', '_sum_card_state_3']) \
.merge(sum_card_state_7, on = ['Date', 'Merch state', 'Cardnum', 'order'], how = 'left', suffixes=['', '_sum_card_state_7']) \
.merge(sum_card_state_14, on = ['Date', 'Merch state', 'Cardnum', 'order'], how = 'left', suffixes=['', '_sum_card_state_14']) \
.merge(sum_card_state_30, on = ['Date', 'Merch state', 'Cardnum', 'order'], how = 'left', suffixes=['', '_sum_card_state_30'])

In [104]:
merged_data_card_state['Amount_avg_card_state_0'] = merged_data_card_state['Amount']
merged_data_card_state['Amount_max_card_state_0'] = merged_data_card_state['Amount']
merged_data_card_state['Amount_median_card_state_0'] = merged_data_card_state['Amount']
merged_data_card_state['Amount_sum_card_state_0'] = merged_data_card_state['Amount']

In [105]:
merged_data_card_state.head()

Unnamed: 0,Date,Cardnum,Merch state,Amount,Fraud,order,Amount_avg_card_state_1,Amount_avg_card_state_3,Amount_avg_card_state_7,Amount_avg_card_state_14,...,Amount_median_card_state_30,Amount_sum_card_state_1,Amount_sum_card_state_3,Amount_sum_card_state_7,Amount_sum_card_state_14,Amount_sum_card_state_30,Amount_avg_card_state_0,Amount_max_card_state_0,Amount_median_card_state_0,Amount_sum_card_state_0
0,2010-01-01,5142190439,TN,3.62,0,1,3.62,3.62,3.62,3.62,...,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62
1,2010-01-01,5142183973,MA,31.42,0,1,31.42,31.42,31.42,31.42,...,31.42,31.42,31.42,31.42,31.42,31.42,31.42,31.42,31.42,31.42
2,2010-01-01,5142131721,MD,178.49,0,1,178.49,178.49,178.49,178.49,...,178.49,178.49,178.49,178.49,178.49,178.49,178.49,178.49,178.49,178.49
3,2010-01-01,5142148452,TN,3.62,0,1,3.62,3.62,3.62,3.62,...,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62
4,2010-01-01,5142190439,TN,3.62,0,2,3.62,3.62,3.62,3.62,...,3.62,7.24,7.24,7.24,7.24,7.24,3.62,3.62,3.62,3.62


In [106]:
for i in [0, 1, 3, 7, 14, 30]:
    merged_data_card_state['qaa_cm_' + str(i)] = merged_data_card_state['Amount'] / merged_data_card_state['Amount_avg_card_state_' + str(i)]
    merged_data_card_state['qam_cm_' + str(i)] = merged_data_card_state['Amount'] / merged_data_card_state['Amount_max_card_state_' + str(i)]
    merged_data_card_state['qame_cm_' + str(i)] = merged_data_card_state['Amount'] / merged_data_card_state['Amount_median_card_state_' + str(i)]
    merged_data_card_state['qas_cm_' + str(i)] = merged_data_card_state['Amount'] / merged_data_card_state['Amount_sum_card_state_' + str(i)]

In [107]:
merged_data_card_state.head()

Unnamed: 0,Date,Cardnum,Merch state,Amount,Fraud,order,Amount_avg_card_state_1,Amount_avg_card_state_3,Amount_avg_card_state_7,Amount_avg_card_state_14,...,qame_cm_7,qas_cm_7,qaa_cm_14,qam_cm_14,qame_cm_14,qas_cm_14,qaa_cm_30,qam_cm_30,qame_cm_30,qas_cm_30
0,2010-01-01,5142190439,TN,3.62,0,1,3.62,3.62,3.62,3.62,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,2010-01-01,5142183973,MA,31.42,0,1,31.42,31.42,31.42,31.42,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,2010-01-01,5142131721,MD,178.49,0,1,178.49,178.49,178.49,178.49,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,2010-01-01,5142148452,TN,3.62,0,1,3.62,3.62,3.62,3.62,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4,2010-01-01,5142190439,TN,3.62,0,2,3.62,3.62,3.62,3.62,...,1.0,0.5,1.0,1.0,1.0,0.5,1.0,1.0,1.0,0.5


In [108]:
merged_data_card_state.shape

(83970, 54)

### Amount Variables 240

In [109]:
merged_data_card.head()

Unnamed: 0,Date,Cardnum,Amount,Fraud,order,Amount_avg_card_1,Amount_avg_card_3,Amount_avg_card_7,Amount_avg_card_14,Amount_avg_card_30,...,qame_cm_7,qas_cm_7,qaa_cm_14,qam_cm_14,qame_cm_14,qas_cm_14,qaa_cm_30,qam_cm_30,qame_cm_30,qas_cm_30
0,2010-01-01,5142190439,3.62,0,1,3.62,3.62,3.62,3.62,3.62,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,2010-01-01,5142183973,31.42,0,1,31.42,31.42,31.42,31.42,31.42,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,2010-01-01,5142131721,178.49,0,1,178.49,178.49,178.49,178.49,178.49,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,2010-01-01,5142148452,3.62,0,1,3.62,3.62,3.62,3.62,3.62,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4,2010-01-01,5142190439,3.62,0,2,3.62,3.62,3.62,3.62,3.62,...,1.0,0.5,1.0,1.0,1.0,0.5,1.0,1.0,1.0,0.5


In [110]:
merged_data_merchant.head()

Unnamed: 0,Date,Merchnum,Amount,Fraud,order,Amount_avg_merchant_1,Amount_avg_merchant_3,Amount_avg_merchant_7,Amount_avg_merchant_14,Amount_avg_merchant_30,...,qame_cm_7,qas_cm_7,qaa_cm_14,qam_cm_14,qame_cm_14,qas_cm_14,qaa_cm_30,qam_cm_30,qame_cm_30,qas_cm_30
0,2010-01-01,5509006296254,3.62,0,1,3.62,3.62,3.62,3.62,3.62,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,2010-01-01,61003026333,31.42,0,1,31.42,31.42,31.42,31.42,31.42,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,2010-01-01,4503082993600,178.49,0,1,178.49,178.49,178.49,178.49,178.49,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,2010-01-01,5509006296254,3.62,0,2,3.62,3.62,3.62,3.62,3.62,...,1.0,0.5,1.0,1.0,1.0,0.5,1.0,1.0,1.0,0.5
4,2010-01-01,5509006296254,3.62,0,3,3.62,3.62,3.62,3.62,3.62,...,1.0,0.333333,1.0,1.0,1.0,0.333333,1.0,1.0,1.0,0.333333


In [111]:
merged_data_card_merchant.head()

Unnamed: 0,Date,Cardnum,Merchnum,Amount,Fraud,order,Amount_avg_card_merchant_1,Amount_avg_card_merchant_3,Amount_avg_card_merchant_7,Amount_avg_card_merchant_14,...,qame_cm_7,qas_cm_7,qaa_cm_14,qam_cm_14,qame_cm_14,qas_cm_14,qaa_cm_30,qam_cm_30,qame_cm_30,qas_cm_30
0,2010-01-01,5142190439,5509006296254,3.62,0,1,3.62,3.62,3.62,3.62,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,2010-01-01,5142183973,61003026333,31.42,0,1,31.42,31.42,31.42,31.42,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,2010-01-01,5142131721,4503082993600,178.49,0,1,178.49,178.49,178.49,178.49,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,2010-01-01,5142148452,5509006296254,3.62,0,1,3.62,3.62,3.62,3.62,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4,2010-01-01,5142190439,5509006296254,3.62,0,2,3.62,3.62,3.62,3.62,...,1.0,0.5,1.0,1.0,1.0,0.5,1.0,1.0,1.0,0.5


In [112]:
merged_data_card_zip.head()

Unnamed: 0,Date,Cardnum,Merch zip,Amount,Fraud,order,Amount_avg_card_zip_1,Amount_avg_card_zip_3,Amount_avg_card_zip_7,Amount_avg_card_zip_14,...,qame_cm_7,qas_cm_7,qaa_cm_14,qam_cm_14,qame_cm_14,qas_cm_14,qaa_cm_30,qam_cm_30,qame_cm_30,qas_cm_30
0,2010-01-01,5142190439,38118.0,3.62,0,1,3.62,3.62,3.62,3.62,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,2010-01-01,5142183973,1803.0,31.42,0,1,31.42,31.42,31.42,31.42,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,2010-01-01,5142131721,20706.0,178.49,0,1,178.49,178.49,178.49,178.49,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,2010-01-01,5142148452,38118.0,3.62,0,1,3.62,3.62,3.62,3.62,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4,2010-01-01,5142190439,38118.0,3.62,0,2,3.62,3.62,3.62,3.62,...,1.0,0.5,1.0,1.0,1.0,0.5,1.0,1.0,1.0,0.5


In [113]:
merged_data_card_state.head()

Unnamed: 0,Date,Cardnum,Merch state,Amount,Fraud,order,Amount_avg_card_state_1,Amount_avg_card_state_3,Amount_avg_card_state_7,Amount_avg_card_state_14,...,qame_cm_7,qas_cm_7,qaa_cm_14,qam_cm_14,qame_cm_14,qas_cm_14,qaa_cm_30,qam_cm_30,qame_cm_30,qas_cm_30
0,2010-01-01,5142190439,TN,3.62,0,1,3.62,3.62,3.62,3.62,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,2010-01-01,5142183973,MA,31.42,0,1,31.42,31.42,31.42,31.42,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,2010-01-01,5142131721,MD,178.49,0,1,178.49,178.49,178.49,178.49,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,2010-01-01,5142148452,TN,3.62,0,1,3.62,3.62,3.62,3.62,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4,2010-01-01,5142190439,TN,3.62,0,2,3.62,3.62,3.62,3.62,...,1.0,0.5,1.0,1.0,1.0,0.5,1.0,1.0,1.0,0.5


In [114]:
merged_data_card_pure = merged_data_card.iloc[:, 5:]
merged_data_merchant_pure = merged_data_merchant.iloc[:, 5:]
merged_data_card_merchant_pure = merged_data_card_merchant.iloc[:, 6:]
merged_data_card_zip_pure = merged_data_card_zip.iloc[:, 6:]
merged_data_card_state_pure = merged_data_card_state.iloc[:, 6:]

In [119]:
[merged_data_card_pure.shape, 
merged_data_merchant_pure.shape,
merged_data_card_merchant_pure.shape, 
merged_data_card_zip_pure.shape,
merged_data_card_state_pure.shape]

[(83970, 48), (83970, 48), (83970, 48), (83970, 48), (83970, 48)]

In [120]:
Amount_variables = pd.concat([merged_data_card_pure,
                              merged_data_merchant_pure,
                              merged_data_card_merchant_pure, 
                              merged_data_card_zip_pure, 
                              merged_data_card_state_pure], axis = 1)

In [121]:
Amount_variables.shape

(83970, 240)

In [122]:
Amount_variables.head()

Unnamed: 0,Amount_avg_card_1,Amount_avg_card_3,Amount_avg_card_7,Amount_avg_card_14,Amount_avg_card_30,Amount_max_card_1,Amount_max_card_3,Amount_max_card_7,Amount_max_card_14,Amount_max_card_30,...,qame_cm_7,qas_cm_7,qaa_cm_14,qam_cm_14,qame_cm_14,qas_cm_14,qaa_cm_30,qam_cm_30,qame_cm_30,qas_cm_30
0,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,31.42,31.42,31.42,31.42,31.42,31.42,31.42,31.42,31.42,31.42,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,178.49,178.49,178.49,178.49,178.49,178.49,178.49,178.49,178.49,178.49,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,...,1.0,0.5,1.0,1.0,1.0,0.5,1.0,1.0,1.0,0.5


In [123]:
Amount_variables['Recnum'] = df['Recnum'].values
Amount_variables['Fraud'] = df['Fraud'].values
Amount_variables['Date'] = df.index.values

In [124]:
Amount_variables.head()

Unnamed: 0,Amount_avg_card_1,Amount_avg_card_3,Amount_avg_card_7,Amount_avg_card_14,Amount_avg_card_30,Amount_max_card_1,Amount_max_card_3,Amount_max_card_7,Amount_max_card_14,Amount_max_card_30,...,qam_cm_14,qame_cm_14,qas_cm_14,qaa_cm_30,qam_cm_30,qame_cm_30,qas_cm_30,Recnum,Fraud,Date
0,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1,0,2010-01-01
1,31.42,31.42,31.42,31.42,31.42,31.42,31.42,31.42,31.42,31.42,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2,0,2010-01-01
2,178.49,178.49,178.49,178.49,178.49,178.49,178.49,178.49,178.49,178.49,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,3,0,2010-01-01
3,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,4,0,2010-01-01
4,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,3.62,...,1.0,1.0,0.5,1.0,1.0,1.0,0.5,5,0,2010-01-01


In [125]:
Amount_variables.shape

(83970, 243)

## Frequency Variables

### Card

In [None]:
count_card_1 = df.groupby(['Cardnum'])['Amount'].rolling('1d').count().reset_index()
count_card_1['Count'] = count_card_1['Amount'].astype('int')
del count_card_1['Amount']
count_card_1['order'] = count_card_1.groupby(['Cardnum', 'Date']).cumcount() + 1

count_card_3 = df.groupby(['Cardnum'])['Amount'].rolling('3d').count().reset_index()
count_card_3['Count'] = count_card_3['Amount'].astype('int')
del count_card_3['Amount']
count_card_3['order'] = count_card_3.groupby(['Cardnum', 'Date']).cumcount() + 1

count_card_7 = df.groupby(['Cardnum'])['Amount'].rolling('7d').count().reset_index()
count_card_7['Count'] = count_card_7['Amount'].astype('int')
del count_card_7['Amount']
count_card_7['order'] = count_card_7.groupby(['Cardnum', 'Date']).cumcount() + 1

count_card_14 = df.groupby(['Cardnum'])['Amount'].rolling('14d').count().reset_index()
count_card_14['Count'] = count_card_14['Amount'].astype('int')
del count_card_14['Amount']
count_card_14['order'] = count_card_14.groupby(['Cardnum', 'Date']).cumcount() + 1

count_card_30 = df.groupby(['Cardnum'])['Amount'].rolling('30d').count().reset_index()
count_card_30['Count'] = count_card_30['Amount'].astype('int')
del count_card_30['Amount']
count_card_30['order'] = count_card_30.groupby(['Cardnum', 'Date']).cumcount() + 1

In [None]:
count_card_1.head()

In [None]:
card_df = df.copy()
card_df = card_df.reset_index()
card_df['order'] = card_df.groupby(['Cardnum', 'Date']).cumcount() + 1

In [None]:
card_df.head()

In [None]:
not_needed_columns = ['index', 'Recnum', 'Merchnum', 'Merch description', 'Merch zip', 'Transtype', 'Merch state', 'Amount']
for column in not_needed_columns:
    try:
        del card_df[column]
    except:
        pass

In [None]:
card_df['Count'] = 0

In [None]:
card_df.head()

In [None]:
merged_data_card = card_df \
.merge(count_card_1, on = ['Date', 'Cardnum', 'order'], how = 'left', suffixes=['', '_count_card_1'])\
.merge(count_card_3, on = ['Date', 'Cardnum', 'order'], how = 'left', suffixes=['', '_count_card_3']) \
.merge(count_card_7, on = ['Date', 'Cardnum', 'order'], how = 'left', suffixes=['', '_count_card_7']) \
.merge(count_card_14, on = ['Date', 'Cardnum', 'order'], how = 'left', suffixes=['', '_count_card_14']) \
.merge(count_card_30, on = ['Date', 'Cardnum', 'order'], how = 'left', suffixes=['', '_count_card_30']) 

In [None]:
merged_data_card['Count_count_card_0'] = 1

In [None]:
merged_data_card.tail()

### Merchant

In [None]:
count_merchant_1 = df.groupby(['Merchnum'])['Amount'].rolling('1d').count().reset_index()
count_merchant_1['Count'] = count_merchant_1['Amount'].astype('int')
del count_merchant_1['Amount']
count_merchant_1['order'] = count_merchant_1.groupby(['Merchnum', 'Date']).cumcount() + 1

count_merchant_3 = df.groupby(['Merchnum'])['Amount'].rolling('3d').count().reset_index()
count_merchant_3['Count'] = count_merchant_3['Amount'].astype('int')
del count_merchant_3['Amount']
count_merchant_3['order'] = count_merchant_3.groupby(['Merchnum', 'Date']).cumcount() + 1

count_merchant_7 = df.groupby(['Merchnum'])['Amount'].rolling('7d').count().reset_index()
count_merchant_7['Count'] = count_merchant_7['Amount'].astype('int')
del count_merchant_7['Amount']
count_merchant_7['order'] = count_merchant_7.groupby(['Merchnum', 'Date']).cumcount() + 1

count_merchant_14 = df.groupby(['Merchnum'])['Amount'].rolling('14d').count().reset_index()
count_merchant_14['Count'] = count_merchant_14['Amount'].astype('int')
del count_merchant_14['Amount']
count_merchant_14['order'] = count_merchant_14.groupby(['Merchnum', 'Date']).cumcount() + 1

count_merchant_30 = df.groupby(['Merchnum'])['Amount'].rolling('30d').count().reset_index()
count_merchant_30['Count'] = count_merchant_30['Amount'].astype('int')
del count_merchant_30['Amount']
count_merchant_30['order'] = count_merchant_30.groupby(['Merchnum', 'Date']).cumcount() + 1

In [None]:
merchant_df = df.copy()
merchant_df = merchant_df.reset_index()
merchant_df['order'] = merchant_df.groupby(['Merchnum', 'Date']).cumcount() + 1

In [None]:
merchant_df.head()

In [None]:
not_needed_columns = ['index', 'Recnum', 'Cardnum', 'Merch description', 'Merch zip', 'Transtype', 'Merch state', 'Amount']
for column in not_needed_columns:
    try:
        del merchant_df[column]
    except:
        pass

In [None]:
merchant_df.head()

In [None]:
merged_data_merchant = merchant_df \
.merge(count_merchant_1, on = ['Date', 'Merchnum', 'order'], how = 'left', suffixes=['', '_count_merchant_1'])\
.merge(count_merchant_3, on = ['Date', 'Merchnum', 'order'], how = 'left', suffixes=['', '_count_merchant_3']) \
.merge(count_merchant_7, on = ['Date', 'Merchnum', 'order'], how = 'left', suffixes=['', '_count_merchant_7']) \
.merge(count_merchant_14, on = ['Date', 'Merchnum', 'order'], how = 'left', suffixes=['', '_count_merchant_14']) \
.merge(count_merchant_30, on = ['Date', 'Merchnum', 'order'], how = 'left', suffixes=['', '_count_merchant_30']) 

In [None]:
merged_data_merchant['Count_count_merchant_0'] = 1

In [None]:
merged_data_merchant = merged_data_merchant.rename(columns={'Count':"Count_count_merchant_1"})

In [None]:
merged_data_merchant.tail()

### Card + Merchant

In [None]:
count_card_merchant_1 = df.groupby(['Cardnum', 'Merchnum'])['Amount'].rolling('1d').count().reset_index()
count_card_merchant_1['Count'] = count_card_merchant_1['Amount'].astype('int')
del count_card_merchant_1['Amount']
count_card_merchant_1['order'] = count_card_merchant_1.groupby(['Cardnum', 'Merchnum', 'Date']).cumcount() + 1

count_card_merchant_3 = df.groupby(['Cardnum', 'Merchnum'])['Amount'].rolling('3d').count().reset_index()
count_card_merchant_3['Count'] = count_card_merchant_3['Amount'].astype('int')
del count_card_merchant_3['Amount']
count_card_merchant_3['order'] = count_card_merchant_3.groupby(['Cardnum', 'Merchnum', 'Date']).cumcount() + 1

count_card_merchant_7 = df.groupby(['Cardnum', 'Merchnum'])['Amount'].rolling('7d').count().reset_index()
count_card_merchant_7['Count'] = count_card_merchant_7['Amount'].astype('int')
del count_card_merchant_7['Amount']
count_card_merchant_7['order'] = count_card_merchant_7.groupby(['Cardnum', 'Merchnum', 'Date']).cumcount() + 1

count_card_merchant_14 = df.groupby(['Cardnum', 'Merchnum'])['Amount'].rolling('14d').count().reset_index()
count_card_merchant_14['Count'] = count_card_merchant_14['Amount'].astype('int')
del count_card_merchant_14['Amount']
count_card_merchant_14['order'] = count_card_merchant_14.groupby(['Cardnum', 'Merchnum', 'Date']).cumcount() + 1

count_card_merchant_30 = df.groupby(['Cardnum', 'Merchnum'])['Amount'].rolling('30d').count().reset_index()
count_card_merchant_30['Count'] = count_card_merchant_30['Amount'].astype('int')
del count_card_merchant_30['Amount']
count_card_merchant_30['order'] = count_card_merchant_30.groupby(['Cardnum', 'Merchnum', 'Date']).cumcount() + 1

In [None]:
card_merchant_df = df.copy()
card_merchant_df = card_merchant_df.reset_index()
card_merchant_df['order'] = card_merchant_df.groupby(['Cardnum', 'Merchnum', 'Date']).cumcount() + 1

In [None]:
card_merchant_df.head()

In [None]:
not_needed_columns = ['index', 'Recnum', 'Merch description', 'Merch zip', 'Transtype', 'Merch state', 'Amount']
for column in not_needed_columns:
    try:
        del card_merchant_df[column]
    except:
        pass

In [None]:
card_merchant_df.head()

In [None]:
merged_data_card_merchant = card_merchant_df \
.merge(count_card_merchant_1, on = ['Date', 'Cardnum', 'Merchnum', 'order'], how = 'left', suffixes=['', '_count_card_merchant_1'])\
.merge(count_card_merchant_3, on = ['Date', 'Cardnum', 'Merchnum', 'order'], how = 'left', suffixes=['', '_count_card_merchant_3']) \
.merge(count_card_merchant_7, on = ['Date', 'Cardnum', 'Merchnum', 'order'], how = 'left', suffixes=['', '_count_card_merchant_7']) \
.merge(count_card_merchant_14, on = ['Date', 'Cardnum', 'Merchnum', 'order'], how = 'left', suffixes=['', '_count_card_merchant_14']) \
.merge(count_card_merchant_30, on = ['Date', 'Cardnum', 'Merchnum', 'order'], how = 'left', suffixes=['', '_count_card_merchant_30']) 

In [None]:
merged_data_card_merchant['Count_count_card_merchant_0'] = 1

In [None]:
merged_data_card_merchant = merged_data_card_merchant.rename(columns={'Count':"Count_count_card_merchant_1"})

In [None]:
merged_data_card_merchant.tail()

### Card + Zip code

In [None]:
count_card_zip_1 = df.groupby(['Cardnum', 'Merch zip'])['Amount'].rolling('1d').count().reset_index()
count_card_zip_1['Count'] = count_card_zip_1['Amount'].astype('int')
del count_card_zip_1['Amount']
count_card_zip_1['order'] = count_card_zip_1.groupby(['Cardnum', 'Merch zip', 'Date']).cumcount() + 1

count_card_zip_3 = df.groupby(['Cardnum', 'Merch zip'])['Amount'].rolling('3d').count().reset_index()
count_card_zip_3['Count'] = count_card_zip_3['Amount'].astype('int')
del count_card_zip_3['Amount']
count_card_zip_3['order'] = count_card_zip_3.groupby(['Cardnum', 'Merch zip', 'Date']).cumcount() + 1

count_card_zip_7 = df.groupby(['Cardnum', 'Merch zip'])['Amount'].rolling('7d').count().reset_index()
count_card_zip_7['Count'] = count_card_zip_7['Amount'].astype('int')
del count_card_zip_7['Amount']
count_card_zip_7['order'] = count_card_zip_7.groupby(['Cardnum', 'Merch zip', 'Date']).cumcount() + 1

count_card_zip_14 = df.groupby(['Cardnum', 'Merch zip'])['Amount'].rolling('14d').count().reset_index()
count_card_zip_14['Count'] = count_card_zip_14['Amount'].astype('int')
del count_card_zip_14['Amount']
count_card_zip_14['order'] = count_card_zip_14.groupby(['Cardnum', 'Merch zip', 'Date']).cumcount() + 1

count_card_zip_30 = df.groupby(['Cardnum', 'Merch zip'])['Amount'].rolling('30d').count().reset_index()
count_card_zip_30['Count'] = count_card_zip_30['Amount'].astype('int')
del count_card_zip_30['Amount']
count_card_zip_30['order'] = count_card_zip_30.groupby(['Cardnum', 'Merch zip', 'Date']).cumcount() + 1

In [None]:
card_zip_df = df.copy()
card_zip_df = card_zip_df.reset_index()
card_zip_df['order'] = card_zip_df.groupby(['Cardnum', 'Merch zip', 'Date']).cumcount() + 1

In [None]:
card_zip_df.head()

In [None]:
not_needed_columns = ['index', 'Recnum', 'Merch description', 'Merchnum', 'Transtype', 'Merch state', 'Amount']
for column in not_needed_columns:
    try:
        del card_zip_df[column]
    except:
        pass

In [None]:
card_zip_df.head()

In [None]:
merged_data_card_zip = card_zip_df \
.merge(count_card_zip_1, on = ['Date', 'Cardnum', 'Merch zip', 'order'], how = 'left', suffixes=['', '_count_card_zip_1'])\
.merge(count_card_zip_3, on = ['Date', 'Cardnum', 'Merch zip', 'order'], how = 'left', suffixes=['', '_count_card_zip_3']) \
.merge(count_card_zip_7, on = ['Date', 'Cardnum', 'Merch zip', 'order'], how = 'left', suffixes=['', '_count_card_zip_7']) \
.merge(count_card_zip_14, on = ['Date', 'Cardnum', 'Merch zip', 'order'], how = 'left', suffixes=['', '_count_card_zip_14']) \
.merge(count_card_zip_30, on = ['Date', 'Cardnum', 'Merch zip', 'order'], how = 'left', suffixes=['', '_count_card_zip_30']) 

In [None]:
merged_data_card_zip['Count_count_card_zip_0'] = 1

In [None]:
merged_data_card_zip = merged_data_card_zip.rename(columns={'Count':"Count_count_card_zip_1"})

In [None]:
merged_data_card_zip.tail()

### Card + State

In [None]:
count_card_state_1 = df.groupby(['Cardnum', 'Merch state'])['Amount'].rolling('1d').count().reset_index()
count_card_state_1['Count'] = count_card_state_1['Amount'].astype('int')
del count_card_state_1['Amount']
count_card_state_1['order'] = count_card_state_1.groupby(['Cardnum', 'Merch state', 'Date']).cumcount() + 1

count_card_state_3 = df.groupby(['Cardnum', 'Merch state'])['Amount'].rolling('3d').count().reset_index()
count_card_state_3['Count'] = count_card_state_3['Amount'].astype('int')
del count_card_state_3['Amount']
count_card_state_3['order'] = count_card_state_3.groupby(['Cardnum', 'Merch state', 'Date']).cumcount() + 1

count_card_state_7 = df.groupby(['Cardnum', 'Merch state'])['Amount'].rolling('7d').count().reset_index()
count_card_state_7['Count'] = count_card_state_7['Amount'].astype('int')
del count_card_state_7['Amount']
count_card_state_7['order'] = count_card_state_7.groupby(['Cardnum', 'Merch state', 'Date']).cumcount() + 1

count_card_state_14 = df.groupby(['Cardnum', 'Merch state'])['Amount'].rolling('14d').count().reset_index()
count_card_state_14['Count'] = count_card_state_14['Amount'].astype('int')
del count_card_state_14['Amount']
count_card_state_14['order'] = count_card_state_14.groupby(['Cardnum', 'Merch state', 'Date']).cumcount() + 1

count_card_state_30 = df.groupby(['Cardnum', 'Merch state'])['Amount'].rolling('30d').count().reset_index()
count_card_state_30['Count'] = count_card_state_30['Amount'].astype('int')
del count_card_state_30['Amount']
count_card_state_30['order'] = count_card_state_30.groupby(['Cardnum', 'Merch state', 'Date']).cumcount() + 1

In [None]:
card_state_df = df.copy()
card_state_df = card_state_df.reset_index()
card_state_df['order'] = card_state_df.groupby(['Cardnum', 'Merch state', 'Date']).cumcount() + 1

In [None]:
card_state_df.head()

In [None]:
not_needed_columns = ['index', 'Recnum', 'Merch description', 'Merchnum', 'Transtype', 'Merch zip', 'Amount']
for column in not_needed_columns:
    try:
        del card_state_df[column]
    except:
        pass

In [None]:
card_state_df.head()

In [None]:
merged_data_card_state = card_state_df \
.merge(count_card_state_1, on = ['Date', 'Cardnum', 'Merch state', 'order'], how = 'left', suffixes=['', '_count_card_state_1'])\
.merge(count_card_state_3, on = ['Date', 'Cardnum', 'Merch state', 'order'], how = 'left', suffixes=['', '_count_card_state_3']) \
.merge(count_card_state_7, on = ['Date', 'Cardnum', 'Merch state', 'order'], how = 'left', suffixes=['', '_count_card_state_7']) \
.merge(count_card_state_14, on = ['Date', 'Cardnum', 'Merch state', 'order'], how = 'left', suffixes=['', '_count_card_state_14']) \
.merge(count_card_state_30, on = ['Date', 'Cardnum', 'Merch state', 'order'], how = 'left', suffixes=['', '_count_card_state_30']) 

In [None]:
merged_data_card_state['Count_count_card_state_0'] = 1

In [None]:
merged_data_card_state = merged_data_card_state.rename(columns={'Count':"Count_count_card_state_1"})

In [None]:
merged_data_card_state.tail()

### Frequency Variables 30

In [None]:
merged_data_card.head()

In [None]:
merged_data_merchant.head()

In [None]:
merged_data_card_merchant.head()

In [None]:
merged_data_card_zip.head()

In [None]:
merged_data_card_state.head()

In [None]:
merged_data_card_pure = merged_data_card.iloc[:, 5:]
merged_data_merchant_pure = merged_data_merchant.iloc[:, 4:]
merged_data_card_merchant_pure = merged_data_card_merchant.iloc[:, 5:]
merged_data_card_zip_pure = merged_data_card_zip.iloc[:, 5:]
merged_data_card_state_pure = merged_data_card_state.iloc[:, 5:]

In [None]:
[merged_data_card_pure.shape, 
merged_data_merchant_pure.shape,
merged_data_card_merchant_pure.shape, 
merged_data_card_zip_pure.shape,
merged_data_card_state_pure.shape]

In [None]:
Frequency_variables = pd.concat([merged_data_card_pure,
                              merged_data_merchant_pure,
                              merged_data_card_merchant_pure, 
                              merged_data_card_zip_pure, 
                              merged_data_card_state_pure], axis = 1)

In [None]:
Frequency_variables.shape

In [None]:
Frequency_variables.tail()

In [None]:
Frequency_variables['Recnum'] = df['Recnum'].values
Frequency_variables['Fraud'] = df['Fraud'].values
Frequency_variables['Date'] = df.index.values
Frequency_variables['Transtype'] = df['Transtype'].values

In [None]:
Frequency_variables.tail()

In [None]:
Frequency_variables.shape

In [None]:
len(Frequency_variables['Recnum'].unique())

## Days since Variables

### Card

In [None]:
DateDiff = df.copy()

In [None]:
DateDiff.info()

In [None]:
def subtract(df):
    temp_df = df.copy()
    temp_df = temp_df.reset_index()
#     print(temp_df)
    temp_df['LastTime'] = temp_df['Date'].shift(1)
    temp_df['Card_SinceLastTime'] = temp_df['Date'] - temp_df['LastTime']
    return temp_df

In [None]:
Card_df = DateDiff.groupby(['Cardnum']).apply(lambda df: subtract(df))

In [None]:
Card_df.head()

In [None]:
len(Card_df['Recnum'].unique())

In [None]:
Card = Card_df[['Recnum', 'Card_SinceLastTime']].reset_index(drop = 1)

In [None]:
Card.head()

### Merchant

In [None]:
def subtract(df):
    temp_df = df.copy()
    temp_df = temp_df.reset_index()
#     print(temp_df)
    temp_df['LastTime'] = temp_df['Date'].shift(1)
    temp_df['Merchant_SinceLastTime'] = temp_df['Date'] - temp_df['LastTime']
    return temp_df

In [None]:
Merchant_df = DateDiff.groupby(['Merchnum']).apply(lambda df: subtract(df))

In [None]:
Merchant_df.head()

In [None]:
len(Merchant_df['Recnum'].unique())

In [None]:
Merchant = Merchant_df[['Recnum', 'Merchant_SinceLastTime']].reset_index(drop = 1)

In [None]:
Merchant.head()

### Card_Merchant

In [None]:
def subtract(df):
    temp_df = df.copy()
    temp_df = temp_df.reset_index()
#     print(temp_df)
    temp_df['LastTime'] = temp_df['Date'].shift(1)
    temp_df['Card_Merchant_SinceLastTime'] = temp_df['Date'] - temp_df['LastTime']
    return temp_df

In [None]:
Card_Merchant_df = DateDiff.groupby(['Cardnum', 'Merchnum']).apply(lambda df: subtract(df))

In [None]:
Card_Merchant_df.head()

In [None]:
len(Card_Merchant_df['Recnum'].unique())

In [None]:
Card_Merchant = Card_Merchant_df[['Recnum', 'Card_Merchant_SinceLastTime']].reset_index(drop = 1)

In [None]:
Card_Merchant.head()

### Card_Zip code

In [None]:
def subtract(df):
    temp_df = df.copy()
    temp_df = temp_df.reset_index()
#     print(temp_df)
    temp_df['LastTime'] = temp_df['Date'].shift(1)
    temp_df['Card_Zip_SinceLastTime'] = temp_df['Date'] - temp_df['LastTime']
    return temp_df

In [None]:
Card_Zip_df = DateDiff.groupby(['Cardnum', 'Merch zip']).apply(lambda df: subtract(df))

In [None]:
Card_Zip_df.head()

In [None]:
len(Card_Zip_df['Recnum'].unique())

In [None]:
Card_Zip = Card_Zip_df[['Recnum', 'Card_Zip_SinceLastTime']].reset_index(drop = 1)

In [None]:
Merchant.head()

### Card_State

In [None]:
def subtract(df):
    temp_df = df.copy()
    temp_df = temp_df.reset_index()
#     print(temp_df)
    temp_df['LastTime'] = temp_df['Date'].shift(1)
    temp_df['Card_State_SinceLastTime'] = temp_df['Date'] - temp_df['LastTime']
    return temp_df

In [None]:
Card_State_df = DateDiff.groupby(['Cardnum', 'Merch state']).apply(lambda df: subtract(df))

In [None]:
Card_State_df.head()

In [None]:
len(Card_State_df['Recnum'].unique())

In [None]:
Card_State = Card_State_df[['Recnum', 'Card_State_SinceLastTime']].reset_index(drop = 1)

In [None]:
Card_State.head()

### Days since Variables 5

In [None]:
Days_since_variables = Card.merge(Merchant, on = ['Recnum']) \
.merge(Card_Merchant, on = ['Recnum']) \
.merge(Card_Zip, on = ['Recnum']) \
.merge(Card_State, on = ['Recnum'])

In [None]:
Days_since_variables.head()

In [None]:
Days_since_variables.shape

## Velocity change Variables

### Count_Card_0 + 1

In [None]:
count_card_1 = df.groupby(['Cardnum'])['Amount'].rolling('1d').count().reset_index()
count_card_1['Card_Count_1'] = count_card_1['Amount'].astype('int')
del count_card_1['Amount']
count_card_1['order'] = count_card_1.groupby(['Cardnum', 'Date']).cumcount() + 1

In [None]:
df['order'] = df.groupby(['Cardnum', 'Date']).cumcount() + 1

In [None]:
count_card_1.shape

In [None]:
NC1 = df.merge(count_card_1, on = ['Cardnum', 'Date', 'order'])

In [None]:
NC1.shape

In [None]:
NC1 = NC1[['Recnum', 'Card_Count_1']]

In [None]:
NC1['Card_Count_0'] = 1

In [None]:
NC01 = NC1.copy()

In [None]:
NC01 = NC01.rename(columns = {'Card_Count_1':"NC1", 'Card_Count_0':"NC0"})

In [None]:
NC01.head()

In [None]:
len(NC01['Recnum'].unique())

### Count_Merchant_0 + 1

In [None]:
count_merchant_1 = df.groupby(['Merchnum'])['Amount'].rolling('1d').count().reset_index()
count_merchant_1['Merchant_Count_1'] = count_merchant_1['Amount'].astype('int')
del count_merchant_1['Amount']
count_merchant_1['order'] = count_merchant_1.groupby(['Merchnum', 'Date']).cumcount() + 1

In [None]:
count_merchant_1.shape

In [None]:
df['order'] = df.groupby(['Merchnum', 'Date']).cumcount() + 1

In [None]:
NM1 = df.merge(count_merchant_1, on = ['Merchnum', 'Date', 'order'])

In [None]:
NM1.shape

In [None]:
NM1 = NM1[['Recnum', 'Merchant_Count_1']]

In [None]:
NM1['Merchant_Count_0'] = 1

In [None]:
NM01 = NM1.copy()

In [None]:
NM01 = NM01.rename(columns = {'Merchant_Count_1':"NM1", 'Merchant_Count_0':"NM0"})

In [None]:
NM01.head()

In [None]:
len(NM01['Recnum'].unique())

### Amount_Card_0 + 1

In [None]:
sum_card_1 = df.groupby(['Cardnum'])['Amount'].rolling('1d').sum().reset_index()
sum_card_1['order'] = sum_card_1.groupby(['Cardnum', 'Date']).cumcount() + 1

In [None]:
sum_card_1.shape

In [None]:
df['order'] = df.groupby(['Cardnum', 'Date']).cumcount() + 1

In [None]:
AC1 = df.merge(sum_card_1, on = ['Cardnum', 'Date', 'order'], suffixes=['_0', '_1'])

In [None]:
AC1.head()

In [None]:
AC01 = AC1[['Recnum', 'Amount_0', 'Amount_1']]

In [None]:
AC01 = AC01.rename(columns={'Amount_0':'Card_Amount_0', 'Amount_1':'Card_Amount_1'})

In [None]:
AC01 = AC01.rename(columns = {'Card_Amount_0':"AC0", 'Card_Amount_1':"AC1"})

In [None]:
AC01.tail()

In [None]:
len(AC01['Recnum'].unique())

### Amount_Merchant_0 + 1

In [None]:
sum_merchant_1 = df.groupby(['Merchnum'])['Amount'].rolling('1d').sum().reset_index()
sum_merchant_1['order'] = sum_merchant_1.groupby(['Merchnum', 'Date']).cumcount() + 1

In [None]:
df['order'] = df.groupby(['Merchnum', 'Date']).cumcount() + 1

In [None]:
AM1 = df.merge(sum_merchant_1, on = ['Merchnum', 'Date', 'order'], suffixes=['_0', '_1'])

In [None]:
AM1.head()

In [None]:
AM01 = AM1[['Recnum', 'Amount_0', 'Amount_1']]

In [None]:
AM01 = AM01.rename(columns={'Amount_0':'Merchant_Amount_0', 'Amount_1':'Merchant_Amount_1'})

In [None]:
AM01 = AM01.rename(columns = {'Merchant_Amount_0':"AM0", 'Merchant_Amount_1':"AM1"})

In [None]:
AM01.tail()

### Numerator

In [None]:
Numerator = NC01.merge(NM01, on = 'Recnum').merge(AC01, on = 'Recnum').merge(AM01, on = 'Recnum')

In [None]:
Numerator.tail()

In [None]:
Numerator.shape

In [None]:
len(Numerator['Recnum'].unique())

### Average Count_Card_7

In [None]:
count_card_7 = df.groupby(['Cardnum'])['Amount'].rolling('7d').count().reset_index()
count_card_7['Card_Count'] = count_card_7['Amount'].astype('int')
count_card_7['Average_Card_Count_7'] = count_card_7['Card_Count'] / 7.0
del count_card_7['Amount']
count_card_7['order'] = count_card_7.groupby(['Cardnum', 'Date']).cumcount() + 1

In [None]:
count_card_7.tail()

In [None]:
df['order'] = df.groupby(['Cardnum', 'Date']).cumcount() + 1

In [None]:
ANC7 = df.merge(count_card_7, on = ['Cardnum', 'Date', 'order'], suffixes=['_Original', '_1'])

In [None]:
ANC7 = ANC7[['Recnum', 'Average_Card_Count_7']]

In [None]:
ANC7 = ANC7.rename(columns = {"Average_Card_Count_7": "ANC7"})

In [None]:
ANC7.head()

In [None]:
ANC7.shape

In [None]:
len(ANC7['Recnum'].unique())

### Average Count_Card_14

In [None]:
count_card_14 = df.groupby(['Cardnum'])['Amount'].rolling('14d').count().reset_index()
count_card_14['Card_Count'] = count_card_14['Amount'].astype('int')
count_card_14['Average_Card_Count_14'] = count_card_14['Card_Count'] / 14.0
del count_card_14['Amount']
count_card_14['order'] = count_card_14.groupby(['Cardnum', 'Date']).cumcount() + 1

In [None]:
count_card_14.tail()

In [None]:
df['order'] = df.groupby(['Cardnum', 'Date']).cumcount() + 1

In [None]:
ANC14 = df.merge(count_card_14, on = ['Cardnum', 'Date', 'order'], suffixes=['_Original', '_1'])

In [None]:
ANC14 = ANC14[['Recnum', 'Average_Card_Count_14']]

In [None]:
ANC14 = ANC14.rename(columns = {"Average_Card_Count_14": "ANC14"})

In [None]:
ANC14.tail()

In [None]:
ANC14.shape

In [None]:
len(ANC14['Recnum'].unique())

### Average Count_Card_30

In [None]:
count_card_30 = df.groupby(['Cardnum'])['Amount'].rolling('30d').count().reset_index()
count_card_30['Card_Count'] = count_card_30['Amount'].astype('int')
count_card_30['Average_Card_Count_30'] = count_card_30['Card_Count'] / 30.0
del count_card_30['Amount']
count_card_30['order'] = count_card_30.groupby(['Cardnum', 'Date']).cumcount() + 1

In [None]:
count_card_30.tail()

In [None]:
df['order'] = df.groupby(['Cardnum', 'Date']).cumcount() + 1

In [None]:
ANC30 = df.merge(count_card_30, on = ['Cardnum', 'Date', 'order'], suffixes=['_Original', '_1'])

In [None]:
ANC30 = ANC30[['Recnum', 'Average_Card_Count_30']]

In [None]:
ANC30 = ANC30.rename(columns = {"Average_Card_Count_30": "ANC30"})

In [None]:
ANC30.tail()

In [None]:
ANC30.shape

In [None]:
len(ANC30['Recnum'].unique())

### Average Amount_Card_7

In [None]:
avg_card_7 = df.groupby(['Cardnum'])['Amount'].rolling('7d').mean().reset_index()
avg_card_7['order'] = avg_card_7.groupby(['Cardnum', 'Date']).cumcount() + 1

In [None]:
avg_card_7 = avg_card_7.rename(columns = {"Amount":"Average_Card_Amount_7"})

In [None]:
avg_card_7.head()

In [None]:
df['order'] = df.groupby(['Cardnum', 'Date']).cumcount() + 1

In [None]:
AAC7 = df.merge(avg_card_7, on = ['Cardnum', 'Date', 'order'], suffixes=['_Original', '_1'])

In [None]:
AAC7 = AAC7[['Recnum', 'Average_Card_Amount_7']]

In [None]:
AAC7 = AAC7.rename(columns = {"Average_Card_Amount_7": "ACC7"})

In [None]:
AAC7.head()

In [None]:
AAC7.shape

In [None]:
len(AAC7['Recnum'].unique())

### Average Amount_Card_14

In [None]:
avg_card_14 = df.groupby(['Cardnum'])['Amount'].rolling('14d').mean().reset_index()
avg_card_14['order'] = avg_card_14.groupby(['Cardnum', 'Date']).cumcount() + 1

In [None]:
avg_card_14 = avg_card_14.rename(columns = {"Amount":"Average_Card_Amount_14"})

In [None]:
avg_card_14.head()

In [None]:
df['order'] = df.groupby(['Cardnum', 'Date']).cumcount() + 1

In [None]:
AAC14 = df.merge(avg_card_14, on = ['Cardnum', 'Date', 'order'], suffixes=['_Original', '_1'])

In [None]:
AAC14 = AAC14[['Recnum', 'Average_Card_Amount_14']]

In [None]:
AAC14 = AAC14.rename(columns = {"Average_Card_Amount_14": "ACC14"})

In [None]:
AAC14.head()

In [None]:
AAC14.shape

In [None]:
len(AAC14['Recnum'].unique())

### Average Amount_Card_30

In [None]:
avg_card_30 = df.groupby(['Cardnum'])['Amount'].rolling('30d').mean().reset_index()
avg_card_30['order'] = avg_card_30.groupby(['Cardnum', 'Date']).cumcount() + 1

In [None]:
avg_card_30 = avg_card_30.rename(columns = {"Amount":"Average_Card_Amount_30"})

In [None]:
avg_card_30.head()

In [None]:
df['order'] = df.groupby(['Cardnum', 'Date']).cumcount() + 1

In [None]:
AAC30 = df.merge(avg_card_30, on = ['Cardnum', 'Date', 'order'], suffixes=['_Original', '_1'])

In [None]:
AAC30 = AAC30[['Recnum', 'Average_Card_Amount_30']]

In [None]:
AAC30 = AAC30.rename(columns = {"Average_Card_Amount_30": "ACC30"})

In [None]:
AAC30.tail()

In [None]:
AAC30.shape

In [None]:
len(AAC30['Recnum'].unique())

### Average Count_Merchant_7

In [None]:
count_merchant_7 = df.groupby(['Merchnum'])['Amount'].rolling('7d').count().reset_index()
count_merchant_7['Merchant_Count'] = count_merchant_7['Amount'].astype('int')
count_merchant_7['Average_Merchant_Count_7'] = count_merchant_7['Merchant_Count'] / 7.0
del count_merchant_7['Amount']
count_merchant_7['order'] = count_merchant_7.groupby(['Merchnum', 'Date']).cumcount() + 1

df['order'] = df.groupby(['Merchnum', 'Date']).cumcount() + 1

ANM7 = df.merge(count_merchant_7, on = ['Merchnum', 'Date', 'order'], suffixes=['_Original', '_1'])

ANM7 = ANM7[['Recnum', 'Average_Merchant_Count_7']]

In [None]:
ANM7 = ANM7.rename(columns = {"Average_Merchant_Count_7": "ANM7"})

In [None]:
ANM7.head()

In [None]:
ANM7.shape

In [None]:
len(ANM7['Recnum'].unique())

### Average Count_Merchant_14

In [None]:
count_merchant_14 = df.groupby(['Merchnum'])['Amount'].rolling('14d').count().reset_index()
count_merchant_14['Merchant_Count'] = count_merchant_14['Amount'].astype('int')
count_merchant_14['Average_Merchant_Count_14'] = count_merchant_14['Merchant_Count'] / 14.0
del count_merchant_14['Amount']
count_merchant_14['order'] = count_merchant_14.groupby(['Merchnum', 'Date']).cumcount() + 1

df['order'] = df.groupby(['Merchnum', 'Date']).cumcount() + 1

ANM14 = df.merge(count_merchant_14, on = ['Merchnum', 'Date', 'order'], suffixes=['_Original', '_1'])

ANM14 = ANM14[['Recnum', 'Average_Merchant_Count_14']]

In [None]:
ANM14 = ANM14.rename(columns = {"Average_Merchant_Count_14": "ANM14"})

In [None]:
ANM14.tail()

In [None]:
ANM14.shape

In [None]:
len(ANM14['Recnum'].unique())

### Average Count_Merchant_30

In [None]:
count_merchant_30 = df.groupby(['Merchnum'])['Amount'].rolling('30d').count().reset_index()
count_merchant_30['Merchant_Count'] = count_merchant_30['Amount'].astype('int')
count_merchant_30['Average_Merchant_Count_30'] = count_merchant_30['Merchant_Count'] / 30.0
del count_merchant_30['Amount']
count_merchant_30['order'] = count_merchant_30.groupby(['Merchnum', 'Date']).cumcount() + 1

df['order'] = df.groupby(['Merchnum', 'Date']).cumcount() + 1

ANM30 = df.merge(count_merchant_30, on = ['Merchnum', 'Date', 'order'], suffixes=['_Original', '_1'])

ANM30 = ANM30[['Recnum', 'Average_Merchant_Count_30']]

In [None]:
ANM30 = ANM30.rename(columns = {"Average_Merchant_Count_30": "ANM30"})

In [None]:
ANM30.tail()

In [None]:
ANM30.shape

In [None]:
len(ANM30['Recnum'].unique())

### Average Amount_Merchant_7

In [None]:
avg_merchant_7 = df.groupby(['Merchnum'])['Amount'].rolling('7d').mean().reset_index()
avg_merchant_7['order'] = avg_merchant_7.groupby(['Merchnum', 'Date']).cumcount() + 1

avg_merchant_7 = avg_merchant_7.rename(columns = {"Amount":"Average_Merchant_Amount_7"})

df['order'] = df.groupby(['Merchnum', 'Date']).cumcount() + 1

AAM7 = df.merge(avg_merchant_7, on = ['Merchnum', 'Date', 'order'], suffixes=['_Original', '_1'])

AAM7 = AAM7[['Recnum', 'Average_Merchant_Amount_7']]

In [None]:
AAM7 = AAM7.rename(columns = {"Average_Merchant_Amount_7": "AAM7"})

In [None]:
AAM7.head()

In [None]:
AAM7.shape

In [None]:
len(AAM7['Recnum'].unique())

### Average Amount_Merchant_14

In [None]:
avg_merchant_14 = df.groupby(['Merchnum'])['Amount'].rolling('14d').mean().reset_index()
avg_merchant_14['order'] = avg_merchant_14.groupby(['Merchnum', 'Date']).cumcount() + 1

avg_merchant_14 = avg_merchant_14.rename(columns = {"Amount":"Average_Merchant_Amount_14"})

df['order'] = df.groupby(['Merchnum', 'Date']).cumcount() + 1

AAM14 = df.merge(avg_merchant_14, on = ['Merchnum', 'Date', 'order'], suffixes=['_Original', '_1'])

AAM14 = AAM14[['Recnum', 'Average_Merchant_Amount_14']]

In [None]:
AAM14 = AAM14.rename(columns = {"Average_Merchant_Amount_14": "AAM14"})

In [None]:
AAM14.tail()

In [None]:
AAM14.shape

In [None]:
len(AAM14['Recnum'].unique())

### Average Amount_Merchant_30

In [None]:
avg_merchant_30 = df.groupby(['Merchnum'])['Amount'].rolling('30d').mean().reset_index()
avg_merchant_30['order'] = avg_merchant_30.groupby(['Merchnum', 'Date']).cumcount() + 1

avg_merchant_30 = avg_merchant_30.rename(columns = {"Amount":"Average_Merchant_Amount_30"})

df['order'] = df.groupby(['Merchnum', 'Date']).cumcount() + 1

AAM30 = df.merge(avg_merchant_30, on = ['Merchnum', 'Date', 'order'], suffixes=['_Original', '_1'])

AAM30 = AAM30[['Recnum', 'Average_Merchant_Amount_30']]

In [None]:
AAM30 = AAM30.rename(columns = {"Average_Merchant_Amount_30": "AAM30"})

In [None]:
AAM30.tail()

In [None]:
AAM30.shape

In [None]:
len(AAM30['Recnum'].unique())

### Denominator

In [None]:
Denominator = ANC7.merge(ANC14, on = 'Recnum') \
.merge(ANC30, on = 'Recnum') \
.merge(AAC7, on = 'Recnum') \
.merge(AAC14, on = 'Recnum') \
.merge(AAC30, on = 'Recnum') \
.merge(ANM7, on = 'Recnum') \
.merge(ANM14, on = 'Recnum') \
.merge(ANM30, on = 'Recnum') \
.merge(AAM7, on = 'Recnum') \
.merge(AAM14, on = 'Recnum') \
.merge(AAM30, on = 'Recnum') \

In [None]:
Denominator.head()

In [None]:
Denominator.shape

In [None]:
len(Denominator['Recnum'].unique())

### Velocity change variables 96

In [None]:
temp = {}
for Num in Numerator.columns:
    if Num != 'Recnum':
        for Den in Denominator.columns:
            if Den != 'Recnum':
                temp[Num + '_' + Den] = Numerator.loc[:,Num].values / Denominator.loc[:,Den].values

In [None]:
Velocity_change_variables = pd.DataFrame(temp)

In [None]:
Velocity_change_variables['Recnum'] = df['Recnum'].values

In [None]:
Velocity_change_variables.head()

# Merge All DataFrame

In [None]:
Amount_variables.head()

In [None]:
Frequency_variables.head()

In [None]:
Days_since_variables.head()

In [None]:
Velocity_change_variables.head()

In [None]:
Days_since_int_variables = Days_since_variables.copy()

In [None]:
Days_since_int_variables['Card_SinceLastTime'] = Days_since_variables['Card_SinceLastTime'].dt.days
Days_since_int_variables['Merchant_SinceLastTime'] = Days_since_variables['Merchant_SinceLastTime'].dt.days
Days_since_int_variables['Card_Merchant_SinceLastTime'] = Days_since_variables['Card_Merchant_SinceLastTime'].dt.days
Days_since_int_variables['Card_Zip_SinceLastTime'] = Days_since_variables['Card_Zip_SinceLastTime'].dt.days
Days_since_int_variables['Card_State_SinceLastTime'] = Days_since_variables['Card_State_SinceLastTime'].dt.days

In [None]:
Days_since_int_variables = Days_since_int_variables.fillna(0)

In [None]:
Days_since_int_variables.head()

In [None]:
all_df_Amount_variables = Amount_variables.copy()
del all_df_Amount_variables['Date']
del all_df_Amount_variables['Fraud']
all_df_Frequency_variables = Frequency_variables.copy()
del all_df_Frequency_variables['Date']
del all_df_Frequency_variables['Transtype']
all_df_Days_since_int_variables = Days_since_int_variables.copy()
all_df_Velocity_change_variables = Velocity_change_variables.copy()

In [None]:
all_df = all_df_Amount_variables.merge(all_df_Frequency_variables, on = 'Recnum') \
.merge(all_df_Days_since_int_variables, on = 'Recnum') \
.merge(all_df_Velocity_change_variables, on = 'Recnum')

In [None]:
all_df.shape

In [None]:
all_df.columns.values

In [None]:
all_df.to_csv('/Users/chengyinliu/all_df_before_1101.csv')