## Credit Card Fraud Detection - Task2

### Load transaction data

In [1]:
import pandas as pd
import numpy as np

# Read transaction data from log file
transaction_data = pd.read_csv("data/transaction_log.csv", 
                               names = ["DateTime", "CardID", "TerminalID", "Amount"], 
                               parse_dates=['DateTime'],
                               dtype =  { u'CardID': str, u'TerminalID': str, u'Amount': int})
transaction_data.head()

Unnamed: 0,DateTime,CardID,TerminalID,Amount
0,2018-06-09 03:33:00,11462413,246,1756
1,2018-06-09 03:33:00,94108,130,650
2,2018-06-09 03:33:00,744530,11698,396
3,2018-06-09 03:33:00,541270,310,158
4,2018-06-09 03:33:00,688490,13126,9


In [2]:
transaction_data.dtypes

DateTime      datetime64[ns]
CardID                object
TerminalID            object
Amount                 int32
dtype: object

In [3]:
transaction_data.shape

(2084, 4)

### Feature Extraction

In [4]:
# Parse Datetime column 

# Get year
transaction_data['year'] = transaction_data['DateTime'].dt.year.astype('uint16')

# Get month
transaction_data['month'] = transaction_data['DateTime'].dt.month.astype('uint8')

# Get day
transaction_data['day'] = transaction_data['DateTime'].dt.day.astype('uint8')

# Get hour
transaction_data['hour'] = transaction_data['DateTime'].dt.hour.astype('uint8')

# Get minute
transaction_data['minute'] = transaction_data['DateTime'].dt.minute.astype('uint8')

transaction_data.head()

Unnamed: 0,DateTime,CardID,TerminalID,Amount,year,month,day,hour,minute
0,2018-06-09 03:33:00,11462413,246,1756,2018,6,9,3,33
1,2018-06-09 03:33:00,94108,130,650,2018,6,9,3,33
2,2018-06-09 03:33:00,744530,11698,396,2018,6,9,3,33
3,2018-06-09 03:33:00,541270,310,158,2018,6,9,3,33
4,2018-06-09 03:33:00,688490,13126,9,2018,6,9,3,33


In [5]:
# Parse CardID column

# Get kind feature
# Debit = 0
# Credit = 1
transaction_data['CardKind'] = transaction_data['CardID'].str[0]

# Get limit feature
# Debit - max daily transaction (1000 TL) = 0
# Credit - monthly limit (5000 TL) = 1
# Credit - monthly limit (10000 TL) = 2
# Credit - monthly limit (20000 TL) = 3
# Credit - monthly limit (30000 TL) = 4
transaction_data['CardLimit'] = transaction_data['CardID'].str[1]

# Get home location feature
# International = 00
# State/Province = 01 ... 80
transaction_data['CardHomeLocation'] = transaction_data['CardID'].str.slice(2,4)

transaction_data.head(10)

Unnamed: 0,DateTime,CardID,TerminalID,Amount,year,month,day,hour,minute,CardKind,CardLimit,CardHomeLocation
0,2018-06-09 03:33:00,11462413,246,1756,2018,6,9,3,33,1,1,46
1,2018-06-09 03:33:00,94108,130,650,2018,6,9,3,33,0,0,9
2,2018-06-09 03:33:00,744530,11698,396,2018,6,9,3,33,0,0,74
3,2018-06-09 03:33:00,541270,310,158,2018,6,9,3,33,0,0,54
4,2018-06-09 03:33:00,688490,13126,9,2018,6,9,3,33,0,0,68
5,2018-06-09 03:33:00,593665,12309,940,2018,6,9,3,33,0,0,59
6,2018-06-09 03:33:00,12245195,13244,4721,2018,6,9,3,33,1,2,24
7,2018-06-09 03:33:00,11772298,658,4052,2018,6,9,3,33,1,1,77
8,2018-06-09 03:33:00,318296,301,613,2018,6,9,3,33,0,0,31
9,2018-06-09 03:33:00,12800078,11441,1426,2018,6,9,3,33,1,2,80


In [6]:
# Parse TerminalID column

# Get kind feature
# ATM = 0
# POS = 1
transaction_data['TerminalKind'] = transaction_data['TerminalID'].str[0]

# Get merchant category feature
# ATM - Undefined = 0
# POS - Business type = 1 ... 5
transaction_data['TerminalMerchant'] = transaction_data['TerminalID'].str[1]

# Get location feature
# International = 00
# State/Province = 01 ... 80
# Internet = 99
transaction_data['TerminalLocation'] = transaction_data['TerminalID'].str.slice(2,4)

transaction_data.head(10)

Unnamed: 0,DateTime,CardID,TerminalID,Amount,year,month,day,hour,minute,CardKind,CardLimit,CardHomeLocation,TerminalKind,TerminalMerchant,TerminalLocation
0,2018-06-09 03:33:00,11462413,246,1756,2018,6,9,3,33,1,1,46,0,0,24
1,2018-06-09 03:33:00,94108,130,650,2018,6,9,3,33,0,0,9,0,0,13
2,2018-06-09 03:33:00,744530,11698,396,2018,6,9,3,33,0,0,74,1,1,69
3,2018-06-09 03:33:00,541270,310,158,2018,6,9,3,33,0,0,54,0,0,31
4,2018-06-09 03:33:00,688490,13126,9,2018,6,9,3,33,0,0,68,1,3,12
5,2018-06-09 03:33:00,593665,12309,940,2018,6,9,3,33,0,0,59,1,2,30
6,2018-06-09 03:33:00,12245195,13244,4721,2018,6,9,3,33,1,2,24,1,3,24
7,2018-06-09 03:33:00,11772298,658,4052,2018,6,9,3,33,1,1,77,0,0,65
8,2018-06-09 03:33:00,318296,301,613,2018,6,9,3,33,0,0,31,0,0,30
9,2018-06-09 03:33:00,12800078,11441,1426,2018,6,9,3,33,1,2,80,1,1,44


In [7]:
transaction_data.to_csv('data/task3_data.csv', sep=',', index=False)