 # Imbalanced classification: credit card fraud detection

 **Author:** [Madison Estabrook](https://github.com/madisonestabrook/madisonestabrook)<br>
 **Last modified:** 2020/07/26<br>
 **Description:** Demonstration of how to handle highly imbalanced classification problems.

 ## Introduction

 This example looks at the
 [Kaggle Credit Card Fraud Detection](https://www.kaggle.com/mlg-ulb/creditcardfraud/)
 dataset to demonstrate how
 to train a classification model on data with highly imbalanced classes.

In [1]:
import csv
import numpy as np

# Get the real data from https://www.kaggle.com/mlg-ulb/creditcardfraud/
fname = "creditcard.csv"

all_features = []
all_targets = []
with open(fname) as f:
    for i, line in enumerate(f):
        if i == 0:
            print("HEADER:", line.strip())
            continue  # Skip header
        fields = line.strip().split(",")
        all_features.append([float(v.replace('"', "")) for v in fields[:-1]])
        all_targets.append([int(fields[-1].replace('"', ""))])
        if i == 1:
            print("EXAMPLE FEATURES:", all_features[-1])

features = np.array(all_features, dtype="float32")
targets = np.array(all_targets, dtype="int32")
print("features.shape:", features.shape)
print("targets.shape:", targets.shape)


HEADER: "Time","V1","V2","V3","V4","V5","V6","V7","V8","V9","V10","V11","V12","V13","V14","V15","V16","V17","V18","V19","V20","V21","V22","V23","V24","V25","V26","V27","V28","Amount","Class"
EXAMPLE FEATURES: [0.0, -1.3598071336738, -0.0727811733098497, 2.53634673796914, 1.37815522427443, -0.338320769942518, 0.462387777762292, 0.239598554061257, 0.0986979012610507, 0.363786969611213, 0.0907941719789316, -0.551599533260813, -0.617800855762348, -0.991389847235408, -0.311169353699879, 1.46817697209427, -0.470400525259478, 0.207971241929242, 0.0257905801985591, 0.403992960255733, 0.251412098239705, -0.018306777944153, 0.277837575558899, -0.110473910188767, 0.0669280749146731, 0.128539358273528, -0.189114843888824, 0.133558376740387, -0.0210530534538215, 149.62]
features.shape: (284807, 30)
targets.shape: (284807, 1)


 ## Prepare a validation set

In [2]:
num_val_samples = int(len(features) * 0.2)
train_features = features[:-num_val_samples]
train_targets = targets[:-num_val_samples]
val_features = features[-num_val_samples:]
val_targets = targets[-num_val_samples:]

print("Number of training samples:", len(train_features))
print("Number of validation samples:", len(val_features))


Number of training samples: 227846
Number of validation samples: 56961


 ## Analyze class imbalance in the targets

In [3]:
counts = np.bincount(train_targets[:, 0])
print(
    "Number of positive samples in training data: {} ({:.2f}% of total)".format(
        counts[1], 100 * float(counts[1]) / len(train_targets)
    )
)

weight_for_0 = 1.0 / counts[0]
weight_for_1 = 1.0 / counts[1]


Number of positive samples in training data: 417 (0.18% of total)


 ## Normalize the data using training set statistics

In [4]:
mean = np.mean(train_features, axis=0)
train_features -= mean
val_features -= mean
std = np.std(train_features, axis=0)
train_features /= std
val_features /= std


 ## Build a binary classification model

In [5]:
import torch
import torch.nn as nn

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.dropout = nn.Dropout(p=0.3)
        self.hidden= nn.Linear(30, 50)
        self.out = nn.Linear(50, 1)
        self.act = nn.RReLU()
    def forward(self, x):
        x = self.act(self.hidden(x))
        x = self.dropout(x)
        x = self.out(x) 
        return x

model = Model()
model



Model(
  (dropout): Dropout(p=0.3, inplace=False)
  (hidden): Linear(in_features=30, out_features=50, bias=True)
  (out): Linear(in_features=50, out_features=1, bias=True)
  (act): RReLU(lower=0.125, upper=0.3333333333333333)
)

In [6]:
class Arguments():
    def __init__(self):
        self.epochs = 30
        self.lr = 0.001
        self.log_interval = 500
        self.batch_size = 2048
        self.test_batch_size = 5000

args = Arguments()

## Numpy Data => PyTorch Data

In [7]:
from torch.utils.data import TensorDataset, DataLoader

train_dataset = TensorDataset(torch.from_numpy(train_features), torch.from_numpy(train_targets.astype(float)))
train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=args.batch_size)

val_dataset = TensorDataset(torch.from_numpy(val_features), torch.from_numpy(val_targets.astype(float)))
val_dataloader = DataLoader(val_dataset, shuffle=True, batch_size=args.batch_size)

## Hooking Torch and Creating Our Workers

In [8]:
import syft as sy
hook = sy.TorchHook(torch) 
client = sy.VirtualWorker(hook, id="client")
bob = sy.VirtualWorker(hook, id="bob")
alice = sy.VirtualWorker(hook, id="alice")
crypto_provider = sy.VirtualWorker(hook, id="crypto_provider")





## Creating a Private Valadation Loader

In [9]:
private_val_loader = []
for data, target in val_dataloader:
    private_val_loader.append((
        data.fix_precision().share(alice, bob, crypto_provider=crypto_provider),
        target.fix_precision().share(alice, bob, crypto_provider=crypto_provider)
    ))

## Training

In [10]:
import torch.nn.functional as F

arr = np.array([weight_for_0, weight_for_1])

def train(args, model, train_loader, optimizer, epoch):
    model.train()
    loss_fx = nn.BCEWithLogitsLoss() #nn.CrossEntropyLoss()
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        output = model(data)
        output = F.rrelu(output)
        loss = loss_fx(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.8f}'.format(
                epoch, batch_idx * args.batch_size, len(train_loader) * args.batch_size,
                100. * batch_idx / len(train_loader), loss.item()))

In [11]:
import torch.optim as optim

optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

In [12]:
for epoch in range(1, args.epochs + 1):
    train(args, model, train_dataloader, optimizer, epoch)



## Testing
### How Do We Compare to Keras?

In [13]:
def test(args, model, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    loss_fx = nn.BCEWithLogitsLoss()
    with torch.no_grad():
        for data, target in test_loader:
            output = model(data)
            output = F.rrelu(output)
            test_loss += loss_fx(output, target).item() # sum up batch loss
            pred = output.argmax(1, keepdim=True) # get the index of the max log-probability 
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    n_incorrect = len(test_loader.dataset) - correct

    print('\nTest set: Average loss: {:.8f}, Accuracy: {}/{} ({:.0f}%; {})\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset), n_incorrect))

In [14]:
test(args, model, val_dataloader)


Test set: Average loss: 0.00000151, Accuracy: 56886/56961 (100%; 75)



 ## Conclusions

 At the end of training, out of 56,961 validation transactions, we are:

 - Correctly identifying 66 of them as fraudulent
 - Missing 9 fraudulent transactions
 - At the cost of incorrectly flagging 441 legitimate transactions

 In the real world, one would put an even higher weight on class 1,
 so as to reflect that False Negatives are more costly than False Positives.

 Next time your credit card gets  declined in an online purchase -- this is why.

## Model As a Service (MAAS)

In [15]:
model.fix_precision().share(alice, bob, crypto_provider=crypto_provider)

Model(
  (dropout): Dropout(p=0.3, inplace=False)
  (hidden): Linear(in_features=30, out_features=50, bias=True)
  (out): Linear(in_features=50, out_features=1, bias=True)
  (act): RReLU(lower=0.125, upper=0.3333333333333333)
)

In [16]:
def test(args, model, test_loader):
    model.eval()
    n_correct_priv = 0
    n_total = 0
    with torch.no_grad():
        for data, target in test_loader:
            output = model(data)
            pred = output.argmax(dim=1) 
            n_correct_priv += pred.eq(target.view_as(pred)).sum()
            n_total += args.test_batch_size
# This 'test' function performs the encrypted evaluation. The model weights, the data inputs, the prediction and the target used for scoring are all encrypted!

# However as you can observe, the syntax is very similar to normal PyTorch testing! Nice!

# The only thing we decrypt from the server side is the final score at the end of our 200 items batches to verify predictions were on average good.      
            n_correct = n_correct_priv.copy().get().float_precision().Long().item()
    
            print('Test set: Accuracy: {}/{} ({:.0f}%)'.format(
                n_correct, n_total,
                100. * n_correct / n_total))

In [17]:
test(args, model, private_val_loader)

RuntimeError: _thnn_rrelu_with_noise_forward not supported on CPUType for Long