In [None]:
from utils import load_data, preprocess_data, normalize_data, seed_everything
from sklearn.metrics import accuracy_score, f1_score
df_train = load_data('data/fraudTrain.csv')
df_test = load_data('data/fraudTest.csv')

df_train = preprocess_data(df_train)
df_test = preprocess_data(df_test)

X_train_scaled = normalize_data(df_train.drop(columns=['is_fraud']))
X_test_scaled = normalize_data(df_test.drop(columns=['is_fraud']))

In [None]:
from sklearn.linear_model import LogisticRegression


lr = LogisticRegression(max_iter=500, random_state=42)
lr.fit(X_train_scaled, df_train['is_fraud'])
y_pred = lr.predict(X_test_scaled)
y_test = df_test['is_fraud']

print(accuracy_score(y_test, y_pred))
print(f1_score(y_test, y_pred))

0.9955858986286235
0.0


In [None]:
from xgboost import XGBClassifier

model = XGBClassifier(objective='binary:logistic',
                          eval_metric='logloss',
                          learning_rate=0.1,
                          booster='gbtree',
                          n_estimators=25,
                          random_state=42,
)
model.fit(X_train_scaled, df_train['is_fraud'])
y_pred_xgb = model.predict(X_test_scaled)
print(accuracy_score(y_test, y_pred_xgb))
print(f1_score(y_test, y_pred_xgb))

0.9977416644023328
0.6888172576245971


Deep Learning model for classification

In [2]:
from torch.utils.data import DataLoader, TensorDataset
import torch
train_dataset = TensorDataset(
    torch.tensor(X_train_scaled, dtype=torch.float32),
    torch.tensor(df_train['is_fraud'].values, dtype=torch.float32))
test_dataset = TensorDataset(
    torch.tensor(X_test_scaled, dtype=torch.float32),
    torch.tensor(df_test['is_fraud'].values, dtype=torch.float32))
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)


In [3]:
seed_everything(42)

In [5]:
import torch.nn as nn
import torch.optim as optim
from model import CreditCardFraudDetector
loss = nn.CrossEntropyLoss()
model = CreditCardFraudDetector(input_size=X_train_scaled.shape[1], hidden_size=256)

In [6]:
devices = ['cpu']
device = torch.device(devices[0])
print(f"Using device: {device}")
model.to(device)
loss.to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)


Using device: cpu


In [7]:
from tqdm import trange, tqdm
for epoch in range(10):
    model.train()
    for batch in tqdm(train_loader):
        inputs, targets = batch
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss_value = loss(outputs, targets.long())
        loss_value.backward()
        optimizer.step()

    model.eval()
    all_targets = []
    all_preds = []
    with torch.no_grad():
        correct = 0
        total = 0
        for batch in test_loader:
            inputs, targets = batch
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += (preds == targets.long()).sum().item()
            all_targets.append(targets)
            all_preds.append(preds)
    targets = torch.cat(all_targets)
    predicted = torch.cat(all_preds)

    print(f'Epoch {epoch+1}, Accuracy: {100 * correct / total:.2f}%')
    print(f'Epoch {epoch+1}, F1 Score: {f1_score(targets.detach().cpu().numpy(), predicted.detach().cpu().numpy(), average="binary"):.2f}')

  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
100%|██████████| 10131/10131 [02:23<00:00, 70.70it/s]


Epoch 1, Accuracy: 99.78%
Epoch 1, F1 Score: 0.67


100%|██████████| 10131/10131 [02:28<00:00, 68.29it/s]


Epoch 2, Accuracy: 99.78%
Epoch 2, F1 Score: 0.68


100%|██████████| 10131/10131 [02:27<00:00, 68.56it/s]


Epoch 3, Accuracy: 99.82%
Epoch 3, F1 Score: 0.74


100%|██████████| 10131/10131 [02:31<00:00, 67.03it/s]


Epoch 4, Accuracy: 99.82%
Epoch 4, F1 Score: 0.73


100%|██████████| 10131/10131 [02:24<00:00, 70.29it/s]


Epoch 5, Accuracy: 99.81%
Epoch 5, F1 Score: 0.73


100%|██████████| 10131/10131 [02:23<00:00, 70.70it/s]


Epoch 6, Accuracy: 99.79%
Epoch 6, F1 Score: 0.73


 46%|████▋     | 4689/10131 [01:08<01:20, 68.02it/s]


KeyboardInterrupt: 