# Experiment

## Install Python dependencies

Import the dependencies for the model training code:

In [None]:
import numpy as np
import pandas as pd
import datetime
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pickle
from pathlib import Path

## Load the CSV data

The CSV data that you use to train the model contains the following fields:

* **distancefromhome** - The distance from home where the transaction happened.
* **distancefromlast_transaction** - The distance from the last transaction that happened.
* **ratiotomedianpurchaseprice** - The ratio of purchased price compared to median purchase price.
* **repeat_retailer** - If it's from a retailer that already has been purchased from before.
* **used_chip** - If the credit card chip was used.
* **usedpinnumber** - If the PIN number was used.
* **online_order** - If it was an online order.
* **fraud** - If the transaction is fraudulent.

In [None]:
Data = pd.read_csv('data/card_transdata.csv')
Data.head()

In [None]:
import torch

# Set the input (X) and output (Y) data. 
# The only output data is whether it's fraudulent. All other fields are inputs to the model.

X = torch.tensor(Data.drop(columns = ['repeat_retailer','distance_from_home', 'fraud']).values, dtype=torch.float)
y = torch.tensor((Data['fraud']).values, dtype=torch.float)

# Split the data into training and testing sets so you have something to test the trained model with.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, shuffle = False)
X_train, X_val, y_train, y_val = train_test_split(X_train,y_train, test_size = 0.2, stratify = y_train)

# Scale the data to remove mean and have unit variance. The data will be between -1 and 1, which makes it a lot easier for the model to learn than random (and potentially large) values.
# It is important to only fit the scaler to the training data, otherwise you are leaking information about the global distribution of variables (which is influenced by the test set) into the training set.
scaler = StandardScaler()

X_train = torch.tensor(scaler.fit_transform(X_train), dtype=torch.float)
X_test = torch.tensor(scaler.transform(X_test), dtype=torch.float)
X_val = torch.tensor(scaler.transform(X_val), dtype=torch.float)

Path("artifact").mkdir(parents=True, exist_ok=True)
with open("artifact/test_data.pkl", "wb") as handle:
    pickle.dump((X_test, y_test), handle)
with open("artifact/scaler.pkl", "wb") as handle:
    pickle.dump(scaler, handle)

# Since the dataset is unbalanced (it has many more non-fraud transactions than fraudulent ones), set a pos_weight to weight the few fraudulent transactions higher than the many non-fraud transactions.
positives = torch.sum(y_train)
negatives = (len(y_train) - torch.sum(y_train))
pos_weight = torch.unsqueeze((negatives / positives), 0)


## Build the model

The model is a simple, fully-connected, deep neural network, containing three hidden layers and one output layer.

In [None]:
import torch

device = (
    "cuda" if torch.cuda.is_available()
    else "mps" if torch.backends.mps.is_available()
    else "cpu"
)

device

In [None]:
X_train = X_train.to(device)
X_test = X_test.to(device)
X_val = X_val.to(device)

y_train = y_train.to(device)
y_test = y_test.to(device)
y_val = y_val.to(device)

In [None]:
import torch
from torch import nn, optim

model = nn.Sequential(
    nn.Linear(5, 32),
    nn.ReLU(),
    nn.Dropout(p=0.2),
    nn.Linear(32, 32),
    nn.BatchNorm1d(32),
    nn.ReLU(),
    nn.Dropout(p=0.2),
    nn.Linear(32, 32),
    nn.BatchNorm1d(32),
    nn.ReLU(),
    nn.Dropout(p=0.2),
    nn.Linear(32, 1),
    nn.Sigmoid(),
).to(device)

model

In [None]:
loss_fn = nn.BCEWithLogitsLoss(pos_weight=pos_weight).to(device)

# optimizer = torch.optim.SGD(params=model.parameters(), lr=0.01)
optimizer = optim.Adam(params=model.parameters(), lr=0.01)


def accuracy_fn(y_pred, y_actual):
    correct = torch.eq(torch.round(y_pred), y_actual).sum().item()
    acc = (correct / len(y_pred)) * 100
    return acc

## Train the model

Training a model is often the most time-consuming part of the machine learning process.  Large models can take multiple GPUs for days.  Expect the training on CPU for this very simple model to take a minute or more.

In [None]:
torch.manual_seed(0) #manually seeding for reproducibility

# Set the number of epochs
epochs = 1000

# Build training and evaluation loop
for epoch in range(epochs):
    # Training
    model.train()

    y_pred = model(X_train).squeeze()
    loss = loss_fn(y_pred, y_train)
    acc = accuracy_fn(y_pred, y_train)

    loss.backward()
    optimizer.step()
    optimizer.zero_grad()

    # Validate
    model.eval()
    with torch.no_grad():
        y_pred = model(X_val).squeeze()
        val_loss = loss_fn(y_pred, y_val)
        val_acc = accuracy_fn(y_pred, y_val)

    if epoch % (epochs // 10) == 0:
        print(f"Epoch: {epoch} | Loss: {loss:.5f}, Accuracy: {acc:.2f}% | "
              f"Validation loss: {val_loss:.5f}, Val acc: {val_acc:.2f}%")


print(f"Final Epoch: {epoch} | Loss: {loss:.5f}, Accuracy: {acc:.2f}% | "
      f"Test loss: {val_loss:.5f}, Test acc: {val_acc:.2f}%")

## Test the Model

In [None]:
def run_inference(data):
    scaled_tx = torch.tensor(scaler.transform(data).astype(np.float32),
                             dtype=torch.float).to(device)
    scaled_tx

    model.eval()
    with torch.inference_mode():
        prediction = torch.round(model(scaled_tx))

    if prediction.item() == 1:
        return "fraud"
    else:
        return "NOT fraud"

In [None]:
# valid transaction
valid_tx = [[0.0, 1.0, 1.0, 1.0, 0.0]]
prediction = run_inference(valid_tx)
print(f"The model thinks the valid transaction is {prediction}")

In [None]:
# fraudulent use case
fraud_tx = [[100, 1.2, 0.0, 0.0, 1.0]]
prediction = run_inference(fraud_tx)
print(f"The model thinks the valid transaction is {prediction}")

In [None]:
### Testing
model.eval()
with torch.inference_mode():
    y_pred = model(X_test).squeeze()
    acc = accuracy_fn(y_pred, y_test)
    
pytorch_y_pred = y_pred

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from matplotlib import pyplot as plt

print(f"Accuracy: {acc}")

c_matrix = confusion_matrix(torch.Tensor.cpu(y_test), 
                            torch.round(torch.Tensor.cpu(y_pred)))
ConfusionMatrixDisplay(c_matrix).plot()


## Save the model file

In [None]:
model.state_dict()


In [None]:
!pip install onnx onnxscript onnxruntime

In [None]:
!pip install -U onnx onnxscript 

In [None]:
model

In [None]:
import os

os.makedirs("models/fraud/1", exist_ok=True)
dummy_input = torch.randn(5, device=device)
torch.onnx.export(model,
                  dummy_input,
                  "models/fraud/1/model.onnx",
                  input_names=["inputs"],
                  output_names=["outputs"],
                  dynamic_axes={
                      "inputs": {0: "batch_size"},
                  },
                  verbose=True)

In [None]:
# import os

# os.makedirs("models/fraud/1", exist_ok=True)
# dummy_input = torch.randn(1, 5, device=device)
# onnx_model = torch.onnx.dynamo_export(model, dummy_input)
# onnx_model.save("models/fraud/1/model.onnx")

## Confirm the model file was created successfully

The output should include the model name, size, and date. 

In [None]:
!ls -alRh ./models/fraud/1/model.onnx

In [None]:
import os

file_path = './models/fraud/1/model.onnx'  # Replace with the actual file path
if os.path.isfile(file_path):
    print(f"Exported model file exists at {file_path}")
else:
    raise FileNotFoundError(f"Expected model file at {file_path} missing")

## Test the model

In [None]:
from sklearn.metrics import confusion_matrix
import numpy as np
import pickle
from matplotlib import pyplot as plt

import onnx
import onnxruntime as rt

Create an ONNX inference runtime session and predict values for all test inputs:

In [None]:
onnx_test_data = scaler.transform(torch.Tensor.cpu(X_test)).astype(np.float32).tolist()
sess = rt.InferenceSession("models/fraud/1/model.onnx", providers=rt.get_available_providers())
input_name = sess.get_inputs()[0].name
output_name = sess.get_outputs()[0].name
output = sess.run([output_name], {input_name: onnx_test_data[0]})
y_pred = torch.tensor(output[0])
y_pred

In [None]:
pytorch_y_pred[245]

In [None]:
output = sess.run([output_name], {input_name: onnx_test_data})

In [None]:
onnx_model = onnx.load("models/fraud/1/model.onnx")

In [None]:
onnx.checker.check_model(onnx_model)

In [None]:
print(onnx.helper.printable_graph(onnx_model.graph))


In [None]:
inputs = sess.get_inputs()

In [None]:
print(inputs[0].name)
print(inputs[0].shape)
print(inputs[0].type)

Show the results:

In [None]:
torch.where(y_pred > 0)

In [None]:
torch.where(pytorch_y_pred > 0)

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from matplotlib import pyplot as plt

print(f"Accuracy: {acc}")

c_matrix = confusion_matrix(torch.Tensor.cpu(y_test), torch.round(torch.Tensor.cpu(y_pred)))
ConfusionMatrixDisplay(c_matrix).plot()

## Example: Is Sally's transaction likely to be fraudulent?

Here is the order of the fields from Sally's transaction details:
* distance_from_last_transaction
* ratio_to_median_price
* used_chip 
* used_pin_number
* online_order 

In [None]:
sally_transaction_details = [
    [0.3111400080477545,
    1.9459399775518593, 
    1.0, 
    0.0, 
    0.0]
    ]

scaled_tx = torch.tensor(scaler.transform(sally_transaction_details).astype(np.float32),
                         dtype=torch.float)
prediction = sess.run([output_name], {input_name: scaler.transform(scaled_tx).astype(np.float32)})

print("Is Sally's transaction predicted to be fraudulent?")

if round(prediction[0].item()) == 1:
    print("The model predicts this is fraud")
else:
    print("The model predicts this is NOT fraud")