In [34]:
import torch

import torch.nn as nn
import torch.optim as optim
import pandas as pd
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE

In [3]:
# Read the Excel file
df = pd.read_excel('C:\\Users\\Phil\\CaseStudy\\PSP_DATA_PREP.xlsx')

# Display the data
df.head()

Unnamed: 0,tmsp,attempts,success,PSP,3D_secured,fee,hour,day_of_week,month,amount_norm,country_Austria,country_Germany,country_Switzerland,card_Diners,card_Master,card_Visa
0,2019-01-01 00:01:11,2,1,UK_Card,0,4.0,0,1,1,0.133013,0,1,0,0,0,1
1,2019-01-01 00:02:49,2,1,UK_Card,1,4.0,0,1,1,0.371795,0,1,0,1,0,0
2,2019-01-01 00:04:33,1,0,Simplecard,0,0.5,0,1,1,0.189103,1,0,0,1,0,0
3,2019-01-01 00:06:41,2,0,Simplecard,0,1.5,0,1,1,0.442308,0,0,1,0,1,0
4,2019-01-01 00:08:46,1,1,UK_Card,0,3.0,0,1,1,0.177885,0,1,0,0,1,0


In [35]:
df_success = df[df['success'] == 1]
df_success = df_success.drop(columns=['tmsp', 'fee', 'success', 'month'], axis=1)


X = df_success.drop("PSP", axis=1)
y = df_success["PSP"].astype('category').cat.codes

# Apply the SMOTE algorithm to balance the data
smote = SMOTE(random_state=42)
X, y = smote.fit_resample(X, y)

X = torch.from_numpy(X.to_numpy()).type(torch.float)
y = torch.from_numpy(y.to_numpy()).type(torch.float)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [36]:
# Define the model architecture
class Classifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(Classifier, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

In [52]:
# Define the hyperparameters
input_size = 11
hidden_size = 50
num_classes = 4
learning_rate = 0.0001
num_epochs = 100000

In [53]:
# Create the model
model = Classifier(input_size, hidden_size, num_classes)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
model.train()
for epoch in range(num_epochs):
    # Forward pass
    outputs = model(X_train)
    loss = criterion(outputs, y_train.long())
    
    # Backward and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    # Print the loss for every epoch

    if (epoch+1) % 20000 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

Epoch [20000/100000], Loss: 1.1941
Epoch [40000/100000], Loss: 1.1818
Epoch [60000/100000], Loss: 1.1781
Epoch [80000/100000], Loss: 1.1769
Epoch [100000/100000], Loss: 1.1758


In [54]:
# Set the model to evaluation mode
model.eval()

# Forward pass on the test data
with torch.no_grad():
    outputs = model(X_test)
    _, predicted = torch.max(outputs.data, 1)

# Calculate the accuracy
total = y_test.size(0)
correct = (predicted == y_test).sum().item()
accuracy = correct / total

# Print the accuracy
print(f"Accuracy on test data: {accuracy * 100:.2f}%")

Accuracy on test data: 42.37%


In [55]:
from sklearn.metrics import confusion_matrix

# Convert the predicted and actual labels to numpy arrays
y_pred = predicted.numpy()
y_actual = y_test.numpy()

# Create the confusion matrix
cm = confusion_matrix(y_actual, y_pred)

# Print the confusion matrix
print(cm)

[[501 205 157 168]
 [208 451 157 245]
 [216 244 297 280]
 [168 177 146 494]]
