In [6]:
!pip install tab-transformer-pytorch

Collecting tab-transformer-pytorch
  Using cached tab_transformer_pytorch-0.3.0-py3-none-any.whl.metadata (690 bytes)
Using cached tab_transformer_pytorch-0.3.0-py3-none-any.whl (6.9 kB)
Installing collected packages: tab-transformer-pytorch
Successfully installed tab-transformer-pytorch-0.3.0


In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np
# Load the dataset
data_train = pd.read_csv('/Users/salma/Downloads/features_train.csv')
data_test= pd.read_csv('/Users/salma/Downloads/features_test.csv')


# Assume 'target' is the name of your target column
target = 'Target'  # Replace this with your actual target column name

features_train = data_train.columns.drop([target, "Image"])  # Assuming the rest are features
features_test = data_test.columns.drop([target, "Image"])  # Assuming the rest are features

# Normalize continuous columns
scaler = StandardScaler()
data_train[features_train] = scaler.fit_transform(data_train[features_train])
data_test[features_test] = scaler.transform(data_test[features_test])

X_train = data_train[features_train]
X_test = data_test[features_test]

#label encoding the target column
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
data_train[target] = le.fit_transform(data_train[target])
data_test[target] = le.transform(data_test[target])

y_train = data_train[target].values
y_test = data_test[target].values

In [40]:
from tab_transformer_pytorch import TabTransformer, FTTransformer
import torch
from torch.nn import ReLU

num_continuous = X_train.shape[1]
dim_output = len(pd.unique(data_train[target]))

model = TabTransformer(
    categories=[],  # no categorical features
    num_continuous=num_continuous,  # number of continuous columns
    dim_out=dim_output,  # number of classes
    depth=8,  # depth of the transformer
    heads=12,  # attention heads
    attn_dropout=0.4,
    ff_dropout=0.4,
    dim = 64,
    mlp_hidden_mults=(128, 64),  # multiples of each hidden layer in the final MLP
    mlp_act=ReLU()  # activation of the final MLP
)

model = FTTransformer(
    categories = (),      # tuple containing the number of unique values within each category
    num_continuous = num_continuous,                # number of continuous values
    dim = 32,                           # dimension, paper set at 32
    dim_out = 5,                        
    depth = 6,                          # depth, paper recommended 6
    heads = 8,                          # heads, paper recommends 8
    attn_dropout = 0.1,                 # post-attention dropout
    ff_dropout = 0.1                    # feed forward dropout
)


# Convert data to torch tensors
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# Define your loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


In [41]:
def evaluate(model, X_cat, X_test_tensor, y_test_tensor):
    with torch.no_grad():
        model.eval()
        outputs = model(X_cat, X_test_tensor)
        _, predicted = torch.max(outputs.data, 1)
        total = y_test_tensor.size(0)
        correct = (predicted == y_test_tensor).sum().item()
        print(f'Accuracy: {100 * correct / total:.2f}%')
        return 100 * correct / total

In [42]:
# Training loop
#empty categorical array
X_cat = np.array([])
best_accuracy = 0
for epoch in range(200):
    optimizer.zero_grad()
    output = model(X_cat, X_train_tensor)
    loss = criterion(output, y_train_tensor)
    loss.backward()
    optimizer.step()
    print(f'Epoch {epoch+1}, Loss: {loss.item()}')
    test_Accuracy = evaluate(model, X_cat, X_test_tensor, y_test_tensor)
    if test_Accuracy > best_accuracy:
        best_accuracy = test_Accuracy
        torch.save(model.state_dict(), 'best_model.pth')
        print('Model saved!')


Epoch 1, Loss: 1.6020207405090332
Accuracy: 25.83%
Model saved!
Epoch 2, Loss: 10.513237953186035
Accuracy: 47.80%
Model saved!
Epoch 3, Loss: 12.197381019592285
Accuracy: 50.73%
Model saved!
Epoch 4, Loss: 10.233697891235352
Accuracy: 25.83%
Epoch 5, Loss: 9.128161430358887
Accuracy: 43.81%
Epoch 6, Loss: 4.014889240264893
Accuracy: 51.00%
Model saved!
Epoch 7, Loss: 2.8073935508728027
Accuracy: 49.93%
Epoch 8, Loss: 2.126082181930542
Accuracy: 50.07%
Epoch 9, Loss: 1.7405215501785278
Accuracy: 47.80%
Epoch 10, Loss: 1.4988607168197632
Accuracy: 44.87%
Epoch 11, Loss: 1.3496067523956299
Accuracy: 43.81%
Epoch 12, Loss: 1.2527309656143188
Accuracy: 43.54%
Epoch 13, Loss: 1.1439390182495117
Accuracy: 44.07%
Epoch 14, Loss: 1.046669840812683
Accuracy: 46.21%
Epoch 15, Loss: 0.9824427366256714
Accuracy: 51.13%
Model saved!
Epoch 16, Loss: 0.9556560516357422
Accuracy: 56.86%
Model saved!
Epoch 17, Loss: 0.957031786441803
Accuracy: 60.85%
Model saved!
Epoch 18, Loss: 0.9662835001945496
Accu

In [43]:
#load the best model
model.load_state_dict(torch.load('best_model.pth'))

<All keys matched successfully>

In [44]:
from sklearn.metrics import f1_score, precision_score, recall_score, confusion_matrix

#calculate f1 score, precision, recall
with torch.no_grad():
    model.eval()
    outputs = model(X_cat, X_test_tensor)
    _, predicted = torch.max(outputs.data, 1)
    total = y_test_tensor.size(0)
    correct = (predicted == y_test_tensor).sum().item()
    print(f'Accuracy: {100 * correct / total:.2f}%')
    f1 = f1_score(y_test_tensor, predicted, average='weighted')
    precision = precision_score(y_test_tensor, predicted, average='weighted')
    recall = recall_score(y_test_tensor, predicted, average='weighted')
    print(f'F1 Score: {100*f1:.2f}')
    print(f'Precision: {100*precision:.2f}')
    print(f'Recall: {100*recall:.2f}')
    cm = confusion_matrix(y_test_tensor, predicted)
    print('Confusion Matrix:')
    print(cm)

Accuracy: 67.91%
F1 Score: 68.36
Precision: 69.08
Recall: 67.91
Confusion Matrix:
[[  2   1  11   1   5]
 [ 16  58  38  13  14]
 [  6  60 120   2   3]
 [  1  11   5 132   2]
 [  1   9  27  15 198]]
