In [1]:
# Step 0: Imports
import pandas as pd
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
import joblib
import os
import numpy as np

# Step 1: Load engineered dataset
df = pd.read_csv("../data/combined_engineered.csv")
print("✅ Loaded dataset")
print("Shape:", df.shape)
print("Label distribution:\n", df['label'].value_counts())


✅ Loaded dataset
Shape: (1572, 50)
Label distribution:
 label
2    670
1    455
0    447
Name: count, dtype: int64


In [2]:
# Step 2: Prepare features and labels
X = df.drop(columns=['label', 'Key', 'Participant_ID']).values.astype(np.float32)
y = df['label'].values.astype(np.int64)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [3]:
# Step 3: PyTorch Dataset
class TabularDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_dataset = TabularDataset(X_train, y_train)
test_dataset = TabularDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [4]:
# Step 4: Define simple Tabular Transformer model
class SimpleTabTransformer(nn.Module):
    def __init__(self, input_dim, hidden_dim=128, num_classes=3):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.BatchNorm1d(hidden_dim),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.BatchNorm1d(hidden_dim),
            nn.Linear(hidden_dim, num_classes)
        )
    def forward(self, x):
        return self.net(x)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = SimpleTabTransformer(X_train.shape[1]).to(device)

In [5]:
# Step 5: Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


In [6]:
# Step 6: Training loop
epochs = 20
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        out = model(xb)
        loss = criterion(out, yb)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * xb.size(0)
    epoch_loss = running_loss / len(train_loader.dataset)
    print(f"Epoch {epoch+1}/{epochs}, Loss: {epoch_loss:.4f}")

Epoch 1/20, Loss: 0.9394
Epoch 2/20, Loss: 0.7933
Epoch 3/20, Loss: 0.7421
Epoch 4/20, Loss: 0.7009
Epoch 5/20, Loss: 0.6511
Epoch 6/20, Loss: 0.6126
Epoch 7/20, Loss: 0.5856
Epoch 8/20, Loss: 0.5219
Epoch 9/20, Loss: 0.5095
Epoch 10/20, Loss: 0.4793
Epoch 11/20, Loss: 0.4592
Epoch 12/20, Loss: 0.4257
Epoch 13/20, Loss: 0.3974
Epoch 14/20, Loss: 0.3666
Epoch 15/20, Loss: 0.3327
Epoch 16/20, Loss: 0.3377
Epoch 17/20, Loss: 0.3043
Epoch 18/20, Loss: 0.3013
Epoch 19/20, Loss: 0.2855
Epoch 20/20, Loss: 0.2627


In [7]:
# Step 7: Evaluation
model.eval()
y_pred_list = []
y_true_list = []

with torch.no_grad():
    for xb, yb in test_loader:
        xb = xb.to(device)
        out = model(xb)
        preds = torch.argmax(out, dim=1)
        y_pred_list.extend(preds.cpu().numpy())
        y_true_list.extend(yb.numpy())

print("\nClassification Report (Transformer Model):\n")
print(classification_report(y_true_list, y_pred_list))


Classification Report (Transformer Model):

              precision    recall  f1-score   support

           0       0.60      0.66      0.63        91
           1       0.57      0.56      0.56        97
           2       0.75      0.71      0.73       127

    accuracy                           0.65       315
   macro avg       0.64      0.64      0.64       315
weighted avg       0.65      0.65      0.65       315



In [8]:
# Step 8: Save the model
os.makedirs("../models", exist_ok=True)
torch.save(model.state_dict(), "../models/tabtransformer_sentiment.pt")
print("✅ Transformer model saved at ../models/tabtransformer_sentiment.pt")

✅ Transformer model saved at ../models/tabtransformer_sentiment.pt
