In [None]:
import torch.nn as nn
from transformers import BertModel
import torch
from transformers import AdamW
from tqdm import tqdm
from torch.utils.data import TensorDataset, DataLoader, RandomSampler

In [None]:
# Load tokenized dataset
data = torch.load('../models/tokenized_dataset.pt')

input_ids = data['input_ids']
attention_masks = data['attention_masks']
features_tensor = data['features_tensor']
labels = data['labels']

In [None]:
from torch.utils.data import random_split
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler


dataset = TensorDataset(input_ids, attention_masks, features_tensor, labels)

In [None]:
# Split the dataset into training and validation sets
# 80% training, 20% validation

val_size = int(0.2 * len(data))
train_size = len(data) - val_size

train_dataset, val_dataset = random_split(data, [train_size, val_size])



In [None]:
# create dataloaders

batch_size = 16

train_dataloader = DataLoader(train_dataset, sampler=RandomSampler(train_dataset), batch_size=batch_size)
val_dataloader = DataLoader(val_dataset, sampler=SequentialSampler(val_dataset), batch_size=batch_size)



### Define the BERT + Tabular Model

In [None]:

class BERTWithTabular(nn.Module):
    def __init__(self):
        super(BERTWithTabular, self).__init__()
        self.bert = BertModel.from_pretrained('bert-base-uncased')
        self.tabular_layer = nn.Linear(3, 32)  # 3 features: hour, weekday, emoji
        self.dropout = nn.Dropout(0.3)
        self.classifier = nn.Linear(768 + 32, 2)  # BERT output + tabular

    def forward(self, input_ids, attention_mask, tabular_input):
        bert_out = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = bert_out.pooler_output
        tabular_output = self.tabular_layer(tabular_input)
        combined = torch.cat((pooled_output, tabular_output), dim=1)
        combined = self.dropout(combined)
        logits = self.classifier(combined)
        return logits

### Define Optimizer, Loss, Device

In [None]:
import torch

print("CUDA Available:", torch.cuda.is_available())
print("Device Name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU detected")
print("Torch CUDA Version:", torch.version.cuda)
print(torch.__version__)



In [None]:
epochs = 3  # Or whatever you want
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = BERTWithTabular().to(device)
optimizer = AdamW(model.parameters(), lr=2e-5)
loss_fn = nn.CrossEntropyLoss()

In [None]:
model.train()
for epoch in range(epochs):
    total_loss = 0
    for batch in tqdm(train_dataloader):
        input_ids, attention_mask, tabular_features, labels = [b.to(device) for b in batch]

        outputs = model(input_ids, attention_mask, tabular_features)
        loss = loss_fn(outputs, labels)

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        total_loss += loss.item()

    print(f"Epoch {epoch+1} | Loss: {total_loss/len(train_dataloader):.4f}")


In [None]:
from sklearn.metrics import classification_report, confusion_matrix

model.eval()
all_preds, all_labels = [], []

with torch.no_grad():
    for batch in val_dataloader:
        input_ids, attention_mask, tabular_features, labels = [b.to(device) for b in batch]
        logits = model(input_ids, attention_mask, tabular_features)
        preds = torch.argmax(logits, dim=1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

print(classification_report(all_labels, all_preds))
print(confusion_matrix(all_labels, all_preds))


In [None]:
torch.save(model.state_dict(), 'bert_with_tabular.pt')
