In [None]:
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader, TensorDataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AdamW
from datasets import load_dataset


class HybridBERTModel(nn.Module):
    def __init__(self, model_name, num_numerical_features, num_labels):
        super().__init__()
        self.bert = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels = num_labels)


        self.numerical_network = nn.Sequential(
            nn.Linear(num_numerical_features, 16),
            nn.ReLU(),
            nn.Linear(16, 16),
            nn.ReLU()
        )

        self.classifier = nn.Linear(22, num_labels)

    def forward(self, input_ids, attention_mask, numerical_features):
        bert_output = self.bert(input_ids = input_ids, attention_mask = attention_mask)
        bert_output = bert_output.logits


        numerical_output = self.numerical_network(numerical_features)



        combined_features = torch.cat((bert_output, numerical_output), dim = 1)
        print(bert_output.shape, numerical_output.shape)
        return self.classifier(combined_features)

In [None]:
def encode_data(dataset):
    encodings = tokenizer(dataset['statement'], truncation=True, padding=True, max_length=128)
    numerical_features = dataset[['barely_true_counts', 'false_counts', 'half_true_counts', 'mostly_true_counts', 'pants_on_fire_counts']]

    numerical_features = numerical_features.values.tolist()
    labels = dataset['label']
    return LIARDataset(encodings, numerical_features, labels)

class LIARDataset(Dataset):
    def __init__(self, encodings, numerical_features, labels):
        self.encodings = encodings
        self.numerical_features = numerical_features
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['numerical_features'] = torch.tensor(self.numerical_features[idx], dtype=torch.float)
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

In [None]:


dataset = load_dataset("liar")

tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')

def preprocess_data(examples):
    tokenized = tokenizer(examples['statement'], padding=True, truncation=True, max_length=128)

    labels = torch.tensor(examples['label'])

    numerical_features = torch.rand((len(examples['label']), 5))
    return tokenized, numerical_features, labels

def create_dataset(dataset_split):
    tokenized, numerical_features, labels = preprocess_data(dataset_split)
    dataset = TensorDataset(
        torch.tensor(tokenized['input_ids']),
        torch.tensor(tokenized['attention_mask']),
        numerical_features,
        labels
    )
    return dataset

train_dataset = create_dataset(dataset['train'])
val_dataset = create_dataset(dataset['validation'])
test_dataset = create_dataset(dataset['test'])

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16)

model = HybridBERTModel('bert-base-uncased', num_numerical_features=5, num_labels=6)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

optimizer = AdamW(model.parameters(), lr=5e-5)

model.train()
for epoch in range(3):
    for batch in train_loader:

        input_ids, attention_mask, numerical_features, labels = batch
        input_ids, attention_mask, numerical_features, labels = input_ids.to(device), attention_mask.to(device), numerical_features.to(device), labels.to(device)

        outputs = model.forward(input_ids, attention_mask, numerical_features)

        loss = nn.CrossEntropyLoss()(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch + 1}, Loss: {loss.item()}")


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


torch.Size([16, 6]) torch.Size([16, 16])
torch.Size([16, 6]) torch.Size([16, 16])
torch.Size([16, 6]) torch.Size([16, 16])
torch.Size([16, 6]) torch.Size([16, 16])
torch.Size([16, 6]) torch.Size([16, 16])
torch.Size([16, 6]) torch.Size([16, 16])
torch.Size([16, 6]) torch.Size([16, 16])
torch.Size([16, 6]) torch.Size([16, 16])
torch.Size([16, 6]) torch.Size([16, 16])
torch.Size([16, 6]) torch.Size([16, 16])
torch.Size([16, 6]) torch.Size([16, 16])
torch.Size([16, 6]) torch.Size([16, 16])
torch.Size([16, 6]) torch.Size([16, 16])
torch.Size([16, 6]) torch.Size([16, 16])
torch.Size([16, 6]) torch.Size([16, 16])
torch.Size([16, 6]) torch.Size([16, 16])
torch.Size([16, 6]) torch.Size([16, 16])
torch.Size([16, 6]) torch.Size([16, 16])
torch.Size([16, 6]) torch.Size([16, 16])
torch.Size([16, 6]) torch.Size([16, 16])
torch.Size([16, 6]) torch.Size([16, 16])
torch.Size([16, 6]) torch.Size([16, 16])
torch.Size([16, 6]) torch.Size([16, 16])
torch.Size([16, 6]) torch.Size([16, 16])
torch.Size([16, 

In [None]:
def evaluate_model(model, data_loader):
    model.eval()
    correct_predictions = 0
    total_predictions = 0

    with torch.no_grad():
        for batch in data_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            numerical_features = batch['numerical_features'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids, attention_mask, numerical_features)
            _, predicted = torch.max(outputs.data, 1)
            total_predictions += labels.size(0)
            correct_predictions += (predicted == labels).sum().item()

    accuracy = correct_predictions / total_predictions
    return accuracy

accuracy = evaluate_model(model, val_loader)
print(f'Validation Accuracy: {accuracy * 100:.2f}%')


NameError: name 'model' is not defined