In [2]:
from transformers import BertTokenizer, BertForSequenceClassification
import torch
from tqdm import tqdm
from torchvision import datasets
from torch.utils.data import DataLoader
from torch import nn
from torch.nn import functional as F
import importlib

from utils import *

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

2024-05-02 09:15:37.360294: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


cpu


[nltk_data] Downloading package punkt to /Users/rfd/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [3]:
n_labels = 2

epochs = 3
lr = 0.0001
manual_loss= False
batch_sizes = 8
max_len = 64

loss = 'cross_ent'

run_data = {}

In [None]:
# to test

# pick a set of params and then run all the models and pick the best performing one on val and the test and val

In [4]:
include_dev=False
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')

batch_size_train = 8
batch_size_test = 8
batch_size_dev = 8

if include_dev:
    train_df, val_df, test_df = get_processed_data(dev=True)
    
    train_df= get_cols_for_bert(train_df, 'prop')
    val_df= get_cols_for_bert(val_df, 'prop')
    test_df= get_cols_for_bert(test_df, 'prop')
    
    train_input_embeddings_labelled = format_and_tokenise_from_df(train_df, tokenizer,max_len=64)
    val_input_embeddings_labelled = format_and_tokenise_from_df(val_df, tokenizer,max_len=64)
    test_input_embeddings_labelled = format_and_tokenise_from_df(test_df, tokenizer,max_len=64)
    
    train_dataset = CustomPropagandaDataset(train_input_embeddings_labelled)
    test_dataset = CustomPropagandaDataset(test_input_embeddings_labelled)
    val_dataset = CustomPropagandaDataset(val_input_embeddings_labelled)
    


    train_dataloader = DataLoader(train_dataset, batch_size=batch_size_train, shuffle=True)
    test_dataloader = DataLoader(test_dataset, batch_size=batch_size_test, shuffle=True)
    val_dataloader = DataLoader(val_dataset, batch_size=batch_size_dev, shuffle=True)
    
    
else:
    train_df, val_df = get_processed_data(dev=False)
    
    train_df= get_cols_for_bert(train_df, 'prop')
    val_df= get_cols_for_bert(val_df, 'prop')
    
    train_input_embeddings_labelled = format_and_tokenise_from_df(train_df, tokenizer,max_len=64)
    val_input_embeddings_labelled = format_and_tokenise_from_df(val_df, tokenizer,max_len=64)
    
    train_dataset = CustomPropagandaDataset(train_input_embeddings_labelled)
    val_dataset = CustomPropagandaDataset(val_input_embeddings_labelled)
    
    train_dataloader = DataLoader(train_dataset, batch_size=batch_size_train, shuffle=True)
    val_dataloader = DataLoader(val_dataset, batch_size=batch_size_dev, shuffle=True)
    
    


2560
2560
[0, 0, 1, 0, 0]
['No,  he  will not be confirmed. ', 'This declassification effort  won’t make things any worse than they are for President Trump.  ', '"The Obama administration misled the  American people  and Congress because they were desperate to get a deal with Iran," said Sen. ', '“It looks like we’re capturing the demise of this dark vortex,  and  it’s different from what well-known studies led us to expect,” said Michael H. Wong of the University of California at Berkeley says. ', ' Location: Westerville, Ohio  ']
640
640
[0, 1, 1, 0, 1]
['On average, between 300 and 600 infections are recorded every year among a population approaching 25 million people,  according to a UN estimate.  ', 'Mostly because  the country would not last long without an outside high IQ elite to run the country . ', 'Lyndon Johnson  gets Earl Warren and Sen. Richard Russel to join the Warren Commission by telling them that the assassination could lead to World War III . ', ' You  may opt out a

In [5]:
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=n_labels)
optimizer = torch.optim.Adam(params=model.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()

train_losses = []
train_accuracy = []
val_losses = []
val_accuracy = []

model.to(device)

for epoch in range(epochs):
    train_running_losses = []
    train_total = 0
    train_correct = 0
    
    model.train()
    for batch in tqdm(train_dataloader):
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        
        if manual_loss:
            loss = criterion(outputs.logits, batch['labels'])
        else:
            loss = outputs.loss
        
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        
        _, indices = torch.max(outputs['logits'], dim=1)
        predicted_labels = indices
        train_total += batch['labels'].size(0)
        train_correct += (predicted_labels == batch['labels']).sum().item()
        train_running_losses.append(loss.item())
    
    train_losses.append(sum(train_running_losses) / len(train_running_losses))
    train_accuracy.append(train_correct / train_total)
    print(f'TRAIN: Epoch [{epoch+1}/{epochs}] Loss: {sum(train_running_losses)/len(train_running_losses)} Acc: {train_correct/train_total}')
    
    model.eval()
    with torch.no_grad():
        val_running_losses = []
        val_total = 0
        val_correct = 0
        
        for batch in tqdm(val_dataloader):
            batch = {k: v.to(device) for k, v in batch.items()}
            outputs = model(**batch)
            loss = outputs.loss
            
            _, indices = torch.max(outputs['logits'], dim=1)
            predicted_labels = indices
            
            val_total += batch['labels'].size(0)
            val_correct += (predicted_labels == batch['labels']).sum().item()
            val_running_losses.append(loss.item())
        
        val_losses.append(sum(val_running_losses) / len(val_running_losses))
        val_accuracy.append(val_correct / val_total)
        print(f'VAL: Epoch [{epoch+1}/{epochs}] Loss: {sum(val_running_losses)/len(val_running_losses)} Acc: {val_correct/val_total}')
    
    if include_dev:
        print('TESTING...')
        test_losses = []
        test_accuracy = []
        
        model.eval()
        with torch.no_grad():
            test_running_losses = []
            test_total = 0
            test_correct = 0
            
            for batch in test_dataloader:
                batch = {k: v.to(device) for k, v in batch.items()}
                outputs = model(**batch)
                loss = outputs.loss
                
                _, indices = torch.max(outputs['logits'], dim=1)
                predicted_labels = indices
                
                test_total += batch['labels'].size(0)
                test_correct += (predicted_labels == batch['labels']).sum().item()
                test_running_losses.append(loss.item())
            
            test_losses.append(sum(test_running_losses) / len(test_running_losses))
            test_accuracy.append(test_correct / test_total)
            print(f'TEST: Epoch [{epoch+1}/{epochs}] Loss: {sum(test_running_losses)/len(test_running_losses)} Acc: {test_correct/test_total}')

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly i

KeyboardInterrupt: 

In [1]:
# using binary class specific settings:
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=n_labels)
optimizer = torch.optim.Adam(params=model.parameters(), lr=lr)
criterion = nn.BCEWithLogitsLoss()

train_losses = []
train_accuracy = []
val_losses = []
val_accuracy = []

model.to(device)

for epoch in range(epochs):
    train_running_losses = []
    train_total = 0
    train_correct = 0
    
    model.train()
    for batch in tqdm(train_dataloader):
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        
        if manual_loss:
            loss = criterion(outputs.logits.view(-1), batch['labels'].float())
        else:
            loss = outputs.loss
        
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        
        predicted_labels = (outputs.logits.view(-1) >= 0.5).int()
        train_total += batch['labels'].size(0)
        train_correct += (predicted_labels == batch['labels']).sum().item()
        train_running_losses.append(loss.item())
    
    train_losses.append(sum(train_running_losses) / len(train_running_losses))
    train_accuracy.append(train_correct / train_total)
    print(f'TRAIN: Epoch [{epoch+1}/{epochs}] Loss: {sum(train_running_losses)/len(train_running_losses)} Acc: {train_correct/train_total}')
    
    model.eval()
    with torch.no_grad():
        val_running_losses = []
        val_total = 0
        val_correct = 0
        
        for batch in tqdm(val_dataloader):
            batch = {k: v.to(device) for k, v in batch.items()}
            outputs = model(**batch)
            loss = outputs.loss
            
            predicted_labels = (outputs.logits.view(-1) >= 0.5).int()
            
            val_total += batch['labels'].size(0)
            val_correct += (predicted_labels == batch['labels']).sum().item()
            val_running_losses.append(loss.item())
        
        val_losses.append(sum(val_running_losses) / len(val_running_losses))
        val_accuracy.append(val_correct / val_total)
        print(f'VAL: Epoch [{epoch+1}/{epochs}] Loss: {sum(val_running_losses)/len(val_running_losses)} Acc: {val_correct/val_total}')
    
    if include_dev:
        print('TESTING...')
        test_losses = []
        test_accuracy = []
        
        model.eval()
        with torch.no_grad():
            test_running_losses = []
            test_total = 0
            test_correct = 0
            
            for batch in test_dataloader:
                batch = {k: v.to(device) for k, v in batch.items()}
                outputs = model(**batch)
                loss = outputs.loss
                
                predicted_labels = (outputs.logits.view(-1) >= 0.5).int()
                
                test_total += batch['labels'].size(0)
                test_correct += (predicted_labels == batch['labels']).sum().item()
                test_running_losses.append(loss.item())
            
            test_losses.append(sum(test_running_losses) / len(test_running_losses))
            test_accuracy.append(test_correct / test_total)
            print(f'TEST: Epoch [{epoch+1}/{epochs}] Loss: {sum(test_running_losses)/len(test_running_losses)} Acc: {test_correct/test_total}')

In [None]:
model = BertClassifier(num_classes=n_labels)
optimizer = torch.optim.Adam(params=model.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()
train_losses = []
train_accuracy = []
val_losses = []
val_accuracy = []

model.to(device)

for epoch in range(epochs):
    train_running_losses = []
    train_total = 0
    train_correct = 0
    
    model.train()
    for batch in tqdm(train_dataloader):
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(batch['input_ids'], batch['attention_mask'])
        batch_loss = criterion(outputs, batch['labels'].long())
        train_running_losses.append(batch_loss.item())
        
        acc = (outputs.argmax(dim=1) == batch['labels']).sum().item()
        train_correct += acc
        train_total += len(batch['input_ids'])
        
        model.zero_grad()
        batch_loss.backward()
        optimizer.step()
    
    train_losses.append(sum(train_running_losses) / len(train_running_losses))
    train_accuracy.append(train_correct / train_total)
    print(f'TRAIN: Epoch [{epoch + 1}/{epochs}] Loss: {sum(train_running_losses) / len(train_running_losses)} Acc: {train_correct / train_total}')
    
    model.eval()
    with torch.no_grad():
        val_running_losses = []
        val_total = 0
        val_correct = 0
        
        for batch in tqdm(val_dataloader):
            batch = {k: v.to(device) for k, v in batch.items()}
            outputs = model(batch['input_ids'], batch['attention_mask'])
            batch_loss = criterion(outputs, batch['labels'].long())
            val_running_losses.append(batch_loss.item())
            
            acc = (outputs.argmax(dim=1) == batch['labels']).sum().item()
            val_correct += acc
            val_total += len(batch['input_ids'])
        
        val_losses.append(sum(val_running_losses) / len(val_running_losses))
        val_accuracy.append(val_correct / val_total)
        print(f'VAL: Epoch [{epoch + 1}/{epochs}] Loss: {sum(val_running_losses) / len(val_running_losses)} Acc: {val_correct / val_total}')

if include_dev:
    print('TESTING')
    model.eval()
    with torch.no_grad():
        test_running_losses = []
        test_total = 0
        test_correct = 0
        for batch in test_dataloader:
            batch = {k: v.to(device) for k, v in batch.items()}
            outputs = model(batch['input_ids'], batch['attention_mask'])
            batch_loss=criterion(outputs,batch['labels'].long())
            
            
            test_total+=len(batch['input_ids'])
            
            
            acc=(outputs.argmax(dim=1)==batch['labels']).sum().item()
            test_correct+=acc



            test_running_losses.append(batch_loss.item())

        test_losses.append(sum(test_running_losses)/len(test_running_losses))
        test_accuracy.append(test_correct/test_total)

    print(f'TEST: Epoch [{epoch + 1}/{epochs}] Loss: {sum(test_running_losses)/len(test_running_losses)} Acc: {test_correct/test_total}')

KeyboardInterrupt: 

In [None]:
class BertClassifier_2(nn.Module):
    def __init__(self, dropout=0.5, num_classes=2):
        super().__init__()
        self.bert = BertModel.from_pretrained('bert-base-uncased')
        self.dropout = nn.Dropout(dropout)
        self.linear_1 = nn.Linear(768, 128)
        self.relu=nn.ReLU()
        self.linear_2 = nn.Linear(128, num_classes)

        if num_classes == 2:
            self.activation = nn.Sigmoid()
        else:
            self.activation = nn.Softmax(dim=1)

    def forward(self, input_id, mask):
        last_hidden_layer, pooled_output = self.bert(input_ids=input_id, attention_mask=mask, return_dict=False)
        dropout_output = self.dropout(pooled_output)
        linear_output_1 = self.linear_1(dropout_output)
        relu_applied = self.relu(linear_output_1)
        dropout = self.dropout(relu_applied)
        linear_output_2 = self.linear_2(dropout)
        final_layer = self.activation(linear_output_2)
        return final_layer
    
    
model=BertClassifier_2(num_classes=n_labels)
optimizer = torch.optim.Adam(params=model.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()
train_losses = []
train_accuracy = []
val_losses = []
val_accuracy = []

model.to(device)

for epoch in range(epochs):
    train_running_losses = []
    train_total = 0
    train_correct = 0
    
    model.train()
    for batch in tqdm(train_dataloader):
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(batch['input_ids'], batch['attention_mask'])
        batch_loss = criterion(outputs, batch['labels'].long())
        train_running_losses.append(batch_loss.item())
        
        acc = (outputs.argmax(dim=1) == batch['labels']).sum().item()
        train_correct += acc
        train_total += len(batch['input_ids'])
        
        model.zero_grad()
        batch_loss.backward()
        optimizer.step()
    
    train_losses.append(sum(train_running_losses) / len(train_running_losses))
    train_accuracy.append(train_correct / train_total)
    print(f'TRAIN: Epoch [{epoch + 1}/{epochs}] Loss: {sum(train_running_losses) / len(train_running_losses)} Acc: {train_correct / train_total}')
    
    model.eval()
    with torch.no_grad():
        val_running_losses = []
        val_total = 0
        val_correct = 0
        
        for batch in tqdm(val_dataloader):
            batch = {k: v.to(device) for k, v in batch.items()}
            outputs = model(batch['input_ids'], batch['attention_mask'])
            batch_loss = criterion(outputs, batch['labels'].long())
            val_running_losses.append(batch_loss.item())
            
            acc = (outputs.argmax(dim=1) == batch['labels']).sum().item()
            val_correct += acc
            val_total += len(batch['input_ids'])
        
        val_losses.append(sum(val_running_losses) / len(val_running_losses))
        val_accuracy.append(val_correct / val_total)
        print(f'VAL: Epoch [{epoch + 1}/{epochs}] Loss: {sum(val_running_losses) / len(val_running_losses)} Acc: {val_correct / val_total}')



TRAIN: Epoch [1/10] Loss: 0.6914308048784733 Acc: 0.511328125
VAL: Epoch [1/10] Loss: 0.6871590316295624 Acc: 0.609375
TRAIN: Epoch [2/10] Loss: 0.6791071869432926 Acc: 0.60078125
VAL: Epoch [2/10] Loss: 0.6692460775375366 Acc: 0.6109375
TRAIN: Epoch [3/10] Loss: 0.6675182245671749 Acc: 0.614453125
VAL: Epoch [3/10] Loss: 0.6615218460559845 Acc: 0.61875
TRAIN: Epoch [4/10] Loss: 0.6517904222011566 Acc: 0.63515625
VAL: Epoch [4/10] Loss: 0.6539186716079712 Acc: 0.6265625
TRAIN: Epoch [5/10] Loss: 0.6428570955991745 Acc: 0.65
VAL: Epoch [5/10] Loss: 0.6485287368297576 Acc: 0.6453125
TRAIN: Epoch [6/10] Loss: 0.6264784887433053 Acc: 0.681640625
VAL: Epoch [6/10] Loss: 0.6409658163785934 Acc: 0.6453125
TRAIN: Epoch [7/10] Loss: 0.6019117549061775 Acc: 0.713671875
VAL: Epoch [7/10] Loss: 0.634257686138153 Acc: 0.6671875
TRAIN: Epoch [8/10] Loss: 0.5684359095990658 Acc: 0.754296875
VAL: Epoch [8/10] Loss: 0.6334288597106934 Acc: 0.659375
TRAIN: Epoch [9/10] Loss: 0.5398325584828854 Acc: 0.78