In [1]:
pip install transformers



In [2]:
pip install torch



In [3]:
from transformers import BertTokenizer, BertForSequenceClassification, AdamW, AutoTokenizer, AutoModelForSequenceClassification
import torch
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [4]:
labelled_tweets = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Final_Labels.csv')


train_df, test_df = train_test_split(labelled_tweets, test_size = 0.2, random_state=42)
train_df, val_df = train_test_split(train_df, test_size = 0.2, random_state=42)

**Model 1**

In [5]:
model_name = 'bert-base-uncased'
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name,num_labels = 4)

#train_df, test_df = train_test_split(labelled_tweets, test_size = 0.2, random_state=42)
#train_df, val_df = train_test_split(train_df, test_size = 0.2, random_state=42)
#labelled_tweets['Label'] = labelled_tweets['Label'].replace(['Supporter','Manipulator','Against','Neutral'],
 #                                                           [0,1,2,3])
#labels = labelled_tweets['Label'].tolist()
#tweets = labelled_tweets['TranslatedText'].tolist()

#encoded_tweets = tokenizer(tweets, padding = True, truncation=True, return_tensors='pt')

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
def tokenize_data(data, tokenizer):
  input_ids = []
  attention_masks = []

  for text in data['TranslatedText']:
    encoded_text = tokenizer.encode_plus(
        text,
        add_special_tokens = True,
        max_length=256,
        pad_to_max_length=True,
        return_attention_mask=True,
        return_tensors = 'pt'
    )
    input_ids.append(encoded_text['input_ids'])
    attention_masks.append(encoded_text['attention_mask'])

  input_ids = torch.cat(input_ids, dim=0)
  attention_masks = torch.cat(attention_masks, dim = 0)
  labels = torch.tensor(data['Label'].map({'Supporter':0,'Against':1,'Manipulator':2,'Neutral': 3}).tolist())

  return input_ids, attention_masks, labels

In [12]:
#train_size = 739
#val_size = 158
#test_size = 158
train_input_ids, train_attention_masks, train_labels = tokenize_data(train_df, tokenizer)
val_input_ids, val_attention_masks, val_labels = tokenize_data(val_df, tokenizer)
test_input_ids, test_attention_masks, test_labels = tokenize_data(test_df, tokenizer)


Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


In [13]:
train_dataset = TensorDataset(train_input_ids, train_attention_masks, train_labels)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)

val_dataset = TensorDataset(val_input_ids, val_attention_masks, val_labels)
val_loader = DataLoader(val_dataset, batch_size=32)

test_dataset = TensorDataset(test_input_ids, test_attention_masks, test_labels)
test_loader = DataLoader(test_dataset, batch_size=32)

In [None]:
#train_dataset = TensorDataset(encoded_tweets['input_ids'][:train_size],encoded_tweets['attention_mask'][:train_size],torch.tensor(labels[:train_size]))
#train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True)
#
#val_dataset = TensorDataset(encoded_tweets['input_ids'][train_size:train_size+val_size],
 #                            encoded_tweets['attention_mask'][train_size:train_size+val_size],
  #                           torch.tensor(labels[train_size:train_size+val_size]))
#val_loader = DataLoader(val_dataset, batch_size=2)

In [9]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
optimizer = AdamW(model.parameters(), lr=2e-5)



In [10]:
device

device(type='cuda')

In [14]:
num_epochs = 5
for epoch in range(num_epochs):
    model.train()
    for batch in train_loader:
        input_ids, attention_mask, label = batch
        input_ids, attention_mask, label = input_ids.to(device), attention_mask.to(device), label.to(device)

        outputs = model(input_ids, attention_mask = attention_mask, labels = label)
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

    #Validation
    model.eval()
    with torch.no_grad():
        total_val_loss = 0.0
        correct_predictions = 0
        total_predictions = 0
        predictions = []
        ground_truth = []
        for batch in val_loader:
            input_ids, attention_mask, label = batch
            input_ids, attention_mask, label = input_ids.to(device), attention_mask.to(device), label.to(device)
            outputs = model(input_ids, attention_mask = attention_mask, labels = label)
            loss = outputs.loss
            total_val_loss += loss.item()

            logits = outputs.logits
            predicted_labels = torch.argmax(logits, dim=1)

            predictions.extend(predicted_labels.cpu().numpy())
            ground_truth.extend(label.cpu().numpy())

            correct_predictions += (predicted_labels == label).sum().item()
            total_predictions += label.size(0)

        test_accuracy = correct_predictions/ total_predictions
        average_val_loss = total_val_loss/len(val_loader)
        print(f"Validation Accuracy: {test_accuracy:.4f}")
        print(f"Average Validation Loss: {average_val_loss:.4f}")

         #Calculate the accuracy and F1 score for each label
        target_names = ['Supporter','Against','Manipulator','Neutral']
        report = classification_report(ground_truth, predictions, target_names=target_names,output_dict=True)

        print("Classification Report:")
        print(pd.DataFrame(report).transpose())

Validation Accuracy: 0.7456
Average Validation Loss: 0.8839
Classification Report:
              precision    recall  f1-score     support
Supporter      0.721088  0.990654  0.834646  107.000000
Against        1.000000  0.266667  0.421053   15.000000
Manipulator    0.941176  0.533333  0.680851   30.000000
Neutral        0.000000  0.000000  0.000000   17.000000
accuracy       0.745562  0.745562  0.745562    0.745562
macro avg      0.665566  0.447664  0.484137  169.000000
weighted avg   0.712377  0.745562  0.686677  169.000000
Validation Accuracy: 0.7041
Average Validation Loss: 0.9065
Classification Report:
              precision    recall  f1-score     support
Supporter      0.838384  0.775701  0.805825  107.000000
Against        1.000000  0.333333  0.500000   15.000000
Manipulator    0.750000  0.700000  0.724138   30.000000
Neutral        0.270270  0.588235  0.370370   17.000000
accuracy       0.704142  0.704142  0.704142    0.704142
macro avg      0.714664  0.599317  0.600083  169.0

In [None]:
#test_dataset = TensorDataset(encoded_tweets['input_ids'][train_size+val_size+test_size:],
  #                           encoded_tweets['attention_mask'][train_size+val_size+test_size:],
  #                           torch.tensor(labels[train_size+val_size+test_size:]))
#test_loader = DataLoader(test_dataset, batch_size=2)

In [15]:
model.eval()
with torch.no_grad():
  total_test_loss = 0.0
  correct_predictions = 0
  total_predictions = 0
  predictions = []
  ground_truth = []
  for batch in test_loader:
    input_ids, attention_mask, label = batch
    input_ids, attention_mask, label = input_ids.to(device), attention_mask.to(device), label.to(device)
    outputs = model(input_ids, attention_mask=attention_mask, labels = label)
    loss = outputs.loss
    total_test_loss += loss.item()

    logits = outputs.logits
    predicted_labels = torch.argmax(logits, dim=1)

    predictions.extend(predicted_labels.cpu().numpy())
    ground_truth.extend(label.cpu().numpy())

    correct_predictions += (predicted_labels == label).sum().item()
    total_predictions += label.size(0)

test_accuracy = correct_predictions/ total_predictions
average_test_loss = total_test_loss/ len(test_loader)

print(f"Test Accuracy: {test_accuracy:.4f}")
print(f"Average Test Loss: {average_test_loss:.4f}")

 #Calculate the accuracy and F1 score for each label
target_names = ['Supporter','Against','Manipulator','Neutral']
report = classification_report(ground_truth, predictions, target_names=target_names,output_dict=True)

print("Classification Report:")
print(pd.DataFrame(report).transpose())


Test Accuracy: 0.6934
Average Test Loss: 1.0788
Classification Report:
              precision    recall  f1-score     support
Supporter      0.753333  0.837037  0.792982  135.000000
Against        0.333333  0.187500  0.240000   16.000000
Manipulator    0.689655  0.588235  0.634921   34.000000
Neutral        0.458333  0.407407  0.431373   27.000000
accuracy       0.693396  0.693396  0.693396    0.693396
macro avg      0.558664  0.505045  0.524819  212.000000
weighted avg   0.673852  0.693396  0.679844  212.000000


**Model 2**

In [11]:
model_name2 = 'vinai/bertweet-large'
tokenizer = AutoTokenizer.from_pretrained(model_name2)
model2 = AutoModelForSequenceClassification.from_pretrained(model_name2,num_labels = 4)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-large and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [17]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model2.to(device)
optimizer = AdamW(model2.parameters(), lr=2e-5)



In [15]:
device

device(type='cuda')

In [18]:
num_epochs = 5
for epoch in range(num_epochs):
    model2.train()
    for batch in train_loader:
        input_ids, attention_mask, label = batch
        input_ids, attention_mask, label = input_ids.to(device), attention_mask.to(device), label.to(device)

        outputs = model2(input_ids, attention_mask = attention_mask, labels = label)
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

    #Validation
    model2.eval()
    with torch.no_grad():
        total_val_loss = 0.0
        correct_predictions = 0
        total_predictions = 0
        predictions = []
        ground_truth = []
        for batch in val_loader:
            input_ids, attention_mask, label = batch
            input_ids, attention_mask, label = input_ids.to(device), attention_mask.to(device), label.to(device)
            outputs = model2(input_ids, attention_mask = attention_mask, labels = label)
            loss = outputs.loss
            total_val_loss += loss.item()

            logits = outputs.logits
            predicted_labels = torch.argmax(logits, dim=1)

            predictions.extend(predicted_labels.cpu().numpy())
            ground_truth.extend(label.cpu().numpy())

            correct_predictions += (predicted_labels == label).sum().item()
            total_predictions += label.size(0)

        test_accuracy = correct_predictions/ total_predictions
        average_val_loss = total_val_loss/len(val_loader)
        print(f"Validation Accuracy: {test_accuracy:.4f}")
        print(f"Average Validation Loss: {average_val_loss:.4f}")

         #Calculate the accuracy and F1 score for each label
        target_names = ['Supporter','Against','Manipulator','Neutral']
        report = classification_report(ground_truth, predictions, target_names=target_names,output_dict=True)

        print("Classification Report:")
        print(pd.DataFrame(report).transpose())

Validation Accuracy: 0.6923
Average Validation Loss: 1.0289
Classification Report:
              precision    recall  f1-score     support
Supporter      0.675159  0.990654  0.803030  107.000000
Against        0.000000  0.000000  0.000000   15.000000
Manipulator    0.916667  0.366667  0.523810   30.000000
Neutral        0.000000  0.000000  0.000000   17.000000
accuracy       0.692308  0.692308  0.692308    0.692308
macro avg      0.397956  0.339330  0.331710  169.000000
weighted avg   0.590190  0.692308  0.601411  169.000000


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Validation Accuracy: 0.6568
Average Validation Loss: 0.9512
Classification Report:
              precision    recall  f1-score     support
Supporter      0.673469  0.925234  0.779528  107.000000
Against        0.000000  0.000000  0.000000   15.000000
Manipulator    0.545455  0.400000  0.461538   30.000000
Neutral        0.000000  0.000000  0.000000   17.000000
accuracy       0.656805  0.656805  0.656805    0.656805
macro avg      0.304731  0.331308  0.310267  169.000000
weighted avg   0.523224  0.656805  0.575477  169.000000


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Validation Accuracy: 0.7870
Average Validation Loss: 0.8162
Classification Report:
              precision    recall  f1-score     support
Supporter      0.796875  0.953271  0.868085  107.000000
Against        1.000000  0.333333  0.500000   15.000000
Manipulator    0.913043  0.700000  0.792453   30.000000
Neutral        0.384615  0.294118  0.333333   17.000000
accuracy       0.786982  0.786982  0.786982    0.786982
macro avg      0.773633  0.570181  0.623468  169.000000
weighted avg   0.794056  0.786982  0.768197  169.000000
Validation Accuracy: 0.7929
Average Validation Loss: 0.8679
Classification Report:
              precision    recall  f1-score     support
Supporter      0.809917  0.915888  0.859649  107.000000
Against        1.000000  0.333333  0.500000   15.000000
Manipulator    0.700000  0.933333  0.800000   30.000000
Neutral        1.000000  0.176471  0.300000   17.000000
accuracy       0.792899  0.792899  0.792899    0.792899
macro avg      0.877479  0.589756  0.614912  169.0

In [19]:
model2.eval()
with torch.no_grad():
  total_test_loss = 0.0
  correct_predictions = 0
  total_predictions = 0
  predictions = []
  ground_truth = []
  for batch in test_loader:
    input_ids, attention_mask, label = batch
    input_ids, attention_mask, label = input_ids.to(device), attention_mask.to(device), label.to(device)
    outputs = model2(input_ids, attention_mask=attention_mask, labels = label)
    loss = outputs.loss
    total_test_loss += loss.item()

    logits = outputs.logits
    predicted_labels = torch.argmax(logits, dim=1)

    predictions.extend(predicted_labels.cpu().numpy())
    ground_truth.extend(label.cpu().numpy())

    correct_predictions += (predicted_labels == label).sum().item()
    total_predictions += label.size(0)

test_accuracy = correct_predictions/ total_predictions
average_test_loss = total_test_loss/ len(test_loader)

print(f"Test Accuracy: {test_accuracy:.4f}")
print(f"Average Test Loss: {average_test_loss:.4f}")

 #Calculate the accuracy and F1 score for each label
target_names = ['Supporter','Against','Manipulator','Neutral']
report = classification_report(ground_truth, predictions, target_names=target_names,output_dict=True)

print("Classification Report:")
print(pd.DataFrame(report).transpose())


Test Accuracy: 0.6792
Average Test Loss: 0.9642
Classification Report:
              precision    recall  f1-score     support
Supporter      0.822034  0.718519  0.766798  135.000000
Against        1.000000  0.125000  0.222222   16.000000
Manipulator    0.530612  0.764706  0.626506   34.000000
Neutral        0.441860  0.703704  0.542857   27.000000
accuracy       0.679245  0.679245  0.679245    0.679245
macro avg      0.698627  0.577982  0.539596  212.000000
weighted avg   0.740310  0.679245  0.674678  212.000000
