In [1]:
from Transformer_Models import MashableBertModel
import torch
import torch.nn as nn

import numpy as np
import pandas as pd

In [2]:
import datasets
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')

In [3]:
articles = datasets.load_dataset('online_news_popularity_data')

Found cached dataset online_news_popularity_data (/home/leeparkuky/.cache/huggingface/datasets/online_news_popularity_data/online_news_popularity_data/1.0.0/40871d81330db452a45539c72f8cf51048b7f8890bee79b1a6306eb6532df4b2)


  0%|          | 0/1 [00:00<?, ?it/s]

In [5]:
def concatenate_fernandes_variables(examples):
    fernandes = [val for key, val in examples.items() if key not in ['title','content','shares','shares_class']]
    fernandes = np.array(fernandes).T.tolist()
    return {'fernandes': fernandes}

In [6]:
articles_concat = articles.map(concatenate_fernandes_variables, batched = True, batch_size = 64, num_proc = 16,
                              remove_columns = [x for x in articles.column_names['train'] if x not in ['title','content','shares','shares_class']] )

Loading cached processed dataset at /home/leeparkuky/.cache/huggingface/datasets/online_news_popularity_data/online_news_popularity_data/1.0.0/40871d81330db452a45539c72f8cf51048b7f8890bee79b1a6306eb6532df4b2/cache-de0dc50a5a27799a_*_of_00016.arrow


In [8]:
def tokenize(examples):
#     text = [title + content for title, content in zip(examples['title'], examples['content'])]
    return tokenizer(examples['title'], text_pair =  examples['content'], max_length = 512, truncation = True, padding = True)

In [9]:
articles_tokenized = articles_concat.map(tokenize, batched = True, batch_size = 64, num_proc = 16,
                   remove_columns = ['shares','title','content'])

Loading cached processed dataset at /home/leeparkuky/.cache/huggingface/datasets/online_news_popularity_data/online_news_popularity_data/1.0.0/40871d81330db452a45539c72f8cf51048b7f8890bee79b1a6306eb6532df4b2/cache-3151a21d0a1b4698_*_of_00016.arrow


In [10]:
articles_tokenized.set_format('pt')

In [11]:
articles_tokenized = articles_tokenized['train'].train_test_split(.2)

## Try 1: Fine-tuning with Cross Entropy Loss

In [13]:
from Transformer_Models import MashableBertForClassification

model = MashableBertForClassification('bert-base-uncased', 20)
model.base_model_load_weight('Model Weights/MashableBertModel_Pretrained.pth')

In [14]:
from torch.utils.data import DataLoader
training_loader = DataLoader(articles_tokenized['train'], batch_size = 8, shuffle = True)
validating_loader = DataLoader(articles_tokenized['test'], batch_size = 8,)

In [15]:
torch.set_float32_matmul_precision("medium")

In [16]:
from lightning import Fabric
fabric = Fabric(accelerator="cuda", devices=1, precision="16-mixed")
fabric.launch()

optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

model, optimizer = fabric.setup(model, optimizer)

training_loader, validating_loader = fabric.setup_dataloaders(
    training_loader, validating_loader)


Using 16-bit Automatic Mixed Precision (AMP)


In [19]:
import torchmetrics

def train(num_epochs, model, optimizer, train_loader, val_loader, fabric, accumulation_steps = 4, num_classes = 20):
    metrics = {}
    for epoch in range(num_epochs):
        # metrics : accuracy, precision, recall, f1-score, 
        train_acc = torchmetrics.Accuracy(
            task="multiclass", num_classes=num_classes).to(fabric.device)
        train_prec = torchmetrics.Precision(
            task="multiclass", num_classes=num_classes).to(fabric.device)
        train_recall = torchmetrics.Recall(
            task="multiclass", num_classes=num_classes).to(fabric.device)
        train_f1score = torchmetrics.F1Score(
            task="multiclass", num_classes=num_classes).to(fabric.device)
        
        # going over train_loaders
        for batch_idx, batch in enumerate(train_loader):
            model.train()

            ### FORWARD AND BACK PROP
            outputs = model(**batch) 
            
            ### gradient accumulation
            assert isinstance(accumulation_steps, int)
            if accumulation_steps > 1:
                outputs.loss = outputs.loss / accumulation_steps
                fabric.backward(outputs.loss)
                if not batch_idx % accumulation_steps:
            ### UPDATE MODEL PARAMETERS
                    optimizer.step()
                    optimizer.zero_grad()
            elif accumulation_steps in [0,1]:
                fabric.backward(outputs.loss)
            ### UPDATE MODEL PARAMETERS
                optimizer.step()
                optimizer.zero_grad()
            else:
                raise AttributeError("accumulation steps can't be negative")
            

            ### LOGGING
            if not batch_idx % 500:
                print(f"Epoch: {epoch:04d}/{num_epochs:04d} "
                      f"| Batch {batch_idx+1:04d}/{len(train_loader):04d} "
                      f"| Loss: {outputs.loss *accumulation_steps:.4f}")

            model.eval()
            with torch.no_grad():
                predicted_labels = torch.argmax(outputs["logits"], 1)
                train_acc.update(predicted_labels, batch["shares_class"]) #accuracy
                train_prec.update(predicted_labels, batch["shares_class"]) # precision
                train_recall.update(predicted_labels, batch["shares_class"]) # recall
                train_f1score.update(predicted_labels, batch["shares_class"]) # f1-score
                
        # At the end of each epoch,
        ### MORE LOGGING
        model.eval()
        with torch.no_grad():
            val_acc = torchmetrics.Accuracy( task="multiclass", num_classes=num_classes).to(fabric.device)
            val_prec = torchmetrics.Precision( task="multiclass", num_classes=num_classes).to(fabric.device)
            val_recall = torchmetrics.Recall( task="multiclass", num_classes=num_classes).to(fabric.device)
            val_f1score = torchmetrics.F1Score( task="multiclass", num_classes=num_classes).to(fabric.device)
            
            for batch in val_loader:
                outputs = model(**batch)
                predicted_labels = torch.argmax(outputs["logits"], 1)
                val_acc.update(predicted_labels, batch["shares_class"]) #accuracy
                val_prec.update(predicted_labels, batch["shares_class"]) # precision
                val_recall.update(predicted_labels, batch["shares_class"]) # recall
                val_f1score.update(predicted_labels, batch["shares_class"]) # f1-score

            print(f"Epoch: {epoch+1:04d}/{num_epochs:04d} \n"
                  f"| Train acc.: {train_acc.compute()*100:.2f}% "
                  f"| Val acc.: {val_acc.compute()*100:.2f}% \n"
                  f"| Train prec.: {train_prec.compute()*100:.2f}% "
                  f"| Val prec.: {val_prec.compute()*100:.2f}% \n"
                  f"| Train recall.: {train_recall.compute()*100:.2f}% "
                  f"| Val recall.: {val_recall.compute()*100:.2f}% \n"
                  f"| Train f1score.: {train_f1score.compute()*100:.2f}% "
                  f"| Val f1score.: {val_f1score.compute()*100:.2f}% \n"
                  )
            metrics[f"epoch_{epoch}"] = {'Train': {'Accuracy': train_acc.compute(),
                                                  'Precision': train_prec.compute(),
                                                  'Recall': train_recall.compute(),
                                                  'F1 Score': train_f1score.compute()},
                                        'Validation': {'Accuracy': val_acc.compute(),
                                                  'Precision': val_prec.compute(),
                                                  'Recall': val_recall.compute(),
                                                  'F1 Score': val_f1score.compute()}}
            
            train_acc.reset(), val_acc.reset(); train_prec.reset(), val_prec.reset(); train_recall.reset(), val_recall.reset()
            train_f1score.reset(), val_f1score.reset()
            
    return metrics

Total epochs 25

In [20]:
metrics_result = train(25, model, optimizer, training_loader, validating_loader,fabric)

Epoch: 0000/0025 | Batch 0001/3961 | Loss: 3.0144
Epoch: 0000/0025 | Batch 0501/3961 | Loss: 2.9800
Epoch: 0000/0025 | Batch 1001/3961 | Loss: 3.0894
Epoch: 0000/0025 | Batch 1501/3961 | Loss: 3.0764
Epoch: 0000/0025 | Batch 2001/3961 | Loss: 3.0454
Epoch: 0000/0025 | Batch 2501/3961 | Loss: 2.9731
Epoch: 0000/0025 | Batch 3001/3961 | Loss: 3.0344
Epoch: 0000/0025 | Batch 3501/3961 | Loss: 3.0195
Epoch: 0001/0025 
| Train acc.: 5.57% | Val acc.: 5.36% 
| Train prec.: 5.57% | Val prec.: 5.36% 
| Train recall.: 5.57% | Val recall.: 5.36% 
| Train f1score.: 5.57% | Val f1score.: 5.36% 

Epoch: 0001/0025 | Batch 0001/3961 | Loss: 3.1086
Epoch: 0001/0025 | Batch 0501/3961 | Loss: 3.0977
Epoch: 0001/0025 | Batch 1001/3961 | Loss: 3.0261
Epoch: 0001/0025 | Batch 1501/3961 | Loss: 3.0073
Epoch: 0001/0025 | Batch 2001/3961 | Loss: 3.0681
Epoch: 0001/0025 | Batch 2501/3961 | Loss: 3.0251
Epoch: 0001/0025 | Batch 3001/3961 | Loss: 3.0227
Epoch: 0001/0025 | Batch 3501/3961 | Loss: 2.9348
Epoch: 00