## Imports

In [None]:
import random
import torch
import warnings

import numpy as np
import pandas as pd

from transformers import BertTokenizerFast, BertForSequenceClassification
from transformers import Trainer, TrainingArguments, DataCollatorWithPadding

from sklearn.metrics import accuracy_score, classification_report

from datasets import load_dataset

from ray import tune

pd.set_option("display.max_rows", None, "display.max_columns", None)

## Function definitions

In [None]:
"""
    Helper function for reproducible behavior to set the seed in ``random``, ``numpy``, ``torch`` and/or ``tf`` (if
    installed).

    Args:
        seed (:obj:`int`): The seed to set.
"""

def set_seed(seed: int):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

In [None]:
"""
    Loads the dataset.
"""

def load_ag_news_dataset():
    dataset = load_dataset("ag_news")
    
    train_x = []
    train_y = []
    dev_x = []
    dev_y = []
    test_x = []
    test_y = []

    for x in range(0, 1000):
        train_x.append(dataset['train'][x]['text'])
        train_y.append(dataset['train'][x]['label'])

    for x in range(1000, 1500):
        dev_x.append(dataset['train'][x]['text'])
        dev_y.append(dataset['train'][x]['label'])

    for x in range(0, 1500):
        test_x.append(dataset['test'][x]['text'])
        test_y.append(dataset['test'][x]['label'])
    
    return train_x, train_y, dev_x, dev_y, test_x, test_y

In [None]:
"""
    Custom metrics method.
"""

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)

    acc = accuracy_score(labels, preds)
    
    metrics = classification_report(
        y_true = labels, 
        y_pred = preds, 
        labels = [0, 1, 2, 3],
        output_dict=True
    )
    
    # Metrics per class
    class_world_precision = metrics['0']['precision']
    class_world_recall = metrics['0']['recall']
    class_world_f1 = metrics['0']['f1-score']
    class_world_count = metrics['0']['support']
    
    class_sports_precision = metrics['1']['precision']
    class_sports_recall = metrics['1']['recall']
    class_sports_f1 = metrics['1']['f1-score']
    class_sports_count = metrics['1']['support']
    
    class_business_precision = metrics['2']['precision']
    class_business_recall = metrics['2']['recall']
    class_business_f1 = metrics['2']['f1-score']
    class_business_count = metrics['2']['support']
    
    class_sci_tech_precision = metrics['3']['precision']
    class_sci_tech_recall = metrics['3']['recall']
    class_sci_tech_f1 = metrics['3']['f1-score']
    class_sci_tech_count = metrics['3']['support']
    
    # Macro averaging
    macro_avg_precision = metrics['macro avg']['precision']
    macro_avg_recall = metrics['macro avg']['recall']
    macro_avg_f1 = metrics['macro avg']['f1-score']
    
    return {
        'accuracy': acc,
        'Class World (precision)': class_world_precision,
        'Class World (recall)': class_world_recall,
        'Class World (f1-score)': class_world_f1,
        'Class World (count)': class_world_count,
        'Class Sports (precision)': class_sports_precision,
        'Class Sports (recall)': class_sports_recall,
        'Class Sports (f1-score)': class_sports_f1,
        'Class Sports (count)': class_sports_count,
        'Class Business (precision)': class_business_precision,
        'Class Business (recall)': class_business_recall,
        'Class Business (f1-score)': class_business_f1,
        'Class Business (count)': class_business_count,
        'Class Sci/Tech (precision)': class_sci_tech_precision,
        'Class Sci/Tech (recall)': class_sci_tech_recall,
        'Class Sci/Tech (f1-score)': class_sci_tech_f1,
        'Class Sci/Tech (count)': class_sci_tech_count,
        'Macro-averaged precision': macro_avg_precision,
        'Macro-averaged recall': macro_avg_recall,
        'Macro-averaged f1-score': macro_avg_f1,
    }

In [None]:
"""
Returns a prediction for the text specified.
"""

def get_prediction(text, tokenizer, target_names):
    inputs = tokenizer(text, padding=True, truncation=True, max_length=max_length, return_tensors="pt")
    outputs = model(**inputs)
    probs = outputs[0].softmax(1)
    return target_names[probs.argmax()]

In [None]:
"""
  Tunes the number of training epochs, and seed.
"""

def my_hp_space_ray(trial):   
    return {
        "num_train_epochs": tune.choice([1, 2]),
        "learning_rate": tune.choice([1e-6, 1e-4]),
        "weight_decay": tune.choice([0.01, 0.001])
    }

## Class definitions

In [None]:
class AGNewsDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {k: torch.tensor(v[idx]) for k, v in self.encodings.items()}
        item["labels"] = torch.tensor([self.labels[idx]])
        return item

    def __len__(self):
        return len(self.labels)

## Classification with BERT

In [None]:
# Disable all warnings
warnings.filterwarnings('ignore')

In [None]:
# Set the seed
set_seed(1)

In [None]:
# Model's name, and maximum sequence length
model_name = "bert-base-uncased"
max_length = 512

In [None]:
# Load the tokenizer
tokenizer = BertTokenizerFast.from_pretrained(model_name, do_lower_case=True)

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [None]:
# Load the dataset
train_x, train_y, dev_x, dev_y, test_x, test_y = load_ag_news_dataset()

# Tokenize the dataset at max_length, using truncation and padding
train_encodings = tokenizer(train_x, truncation=True, padding=True, max_length=max_length)
valid_encodings = tokenizer(dev_x, truncation=True, padding=True, max_length=max_length)
test_encodings = tokenizer(test_x, truncation=True, padding=True, max_length=max_length)

# And bring it into the desired format for applying classification with BERT 
train_dataset = AGNewsDataset(train_encodings, train_y)
validation_dataset = AGNewsDataset(valid_encodings, dev_y)
test_dataset = AGNewsDataset(test_encodings, test_y)

Downloading builder script:   0%|          | 0.00/4.06k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/2.65k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/7.94k [00:00<?, ?B/s]

Downloading and preparing dataset ag_news/default to /root/.cache/huggingface/datasets/ag_news/default/0.0.0/bc2bcb40336ace1a0374767fc29bb0296cdaf8a6da7298436239c54d79180548...


Downloading data:   0%|          | 0.00/11.0M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/751k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/120000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/7600 [00:00<?, ? examples/s]

Dataset ag_news downloaded and prepared to /root/.cache/huggingface/datasets/ag_news/default/0.0.0/bc2bcb40336ace1a0374767fc29bb0296cdaf8a6da7298436239c54d79180548. Subsequent calls will reuse this data.


  0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
# Load a BERT model for text classification
target_list = ["World", "Sports", "Business", "Sci/Tech"]

model = BertForSequenceClassification.from_pretrained(
    model_name, 
    num_labels=len(target_list)
)

Downloading:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [None]:
# Training arguments specification

training_args = TrainingArguments(
    output_dir='./results',          
    num_train_epochs=2,              
    per_device_train_batch_size=16,  
    per_device_eval_batch_size=20,   
    warmup_steps=500,                
    weight_decay=0.01,               
    logging_dir='./logs',            
    load_best_model_at_end=True,     
    logging_steps=50,               
    evaluation_strategy="steps",     
)

In [None]:
# Trainer specification

trainer = Trainer(
    model=model,                         
    args=training_args,                  
    train_dataset=train_dataset,         
    eval_dataset=validation_dataset,          
    compute_metrics=compute_metrics,     
)

In [None]:
# Train the model 
trainer.train()

***** Running training *****
  Num examples = 1000
  Num Epochs = 2
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 126
  Number of trainable parameters = 109485316


Step,Training Loss,Validation Loss,Accuracy,Class world (precision),Class world (recall),Class world (f1-score),Class world (count),Class sports (precision),Class sports (recall),Class sports (f1-score),Class sports (count),Class business (precision),Class business (recall),Class business (f1-score),Class business (count),Class sci/tech (precision),Class sci/tech (recall),Class sci/tech (f1-score),Class sci/tech (count),Macro-averaged precision,Macro-averaged recall,Macro-averaged f1-score
50,1.4152,1.340573,0.318,0.0,0.0,0.0,152,0.724138,0.25,0.371681,84,0.0,0.0,0.0,122,0.292994,0.971831,0.450245,142,0.254283,0.305458,0.205482
100,1.1024,0.935781,0.718,0.972973,0.473684,0.637168,152,0.847826,0.928571,0.886364,84,0.764045,0.557377,0.64455,122,0.57551,0.992958,0.728682,142,0.790089,0.738148,0.724191


***** Running Evaluation *****
  Num examples = 500
  Batch size = 20
***** Running Evaluation *****
  Num examples = 500
  Batch size = 20


Training completed. Do not forget to share your model on huggingface.co/models =)




TrainOutput(global_step=126, training_loss=1.1630673030066112, metrics={'train_runtime': 131.9376, 'train_samples_per_second': 15.159, 'train_steps_per_second': 0.955, 'total_flos': 351506237472000.0, 'train_loss': 1.1630673030066112, 'epoch': 2.0})

In [None]:
# Evaluate the model after training
trainer.evaluate() 

***** Running Evaluation *****
  Num examples = 500
  Batch size = 20


{'eval_loss': 0.7038248181343079,
 'eval_accuracy': 0.82,
 'eval_Class World (precision)': 0.8571428571428571,
 'eval_Class World (recall)': 0.75,
 'eval_Class World (f1-score)': 0.7999999999999999,
 'eval_Class World (count)': 152,
 'eval_Class Sports (precision)': 0.8522727272727273,
 'eval_Class Sports (recall)': 0.8928571428571429,
 'eval_Class Sports (f1-score)': 0.872093023255814,
 'eval_Class Sports (count)': 84,
 'eval_Class Business (precision)': 0.8240740740740741,
 'eval_Class Business (recall)': 0.7295081967213115,
 'eval_Class Business (f1-score)': 0.7739130434782607,
 'eval_Class Business (count)': 122,
 'eval_Class Sci/Tech (precision)': 0.7719298245614035,
 'eval_Class Sci/Tech (recall)': 0.9295774647887324,
 'eval_Class Sci/Tech (f1-score)': 0.8434504792332269,
 'eval_Class Sci/Tech (count)': 142,
 'eval_Macro-averaged precision': 0.8263548707627655,
 'eval_Macro-averaged recall': 0.8254857010917968,
 'eval_Macro-averaged f1-score': 0.8223641364918255,
 'eval_runtime':

In [None]:
# Save the fine-tuned model and the tokenizer
model_path = "ag_news-bert-base-uncased"
model.save_pretrained(model_path)
tokenizer.save_pretrained(model_path)

Configuration saved in ag_news-bert-base-uncased/config.json
Model weights saved in ag_news-bert-base-uncased/pytorch_model.bin
tokenizer config file saved in ag_news-bert-base-uncased/tokenizer_config.json
Special tokens file saved in ag_news-bert-base-uncased/special_tokens_map.json


('ag_news-bert-base-uncased/tokenizer_config.json',
 'ag_news-bert-base-uncased/special_tokens_map.json',
 'ag_news-bert-base-uncased/vocab.txt',
 'ag_news-bert-base-uncased/added_tokens.json',
 'ag_news-bert-base-uncased/tokenizer.json')

In [None]:
# Load model and tokenizer
model = BertForSequenceClassification.from_pretrained(model_path, num_labels=len(target_list))
tokenizer = BertTokenizerFast.from_pretrained(model_path) 

loading configuration file ag_news-bert-base-uncased/config.json
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "torch_dtype": "float32",
  "transformers_version": "4.25.1",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 

In [None]:
test_x[0]

"Fears for T N pension after talks Unions representing workers at Turner   Newall say they are 'disappointed' after talks with stricken parent firm Federal Mogul."

In [None]:
target_list[test_y[0]]

'Business'

In [None]:
get_prediction(test_x[0], tokenizer, target_list)

'Business'

## Hyper parameter search (tuning)

In [None]:
# Hyper parameter training arguments 

hyper_parameter_args = TrainingArguments(
    "test", 
    do_eval=True, 
    per_device_train_batch_size=16,
    weight_decay=0.01,
    learning_rate=1e-6,
    eval_steps=100, 
    disable_tqdm=False
)

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [None]:
"""
  Initializes the BERT model.
"""

def get_model():
    return BertForSequenceClassification.from_pretrained(model_name, num_labels=len(target_list), return_dict=True)

In [None]:
# Hyper trainer

hyper_trainer = Trainer(
    args=hyper_parameter_args, 
    data_collator=DataCollatorWithPadding(tokenizer),
    train_dataset=train_dataset, 
    eval_dataset=validation_dataset, 
    model_init=get_model,
    compute_metrics=compute_metrics,
)

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--bert-base-uncased/snapshots/0a6aa9128b6194f4f3c4db429b6cb4891cdb421b/config.json
Model config BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.25.1",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weights 

In [None]:
# Search
best = hyper_trainer.hyperparameter_search(direction="maximize", hp_space=my_hp_space_ray) 

In [None]:
# Predictions on the test set (100 test samples)

y_pred_test = []
for x in range(0, 100):
  pred = get_prediction(test_x[x], tokenizer, target_list)
  if pred == "World":
    y_pred_test.append(0)
  elif pred == "Sports":
    y_pred_test.append(1)
  elif pred == "Business":
    y_pred_test.append(2)
  else:
    y_pred_test.append(3)

In [None]:
# Compute the metrics 

metrics = classification_report(
        y_true = test_y[:100], 
        y_pred = y_pred_test, 
        labels = [0, 1, 2, 3],
        output_dict=True
    )

In [None]:
# Metrics per class

class_world_precision = metrics['0']['precision']
class_world_recall = metrics['0']['recall']
class_world_f1 = metrics['0']['f1-score']

class_sports_precision = metrics['1']['precision']
class_sports_recall = metrics['1']['recall']
class_sports_f1 = metrics['1']['f1-score']

class_business_precision = metrics['2']['precision']
class_business_recall = metrics['2']['recall']
class_business_f1 = metrics['2']['f1-score']

class_sci_tech_precision = metrics['3']['precision']
class_sci_tech_recall = metrics['3']['recall']
class_sci_tech_f1 = metrics['3']['f1-score']

In [None]:
# Macro averaging

macro_avg_precision = metrics['macro avg']['precision']
macro_avg_recall = metrics['macro avg']['recall']
macro_avg_f1 = metrics['macro avg']['f1-score']

In [None]:
# Include the baselines

pr_world_lr = 0.880342
pr_sports_lr = 0.920904
pr_business_lr = 0.853211
pr_sci_tech_lr = 0.384694

re_world_lr = 0.544974
re_sports_lr = 0.406484
re_business_lr = 0.276786
re_sci_tech_lr = 0.979221

f1_world_lr = 0.673203
f1_sports_lr = 0.564014
f1_business_lr = 0.417978
f1_sci_tech_lr = 0.552381

macro_avg_pr = 0.759788
macro_avg_re = 0.551866
macro_avg_f1 = 0.551894

In [None]:
# Create the dataframe

data = {
     'Metric': ['Precision (World)', 'Precision (Sports)', 'Precision (Business)', 'Precision (Sci/Tech)', 
                'Recall (World)', 'Recall (Sports)', 'Recall (Business)', 'Recall (Sci/Tech)', 
                'F1-score (World)', 'F1-score (Sports)', 'F1-score (Business)', 'F1-score (Sci/Tech)'],
        
     'BERT': [class_world_precision, class_sports_precision, class_business_precision, class_sci_tech_precision,
              class_world_recall, class_sports_recall, class_business_recall, class_sci_tech_recall,
              class_world_f1, class_sports_f1, class_business_f1, class_sci_tech_f1],
      
     'LR': [pr_world_lr, pr_sports_lr, pr_business_lr, pr_sci_tech_lr,
                             re_world_lr, re_sports_lr, re_business_lr, re_sci_tech_lr,
                             f1_world_lr, f1_sports_lr, f1_business_lr, f1_sci_tech_lr]
}

df = pd.DataFrame(data)

In [None]:
# Display

df

Unnamed: 0,Metric,BERT,LR
0,Precision (World),0.928571,0.880342
1,Precision (Sports),0.904762,0.920904
2,Precision (Business),0.833333,0.853211
3,Precision (Sci/Tech),0.897436,0.384694
4,Recall (World),0.866667,0.544974
5,Recall (Sports),0.904762,0.406484
6,Recall (Business),0.833333,0.276786
7,Recall (Sci/Tech),0.945946,0.979221
8,F1-score (World),0.896552,0.673203
9,F1-score (Sports),0.904762,0.564014


In [None]:
# Create the dataframe (macro-averaging)

data_ma = {
     'Metric': ['Macro-Averaged Precision',
                'Macro-Averaged Recall',
                'Macro-Averaged F1-score'],
           
     'BERT': [macro_avg_precision,
              macro_avg_recall,
              macro_avg_f1],
           
     'LR': [macro_avg_pr, macro_avg_re, macro_avg_f1]
}

df_ma = pd.DataFrame(data_ma)

In [None]:
# Display 

df_ma

Unnamed: 0,Metric,BERT,LR
0,Macro-Averaged Precision,0.891026,0.759788
1,Macro-Averaged Recall,0.887677,0.551866
2,Macro-Averaged F1-score,0.551894,0.551894
