In [1]:
LANGUAGE = 'bam'
EXPERIMENT = 'final_ground_truth_simple_training'
PREFERRED_GPU = 'cuda:3' # 'cuda:0'

In [2]:
# Prevent WandB from printing summary in cell output
%env WANDB_SILENT=true

env: WANDB_SILENT=true


In [3]:
import os
import sys
import yaml
import yaml
import wandb
import torch
import warnings

import torch.nn as nn
from torch.utils.data import DataLoader

from tqdm.notebook import tqdm
from datasets import load_dataset
from sklearn.exceptions import UndefinedMetricWarning
from transformers import AutoTokenizer, get_linear_schedule_with_warmup

from getpass import getpass

torch.cuda.empty_cache()
warnings.filterwarnings("ignore", category=UndefinedMetricWarning)

In [4]:
# Add src module to system path
src_module_path = os.path.abspath('../../../')
if src_module_path not in sys.path:
    sys.path.append(src_module_path)

# Import functions and classes from custom modules
from src.data.preprocess import (
    align_labels_for_many_records,
    TorchDataset
)

from src.query.query_gpt import add_annotation_examples_for_batch, add_annotation_examples
from src.query.prompts import MAIN_PROMPT_FOR_BATCH, MAIN_PROMPT

from src.utils.utils import (print_classification_report,
                             calculate_micro_f1_for_batches,
                             calculate_macro_f1_for_batches)

from src.models.xlmr_ner import XLMRobertaForNER

In [5]:
# Specifying path to the necessary files and folders
PATH_TO_SRC = os.path.abspath('../../../')

CONFIG_PATH = os.path.join(PATH_TO_SRC, "settings/config.yml")

In [6]:
# Reading config file
config = yaml.safe_load(open(CONFIG_PATH))

# Printing out name of the current language
language_name = config['languages_names'][LANGUAGE]
language_name

'Bambara'

In [7]:
wandb.login(key=getpass("Weights and Biases API key:"))

True

In [8]:
label_mapping = config['label_mapping']

# Loading tokenizer
tokenizer = AutoTokenizer.from_pretrained(config['model_name'])

# Initialize model
base_model = XLMRobertaForNER(model_name=config['model_name'],
                              num_labels=len(config['label_mapping'].keys()))

# Choose a GPU to use
default_device = config['gpu_settings']['default_device'] if \
    PREFERRED_GPU=='' else PREFERRED_GPU

# Send model to GPU if cuda is available otherwise use CPU
device = torch.device(default_device if torch.cuda.is_available() else "cpu")
print(device)

base_model.to(device)

Some weights of XLMRobertaModel were not initialized from the model checkpoint at Davlan/afro-xlmr-mini and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


cuda:3


XLMRobertaForNER(
  (xlmr): XLMRobertaModel(
    (embeddings): XLMRobertaEmbeddings(
      (word_embeddings): Embedding(250002, 384, padding_idx=1)
      (position_embeddings): Embedding(514, 384, padding_idx=1)
      (token_type_embeddings): Embedding(1, 384)
      (LayerNorm): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): XLMRobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x XLMRobertaLayer(
          (attention): XLMRobertaAttention(
            (self): XLMRobertaSelfAttention(
              (query): Linear(in_features=384, out_features=384, bias=True)
              (key): Linear(in_features=384, out_features=384, bias=True)
              (value): Linear(in_features=384, out_features=384, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): XLMRobertaSelfOutput(
              (dense): Linear(in_features=384, out_features=384, bias=True)
          

In [9]:
# Download dataset for the specific language 
data = load_dataset(config['dataset'], LANGUAGE)
print("Original dataset:\n", data)

  0%|          | 0/3 [00:00<?, ?it/s]

Original dataset:
 DatasetDict({
    train: Dataset({
        features: ['id', 'tokens', 'ner_tags'],
        num_rows: 4462
    })
    validation: Dataset({
        features: ['id', 'tokens', 'ner_tags'],
        num_rows: 638
    })
    test: Dataset({
        features: ['id', 'tokens', 'ner_tags'],
        num_rows: 1274
    })
})


In [10]:
data = data.map(
    align_labels_for_many_records,
    batched=True,
    fn_kwargs={'tokenizer': tokenizer}
)

In [11]:
# Settings
max_len = config['languages_max_tokens'][LANGUAGE]
print(f'Maximum token length for language {LANGUAGE} is {max_len}')
padding_val = config['tokenizer_settings']['padding_value']

# Convert the datasets.Dataset to a PyTorch Dataset
dataset_train = TorchDataset(data['train'], max_length=max_len, padding_value=padding_val)
dataset_test = TorchDataset(data['test'], max_length=max_len, padding_value=padding_val)
dataset_val = TorchDataset(data['validation'],max_length=max_len, padding_value=padding_val)

Maximum token length for language bam is 164


In [12]:
# Settings
batch_size = config['train_settings']['batch_size']
shuffle = config['train_settings']['shuffle']

# Create PyTorch DataLoaders
dataloader_train = DataLoader(dataset_train,
                             batch_size=batch_size,
                             shuffle=shuffle) # Shuffle only training set
dataloader_val = DataLoader(dataset_val, batch_size=batch_size)
dataloader_test = DataLoader(dataset_test, batch_size=batch_size)

In [13]:
def train_ner(model, train_loader, val_loader, device, epochs, lr, num_warmup_steps=5):
    training_history = {
        "train_loss": [],
        "val_loss": [],
        "val_micro_f1": [],
        "val_macro_f1": []
    }
    # Convert lr to float
    lr = float(config['train_settings']['lr'])
    optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
    total_steps = len(train_loader) * epochs
    scheduler = get_linear_schedule_with_warmup(optimizer,
                                                num_warmup_steps=num_warmup_steps,
                                                num_training_steps=total_steps)

    # Loss function
    loss_fn = nn.CrossEntropyLoss(ignore_index=-100)

    for epoch in range(epochs):
        model.train()
        total_train_loss = 0

        for batch in tqdm(train_loader, desc=f"Epoch {epoch + 1}/{epochs} [Train]", leave=False):
            optimizer.zero_grad()
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            logits = model(input_ids, attention_mask=attention_mask)

            logits_reshaped = logits.view(-1, logits.size(-1))
            labels_reshaped = labels.view(-1)

            # Calculate loss
            loss = loss_fn(logits_reshaped, labels_reshaped)

            total_train_loss += loss.item()

            loss.backward()
            optimizer.step()
            scheduler.step()

        avg_train_loss = total_train_loss / len(train_loader)

        # Validation phase
        model.eval()
        total_val_loss = 0
        val_predictions, val_labels = [], []

        with torch.no_grad():
            for batch in tqdm(val_loader, desc=f"Epoch {epoch + 1}/{epochs} [Val]", leave=False):
                input_ids = batch['input_ids'].to(device)
                attention_mask = batch['attention_mask'].to(device)
                labels = batch['labels'].to(device)

                logits = model(input_ids, attention_mask=attention_mask)
                logits_reshaped = logits.view(-1, logits.size(-1))
                labels_reshaped = labels.view(-1)

                # Calculate loss
                loss = loss_fn(logits_reshaped, labels_reshaped)

                total_val_loss += loss.item()

                predictions = torch.argmax(logits, dim=-1)

                val_predictions.append(predictions.detach())
                val_labels.append(batch['labels'].detach())

        avg_val_loss = total_val_loss / len(val_loader)
        micro_f1 = calculate_micro_f1_for_batches(val_predictions, val_labels, ignore_class=0)
        macro_f1 = calculate_macro_f1_for_batches(val_predictions, val_labels, ignore_class=0)

        print(f"Epoch {epoch + 1}/{epochs} | Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f} | Val F1: {micro_f1:.4f}")

        # Update training history
        training_history["train_loss"].append(avg_train_loss)
        training_history["val_loss"].append(avg_val_loss)
        training_history["val_micro_f1"].append(micro_f1)
        training_history["val_macro_f1"].append(macro_f1)

        # WandB logger
        wandb.log({
            "train_loss": avg_train_loss,
            "val_loss": avg_val_loss,
            "val_micro_f1": micro_f1,
            "val_macro_f1": macro_f1
        })

    return model, training_history

In [14]:
# Prepare WandB for training
wandb.init(
    project=EXPERIMENT,
    name=LANGUAGE,
    config={
        'epochs': config['train_settings']['epochs'],
        'learning_rate': config['train_settings']['lr']
    },
    settings=wandb.Settings(disable_job_creation=True)
)

# Initial model training
model, history = train_ner(
    base_model,
    dataloader_train,
    dataloader_val,
    device,
    epochs=config['train_settings']['epochs'],
    lr=config['train_settings']['lr'])

# Disable WandB logger
wandb.finish()

Epoch 1/50 [Train]:   0%|          | 0/279 [00:00<?, ?it/s]

Epoch 1/50 [Val]:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 1/50 | Train Loss: 0.7298 | Val Loss: 0.3717 | Val F1: 0.0000


Epoch 2/50 [Train]:   0%|          | 0/279 [00:00<?, ?it/s]

Epoch 2/50 [Val]:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 2/50 | Train Loss: 0.2540 | Val Loss: 0.2055 | Val F1: 0.0000


Epoch 3/50 [Train]:   0%|          | 0/279 [00:00<?, ?it/s]

Epoch 3/50 [Val]:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 3/50 | Train Loss: 0.1648 | Val Loss: 0.1473 | Val F1: 0.0000


Epoch 4/50 [Train]:   0%|          | 0/279 [00:00<?, ?it/s]

Epoch 4/50 [Val]:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 4/50 | Train Loss: 0.0937 | Val Loss: 0.0782 | Val F1: 0.3474


Epoch 5/50 [Train]:   0%|          | 0/279 [00:00<?, ?it/s]

Epoch 5/50 [Val]:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 5/50 | Train Loss: 0.0601 | Val Loss: 0.0663 | Val F1: 0.7013


Epoch 6/50 [Train]:   0%|          | 0/279 [00:00<?, ?it/s]

Epoch 6/50 [Val]:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 6/50 | Train Loss: 0.0444 | Val Loss: 0.0486 | Val F1: 0.8153


Epoch 7/50 [Train]:   0%|          | 0/279 [00:00<?, ?it/s]

Epoch 7/50 [Val]:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 7/50 | Train Loss: 0.0336 | Val Loss: 0.0408 | Val F1: 0.8350


Epoch 8/50 [Train]:   0%|          | 0/279 [00:00<?, ?it/s]

Epoch 8/50 [Val]:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 8/50 | Train Loss: 0.0273 | Val Loss: 0.0374 | Val F1: 0.8468


Epoch 9/50 [Train]:   0%|          | 0/279 [00:00<?, ?it/s]

Epoch 9/50 [Val]:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 9/50 | Train Loss: 0.0222 | Val Loss: 0.0356 | Val F1: 0.8518


Epoch 10/50 [Train]:   0%|          | 0/279 [00:00<?, ?it/s]

Epoch 10/50 [Val]:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 10/50 | Train Loss: 0.0184 | Val Loss: 0.0328 | Val F1: 0.8658


Epoch 11/50 [Train]:   0%|          | 0/279 [00:00<?, ?it/s]

Epoch 11/50 [Val]:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 11/50 | Train Loss: 0.0149 | Val Loss: 0.0318 | Val F1: 0.8658


Epoch 12/50 [Train]:   0%|          | 0/279 [00:00<?, ?it/s]

Epoch 12/50 [Val]:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 12/50 | Train Loss: 0.0119 | Val Loss: 0.0336 | Val F1: 0.8655


Epoch 13/50 [Train]:   0%|          | 0/279 [00:00<?, ?it/s]

Epoch 13/50 [Val]:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 13/50 | Train Loss: 0.0101 | Val Loss: 0.0370 | Val F1: 0.8742


Epoch 14/50 [Train]:   0%|          | 0/279 [00:00<?, ?it/s]

Epoch 14/50 [Val]:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 14/50 | Train Loss: 0.0089 | Val Loss: 0.0297 | Val F1: 0.8578


Epoch 15/50 [Train]:   0%|          | 0/279 [00:00<?, ?it/s]

Epoch 15/50 [Val]:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 15/50 | Train Loss: 0.0076 | Val Loss: 0.0336 | Val F1: 0.8563


Epoch 16/50 [Train]:   0%|          | 0/279 [00:00<?, ?it/s]

Epoch 16/50 [Val]:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 16/50 | Train Loss: 0.0068 | Val Loss: 0.0328 | Val F1: 0.8662


Epoch 17/50 [Train]:   0%|          | 0/279 [00:00<?, ?it/s]

Epoch 17/50 [Val]:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 17/50 | Train Loss: 0.0059 | Val Loss: 0.0297 | Val F1: 0.8616


Epoch 18/50 [Train]:   0%|          | 0/279 [00:00<?, ?it/s]

Epoch 18/50 [Val]:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 18/50 | Train Loss: 0.0050 | Val Loss: 0.0300 | Val F1: 0.8643


Epoch 19/50 [Train]:   0%|          | 0/279 [00:00<?, ?it/s]

Epoch 19/50 [Val]:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 19/50 | Train Loss: 0.0048 | Val Loss: 0.0307 | Val F1: 0.8685


Epoch 20/50 [Train]:   0%|          | 0/279 [00:00<?, ?it/s]

Epoch 20/50 [Val]:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 20/50 | Train Loss: 0.0040 | Val Loss: 0.0308 | Val F1: 0.8738


Epoch 21/50 [Train]:   0%|          | 0/279 [00:00<?, ?it/s]

Epoch 21/50 [Val]:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 21/50 | Train Loss: 0.0035 | Val Loss: 0.0317 | Val F1: 0.8540


Epoch 22/50 [Train]:   0%|          | 0/279 [00:00<?, ?it/s]

Epoch 22/50 [Val]:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 22/50 | Train Loss: 0.0033 | Val Loss: 0.0316 | Val F1: 0.8719


Epoch 23/50 [Train]:   0%|          | 0/279 [00:00<?, ?it/s]

Epoch 23/50 [Val]:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 23/50 | Train Loss: 0.0029 | Val Loss: 0.0325 | Val F1: 0.8693


Epoch 24/50 [Train]:   0%|          | 0/279 [00:00<?, ?it/s]

Epoch 24/50 [Val]:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 24/50 | Train Loss: 0.0024 | Val Loss: 0.0333 | Val F1: 0.8765


Epoch 25/50 [Train]:   0%|          | 0/279 [00:00<?, ?it/s]

Epoch 25/50 [Val]:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 25/50 | Train Loss: 0.0020 | Val Loss: 0.0333 | Val F1: 0.8818


Epoch 26/50 [Train]:   0%|          | 0/279 [00:00<?, ?it/s]

Epoch 26/50 [Val]:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 26/50 | Train Loss: 0.0020 | Val Loss: 0.0321 | Val F1: 0.8681


Epoch 27/50 [Train]:   0%|          | 0/279 [00:00<?, ?it/s]

Epoch 27/50 [Val]:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 27/50 | Train Loss: 0.0019 | Val Loss: 0.0336 | Val F1: 0.8632


Epoch 28/50 [Train]:   0%|          | 0/279 [00:00<?, ?it/s]

Epoch 28/50 [Val]:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 28/50 | Train Loss: 0.0016 | Val Loss: 0.0341 | Val F1: 0.8738


Epoch 29/50 [Train]:   0%|          | 0/279 [00:00<?, ?it/s]

Epoch 29/50 [Val]:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 29/50 | Train Loss: 0.0017 | Val Loss: 0.0315 | Val F1: 0.8704


Epoch 30/50 [Train]:   0%|          | 0/279 [00:00<?, ?it/s]

Epoch 30/50 [Val]:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 30/50 | Train Loss: 0.0016 | Val Loss: 0.0357 | Val F1: 0.8655


Epoch 31/50 [Train]:   0%|          | 0/279 [00:00<?, ?it/s]

Epoch 31/50 [Val]:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 31/50 | Train Loss: 0.0013 | Val Loss: 0.0340 | Val F1: 0.8571


Epoch 32/50 [Train]:   0%|          | 0/279 [00:00<?, ?it/s]

Epoch 32/50 [Val]:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 32/50 | Train Loss: 0.0012 | Val Loss: 0.0361 | Val F1: 0.8647


Epoch 33/50 [Train]:   0%|          | 0/279 [00:00<?, ?it/s]

Epoch 33/50 [Val]:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 33/50 | Train Loss: 0.0009 | Val Loss: 0.0368 | Val F1: 0.8696


Epoch 34/50 [Train]:   0%|          | 0/279 [00:00<?, ?it/s]

Epoch 34/50 [Val]:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 34/50 | Train Loss: 0.0009 | Val Loss: 0.0372 | Val F1: 0.8651


Epoch 35/50 [Train]:   0%|          | 0/279 [00:00<?, ?it/s]

Epoch 35/50 [Val]:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 35/50 | Train Loss: 0.0010 | Val Loss: 0.0360 | Val F1: 0.8575


Epoch 36/50 [Train]:   0%|          | 0/279 [00:00<?, ?it/s]

Epoch 36/50 [Val]:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 36/50 | Train Loss: 0.0009 | Val Loss: 0.0357 | Val F1: 0.8620


Epoch 37/50 [Train]:   0%|          | 0/279 [00:00<?, ?it/s]

Epoch 37/50 [Val]:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 37/50 | Train Loss: 0.0007 | Val Loss: 0.0371 | Val F1: 0.8651


Epoch 38/50 [Train]:   0%|          | 0/279 [00:00<?, ?it/s]

Epoch 38/50 [Val]:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 38/50 | Train Loss: 0.0006 | Val Loss: 0.0361 | Val F1: 0.8670


Epoch 39/50 [Train]:   0%|          | 0/279 [00:00<?, ?it/s]

Epoch 39/50 [Val]:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 39/50 | Train Loss: 0.0006 | Val Loss: 0.0365 | Val F1: 0.8620


Epoch 40/50 [Train]:   0%|          | 0/279 [00:00<?, ?it/s]

Epoch 40/50 [Val]:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 40/50 | Train Loss: 0.0006 | Val Loss: 0.0355 | Val F1: 0.8712


Epoch 41/50 [Train]:   0%|          | 0/279 [00:00<?, ?it/s]

Epoch 41/50 [Val]:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 41/50 | Train Loss: 0.0005 | Val Loss: 0.0361 | Val F1: 0.8651


Epoch 42/50 [Train]:   0%|          | 0/279 [00:00<?, ?it/s]

Epoch 42/50 [Val]:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 42/50 | Train Loss: 0.0005 | Val Loss: 0.0363 | Val F1: 0.8757


Epoch 43/50 [Train]:   0%|          | 0/279 [00:00<?, ?it/s]

Epoch 43/50 [Val]:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 43/50 | Train Loss: 0.0006 | Val Loss: 0.0366 | Val F1: 0.8563


Epoch 44/50 [Train]:   0%|          | 0/279 [00:00<?, ?it/s]

Epoch 44/50 [Val]:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 44/50 | Train Loss: 0.0004 | Val Loss: 0.0369 | Val F1: 0.8723


Epoch 45/50 [Train]:   0%|          | 0/279 [00:00<?, ?it/s]

Epoch 45/50 [Val]:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 45/50 | Train Loss: 0.0004 | Val Loss: 0.0375 | Val F1: 0.8685


Epoch 46/50 [Train]:   0%|          | 0/279 [00:00<?, ?it/s]

Epoch 46/50 [Val]:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 46/50 | Train Loss: 0.0004 | Val Loss: 0.0366 | Val F1: 0.8738


Epoch 47/50 [Train]:   0%|          | 0/279 [00:00<?, ?it/s]

Epoch 47/50 [Val]:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 47/50 | Train Loss: 0.0004 | Val Loss: 0.0366 | Val F1: 0.8700


Epoch 48/50 [Train]:   0%|          | 0/279 [00:00<?, ?it/s]

Epoch 48/50 [Val]:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 48/50 | Train Loss: 0.0004 | Val Loss: 0.0376 | Val F1: 0.8769


Epoch 49/50 [Train]:   0%|          | 0/279 [00:00<?, ?it/s]

Epoch 49/50 [Val]:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 49/50 | Train Loss: 0.0003 | Val Loss: 0.0374 | Val F1: 0.8753


Epoch 50/50 [Train]:   0%|          | 0/279 [00:00<?, ?it/s]

Epoch 50/50 [Val]:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 50/50 | Train Loss: 0.0003 | Val Loss: 0.0372 | Val F1: 0.8750


In [15]:
# Save model
torch.save(model.state_dict(), f'model_weights/{LANGUAGE}/{EXPERIMENT}.pth')

In [16]:
# Model evaluation
print_classification_report(config, model, dataloader_test, device, ignore_index=-100, ignore_class=0)

[test]:   0%|          | 0/80 [00:00<?, ?it/s]

              precision    recall  f1-score   support

      B-DATE       0.94      0.92      0.93       459
       B-LOC       0.91      0.79      0.85      1313
       B-ORG       0.95      0.73      0.83       357
       B-PER       0.93      0.78      0.85      1156
      I-DATE       0.96      0.88      0.92       623
       I-LOC       0.59      0.42      0.49        95
       I-ORG       1.00      0.58      0.74        67
       I-PER       0.97      0.88      0.92      1067
           O       0.00      0.00      0.00         0

    accuracy                           0.82      5137
   macro avg       0.81      0.66      0.72      5137
weighted avg       0.93      0.82      0.87      5137



In [17]:
# Clear memory

# Delete all models as they are no longer utilized.
del model

# Using garbage collector
import gc
gc.collect()

torch.cuda.empty_cache() 

In [18]:
try:
    os.remove(f'{src_module_path}/../../.netrc')
    print("Logged out of WandB.")
except:
    print("Unsuccessful WandB logging out.")

Logged out of WandB.
