<a href="https://colab.research.google.com/github/steliosg23/PDS-A2/blob/main/SUBMISSION%20Finetuned%20PubMedBERT%20PDS%20A2%20Food%20Hazard%20Detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Install necessary packages and import libraries
This section includes all the necessary imports for data manipulation, model training, and evaluation.
It also imports libraries for handling tokenization, model configuration, and metrics.


In [None]:
from google.colab import drive
import pandas as pd
import torch
import re
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, classification_report
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm
import os
from shutil import make_archive
import numpy as np
from sklearn.utils.class_weight import compute_class_weight
from torch.optim.lr_scheduler import ReduceLROnPlateau

# Mount Google Drive


In [None]:
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Load and preview the training dataset
The dataset containing incident reports is loaded from Google Drive.
We remove any unnecessary columns like 'Unnamed: 0'.


In [None]:
train_path = '/content/drive/MyDrive/Data/incidents_train.csv'
df = pd.read_csv(train_path)
df = df.drop(columns=['Unnamed: 0'])


# Define a function to clean text data
This function removes special characters, converts text to lowercase, and strips extra whitespace.
It is essential to clean the text data for better model performance.


In [None]:
import re

def clean_text(text):
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)  # Remove non-alphanumeric characters
    text = text.lower()  # Convert text to lowercase
    text = ' '.join(text.split())  # Remove extra spaces
    return text


# Clean the text data and load the tokenizer
We apply the `clean_text` function to clean the 'text' column of the dataset.
Then, we initialize the PubMedBERT tokenizer to prepare for tokenization.


In [None]:
# Load the tokenizer for the PubMedBERT model, specifically fine-tuned for biomedical text
tokenizer = AutoTokenizer.from_pretrained("microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract")

# Apply the text cleaning function to the 'text' column in the DataFrame
# This function will preprocess each text entry by removing unwanted characters, stopwords, etc.
df['text'] = df['text'].apply(clean_text)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


# Define features and targets for classification tasks
We specify the input features like date and country and set the classification targets.



In [None]:
# Define the features for the model, which include the year, month, day, and country information
features = ['year', 'month', 'day', 'country']

# Define the target variables for Subtask 1, which are the hazard-category and product-category
targets_subtask1 = ['hazard-category', 'product-category']


# Define the target variables for Subtask 2, which are hazard and product
# Add other targets if necessary depending on the task
targets_subtask2 = ['hazard', 'product']


# Encode target labels
For classification, target labels need to be encoded as numeric values.
We use `LabelEncoder` to convert categorical labels into integers.


In [None]:
# Create an empty dictionary to store label encoders for each target
label_encoders = {}

# Iterate over both sets of targets (Subtask 1 and Subtask 2)
for target in targets_subtask1 + targets_subtask2:
    # Initialize a LabelEncoder for each target
    le = LabelEncoder()

    # Transform the target column values into numeric labels and update the DataFrame
    df[target] = le.fit_transform(df[target])

    # Store the fitted LabelEncoder in the dictionary for future use (e.g., inverse transformation)
    label_encoders[target] = le


# Define a custom PyTorch dataset for text classification
This dataset class will handle text tokenization and label processing.
It ensures the text is properly encoded, padded, and truncated to a fixed length for the model.


In [None]:
# Define a custom Dataset class for text data
class TextDataset(Dataset):
    # Initialize the dataset with texts, labels, tokenizer, and maximum sequence length
    def __init__(self, texts, labels, tokenizer, max_len):
        self.texts = texts  # List of input texts
        self.labels = labels  # List of corresponding labels
        self.tokenizer = tokenizer  # Tokenizer for encoding the text
        self.max_len = max_len  # Maximum length for padding/truncation

    # Define the length of the dataset (number of samples)
    def __len__(self):
        return len(self.texts)

    # Define how to retrieve a single item from the dataset
    def __getitem__(self, item):
        text = str(self.texts[item])  # Get the text for the given index
        label = self.labels[item]  # Get the label for the given index

        # Use the tokenizer to encode the text (add special tokens, padding, truncation)
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,  # Add special tokens (e.g., [CLS], [SEP])
            max_length=self.max_len,  # Limit the sequence length
            padding='max_length',  # Pad sequences to max_length
            truncation=True,  # Truncate longer sequences
            return_tensors='pt'  # Return PyTorch tensors
        )

        # Return a dictionary with input_ids, attention_mask, and label
        return {
            'input_ids': encoding['input_ids'].flatten(),  # Flatten the tensor
            'attention_mask': encoding['attention_mask'].flatten(),  # Flatten the attention mask
            'label': torch.tensor(label, dtype=torch.long)  # Convert label to a tensor
        }


# Split the data into training and testing sets
We split the dataset into training and testing sets for each target.
This ensures that the model is trained on one set and evaluated on a separate, unseen set.


In [None]:
from sklearn.model_selection import train_test_split

# Define a function to prepare data for model training and testing (no validation)
def prepare_data(text_column):
    # Extract features and text column from the DataFrame
    X = df[features + [text_column]]  # Features include specified columns plus the text column
    # Extract target variables for Subtask 1 and Subtask 2
    y_subtask1 = df[targets_subtask1]
    y_subtask2 = df[targets_subtask2]

    # Initialize a dictionary to store data splits for each target
    data_splits = {}

    # Iterate over both sets of target variables (Subtask 1 and Subtask 2)
    for target in targets_subtask1 + targets_subtask2:
        # Split the data into training (90%) and testing (10%) sets
        X_train, X_test, y_train, y_test = train_test_split(
            X, df[target], test_size=0.1, random_state=42
        )

        # Reset the indices for the train and test sets
        X_train = X_train.reset_index(drop=True)
        y_train = y_train.reset_index(drop=True)
        X_test = X_test.reset_index(drop=True)
        y_test = y_test.reset_index(drop=True)

        # Store the splits for the current target in the dictionary
        data_splits[target] = (X_train, X_test, y_train, y_test)

    # Return the dictionary containing data splits for each target
    return data_splits


# Prepare the data splits for text-based tasks
We apply the `prepare_data` function specifically for text tasks and save the splits for later use.


In [None]:
# Prepare the data splits for the 'text' column using the prepare_data function
text_splits = prepare_data('text')


# Set model configuration and define the device
Here, we configure key parameters for training like maximum sequence length, batch size, and learning rate.
We also determine whether to use GPU or CPU for training based on availability.


In [None]:
# Define configuration settings for the model training
config = {
    'max_len': 256,  # Maximum sequence length for input texts
    'batch_size': 16,  # Batch size for training
    'learning_rate': 5e-5,  # Learning rate for the optimizer
    'epochs': 100,  # Increased number of training epochs
    'model_name': "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract"  # Pre-trained model to use
}



# Determine the device to use for training (GPU if available, otherwise CPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")


Using device: cuda


# Training Function Explanation

## Overview
The `train_and_evaluate_bert` function in the submission notebook has been redesigned with key enhancements to improve the training process compared to the benchmark notebooks. These changes include better handling of class imbalance, task-specific adaptations, and improved evaluation metrics, ensuring more robust performance and generalization.

---

## Key Features in the Submission Notebook

### 1. **Dynamic Class Weights**
- Dynamically calculates class weights for each task using `compute_class_weight` to address class imbalance.
- Applies these weights in the loss function (`CrossEntropyLoss`), leading to better performance for underrepresented classes.

### 2. **Task-Specific Adaptation**
- Automatically determines the number of classes (`num_labels`) for each task based on the dataset.
- Saves task-specific label encoders, models, and tokenizers for easy reuse and deployment.

### 3. **Macro F1-Score Evaluation**
- **Change Implemented:** The evaluation metric has been updated to use **macro F1-score** instead of weighted F1-score to better reflect the model's performance across all classes, regardless of class distribution.
- Ensures fair evaluation, especially for tasks with imbalanced datasets.

### 4. **Efficient Training with Early Stopping**
- Introduces early stopping to halt training when validation loss stops improving, preventing overfitting and saving computational resources.

### 5. **Learning Rate Scheduling**
- Uses `ReduceLROnPlateau` to adjust the learning rate dynamically based on validation loss, ensuring smoother convergence.

### 6. **Enhanced Model Saving**
- Saves the best-performing model weights, tokenizer, and task-specific label encoders for deployment or further experimentation.

---

## Comparison with Benchmark Notebooks
- **Improved Metric Selection:** The switch to **macro F1-score** ensures fairer performance evaluation across all classes compared to the weighted F1-score used in the benchmark notebooks.
- **Better Training Pipeline:** Includes features like class weights, early stopping, and learning rate scheduling, which may not be fully utilized in the benchmark implementations.

---

## Summary
The submission notebook incorporates several improvements, including a shift to macro F1-score and a more refined training pipeline, to provide better and fairer results compared to the benchmark notebooks.


In [None]:
from sklearn.model_selection import train_test_split
import torch
import torch.optim as optim
from transformers import AutoModelForSequenceClassification
from torch.utils.data import DataLoader
from sklearn.metrics import f1_score, classification_report
from torch.optim.lr_scheduler import ReduceLROnPlateau
from tqdm import tqdm
import numpy as np

# Train and evaluate the model with early stopping and learning rate reduction
def train_and_evaluate_bert(data_splits, targets):
    # Initialize an empty list to store F1 scores for each target task
    f1_scores = []

    # Loop through each target (task) for training and evaluation
    for target in targets:
        print(f"\nStarting training for task: {target}")

        # Retrieve the corresponding training and testing splits
        X_train, X_test, y_train, y_test = data_splits[target]

        # Extract the 'text' column for training and testing
        texts_train = X_train['text'].values
        texts_test = X_test['text'].values

        # Create datasets for training and testing
        train_dataset = TextDataset(texts_train, y_train, tokenizer, config['max_len'])
        test_dataset = TextDataset(texts_test, y_test, tokenizer, config['max_len'])

        # Create data loaders for batching during training and testing
        train_loader = DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=True)
        test_loader = DataLoader(test_dataset, batch_size=config['batch_size'], shuffle=False)

        # Determine the number of labels for classification dynamically based on the target task
        num_classes = len(label_encoders[target].classes_)  # This gives the number of unique classes for the current task
        """
        # Calculate class weights for CrossEntropyLoss
        class_weights = compute_class_weight(
            'balanced',
            classes=np.unique(y_train),
            y=y_train
        )

        # Initialize the full class weights tensor with 1 (or any other default weight)
        class_weights_full = torch.ones(num_classes)  # Start with all weights equal to 1

        # Map the computed class weights to the appropriate indices for the classes that exist in the training set
        for i, class_idx in enumerate(np.unique(y_train)):
            class_weights_full[class_idx] = class_weights[i]

        # Move the class weights tensor to the correct device (GPU/CPU)
        class_weights_full = class_weights_full.to(device)
        """

        # Load the pre-trained model with the appropriate number of labels for classification
        model = AutoModelForSequenceClassification.from_pretrained(config['model_name'], num_labels=num_classes).to(device)

        # Initialize the optimizer, loss function (with class weights), and learning rate scheduler
        optimizer = optim.Adam(model.parameters(), lr=config['learning_rate'])
        scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.1, patience=3, verbose=True)
        criterion = nn.CrossEntropyLoss() #WITHOUT WEIGHTS

        # Early stopping setup
        best_val_loss = float('inf')
        best_epoch = 0
        patience = 6  # Number of epochs with no improvement before stopping
        epochs_without_improvement = 0
        best_model = None  # Variable to hold the best model

        # Training loop
        model.train()
        for epoch in range(config['epochs']):
            print(f"Epoch {epoch+1}/{config['epochs']} - Training: {target}")
            progress_bar = tqdm(train_loader, desc=f"Training Epoch {epoch+1}", total=len(train_loader), leave=True)

            # Initialize variables to calculate average loss
            epoch_loss = 0.0
            total_batches = 0

            for batch in progress_bar:
                optimizer.zero_grad()

                # Get the input data and labels for the current batch
                input_ids = batch['input_ids'].squeeze(1).to(device)
                attention_mask = batch['attention_mask'].squeeze(1).to(device)
                labels = batch['label'].to(device)

                # Forward pass
                outputs = model(input_ids, attention_mask=attention_mask)
                loss = criterion(outputs.logits, labels)

                # Backward pass and optimization step
                loss.backward()
                optimizer.step()

                # Update epoch loss and total batches
                epoch_loss += loss.item()
                total_batches += 1

                # Update the progress bar with the current loss
                progress_bar.set_postfix(loss=loss.item())

            # Calculate and print average loss for the epoch
            avg_epoch_loss = epoch_loss / total_batches
            print(f"Average Loss after Epoch {epoch+1}: {avg_epoch_loss}")

            # Print the current learning rate
            current_lr = optimizer.param_groups[0]['lr']
            print(f"Learning rate after epoch {epoch+1}: {current_lr}")

            # Apply learning rate scheduler
            scheduler.step(avg_epoch_loss)

            # Save the model if it has the best loss so far
            if avg_epoch_loss < best_val_loss:
                best_val_loss = avg_epoch_loss
                best_epoch = epoch + 1
                epochs_without_improvement = 0
                best_model = model.state_dict()  # Save model weights
                print(f"New best model found. Saving the model at epoch {best_epoch}.")
            else:
                epochs_without_improvement += 1
                if epochs_without_improvement >= patience:
                    print("Early stopping triggered!")
                    break

        # After training, save the best model
        if best_model:
            print(f"Saving the best model from epoch {best_epoch}.")
            model.load_state_dict(best_model)  # Restore the best model
            model.save_pretrained(f'./best_model_{target}')
            tokenizer.save_pretrained(f'./best_model_{target}')
        else:
            print("No improvement in training loss. No model saved.")

        # Evaluate the model on the test set
        print(f"Evaluating model for task: {target}")
        model.eval()
        y_preds = []
        y_true = []

        # Evaluate without computing gradients
        with torch.no_grad():
            for batch in tqdm(test_loader, desc="Evaluating", total=len(test_loader), leave=True):
                input_ids = batch['input_ids'].squeeze(1).to(device)
                attention_mask = batch['attention_mask'].squeeze(1).to(device)
                labels = batch['label'].to(device)

                # Forward pass
                outputs = model(input_ids, attention_mask=attention_mask)

                # Get predictions and append them to the list
                _, preds = torch.max(outputs.logits, dim=1)
                y_preds.extend(preds.cpu().numpy())
                y_true.extend(labels.cpu().numpy())

        # Decode the predictions and true labels
        decoded_preds = label_encoders[target].inverse_transform(y_preds)
        decoded_true = label_encoders[target].inverse_transform(y_true)

        # Calculate the macro F1 score
        f1 = f1_score(decoded_true, decoded_preds, average='macro')
        f1_scores.append(f1)
        print(f"F1-Score for {target}: {f1}")

        # Print the classification report
        print(f"Classification Report for {target}:\n")
        print(classification_report(decoded_true, decoded_preds, zero_division=0))

        # Save the label encoder for the current task
        np.save(f'./best_model_{target}/{target}_label_encoder.npy', label_encoders[target].classes_)
        print(f"Label Encoder for {target} saved in './best_model_{target}'")

    # Return the F1 scores for each target task
    return f1_scores

# Train and evaluate for all targets (subtasks 1 and 2)
text_f1_scores = train_and_evaluate_bert(text_splits, targets_subtask1 + targets_subtask2)



Starting training for task: hazard-category


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/100 - Training: hazard-category


Training Epoch 1: 100%|██████████| 286/286 [00:53<00:00,  5.36it/s, loss=0.778]


Average Loss after Epoch 1: 0.4932138164286743
Learning rate after epoch 1: 5e-05
New best model found. Saving the model at epoch 1.
Epoch 2/100 - Training: hazard-category


Training Epoch 2: 100%|██████████| 286/286 [00:52<00:00,  5.44it/s, loss=0.421]


Average Loss after Epoch 2: 0.228695103641391
Learning rate after epoch 2: 5e-05
New best model found. Saving the model at epoch 2.
Epoch 3/100 - Training: hazard-category


Training Epoch 3: 100%|██████████| 286/286 [00:52<00:00,  5.44it/s, loss=0.229]


Average Loss after Epoch 3: 0.17491754324507566
Learning rate after epoch 3: 5e-05
New best model found. Saving the model at epoch 3.
Epoch 4/100 - Training: hazard-category


Training Epoch 4: 100%|██████████| 286/286 [00:52<00:00,  5.44it/s, loss=0.0279]


Average Loss after Epoch 4: 0.13759832987985152
Learning rate after epoch 4: 5e-05
New best model found. Saving the model at epoch 4.
Epoch 5/100 - Training: hazard-category


Training Epoch 5: 100%|██████████| 286/286 [00:52<00:00,  5.44it/s, loss=0.0479]


Average Loss after Epoch 5: 0.11818640261328409
Learning rate after epoch 5: 5e-05
New best model found. Saving the model at epoch 5.
Epoch 6/100 - Training: hazard-category


Training Epoch 6: 100%|██████████| 286/286 [00:52<00:00,  5.44it/s, loss=0.011]


Average Loss after Epoch 6: 0.0869800663541781
Learning rate after epoch 6: 5e-05
New best model found. Saving the model at epoch 6.
Epoch 7/100 - Training: hazard-category


Training Epoch 7: 100%|██████████| 286/286 [00:52<00:00,  5.44it/s, loss=0.0354]


Average Loss after Epoch 7: 0.08727097197784228
Learning rate after epoch 7: 5e-05
Epoch 8/100 - Training: hazard-category


Training Epoch 8: 100%|██████████| 286/286 [00:52<00:00,  5.44it/s, loss=0.151]


Average Loss after Epoch 8: 0.10631898705687999
Learning rate after epoch 8: 5e-05
Epoch 9/100 - Training: hazard-category


Training Epoch 9: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=0.137]


Average Loss after Epoch 9: 0.07979211018834315
Learning rate after epoch 9: 5e-05
New best model found. Saving the model at epoch 9.
Epoch 10/100 - Training: hazard-category


Training Epoch 10: 100%|██████████| 286/286 [00:53<00:00,  5.39it/s, loss=0.0477]


Average Loss after Epoch 10: 0.08561765833617793
Learning rate after epoch 10: 5e-05
Epoch 11/100 - Training: hazard-category


Training Epoch 11: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=0.00416]


Average Loss after Epoch 11: 0.04964278714358481
Learning rate after epoch 11: 5e-05
New best model found. Saving the model at epoch 11.
Epoch 12/100 - Training: hazard-category


Training Epoch 12: 100%|██████████| 286/286 [00:52<00:00,  5.43it/s, loss=0.0109]


Average Loss after Epoch 12: 0.05302505161315268
Learning rate after epoch 12: 5e-05
Epoch 13/100 - Training: hazard-category


Training Epoch 13: 100%|██████████| 286/286 [00:52<00:00,  5.43it/s, loss=0.00292]


Average Loss after Epoch 13: 0.03754718639029815
Learning rate after epoch 13: 5e-05
New best model found. Saving the model at epoch 13.
Epoch 14/100 - Training: hazard-category


Training Epoch 14: 100%|██████████| 286/286 [00:52<00:00,  5.44it/s, loss=0.00169]


Average Loss after Epoch 14: 0.043519149866961336
Learning rate after epoch 14: 5e-05
Epoch 15/100 - Training: hazard-category


Training Epoch 15: 100%|██████████| 286/286 [00:52<00:00,  5.43it/s, loss=0.00625]


Average Loss after Epoch 15: 0.04479122285322462
Learning rate after epoch 15: 5e-05
Epoch 16/100 - Training: hazard-category


Training Epoch 16: 100%|██████████| 286/286 [00:52<00:00,  5.43it/s, loss=0.0158]


Average Loss after Epoch 16: 0.07537570624356646
Learning rate after epoch 16: 5e-05
Epoch 17/100 - Training: hazard-category


Training Epoch 17: 100%|██████████| 286/286 [00:52<00:00,  5.43it/s, loss=0.0304]


Average Loss after Epoch 17: 0.04882150691351039
Learning rate after epoch 17: 5e-05
Epoch 18/100 - Training: hazard-category


Training Epoch 18: 100%|██████████| 286/286 [00:52<00:00,  5.44it/s, loss=0.008]


Average Loss after Epoch 18: 0.017227711482258462
Learning rate after epoch 18: 5e-06
New best model found. Saving the model at epoch 18.
Epoch 19/100 - Training: hazard-category


Training Epoch 19: 100%|██████████| 286/286 [00:52<00:00,  5.44it/s, loss=0.0129]


Average Loss after Epoch 19: 0.009034228508712373
Learning rate after epoch 19: 5e-06
New best model found. Saving the model at epoch 19.
Epoch 20/100 - Training: hazard-category


Training Epoch 20: 100%|██████████| 286/286 [00:52<00:00,  5.43it/s, loss=0.00235]


Average Loss after Epoch 20: 0.00718273536482878
Learning rate after epoch 20: 5e-06
New best model found. Saving the model at epoch 20.
Epoch 21/100 - Training: hazard-category


Training Epoch 21: 100%|██████████| 286/286 [00:52<00:00,  5.44it/s, loss=0.000972]


Average Loss after Epoch 21: 0.006525563776258614
Learning rate after epoch 21: 5e-06
New best model found. Saving the model at epoch 21.
Epoch 22/100 - Training: hazard-category


Training Epoch 22: 100%|██████████| 286/286 [00:52<00:00,  5.43it/s, loss=0.00129]


Average Loss after Epoch 22: 0.004919385908935154
Learning rate after epoch 22: 5e-06
New best model found. Saving the model at epoch 22.
Epoch 23/100 - Training: hazard-category


Training Epoch 23: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=0.000721]


Average Loss after Epoch 23: 0.004898404438574505
Learning rate after epoch 23: 5e-06
New best model found. Saving the model at epoch 23.
Epoch 24/100 - Training: hazard-category


Training Epoch 24: 100%|██████████| 286/286 [00:52<00:00,  5.43it/s, loss=0.000961]


Average Loss after Epoch 24: 0.004122378047143754
Learning rate after epoch 24: 5e-06
New best model found. Saving the model at epoch 24.
Epoch 25/100 - Training: hazard-category


Training Epoch 25: 100%|██████████| 286/286 [00:52<00:00,  5.43it/s, loss=0.00067]


Average Loss after Epoch 25: 0.0030245822701697607
Learning rate after epoch 25: 5e-06
New best model found. Saving the model at epoch 25.
Epoch 26/100 - Training: hazard-category


Training Epoch 26: 100%|██████████| 286/286 [00:52<00:00,  5.43it/s, loss=0.00133]


Average Loss after Epoch 26: 0.0025890454650128753
Learning rate after epoch 26: 5e-06
New best model found. Saving the model at epoch 26.
Epoch 27/100 - Training: hazard-category


Training Epoch 27: 100%|██████████| 286/286 [00:52<00:00,  5.43it/s, loss=0.000921]


Average Loss after Epoch 27: 0.002083809633875246
Learning rate after epoch 27: 5e-06
New best model found. Saving the model at epoch 27.
Epoch 28/100 - Training: hazard-category


Training Epoch 28: 100%|██████████| 286/286 [00:52<00:00,  5.43it/s, loss=0.00049]


Average Loss after Epoch 28: 0.0017934696432577537
Learning rate after epoch 28: 5e-06
New best model found. Saving the model at epoch 28.
Epoch 29/100 - Training: hazard-category


Training Epoch 29: 100%|██████████| 286/286 [00:52<00:00,  5.43it/s, loss=0.000671]


Average Loss after Epoch 29: 0.0016272282077266567
Learning rate after epoch 29: 5e-06
New best model found. Saving the model at epoch 29.
Epoch 30/100 - Training: hazard-category


Training Epoch 30: 100%|██████████| 286/286 [00:52<00:00,  5.44it/s, loss=0.000308]


Average Loss after Epoch 30: 0.002440069418209621
Learning rate after epoch 30: 5e-06
Epoch 31/100 - Training: hazard-category


Training Epoch 31: 100%|██████████| 286/286 [00:52<00:00,  5.43it/s, loss=0.000421]


Average Loss after Epoch 31: 0.0038831724870341043
Learning rate after epoch 31: 5e-06
Epoch 32/100 - Training: hazard-category


Training Epoch 32: 100%|██████████| 286/286 [00:52<00:00,  5.43it/s, loss=0.000287]


Average Loss after Epoch 32: 0.001323274409158686
Learning rate after epoch 32: 5e-06
New best model found. Saving the model at epoch 32.
Epoch 33/100 - Training: hazard-category


Training Epoch 33: 100%|██████████| 286/286 [00:52<00:00,  5.43it/s, loss=0.000878]


Average Loss after Epoch 33: 0.0012513467329888037
Learning rate after epoch 33: 5e-06
New best model found. Saving the model at epoch 33.
Epoch 34/100 - Training: hazard-category


Training Epoch 34: 100%|██████████| 286/286 [00:52<00:00,  5.43it/s, loss=0.000292]


Average Loss after Epoch 34: 0.0010512634916222895
Learning rate after epoch 34: 5e-06
New best model found. Saving the model at epoch 34.
Epoch 35/100 - Training: hazard-category


Training Epoch 35: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=0.000238]


Average Loss after Epoch 35: 0.0010950130923269349
Learning rate after epoch 35: 5e-06
Epoch 36/100 - Training: hazard-category


Training Epoch 36: 100%|██████████| 286/286 [00:52<00:00,  5.43it/s, loss=0.00031]


Average Loss after Epoch 36: 0.0026818771518004074
Learning rate after epoch 36: 5e-06
Epoch 37/100 - Training: hazard-category


Training Epoch 37: 100%|██████████| 286/286 [00:52<00:00,  5.43it/s, loss=0.000394]


Average Loss after Epoch 37: 0.0020334447574393883
Learning rate after epoch 37: 5e-06
Epoch 38/100 - Training: hazard-category


Training Epoch 38: 100%|██████████| 286/286 [00:52<00:00,  5.43it/s, loss=0.000255]


Average Loss after Epoch 38: 0.0013600726034618438
Learning rate after epoch 38: 5e-06
Epoch 39/100 - Training: hazard-category


Training Epoch 39: 100%|██████████| 286/286 [00:52<00:00,  5.43it/s, loss=0.000252]


Average Loss after Epoch 39: 0.001392575661589512
Learning rate after epoch 39: 5.000000000000001e-07
Epoch 40/100 - Training: hazard-category


Training Epoch 40: 100%|██████████| 286/286 [00:52<00:00,  5.43it/s, loss=0.000478]


Average Loss after Epoch 40: 0.0012774442215091907
Learning rate after epoch 40: 5.000000000000001e-07
Early stopping triggered!
Saving the best model from epoch 34.
Evaluating model for task: hazard-category


Evaluating: 100%|██████████| 32/32 [00:02<00:00, 12.66it/s]


F1-Score for hazard-category: 0.8151186899805364
Classification Report for hazard-category:

                                precision    recall  f1-score   support

                     allergens       0.96      0.99      0.97       188
                    biological       0.99      0.99      0.99       171
                      chemical       0.94      0.97      0.96        35
food additives and flavourings       0.33      0.20      0.25         5
                foreign bodies       1.00      0.98      0.99        58
                         fraud       0.78      0.75      0.76        28
                     migration       1.00      1.00      1.00         1
          organoleptic aspects       0.75      1.00      0.86         3
                  other hazard       0.91      0.67      0.77        15
              packaging defect       0.60      0.60      0.60         5

                      accuracy                           0.95       509
                     macro avg       0.83

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/100 - Training: product-category


Training Epoch 1: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=1.45]


Average Loss after Epoch 1: 1.813236296802134
Learning rate after epoch 1: 5e-05
New best model found. Saving the model at epoch 1.
Epoch 2/100 - Training: product-category


Training Epoch 2: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=1.64]


Average Loss after Epoch 2: 1.0056540972286172
Learning rate after epoch 2: 5e-05
New best model found. Saving the model at epoch 2.
Epoch 3/100 - Training: product-category


Training Epoch 3: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=0.502]


Average Loss after Epoch 3: 0.6673113502353638
Learning rate after epoch 3: 5e-05
New best model found. Saving the model at epoch 3.
Epoch 4/100 - Training: product-category


Training Epoch 4: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=0.513]


Average Loss after Epoch 4: 0.4481409398018272
Learning rate after epoch 4: 5e-05
New best model found. Saving the model at epoch 4.
Epoch 5/100 - Training: product-category


Training Epoch 5: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=0.112]


Average Loss after Epoch 5: 0.33131735108896987
Learning rate after epoch 5: 5e-05
New best model found. Saving the model at epoch 5.
Epoch 6/100 - Training: product-category


Training Epoch 6: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.553]


Average Loss after Epoch 6: 0.24683916724410715
Learning rate after epoch 6: 5e-05
New best model found. Saving the model at epoch 6.
Epoch 7/100 - Training: product-category


Training Epoch 7: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=0.112]


Average Loss after Epoch 7: 0.18494343108634329
Learning rate after epoch 7: 5e-05
New best model found. Saving the model at epoch 7.
Epoch 8/100 - Training: product-category


Training Epoch 8: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=0.0106]


Average Loss after Epoch 8: 0.1539639828863417
Learning rate after epoch 8: 5e-05
New best model found. Saving the model at epoch 8.
Epoch 9/100 - Training: product-category


Training Epoch 9: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=0.0879]


Average Loss after Epoch 9: 0.13262545822800456
Learning rate after epoch 9: 5e-05
New best model found. Saving the model at epoch 9.
Epoch 10/100 - Training: product-category


Training Epoch 10: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=0.0626]


Average Loss after Epoch 10: 0.12538461597274886
Learning rate after epoch 10: 5e-05
New best model found. Saving the model at epoch 10.
Epoch 11/100 - Training: product-category


Training Epoch 11: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.141]


Average Loss after Epoch 11: 0.09628168335412401
Learning rate after epoch 11: 5e-05
New best model found. Saving the model at epoch 11.
Epoch 12/100 - Training: product-category


Training Epoch 12: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=0.485]


Average Loss after Epoch 12: 0.09648865507752798
Learning rate after epoch 12: 5e-05
Epoch 13/100 - Training: product-category


Training Epoch 13: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.0108]


Average Loss after Epoch 13: 0.07807451760367054
Learning rate after epoch 13: 5e-05
New best model found. Saving the model at epoch 13.
Epoch 14/100 - Training: product-category


Training Epoch 14: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=0.00524]


Average Loss after Epoch 14: 0.10736000252939709
Learning rate after epoch 14: 5e-05
Epoch 15/100 - Training: product-category


Training Epoch 15: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.0172]


Average Loss after Epoch 15: 0.07556226904018802
Learning rate after epoch 15: 5e-05
New best model found. Saving the model at epoch 15.
Epoch 16/100 - Training: product-category


Training Epoch 16: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=0.00391]


Average Loss after Epoch 16: 0.04351065140640871
Learning rate after epoch 16: 5e-05
New best model found. Saving the model at epoch 16.
Epoch 17/100 - Training: product-category


Training Epoch 17: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=0.0112]


Average Loss after Epoch 17: 0.08050345434926098
Learning rate after epoch 17: 5e-05
Epoch 18/100 - Training: product-category


Training Epoch 18: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=0.00906]


Average Loss after Epoch 18: 0.0691470960729065
Learning rate after epoch 18: 5e-05
Epoch 19/100 - Training: product-category


Training Epoch 19: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.00245]


Average Loss after Epoch 19: 0.06080832170205308
Learning rate after epoch 19: 5e-05
Epoch 20/100 - Training: product-category


Training Epoch 20: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=0.00548]


Average Loss after Epoch 20: 0.0676467249172388
Learning rate after epoch 20: 5e-05
Epoch 21/100 - Training: product-category


Training Epoch 21: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=0.0211]


Average Loss after Epoch 21: 0.02396335580078494
Learning rate after epoch 21: 5e-06
New best model found. Saving the model at epoch 21.
Epoch 22/100 - Training: product-category


Training Epoch 22: 100%|██████████| 286/286 [00:52<00:00,  5.43it/s, loss=0.00342]


Average Loss after Epoch 22: 0.009621884745474044
Learning rate after epoch 22: 5e-06
New best model found. Saving the model at epoch 22.
Epoch 23/100 - Training: product-category


Training Epoch 23: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=0.00713]


Average Loss after Epoch 23: 0.007237740514931835
Learning rate after epoch 23: 5e-06
New best model found. Saving the model at epoch 23.
Epoch 24/100 - Training: product-category


Training Epoch 24: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=0.0019]


Average Loss after Epoch 24: 0.00631536309948432
Learning rate after epoch 24: 5e-06
New best model found. Saving the model at epoch 24.
Epoch 25/100 - Training: product-category


Training Epoch 25: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=0.00096]


Average Loss after Epoch 25: 0.0048779497031770465
Learning rate after epoch 25: 5e-06
New best model found. Saving the model at epoch 25.
Epoch 26/100 - Training: product-category


Training Epoch 26: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.00105]


Average Loss after Epoch 26: 0.00485614767005843
Learning rate after epoch 26: 5e-06
New best model found. Saving the model at epoch 26.
Epoch 27/100 - Training: product-category


Training Epoch 27: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=0.000716]


Average Loss after Epoch 27: 0.004300967538224573
Learning rate after epoch 27: 5e-06
New best model found. Saving the model at epoch 27.
Epoch 28/100 - Training: product-category


Training Epoch 28: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=0.028]


Average Loss after Epoch 28: 0.003695996297064231
Learning rate after epoch 28: 5e-06
New best model found. Saving the model at epoch 28.
Epoch 29/100 - Training: product-category


Training Epoch 29: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=0.000668]


Average Loss after Epoch 29: 0.0036488874993283772
Learning rate after epoch 29: 5e-06
New best model found. Saving the model at epoch 29.
Epoch 30/100 - Training: product-category


Training Epoch 30: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=0.00196]


Average Loss after Epoch 30: 0.003797119631313139
Learning rate after epoch 30: 5e-06
Epoch 31/100 - Training: product-category


Training Epoch 31: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.00198]


Average Loss after Epoch 31: 0.003230079514148704
Learning rate after epoch 31: 5e-06
New best model found. Saving the model at epoch 31.
Epoch 32/100 - Training: product-category


Training Epoch 32: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=0.000916]


Average Loss after Epoch 32: 0.003529637038847757
Learning rate after epoch 32: 5e-06
Epoch 33/100 - Training: product-category


Training Epoch 33: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=0.00167]


Average Loss after Epoch 33: 0.003001919908793415
Learning rate after epoch 33: 5e-06
New best model found. Saving the model at epoch 33.
Epoch 34/100 - Training: product-category


Training Epoch 34: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=0.00114]


Average Loss after Epoch 34: 0.003878428195004833
Learning rate after epoch 34: 5e-06
Epoch 35/100 - Training: product-category


Training Epoch 35: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=0.00155]


Average Loss after Epoch 35: 0.003755778164730629
Learning rate after epoch 35: 5e-06
Epoch 36/100 - Training: product-category


Training Epoch 36: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=0.000931]


Average Loss after Epoch 36: 0.003206492312414526
Learning rate after epoch 36: 5e-06
Epoch 37/100 - Training: product-category


Training Epoch 37: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=0.000496]


Average Loss after Epoch 37: 0.0025388053430112443
Learning rate after epoch 37: 5e-06
New best model found. Saving the model at epoch 37.
Epoch 38/100 - Training: product-category


Training Epoch 38: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.00352]


Average Loss after Epoch 38: 0.0023062209000565123
Learning rate after epoch 38: 5e-06
New best model found. Saving the model at epoch 38.
Epoch 39/100 - Training: product-category


Training Epoch 39: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.000782]


Average Loss after Epoch 39: 0.002383930675792866
Learning rate after epoch 39: 5e-06
Epoch 40/100 - Training: product-category


Training Epoch 40: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=0.000601]


Average Loss after Epoch 40: 0.0026645982448521258
Learning rate after epoch 40: 5e-06
Epoch 41/100 - Training: product-category


Training Epoch 41: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=0.000337]


Average Loss after Epoch 41: 0.002386644989385602
Learning rate after epoch 41: 5e-06
Epoch 42/100 - Training: product-category


Training Epoch 42: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.000376]


Average Loss after Epoch 42: 0.0018967890020474545
Learning rate after epoch 42: 5e-06
New best model found. Saving the model at epoch 42.
Epoch 43/100 - Training: product-category


Training Epoch 43: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.00206]


Average Loss after Epoch 43: 0.0034983257569842384
Learning rate after epoch 43: 5e-06
Epoch 44/100 - Training: product-category


Training Epoch 44: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=0.000517]


Average Loss after Epoch 44: 0.004673440709373901
Learning rate after epoch 44: 5e-06
Epoch 45/100 - Training: product-category


Training Epoch 45: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.000533]


Average Loss after Epoch 45: 0.007542760688380072
Learning rate after epoch 45: 5e-06
Epoch 46/100 - Training: product-category


Training Epoch 46: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=0.000458]


Average Loss after Epoch 46: 0.0061332775202846005
Learning rate after epoch 46: 5e-06
Epoch 47/100 - Training: product-category


Training Epoch 47: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.000402]


Average Loss after Epoch 47: 0.00196469324853408
Learning rate after epoch 47: 5.000000000000001e-07
Epoch 48/100 - Training: product-category


Training Epoch 48: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.00053]


Average Loss after Epoch 48: 0.0015978646125067253
Learning rate after epoch 48: 5.000000000000001e-07
New best model found. Saving the model at epoch 48.
Epoch 49/100 - Training: product-category


Training Epoch 49: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=0.000474]


Average Loss after Epoch 49: 0.0016143479042766303
Learning rate after epoch 49: 5.000000000000001e-07
Epoch 50/100 - Training: product-category


Training Epoch 50: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.000429]


Average Loss after Epoch 50: 0.0023030502501048046
Learning rate after epoch 50: 5.000000000000001e-07
Epoch 51/100 - Training: product-category


Training Epoch 51: 100%|██████████| 286/286 [00:52<00:00,  5.40it/s, loss=0.000368]


Average Loss after Epoch 51: 0.001475581815433721
Learning rate after epoch 51: 5.000000000000001e-07
New best model found. Saving the model at epoch 51.
Epoch 52/100 - Training: product-category


Training Epoch 52: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.0005]


Average Loss after Epoch 52: 0.002045864686017801
Learning rate after epoch 52: 5.000000000000001e-07
Epoch 53/100 - Training: product-category


Training Epoch 53: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=0.000636]


Average Loss after Epoch 53: 0.0014641665577480628
Learning rate after epoch 53: 5.000000000000001e-07
New best model found. Saving the model at epoch 53.
Epoch 54/100 - Training: product-category


Training Epoch 54: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=0.000616]


Average Loss after Epoch 54: 0.0017141872934323013
Learning rate after epoch 54: 5.000000000000001e-07
Epoch 55/100 - Training: product-category


Training Epoch 55: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.000264]


Average Loss after Epoch 55: 0.0016193433980810801
Learning rate after epoch 55: 5.000000000000001e-07
Epoch 56/100 - Training: product-category


Training Epoch 56: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=0.000289]


Average Loss after Epoch 56: 0.0024266323752736288
Learning rate after epoch 56: 5.000000000000001e-07
Epoch 57/100 - Training: product-category


Training Epoch 57: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=0.00106]


Average Loss after Epoch 57: 0.0014916755829105753
Learning rate after epoch 57: 5.000000000000001e-07
Epoch 58/100 - Training: product-category


Training Epoch 58: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.0005]


Average Loss after Epoch 58: 0.0020363960815353442
Learning rate after epoch 58: 5.000000000000001e-08
Epoch 59/100 - Training: product-category


Training Epoch 59: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.000679]


Average Loss after Epoch 59: 0.00141095503987834
Learning rate after epoch 59: 5.000000000000001e-08
New best model found. Saving the model at epoch 59.
Epoch 60/100 - Training: product-category


Training Epoch 60: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.000277]


Average Loss after Epoch 60: 0.0014678155002530201
Learning rate after epoch 60: 5.000000000000001e-08
Epoch 61/100 - Training: product-category


Training Epoch 61: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.000509]


Average Loss after Epoch 61: 0.001655602896204658
Learning rate after epoch 61: 5.000000000000001e-08
Epoch 62/100 - Training: product-category


Training Epoch 62: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=0.000324]


Average Loss after Epoch 62: 0.0015490878245368327
Learning rate after epoch 62: 5.000000000000001e-08
Epoch 63/100 - Training: product-category


Training Epoch 63: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=0.000502]


Average Loss after Epoch 63: 0.0015833409566398046
Learning rate after epoch 63: 5.000000000000001e-08
Epoch 64/100 - Training: product-category


Training Epoch 64: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=0.000405]


Average Loss after Epoch 64: 0.0017228553622555625
Learning rate after epoch 64: 5.000000000000002e-09
Epoch 65/100 - Training: product-category


Training Epoch 65: 100%|██████████| 286/286 [00:52<00:00,  5.40it/s, loss=0.000276]


Average Loss after Epoch 65: 0.0016404325031073783
Learning rate after epoch 65: 5.000000000000002e-09
Early stopping triggered!
Saving the best model from epoch 59.
Evaluating model for task: product-category


Evaluating: 100%|██████████| 32/32 [00:02<00:00, 12.51it/s]


F1-Score for product-category: 0.6884324785400671
Classification Report for product-category:

                                                   precision    recall  f1-score   support

                              alcoholic beverages       0.50      0.50      0.50         2
                      cereals and bakery products       0.73      0.87      0.80        54
     cocoa and cocoa preparations, coffee and tea       0.75      0.72      0.74        29
                                    confectionery       0.71      0.60      0.65        20
dietetic foods, food supplements, fortified foods       0.62      0.91      0.74        11
                                    fats and oils       1.00      0.33      0.50         3
                                   feed materials       0.00      0.00      0.00         2
                   food additives and flavourings       0.00      0.00      0.00         0
                           food contact materials       1.00      1.00      1.00     

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/100 - Training: hazard


Training Epoch 1: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=1.62]


Average Loss after Epoch 1: 2.107708618044853
Learning rate after epoch 1: 5e-05
New best model found. Saving the model at epoch 1.
Epoch 2/100 - Training: hazard


Training Epoch 2: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.498]


Average Loss after Epoch 2: 0.9869317434259228
Learning rate after epoch 2: 5e-05
New best model found. Saving the model at epoch 2.
Epoch 3/100 - Training: hazard


Training Epoch 3: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.178]


Average Loss after Epoch 3: 0.7051188242654284
Learning rate after epoch 3: 5e-05
New best model found. Saving the model at epoch 3.
Epoch 4/100 - Training: hazard


Training Epoch 4: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.349]


Average Loss after Epoch 4: 0.5285731355522896
Learning rate after epoch 4: 5e-05
New best model found. Saving the model at epoch 4.
Epoch 5/100 - Training: hazard


Training Epoch 5: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.13]


Average Loss after Epoch 5: 0.42307763988269376
Learning rate after epoch 5: 5e-05
New best model found. Saving the model at epoch 5.
Epoch 6/100 - Training: hazard


Training Epoch 6: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.436]


Average Loss after Epoch 6: 0.34010251697439414
Learning rate after epoch 6: 5e-05
New best model found. Saving the model at epoch 6.
Epoch 7/100 - Training: hazard


Training Epoch 7: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.33]


Average Loss after Epoch 7: 0.2854615771504757
Learning rate after epoch 7: 5e-05
New best model found. Saving the model at epoch 7.
Epoch 8/100 - Training: hazard


Training Epoch 8: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.178]


Average Loss after Epoch 8: 0.22460739973645943
Learning rate after epoch 8: 5e-05
New best model found. Saving the model at epoch 8.
Epoch 9/100 - Training: hazard


Training Epoch 9: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.0218]


Average Loss after Epoch 9: 0.19810415682272164
Learning rate after epoch 9: 5e-05
New best model found. Saving the model at epoch 9.
Epoch 10/100 - Training: hazard


Training Epoch 10: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.161]


Average Loss after Epoch 10: 0.17181001865252904
Learning rate after epoch 10: 5e-05
New best model found. Saving the model at epoch 10.
Epoch 11/100 - Training: hazard


Training Epoch 11: 100%|██████████| 286/286 [00:52<00:00,  5.40it/s, loss=0.149]


Average Loss after Epoch 11: 0.135720607225451
Learning rate after epoch 11: 5e-05
New best model found. Saving the model at epoch 11.
Epoch 12/100 - Training: hazard


Training Epoch 12: 100%|██████████| 286/286 [00:52<00:00,  5.40it/s, loss=0.0491]


Average Loss after Epoch 12: 0.13074126983895065
Learning rate after epoch 12: 5e-05
New best model found. Saving the model at epoch 12.
Epoch 13/100 - Training: hazard


Training Epoch 13: 100%|██████████| 286/286 [00:52<00:00,  5.40it/s, loss=0.0864]


Average Loss after Epoch 13: 0.11871577781898628
Learning rate after epoch 13: 5e-05
New best model found. Saving the model at epoch 13.
Epoch 14/100 - Training: hazard


Training Epoch 14: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.108]


Average Loss after Epoch 14: 0.13014082768866858
Learning rate after epoch 14: 5e-05
Epoch 15/100 - Training: hazard


Training Epoch 15: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.522]


Average Loss after Epoch 15: 0.12777707816616118
Learning rate after epoch 15: 5e-05
Epoch 16/100 - Training: hazard


Training Epoch 16: 100%|██████████| 286/286 [00:52<00:00,  5.40it/s, loss=0.0127]


Average Loss after Epoch 16: 0.08843368179184365
Learning rate after epoch 16: 5e-05
New best model found. Saving the model at epoch 16.
Epoch 17/100 - Training: hazard


Training Epoch 17: 100%|██████████| 286/286 [00:52<00:00,  5.40it/s, loss=0.0199]


Average Loss after Epoch 17: 0.057437454249345134
Learning rate after epoch 17: 5e-05
New best model found. Saving the model at epoch 17.
Epoch 18/100 - Training: hazard


Training Epoch 18: 100%|██████████| 286/286 [00:52<00:00,  5.40it/s, loss=0.0167]


Average Loss after Epoch 18: 0.06009914820290894
Learning rate after epoch 18: 5e-05
Epoch 19/100 - Training: hazard


Training Epoch 19: 100%|██████████| 286/286 [00:52<00:00,  5.40it/s, loss=0.00928]


Average Loss after Epoch 19: 0.07702905626420478
Learning rate after epoch 19: 5e-05
Epoch 20/100 - Training: hazard


Training Epoch 20: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.0365]


Average Loss after Epoch 20: 0.05868229179047178
Learning rate after epoch 20: 5e-05
Epoch 21/100 - Training: hazard


Training Epoch 21: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.0112]


Average Loss after Epoch 21: 0.0403687732715088
Learning rate after epoch 21: 5e-05
New best model found. Saving the model at epoch 21.
Epoch 22/100 - Training: hazard


Training Epoch 22: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.011]


Average Loss after Epoch 22: 0.03833746952387355
Learning rate after epoch 22: 5e-05
New best model found. Saving the model at epoch 22.
Epoch 23/100 - Training: hazard


Training Epoch 23: 100%|██████████| 286/286 [00:52<00:00,  5.40it/s, loss=0.0038]


Average Loss after Epoch 23: 0.03060190492249387
Learning rate after epoch 23: 5e-05
New best model found. Saving the model at epoch 23.
Epoch 24/100 - Training: hazard


Training Epoch 24: 100%|██████████| 286/286 [00:52<00:00,  5.40it/s, loss=0.0104]


Average Loss after Epoch 24: 0.024291283441048077
Learning rate after epoch 24: 5e-05
New best model found. Saving the model at epoch 24.
Epoch 25/100 - Training: hazard


Training Epoch 25: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.0203]


Average Loss after Epoch 25: 0.126713747957679
Learning rate after epoch 25: 5e-05
Epoch 26/100 - Training: hazard


Training Epoch 26: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.0551]


Average Loss after Epoch 26: 0.059815377552205555
Learning rate after epoch 26: 5e-05
Epoch 27/100 - Training: hazard


Training Epoch 27: 100%|██████████| 286/286 [00:52<00:00,  5.40it/s, loss=0.00825]


Average Loss after Epoch 27: 0.05066224969060557
Learning rate after epoch 27: 5e-05
Epoch 28/100 - Training: hazard


Training Epoch 28: 100%|██████████| 286/286 [00:52<00:00,  5.40it/s, loss=0.00124]


Average Loss after Epoch 28: 0.031160512560542176
Learning rate after epoch 28: 5e-05
Epoch 29/100 - Training: hazard


Training Epoch 29: 100%|██████████| 286/286 [00:52<00:00,  5.40it/s, loss=0.00263]


Average Loss after Epoch 29: 0.017768676607654646
Learning rate after epoch 29: 5e-06
New best model found. Saving the model at epoch 29.
Epoch 30/100 - Training: hazard


Training Epoch 30: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.00193]


Average Loss after Epoch 30: 0.010055829910072986
Learning rate after epoch 30: 5e-06
New best model found. Saving the model at epoch 30.
Epoch 31/100 - Training: hazard


Training Epoch 31: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.00433]


Average Loss after Epoch 31: 0.006296266810203271
Learning rate after epoch 31: 5e-06
New best model found. Saving the model at epoch 31.
Epoch 32/100 - Training: hazard


Training Epoch 32: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.00804]


Average Loss after Epoch 32: 0.005464382955464176
Learning rate after epoch 32: 5e-06
New best model found. Saving the model at epoch 32.
Epoch 33/100 - Training: hazard


Training Epoch 33: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.00166]


Average Loss after Epoch 33: 0.007132344083637455
Learning rate after epoch 33: 5e-06
Epoch 34/100 - Training: hazard


Training Epoch 34: 100%|██████████| 286/286 [00:52<00:00,  5.40it/s, loss=0.00342]


Average Loss after Epoch 34: 0.004480318789096823
Learning rate after epoch 34: 5e-06
New best model found. Saving the model at epoch 34.
Epoch 35/100 - Training: hazard


Training Epoch 35: 100%|██████████| 286/286 [00:52<00:00,  5.40it/s, loss=0.00682]


Average Loss after Epoch 35: 0.004436460905097803
Learning rate after epoch 35: 5e-06
New best model found. Saving the model at epoch 35.
Epoch 36/100 - Training: hazard


Training Epoch 36: 100%|██████████| 286/286 [00:52<00:00,  5.40it/s, loss=0.00146]


Average Loss after Epoch 36: 0.0035548789517862813
Learning rate after epoch 36: 5e-06
New best model found. Saving the model at epoch 36.
Epoch 37/100 - Training: hazard


Training Epoch 37: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.00114]


Average Loss after Epoch 37: 0.0038313720984037613
Learning rate after epoch 37: 5e-06
Epoch 38/100 - Training: hazard


Training Epoch 38: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.00299]


Average Loss after Epoch 38: 0.0032994379606944594
Learning rate after epoch 38: 5e-06
New best model found. Saving the model at epoch 38.
Epoch 39/100 - Training: hazard


Training Epoch 39: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.0008]


Average Loss after Epoch 39: 0.0032363733853916867
Learning rate after epoch 39: 5e-06
New best model found. Saving the model at epoch 39.
Epoch 40/100 - Training: hazard


Training Epoch 40: 100%|██████████| 286/286 [00:52<00:00,  5.40it/s, loss=0.00927]


Average Loss after Epoch 40: 0.0040667131181905646
Learning rate after epoch 40: 5e-06
Epoch 41/100 - Training: hazard


Training Epoch 41: 100%|██████████| 286/286 [00:52<00:00,  5.40it/s, loss=0.00147]


Average Loss after Epoch 41: 0.004050907396004776
Learning rate after epoch 41: 5e-06
Epoch 42/100 - Training: hazard


Training Epoch 42: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.00154]


Average Loss after Epoch 42: 0.0041850581593197685
Learning rate after epoch 42: 5e-06
Epoch 43/100 - Training: hazard


Training Epoch 43: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.00102]


Average Loss after Epoch 43: 0.003155183549555201
Learning rate after epoch 43: 5e-06
New best model found. Saving the model at epoch 43.
Epoch 44/100 - Training: hazard


Training Epoch 44: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.00193]


Average Loss after Epoch 44: 0.003010810032210938
Learning rate after epoch 44: 5e-06
New best model found. Saving the model at epoch 44.
Epoch 45/100 - Training: hazard


Training Epoch 45: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.00109]


Average Loss after Epoch 45: 0.0024972419018191253
Learning rate after epoch 45: 5e-06
New best model found. Saving the model at epoch 45.
Epoch 46/100 - Training: hazard


Training Epoch 46: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.00103]


Average Loss after Epoch 46: 0.003140161608889621
Learning rate after epoch 46: 5e-06
Epoch 47/100 - Training: hazard


Training Epoch 47: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.00156]


Average Loss after Epoch 47: 0.0024831103368774645
Learning rate after epoch 47: 5e-06
New best model found. Saving the model at epoch 47.
Epoch 48/100 - Training: hazard


Training Epoch 48: 100%|██████████| 286/286 [00:52<00:00,  5.40it/s, loss=0.138]


Average Loss after Epoch 48: 0.0024681115591975084
Learning rate after epoch 48: 5e-06
New best model found. Saving the model at epoch 48.
Epoch 49/100 - Training: hazard


Training Epoch 49: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.0034]


Average Loss after Epoch 49: 0.0024370942992391065
Learning rate after epoch 49: 5e-06
New best model found. Saving the model at epoch 49.
Epoch 50/100 - Training: hazard


Training Epoch 50: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.000553]


Average Loss after Epoch 50: 0.0024263577425235336
Learning rate after epoch 50: 5e-06
New best model found. Saving the model at epoch 50.
Epoch 51/100 - Training: hazard


Training Epoch 51: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.00153]


Average Loss after Epoch 51: 0.002124504464938162
Learning rate after epoch 51: 5e-06
New best model found. Saving the model at epoch 51.
Epoch 52/100 - Training: hazard


Training Epoch 52: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.00209]


Average Loss after Epoch 52: 0.006476609119669196
Learning rate after epoch 52: 5e-06
Epoch 53/100 - Training: hazard


Training Epoch 53: 100%|██████████| 286/286 [00:52<00:00,  5.40it/s, loss=0.00145]


Average Loss after Epoch 53: 0.002845010309534268
Learning rate after epoch 53: 5e-06
Epoch 54/100 - Training: hazard


Training Epoch 54: 100%|██████████| 286/286 [00:52<00:00,  5.40it/s, loss=0.000537]


Average Loss after Epoch 54: 0.002453183136262501
Learning rate after epoch 54: 5e-06
Epoch 55/100 - Training: hazard


Training Epoch 55: 100%|██████████| 286/286 [00:52<00:00,  5.40it/s, loss=0.001]


Average Loss after Epoch 55: 0.001956714766378469
Learning rate after epoch 55: 5e-06
New best model found. Saving the model at epoch 55.
Epoch 56/100 - Training: hazard


Training Epoch 56: 100%|██████████| 286/286 [00:52<00:00,  5.40it/s, loss=0.000777]


Average Loss after Epoch 56: 0.0018328860831236095
Learning rate after epoch 56: 5e-06
New best model found. Saving the model at epoch 56.
Epoch 57/100 - Training: hazard


Training Epoch 57: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.000714]


Average Loss after Epoch 57: 0.0016316570326017534
Learning rate after epoch 57: 5e-06
New best model found. Saving the model at epoch 57.
Epoch 58/100 - Training: hazard


Training Epoch 58: 100%|██████████| 286/286 [00:52<00:00,  5.40it/s, loss=0.00616]


Average Loss after Epoch 58: 0.0018073285872770401
Learning rate after epoch 58: 5e-06
Epoch 59/100 - Training: hazard


Training Epoch 59: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.00146]


Average Loss after Epoch 59: 0.0030549986781902494
Learning rate after epoch 59: 5e-06
Epoch 60/100 - Training: hazard


Training Epoch 60: 100%|██████████| 286/286 [00:52<00:00,  5.40it/s, loss=0.00115]


Average Loss after Epoch 60: 0.0018940691425468304
Learning rate after epoch 60: 5e-06
Epoch 61/100 - Training: hazard


Training Epoch 61: 100%|██████████| 286/286 [00:52<00:00,  5.40it/s, loss=0.000463]


Average Loss after Epoch 61: 0.0016284637811088568
Learning rate after epoch 61: 5e-06
New best model found. Saving the model at epoch 61.
Epoch 62/100 - Training: hazard


Training Epoch 62: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.0007]


Average Loss after Epoch 62: 0.001515642123411237
Learning rate after epoch 62: 5e-06
New best model found. Saving the model at epoch 62.
Epoch 63/100 - Training: hazard


Training Epoch 63: 100%|██████████| 286/286 [00:52<00:00,  5.40it/s, loss=0.000727]


Average Loss after Epoch 63: 0.0037614722579772182
Learning rate after epoch 63: 5e-06
Epoch 64/100 - Training: hazard


Training Epoch 64: 100%|██████████| 286/286 [00:52<00:00,  5.40it/s, loss=0.000373]


Average Loss after Epoch 64: 0.001956709582080586
Learning rate after epoch 64: 5e-06
Epoch 65/100 - Training: hazard


Training Epoch 65: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.000251]


Average Loss after Epoch 65: 0.0015645461940834253
Learning rate after epoch 65: 5e-06
Epoch 66/100 - Training: hazard


Training Epoch 66: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.000269]


Average Loss after Epoch 66: 0.0017700143680167775
Learning rate after epoch 66: 5e-06
Epoch 67/100 - Training: hazard


Training Epoch 67: 100%|██████████| 286/286 [00:52<00:00,  5.40it/s, loss=0.000757]


Average Loss after Epoch 67: 0.001246486003250144
Learning rate after epoch 67: 5.000000000000001e-07
New best model found. Saving the model at epoch 67.
Epoch 68/100 - Training: hazard


Training Epoch 68: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.00043]


Average Loss after Epoch 68: 0.0012302918027792536
Learning rate after epoch 68: 5.000000000000001e-07
New best model found. Saving the model at epoch 68.
Epoch 69/100 - Training: hazard


Training Epoch 69: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.00103]


Average Loss after Epoch 69: 0.0014272592453488761
Learning rate after epoch 69: 5.000000000000001e-07
Epoch 70/100 - Training: hazard


Training Epoch 70: 100%|██████████| 286/286 [00:52<00:00,  5.40it/s, loss=0.000281]


Average Loss after Epoch 70: 0.0012801631139689026
Learning rate after epoch 70: 5.000000000000001e-07
Epoch 71/100 - Training: hazard


Training Epoch 71: 100%|██████████| 286/286 [00:52<00:00,  5.40it/s, loss=0.000127]


Average Loss after Epoch 71: 0.0013264264596766187
Learning rate after epoch 71: 5.000000000000001e-07
Epoch 72/100 - Training: hazard


Training Epoch 72: 100%|██████████| 286/286 [00:52<00:00,  5.40it/s, loss=0.000592]


Average Loss after Epoch 72: 0.0014386604180526138
Learning rate after epoch 72: 5.000000000000001e-07
Epoch 73/100 - Training: hazard


Training Epoch 73: 100%|██████████| 286/286 [00:52<00:00,  5.40it/s, loss=0.000316]


Average Loss after Epoch 73: 0.0012071792173571395
Learning rate after epoch 73: 5.000000000000001e-08
New best model found. Saving the model at epoch 73.
Epoch 74/100 - Training: hazard


Training Epoch 74: 100%|██████████| 286/286 [00:52<00:00,  5.40it/s, loss=0.000212]


Average Loss after Epoch 74: 0.0011989972550585585
Learning rate after epoch 74: 5.000000000000001e-08
New best model found. Saving the model at epoch 74.
Epoch 75/100 - Training: hazard


Training Epoch 75: 100%|██████████| 286/286 [00:52<00:00,  5.40it/s, loss=0.0393]


Average Loss after Epoch 75: 0.001152469881995861
Learning rate after epoch 75: 5.000000000000001e-08
New best model found. Saving the model at epoch 75.
Epoch 76/100 - Training: hazard


Training Epoch 76: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.000643]


Average Loss after Epoch 76: 0.0011314002445319317
Learning rate after epoch 76: 5.000000000000001e-08
New best model found. Saving the model at epoch 76.
Epoch 77/100 - Training: hazard


Training Epoch 77: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=0.000587]


Average Loss after Epoch 77: 0.001219785825485344
Learning rate after epoch 77: 5.000000000000001e-08
Epoch 78/100 - Training: hazard


Training Epoch 78: 100%|██████████| 286/286 [00:52<00:00,  5.40it/s, loss=0.000293]


Average Loss after Epoch 78: 0.0013228387437444148
Learning rate after epoch 78: 5.000000000000001e-08
Epoch 79/100 - Training: hazard


Training Epoch 79: 100%|██████████| 286/286 [00:52<00:00,  5.40it/s, loss=0.000772]


Average Loss after Epoch 79: 0.001200606525516206
Learning rate after epoch 79: 5.000000000000001e-08
Epoch 80/100 - Training: hazard


Training Epoch 80: 100%|██████████| 286/286 [00:52<00:00,  5.40it/s, loss=0.000377]


Average Loss after Epoch 80: 0.0011777829616100583
Learning rate after epoch 80: 5.000000000000001e-08
Epoch 81/100 - Training: hazard


Training Epoch 81: 100%|██████████| 286/286 [00:52<00:00,  5.40it/s, loss=0.00118]


Average Loss after Epoch 81: 0.0012017339864363926
Learning rate after epoch 81: 5.000000000000002e-09
Epoch 82/100 - Training: hazard


Training Epoch 82: 100%|██████████| 286/286 [00:52<00:00,  5.40it/s, loss=0.000217]


Average Loss after Epoch 82: 0.0012895861720251915
Learning rate after epoch 82: 5.000000000000002e-09
Early stopping triggered!
Saving the best model from epoch 76.
Evaluating model for task: hazard


Evaluating: 100%|██████████| 32/32 [00:02<00:00, 12.50it/s]


F1-Score for hazard: 0.5790880276869168
Classification Report for hazard:

                                                 precision    recall  f1-score   support

                                      Aflatoxin       1.00      1.00      1.00         2
                                      alkaloids       1.00      1.00      1.00         2
                                      allergens       0.00      0.00      0.00         3
                                         almond       0.80      1.00      0.89         4
                                      amygdalin       0.00      0.00      0.00         1
                           bad smell / off odor       1.00      1.00      1.00         1
                                  bone fragment       1.00      1.00      1.00         1
                                     brazil nut       1.00      1.00      1.00         1
                              bulging packaging       1.00      0.50      0.67         2
                                  

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/100 - Training: product


Training Epoch 1: 100%|██████████| 286/286 [00:52<00:00,  5.40it/s, loss=6.03]


Average Loss after Epoch 1: 6.383555345601969
Learning rate after epoch 1: 5e-05
New best model found. Saving the model at epoch 1.
Epoch 2/100 - Training: product


Training Epoch 2: 100%|██████████| 286/286 [00:52<00:00,  5.40it/s, loss=5.74]


Average Loss after Epoch 2: 6.135338401460981
Learning rate after epoch 2: 5e-05
New best model found. Saving the model at epoch 2.
Epoch 3/100 - Training: product


Training Epoch 3: 100%|██████████| 286/286 [00:52<00:00,  5.40it/s, loss=5.92]


Average Loss after Epoch 3: 6.115977930855918
Learning rate after epoch 3: 5e-05
New best model found. Saving the model at epoch 3.
Epoch 4/100 - Training: product


Training Epoch 4: 100%|██████████| 286/286 [00:52<00:00,  5.40it/s, loss=6.82]


Average Loss after Epoch 4: 6.069781685208941
Learning rate after epoch 4: 5e-05
New best model found. Saving the model at epoch 4.
Epoch 5/100 - Training: product


Training Epoch 5: 100%|██████████| 286/286 [00:52<00:00,  5.40it/s, loss=5.86]


Average Loss after Epoch 5: 6.109323686653084
Learning rate after epoch 5: 5e-05
Epoch 6/100 - Training: product


Training Epoch 6: 100%|██████████| 286/286 [00:53<00:00,  5.39it/s, loss=6.95]


Average Loss after Epoch 6: 6.09661059946447
Learning rate after epoch 6: 5e-05
Epoch 7/100 - Training: product


Training Epoch 7: 100%|██████████| 286/286 [00:52<00:00,  5.43it/s, loss=6.06]


Average Loss after Epoch 7: 6.081527846676487
Learning rate after epoch 7: 5e-05
Epoch 8/100 - Training: product


Training Epoch 8: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=6.35]


Average Loss after Epoch 8: 6.089863962226814
Learning rate after epoch 8: 5e-05
Epoch 9/100 - Training: product


Training Epoch 9: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=6.18]


Average Loss after Epoch 9: 6.041204052371579
Learning rate after epoch 9: 5e-06
New best model found. Saving the model at epoch 9.
Epoch 10/100 - Training: product


Training Epoch 10: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=6.2]


Average Loss after Epoch 10: 6.04062747788596
Learning rate after epoch 10: 5e-06
New best model found. Saving the model at epoch 10.
Epoch 11/100 - Training: product


Training Epoch 11: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=5.93]


Average Loss after Epoch 11: 6.040067499334162
Learning rate after epoch 11: 5e-06
New best model found. Saving the model at epoch 11.
Epoch 12/100 - Training: product


Training Epoch 12: 100%|██████████| 286/286 [00:52<00:00,  5.43it/s, loss=6.27]


Average Loss after Epoch 12: 6.033342133035193
Learning rate after epoch 12: 5e-06
New best model found. Saving the model at epoch 12.
Epoch 13/100 - Training: product


Training Epoch 13: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=6.24]


Average Loss after Epoch 13: 6.033676004076337
Learning rate after epoch 13: 5e-06
Epoch 14/100 - Training: product


Training Epoch 14: 100%|██████████| 286/286 [00:53<00:00,  5.39it/s, loss=5.71]


Average Loss after Epoch 14: 6.034582504859338
Learning rate after epoch 14: 5e-06
Epoch 15/100 - Training: product


Training Epoch 15: 100%|██████████| 286/286 [00:52<00:00,  5.40it/s, loss=5.88]


Average Loss after Epoch 15: 6.0342475967807365
Learning rate after epoch 15: 5e-06
Epoch 16/100 - Training: product


Training Epoch 16: 100%|██████████| 286/286 [00:53<00:00,  5.37it/s, loss=5.02]


Average Loss after Epoch 16: 6.02370642615365
Learning rate after epoch 16: 5e-06
New best model found. Saving the model at epoch 16.
Epoch 17/100 - Training: product


Training Epoch 17: 100%|██████████| 286/286 [00:53<00:00,  5.36it/s, loss=5.53]


Average Loss after Epoch 17: 5.915957652605497
Learning rate after epoch 17: 5e-06
New best model found. Saving the model at epoch 17.
Epoch 18/100 - Training: product


Training Epoch 18: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=6.43]


Average Loss after Epoch 18: 5.86404185028343
Learning rate after epoch 18: 5e-06
New best model found. Saving the model at epoch 18.
Epoch 19/100 - Training: product


Training Epoch 19: 100%|██████████| 286/286 [00:52<00:00,  5.44it/s, loss=5.84]


Average Loss after Epoch 19: 5.821550009133932
Learning rate after epoch 19: 5e-06
New best model found. Saving the model at epoch 19.
Epoch 20/100 - Training: product


Training Epoch 20: 100%|██████████| 286/286 [00:52<00:00,  5.44it/s, loss=5.66]


Average Loss after Epoch 20: 5.782241327779277
Learning rate after epoch 20: 5e-06
New best model found. Saving the model at epoch 20.
Epoch 21/100 - Training: product


Training Epoch 21: 100%|██████████| 286/286 [00:52<00:00,  5.43it/s, loss=5.67]


Average Loss after Epoch 21: 5.752069833395365
Learning rate after epoch 21: 5e-06
New best model found. Saving the model at epoch 21.
Epoch 22/100 - Training: product


Training Epoch 22: 100%|██████████| 286/286 [00:52<00:00,  5.43it/s, loss=6.59]


Average Loss after Epoch 22: 5.724537127501481
Learning rate after epoch 22: 5e-06
New best model found. Saving the model at epoch 22.
Epoch 23/100 - Training: product


Training Epoch 23: 100%|██████████| 286/286 [00:52<00:00,  5.43it/s, loss=5.22]


Average Loss after Epoch 23: 5.692131634358759
Learning rate after epoch 23: 5e-06
New best model found. Saving the model at epoch 23.
Epoch 24/100 - Training: product


Training Epoch 24: 100%|██████████| 286/286 [00:52<00:00,  5.43it/s, loss=6.48]


Average Loss after Epoch 24: 5.670164386709254
Learning rate after epoch 24: 5e-06
New best model found. Saving the model at epoch 24.
Epoch 25/100 - Training: product


Training Epoch 25: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=6.16]


Average Loss after Epoch 25: 5.63422417640686
Learning rate after epoch 25: 5e-06
New best model found. Saving the model at epoch 25.
Epoch 26/100 - Training: product


Training Epoch 26: 100%|██████████| 286/286 [00:52<00:00,  5.43it/s, loss=6.48]


Average Loss after Epoch 26: 5.602715218817438
Learning rate after epoch 26: 5e-06
New best model found. Saving the model at epoch 26.
Epoch 27/100 - Training: product


Training Epoch 27: 100%|██████████| 286/286 [00:52<00:00,  5.43it/s, loss=4.79]


Average Loss after Epoch 27: 5.5746069237902445
Learning rate after epoch 27: 5e-06
New best model found. Saving the model at epoch 27.
Epoch 28/100 - Training: product


Training Epoch 28: 100%|██████████| 286/286 [00:52<00:00,  5.43it/s, loss=5.95]


Average Loss after Epoch 28: 5.525195548584411
Learning rate after epoch 28: 5e-06
New best model found. Saving the model at epoch 28.
Epoch 29/100 - Training: product


Training Epoch 29: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=6.14]


Average Loss after Epoch 29: 5.488152295559436
Learning rate after epoch 29: 5e-06
New best model found. Saving the model at epoch 29.
Epoch 30/100 - Training: product


Training Epoch 30: 100%|██████████| 286/286 [00:52<00:00,  5.43it/s, loss=5.33]


Average Loss after Epoch 30: 5.446862415833906
Learning rate after epoch 30: 5e-06
New best model found. Saving the model at epoch 30.
Epoch 31/100 - Training: product


Training Epoch 31: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=5.25]


Average Loss after Epoch 31: 5.411822372383171
Learning rate after epoch 31: 5e-06
New best model found. Saving the model at epoch 31.
Epoch 32/100 - Training: product


Training Epoch 32: 100%|██████████| 286/286 [00:52<00:00,  5.43it/s, loss=6.28]


Average Loss after Epoch 32: 5.375285873879919
Learning rate after epoch 32: 5e-06
New best model found. Saving the model at epoch 32.
Epoch 33/100 - Training: product


Training Epoch 33: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=5.61]


Average Loss after Epoch 33: 5.336497260140372
Learning rate after epoch 33: 5e-06
New best model found. Saving the model at epoch 33.
Epoch 34/100 - Training: product


Training Epoch 34: 100%|██████████| 286/286 [00:52<00:00,  5.43it/s, loss=5.24]


Average Loss after Epoch 34: 5.3119232421154745
Learning rate after epoch 34: 5e-06
New best model found. Saving the model at epoch 34.
Epoch 35/100 - Training: product


Training Epoch 35: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=4.79]


Average Loss after Epoch 35: 5.28283478163339
Learning rate after epoch 35: 5e-06
New best model found. Saving the model at epoch 35.
Epoch 36/100 - Training: product


Training Epoch 36: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=5.64]


Average Loss after Epoch 36: 5.253482575183148
Learning rate after epoch 36: 5e-06
New best model found. Saving the model at epoch 36.
Epoch 37/100 - Training: product


Training Epoch 37: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=6.09]


Average Loss after Epoch 37: 5.2294192055722215
Learning rate after epoch 37: 5e-06
New best model found. Saving the model at epoch 37.
Epoch 38/100 - Training: product


Training Epoch 38: 100%|██████████| 286/286 [00:52<00:00,  5.43it/s, loss=6.11]


Average Loss after Epoch 38: 5.199584989280967
Learning rate after epoch 38: 5e-06
New best model found. Saving the model at epoch 38.
Epoch 39/100 - Training: product


Training Epoch 39: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=4.61]


Average Loss after Epoch 39: 5.1694162833940736
Learning rate after epoch 39: 5e-06
New best model found. Saving the model at epoch 39.
Epoch 40/100 - Training: product


Training Epoch 40: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=5.42]


Average Loss after Epoch 40: 5.14363920605266
Learning rate after epoch 40: 5e-06
New best model found. Saving the model at epoch 40.
Epoch 41/100 - Training: product


Training Epoch 41: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=5.09]


Average Loss after Epoch 41: 5.113911169392246
Learning rate after epoch 41: 5e-06
New best model found. Saving the model at epoch 41.
Epoch 42/100 - Training: product


Training Epoch 42: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=5.53]


Average Loss after Epoch 42: 5.092596540917883
Learning rate after epoch 42: 5e-06
New best model found. Saving the model at epoch 42.
Epoch 43/100 - Training: product


Training Epoch 43: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=5.32]


Average Loss after Epoch 43: 5.072762285912787
Learning rate after epoch 43: 5e-06
New best model found. Saving the model at epoch 43.
Epoch 44/100 - Training: product


Training Epoch 44: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=5.2]


Average Loss after Epoch 44: 5.043146060063289
Learning rate after epoch 44: 5e-06
New best model found. Saving the model at epoch 44.
Epoch 45/100 - Training: product


Training Epoch 45: 100%|██████████| 286/286 [00:52<00:00,  5.43it/s, loss=5.82]


Average Loss after Epoch 45: 5.015657172336445
Learning rate after epoch 45: 5e-06
New best model found. Saving the model at epoch 45.
Epoch 46/100 - Training: product


Training Epoch 46: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=4.39]


Average Loss after Epoch 46: 4.993343829275011
Learning rate after epoch 46: 5e-06
New best model found. Saving the model at epoch 46.
Epoch 47/100 - Training: product


Training Epoch 47: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=5.41]


Average Loss after Epoch 47: 4.965192082044962
Learning rate after epoch 47: 5e-06
New best model found. Saving the model at epoch 47.
Epoch 48/100 - Training: product


Training Epoch 48: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=4.63]


Average Loss after Epoch 48: 4.946476413653447
Learning rate after epoch 48: 5e-06
New best model found. Saving the model at epoch 48.
Epoch 49/100 - Training: product


Training Epoch 49: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=4.92]


Average Loss after Epoch 49: 4.9278477738787245
Learning rate after epoch 49: 5e-06
New best model found. Saving the model at epoch 49.
Epoch 50/100 - Training: product


Training Epoch 50: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=4.31]


Average Loss after Epoch 50: 4.895140157712923
Learning rate after epoch 50: 5e-06
New best model found. Saving the model at epoch 50.
Epoch 51/100 - Training: product


Training Epoch 51: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=4.32]


Average Loss after Epoch 51: 4.864782902744267
Learning rate after epoch 51: 5e-06
New best model found. Saving the model at epoch 51.
Epoch 52/100 - Training: product


Training Epoch 52: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=4.95]


Average Loss after Epoch 52: 4.843598345776538
Learning rate after epoch 52: 5e-06
New best model found. Saving the model at epoch 52.
Epoch 53/100 - Training: product


Training Epoch 53: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=3.99]


Average Loss after Epoch 53: 4.831860537295575
Learning rate after epoch 53: 5e-06
New best model found. Saving the model at epoch 53.
Epoch 54/100 - Training: product


Training Epoch 54: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=4.81]


Average Loss after Epoch 54: 4.810069989491176
Learning rate after epoch 54: 5e-06
New best model found. Saving the model at epoch 54.
Epoch 55/100 - Training: product


Training Epoch 55: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=4.92]


Average Loss after Epoch 55: 4.779377647213169
Learning rate after epoch 55: 5e-06
New best model found. Saving the model at epoch 55.
Epoch 56/100 - Training: product


Training Epoch 56: 100%|██████████| 286/286 [00:52<00:00,  5.43it/s, loss=5.61]


Average Loss after Epoch 56: 4.754589103318595
Learning rate after epoch 56: 5e-06
New best model found. Saving the model at epoch 56.
Epoch 57/100 - Training: product


Training Epoch 57: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=4.92]


Average Loss after Epoch 57: 4.743872942624392
Learning rate after epoch 57: 5e-06
New best model found. Saving the model at epoch 57.
Epoch 58/100 - Training: product


Training Epoch 58: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=5.76]


Average Loss after Epoch 58: 4.71627880810024
Learning rate after epoch 58: 5e-06
New best model found. Saving the model at epoch 58.
Epoch 59/100 - Training: product


Training Epoch 59: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=4.85]


Average Loss after Epoch 59: 4.6780066156720785
Learning rate after epoch 59: 5e-06
New best model found. Saving the model at epoch 59.
Epoch 60/100 - Training: product


Training Epoch 60: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=4.58]


Average Loss after Epoch 60: 4.664464219466789
Learning rate after epoch 60: 5e-06
New best model found. Saving the model at epoch 60.
Epoch 61/100 - Training: product


Training Epoch 61: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=3.89]


Average Loss after Epoch 61: 4.645151976938848
Learning rate after epoch 61: 5e-06
New best model found. Saving the model at epoch 61.
Epoch 62/100 - Training: product


Training Epoch 62: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=4.43]


Average Loss after Epoch 62: 4.618023533921142
Learning rate after epoch 62: 5e-06
New best model found. Saving the model at epoch 62.
Epoch 63/100 - Training: product


Training Epoch 63: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=5.07]


Average Loss after Epoch 63: 4.603751383461319
Learning rate after epoch 63: 5e-06
New best model found. Saving the model at epoch 63.
Epoch 64/100 - Training: product


Training Epoch 64: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=4.58]


Average Loss after Epoch 64: 4.58557955118326
Learning rate after epoch 64: 5e-06
New best model found. Saving the model at epoch 64.
Epoch 65/100 - Training: product


Training Epoch 65: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=4.78]


Average Loss after Epoch 65: 4.559291431120226
Learning rate after epoch 65: 5e-06
New best model found. Saving the model at epoch 65.
Epoch 66/100 - Training: product


Training Epoch 66: 100%|██████████| 286/286 [00:52<00:00,  5.43it/s, loss=3.71]


Average Loss after Epoch 66: 4.532759798990263
Learning rate after epoch 66: 5e-06
New best model found. Saving the model at epoch 66.
Epoch 67/100 - Training: product


Training Epoch 67: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=3.83]


Average Loss after Epoch 67: 4.52652913957209
Learning rate after epoch 67: 5e-06
New best model found. Saving the model at epoch 67.
Epoch 68/100 - Training: product


Training Epoch 68: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=4.41]


Average Loss after Epoch 68: 4.499528552268768
Learning rate after epoch 68: 5e-06
New best model found. Saving the model at epoch 68.
Epoch 69/100 - Training: product


Training Epoch 69: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=5.52]


Average Loss after Epoch 69: 4.484158049930226
Learning rate after epoch 69: 5e-06
New best model found. Saving the model at epoch 69.
Epoch 70/100 - Training: product


Training Epoch 70: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=4.39]


Average Loss after Epoch 70: 4.450164032982779
Learning rate after epoch 70: 5e-06
New best model found. Saving the model at epoch 70.
Epoch 71/100 - Training: product


Training Epoch 71: 100%|██████████| 286/286 [00:52<00:00,  5.43it/s, loss=4.53]


Average Loss after Epoch 71: 4.420345474790026
Learning rate after epoch 71: 5e-06
New best model found. Saving the model at epoch 71.
Epoch 72/100 - Training: product


Training Epoch 72: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=3.46]


Average Loss after Epoch 72: 4.414757744415657
Learning rate after epoch 72: 5e-06
New best model found. Saving the model at epoch 72.
Epoch 73/100 - Training: product


Training Epoch 73: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=3.62]


Average Loss after Epoch 73: 4.401427147271749
Learning rate after epoch 73: 5e-06
New best model found. Saving the model at epoch 73.
Epoch 74/100 - Training: product


Training Epoch 74: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=4.62]


Average Loss after Epoch 74: 4.376887206431036
Learning rate after epoch 74: 5e-06
New best model found. Saving the model at epoch 74.
Epoch 75/100 - Training: product


Training Epoch 75: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=3.59]


Average Loss after Epoch 75: 4.344624105866973
Learning rate after epoch 75: 5e-06
New best model found. Saving the model at epoch 75.
Epoch 76/100 - Training: product


Training Epoch 76: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=3.56]


Average Loss after Epoch 76: 4.34282260257881
Learning rate after epoch 76: 5e-06
New best model found. Saving the model at epoch 76.
Epoch 77/100 - Training: product


Training Epoch 77: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=4.96]


Average Loss after Epoch 77: 4.324915080637365
Learning rate after epoch 77: 5e-06
New best model found. Saving the model at epoch 77.
Epoch 78/100 - Training: product


Training Epoch 78: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=5.26]


Average Loss after Epoch 78: 4.3043784211565566
Learning rate after epoch 78: 5e-06
New best model found. Saving the model at epoch 78.
Epoch 79/100 - Training: product


Training Epoch 79: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=5.28]


Average Loss after Epoch 79: 4.298204259438948
Learning rate after epoch 79: 5e-06
New best model found. Saving the model at epoch 79.
Epoch 80/100 - Training: product


Training Epoch 80: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=3.77]


Average Loss after Epoch 80: 4.2702543552105245
Learning rate after epoch 80: 5e-06
New best model found. Saving the model at epoch 80.
Epoch 81/100 - Training: product


Training Epoch 81: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=5.08]


Average Loss after Epoch 81: 4.2511079202998765
Learning rate after epoch 81: 5e-06
New best model found. Saving the model at epoch 81.
Epoch 82/100 - Training: product


Training Epoch 82: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=3.63]


Average Loss after Epoch 82: 4.23567684630414
Learning rate after epoch 82: 5e-06
New best model found. Saving the model at epoch 82.
Epoch 83/100 - Training: product


Training Epoch 83: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=3.39]


Average Loss after Epoch 83: 4.207339934535794
Learning rate after epoch 83: 5e-06
New best model found. Saving the model at epoch 83.
Epoch 84/100 - Training: product


Training Epoch 84: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=3.79]


Average Loss after Epoch 84: 4.194113144507775
Learning rate after epoch 84: 5e-06
New best model found. Saving the model at epoch 84.
Epoch 85/100 - Training: product


Training Epoch 85: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=5.21]


Average Loss after Epoch 85: 4.174971487138655
Learning rate after epoch 85: 5e-06
New best model found. Saving the model at epoch 85.
Epoch 86/100 - Training: product


Training Epoch 86: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=5.7]


Average Loss after Epoch 86: 4.165075883998737
Learning rate after epoch 86: 5e-06
New best model found. Saving the model at epoch 86.
Epoch 87/100 - Training: product


Training Epoch 87: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=3.72]


Average Loss after Epoch 87: 4.154140357370977
Learning rate after epoch 87: 5e-06
New best model found. Saving the model at epoch 87.
Epoch 88/100 - Training: product


Training Epoch 88: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=4.61]


Average Loss after Epoch 88: 4.129659181708223
Learning rate after epoch 88: 5e-06
New best model found. Saving the model at epoch 88.
Epoch 89/100 - Training: product


Training Epoch 89: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=4.71]


Average Loss after Epoch 89: 4.121662316622434
Learning rate after epoch 89: 5e-06
New best model found. Saving the model at epoch 89.
Epoch 90/100 - Training: product


Training Epoch 90: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=3.47]


Average Loss after Epoch 90: 4.1031370971586325
Learning rate after epoch 90: 5e-06
New best model found. Saving the model at epoch 90.
Epoch 91/100 - Training: product


Training Epoch 91: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=3.97]


Average Loss after Epoch 91: 4.090055997555073
Learning rate after epoch 91: 5e-06
New best model found. Saving the model at epoch 91.
Epoch 92/100 - Training: product


Training Epoch 92: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=4.26]


Average Loss after Epoch 92: 4.0722308200556085
Learning rate after epoch 92: 5e-06
New best model found. Saving the model at epoch 92.
Epoch 93/100 - Training: product


Training Epoch 93: 100%|██████████| 286/286 [00:52<00:00,  5.43it/s, loss=3.75]


Average Loss after Epoch 93: 4.069394704345223
Learning rate after epoch 93: 5e-06
New best model found. Saving the model at epoch 93.
Epoch 94/100 - Training: product


Training Epoch 94: 100%|██████████| 286/286 [00:52<00:00,  5.43it/s, loss=4.24]


Average Loss after Epoch 94: 4.0465586093755865
Learning rate after epoch 94: 5e-06
New best model found. Saving the model at epoch 94.
Epoch 95/100 - Training: product


Training Epoch 95: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=3.7]


Average Loss after Epoch 95: 4.0328818784727085
Learning rate after epoch 95: 5e-06
New best model found. Saving the model at epoch 95.
Epoch 96/100 - Training: product


Training Epoch 96: 100%|██████████| 286/286 [00:52<00:00,  5.41it/s, loss=4.58]


Average Loss after Epoch 96: 4.018356554158084
Learning rate after epoch 96: 5e-06
New best model found. Saving the model at epoch 96.
Epoch 97/100 - Training: product


Training Epoch 97: 100%|██████████| 286/286 [00:52<00:00,  5.43it/s, loss=4.69]


Average Loss after Epoch 97: 3.9990992546081543
Learning rate after epoch 97: 5e-06
New best model found. Saving the model at epoch 97.
Epoch 98/100 - Training: product


Training Epoch 98: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=4.48]


Average Loss after Epoch 98: 3.994721696926997
Learning rate after epoch 98: 5e-06
New best model found. Saving the model at epoch 98.
Epoch 99/100 - Training: product


Training Epoch 99: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=4.12]


Average Loss after Epoch 99: 3.994592086418525
Learning rate after epoch 99: 5e-06
New best model found. Saving the model at epoch 99.
Epoch 100/100 - Training: product


Training Epoch 100: 100%|██████████| 286/286 [00:52<00:00,  5.42it/s, loss=3.9]


Average Loss after Epoch 100: 3.962687280628231
Learning rate after epoch 100: 5e-06
New best model found. Saving the model at epoch 100.
Saving the best model from epoch 100.
Evaluating model for task: product


Evaluating: 100%|██████████| 32/32 [00:02<00:00, 12.56it/s]

F1-Score for product: 0.02825325017133911
Classification Report for product:

                                                   precision    recall  f1-score   support

                           Catfishes (freshwater)       0.00      0.00      0.00         3
                            Fishes not identified       0.29      0.80      0.42         5
                         Not classified pork meat       0.00      0.00      0.00         1
              Precooked cooked pork meat products       0.00      0.00      0.00         1
                               after dinner mints       0.00      0.00      0.00         1
                                            algae       0.00      0.00      0.00         3
                                  almond products       0.00      0.00      0.00         2
                                          almonds       0.00      0.00      0.00         1
                                 anchovies in oil       0.00      0.00      0.00         1
           




# Generate predictions on the test data and print the predictions DataFrame
Here, we load the test dataset, use the trained model to generate predictions, and display the results.


In [None]:
# Import necessary libraries
import os
import torch
import numpy as np
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from sklearn.preprocessing import LabelEncoder

# Load the test data for predictions (CSV containing validation data)
test_path = '/content/drive/MyDrive/Data/validation_data/incidents.csv'
test_df = pd.read_csv(test_path, index_col=0)

# Define the predict function
def predict(texts, model_base_path, target):
    # Load the tokenizer for the specified pre-trained model
    tokenizer = AutoTokenizer.from_pretrained(model_base_path)

    # Load the correct label encoder for the given target
    label_encoder_path = f'{model_base_path}/{target}_label_encoder.npy'
    label_encoder = LabelEncoder()

    # Check if the label encoder file exists and load it
    if os.path.exists(label_encoder_path):
        label_encoder.classes_ = np.load(label_encoder_path, allow_pickle=True)
    else:
        # Print a warning if the label encoder is not found
        print(f"Warning: Label encoder not found for {target} at {label_encoder_path}")
        return None

    # Load the pre-trained model for sequence classification
    model = AutoModelForSequenceClassification.from_pretrained(model_base_path).to(device)

    # Tokenize the input texts
    inputs = tokenizer(
        texts,
        padding=True,  # Pad sequences to the max length
        truncation=True,  # Truncate sequences to the max length
        max_length=512,  # Limit sequence length to 512 tokens
        return_tensors="pt"  # Return PyTorch tensors
    ).to(device)

    # Put the model in evaluation mode
    model.eval()

    # Make predictions with no gradient calculation
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        predictions = torch.argmax(logits, dim=-1)  # Get the predicted class for each input

    # Decode the predictions using the label encoder
    decoded_predictions = label_encoder.inverse_transform(predictions.cpu().numpy())

    # Return the decoded predictions
    return decoded_predictions

# Define device for model prediction (use GPU if available, else use CPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Prepare an empty dataframe to store the predictions
predictions = pd.DataFrame()

# Run predictions for all targets using the correct saved model
for column in targets_subtask1 + targets_subtask2:
    # Define the model path dynamically based on the target column
    model_path = f'./best_model_{column}'  # Update model path to point to the best model

    # Get the decoded predictions for the current target
    decoded_preds = predict(test_df['text'].tolist(), model_path, column)

    # If predictions were successfully made, store them in the dataframe
    if decoded_preds is not None:
        predictions[column] = decoded_preds

# Display the final predictions
print("\nFinal Predictions:\n")
print(predictions)


Using device: cuda

Final Predictions:

    hazard-category                                   product-category  \
0        biological                       meat, egg and dairy products   
1        biological                       meat, egg and dairy products   
2        biological                         prepared dishes and snacks   
3         allergens                                  ices and desserts   
4    foreign bodies                       meat, egg and dairy products   
..              ...                                                ...   
560       allergens                              fruits and vegetables   
561       allergens  dietetic foods, food supplements, fortified foods   
562  foreign bodies                        cereals and bakery products   
563       allergens                        cereals and bakery products   
564       allergens                                      confectionery   

                           hazard                 product  
0          

In [None]:
predictions

Unnamed: 0,hazard-category,product-category,hazard,product
0,biological,"meat, egg and dairy products",listeria monocytogenes,salami
1,biological,"meat, egg and dairy products",escherichia coli,sausage
2,biological,prepared dishes and snacks,enteroviruses,chicken based products
3,allergens,ices and desserts,pecan nut,ice cream
4,foreign bodies,"meat, egg and dairy products",metal fragment,chicken based products
...,...,...,...,...
560,allergens,fruits and vegetables,cashew,salads
561,allergens,"dietetic foods, food supplements, fortified foods",milk and products thereof,cakes
562,foreign bodies,cereals and bakery products,plastic fragment,cookies
563,allergens,cereals and bakery products,peanuts and products thereof,milk


# Create the submission folder and archive the results
Finally, predictions and models are saved into a submission directory for easy sharing or evaluation.


In [None]:
import os
from shutil import make_archive
import pandas as pd
from google.colab import drive

# Define the Google Drive path where you want to save the files
output_folder = '/content/drive/MyDrive/submission_finetunedPUBMEDBERT v4/'

# Create the folder in Google Drive if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# Save predictions to a CSV file named 'submission.csv' inside the folder
predictions.to_csv(f'{output_folder}submission.csv', index=False)

# Zip the folder for submission
make_archive(output_folder, 'zip', output_folder)

# Print confirmation message
print(f"Submission saved to Google Drive at {output_folder}")


Submission saved to Google Drive at /content/drive/MyDrive/submission_finetunedPUBMEDBERTv4/
