<a href="https://colab.research.google.com/github/steliosg23/PDS-A2/blob/main/SUBMISSION%20Finetuned%20PubMedBERT%20PDS%20A2%20Food%20Hazard%20Detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Install necessary packages and import libraries
This section includes all the necessary imports for data manipulation, model training, and evaluation.
It also imports libraries for handling tokenization, model configuration, and metrics.


In [None]:
from google.colab import drive
import pandas as pd
import torch
import re
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, classification_report
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm
import os
from shutil import make_archive
import numpy as np
from sklearn.utils.class_weight import compute_class_weight
from torch.optim.lr_scheduler import ReduceLROnPlateau

# Mount Google Drive


In [None]:
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Load and preview the training dataset
The dataset containing incident reports is loaded from Google Drive.
We remove any unnecessary columns like 'Unnamed: 0'.


In [None]:
train_path = '/content/drive/MyDrive/Data/incidents_train.csv'
df = pd.read_csv(train_path)
df = df.drop(columns=['Unnamed: 0'])


# Define a function to clean text data
This function removes special characters, converts text to lowercase, and strips extra whitespace.
It is essential to clean the text data for better model performance.


In [None]:
import re

def clean_text(text):
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)  # Remove non-alphanumeric characters
    text = text.lower()  # Convert text to lowercase
    text = ' '.join(text.split())  # Remove extra spaces
    return text


# Clean the text data and load the tokenizer
We apply the `clean_text` function to clean the 'text' column of the dataset.
Then, we initialize the PubMedBERT tokenizer to prepare for tokenization.


In [None]:
# Load the tokenizer for the PubMedBERT model, specifically fine-tuned for biomedical text
tokenizer = AutoTokenizer.from_pretrained("microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract")

# Apply the text cleaning function to the 'text' column in the DataFrame
# This function will preprocess each text entry by removing unwanted characters, stopwords, etc.
df['text'] = df['text'].apply(clean_text)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


# Define features and targets for classification tasks
We specify the input features like date and country and set the classification targets.



In [None]:
# Define the features for the model, which include the year, month, day, and country information
features = ['year', 'month', 'day', 'country']

# Define the target variables for Subtask 1, which are the hazard-category and product-category
targets_subtask1 = ['hazard-category', 'product-category']

# Define the target variables for Subtask 2, which are hazard and product
# Add other targets if necessary depending on the task
targets_subtask2 = ['hazard', 'product']


# Encode target labels
For classification, target labels need to be encoded as numeric values.
We use `LabelEncoder` to convert categorical labels into integers.


In [None]:
# Create an empty dictionary to store label encoders for each target
label_encoders = {}

# Iterate over both sets of targets (Subtask 1 and Subtask 2)
for target in targets_subtask1 + targets_subtask2:
    # Initialize a LabelEncoder for each target
    le = LabelEncoder()

    # Transform the target column values into numeric labels and update the DataFrame
    df[target] = le.fit_transform(df[target])

    # Store the fitted LabelEncoder in the dictionary for future use (e.g., inverse transformation)
    label_encoders[target] = le


# Define a custom PyTorch dataset for text classification
This dataset class will handle text tokenization and label processing.
It ensures the text is properly encoded, padded, and truncated to a fixed length for the model.


In [None]:
# Define a custom Dataset class for text data
class TextDataset(Dataset):
    # Initialize the dataset with texts, labels, tokenizer, and maximum sequence length
    def __init__(self, texts, labels, tokenizer, max_len):
        self.texts = texts  # List of input texts
        self.labels = labels  # List of corresponding labels
        self.tokenizer = tokenizer  # Tokenizer for encoding the text
        self.max_len = max_len  # Maximum length for padding/truncation

    # Define the length of the dataset (number of samples)
    def __len__(self):
        return len(self.texts)

    # Define how to retrieve a single item from the dataset
    def __getitem__(self, item):
        text = str(self.texts[item])  # Get the text for the given index
        label = self.labels[item]  # Get the label for the given index

        # Use the tokenizer to encode the text (add special tokens, padding, truncation)
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,  # Add special tokens (e.g., [CLS], [SEP])
            max_length=self.max_len,  # Limit the sequence length
            padding='max_length',  # Pad sequences to max_length
            truncation=True,  # Truncate longer sequences
            return_tensors='pt'  # Return PyTorch tensors
        )

        # Return a dictionary with input_ids, attention_mask, and label
        return {
            'input_ids': encoding['input_ids'].flatten(),  # Flatten the tensor
            'attention_mask': encoding['attention_mask'].flatten(),  # Flatten the attention mask
            'label': torch.tensor(label, dtype=torch.long)  # Convert label to a tensor
        }


# Split the data into training and testing sets
We split the dataset into training and testing sets for each target.
This ensures that the model is trained on one set and evaluated on a separate, unseen set.


In [None]:
# Define a function to prepare data for model training, validation, and testing
def prepare_data(text_column):
    # Extract features and text column from the DataFrame
    X = df[features + [text_column]]  # Features include specified columns plus the text column
    # Extract target variables for Subtask 1 and Subtask 2
    y_subtask1 = df[targets_subtask1]
    y_subtask2 = df[targets_subtask2]

    # Initialize a dictionary to store data splits for each target
    data_splits = {}

    # Iterate over both sets of target variables (Subtask 1 and Subtask 2)
    for target in targets_subtask1 + targets_subtask2:
        # Split the data into training (70%), validation (20%), and testing (10%) sets
        X_train, X_temp, y_train, y_temp = train_test_split(
            X, df[target], test_size=0.3, random_state=42
        )
        X_valid, X_test, y_valid, y_test = train_test_split(
            X_temp, y_temp, test_size=0.33, random_state=42  # This gives approximately 10% test and 20% validation
        )

        # Reset the indices for the train, validation, and test sets
        X_train = X_train.reset_index(drop=True)
        y_train = y_train.reset_index(drop=True)
        X_valid = X_valid.reset_index(drop=True)
        y_valid = y_valid.reset_index(drop=True)
        X_test = X_test.reset_index(drop=True)
        y_test = y_test.reset_index(drop=True)

        # Store the splits for the current target in the dictionary
        data_splits[target] = (X_train, X_valid, X_test, y_train, y_valid, y_test)

    # Return the dictionary containing data splits for each target
    return data_splits

# Prepare the data splits for text-based tasks
We apply the `prepare_data` function specifically for text tasks and save the splits for later use.


In [None]:
# Prepare the data splits for the 'text' column using the prepare_data function
text_splits = prepare_data('text')


# Set model configuration and define the device
Here, we configure key parameters for training like maximum sequence length, batch size, and learning rate.
We also determine whether to use GPU or CPU for training based on availability.


In [None]:
# Define configuration settings for the model training
config = {
    'max_len': 512,  # Maximum sequence length for input texts
    'batch_size': 16,  # Batch size for training
    'learning_rate': 2e-5,  # Learning rate for the optimizer
    'epochs': 30,  # Increased number of training epochs
    'model_name': "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract"  # Pre-trained model to use
}



# Determine the device to use for training (GPU if available, otherwise CPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")


Using device: cuda


# Train and evaluate the model for each task
This function performs model training and evaluation for each target.
It uses a neural network to predict labels and calculates the F1 score for evaluation.


In [None]:
# Train and evaluate the neural network for each target task
def train_and_evaluate_bert(data_splits, targets):
    # Initialize an empty list to store F1 scores for each target task
    f1_scores = []

    # Loop through each target (task) for training and evaluation
    for target in targets:
        print(f"\nStarting training for task: {target}")

        # Retrieve the corresponding training, validation, and testing splits
        X_train, X_valid, X_test, y_train, y_valid, y_test = data_splits[target]

        # Extract the 'text' column for training, validation, and testing
        texts_train = X_train['text'].values
        texts_valid = X_valid['text'].values
        texts_test = X_test['text'].values

        # Create datasets for training, validation, and testing
        train_dataset = TextDataset(texts_train, y_train, tokenizer, config['max_len'])
        valid_dataset = TextDataset(texts_valid, y_valid, tokenizer, config['max_len'])
        test_dataset = TextDataset(texts_test, y_test, tokenizer, config['max_len'])

        # Create data loaders for batching during training, validation, and testing
        train_loader = DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=True)
        valid_loader = DataLoader(valid_dataset, batch_size=config['batch_size'], shuffle=False)
        test_loader = DataLoader(test_dataset, batch_size=config['batch_size'], shuffle=False)

        # Determine the number of labels for classification dynamically based on the target task
        num_classes = len(label_encoders[target].classes_)  # This gives the number of unique classes for the current task

        # Calculate class weights for CrossEntropyLoss
        class_weights = compute_class_weight(
            'balanced',
            classes=np.unique(y_train),
            y=y_train
        )

        # Initialize the full class weights tensor with 1 (or any other default weight)
        class_weights_full = torch.ones(num_classes)  # Start with all weights equal to 1

        # Map the computed class weights to the appropriate indices for the classes that exist in the training set
        for i, class_idx in enumerate(np.unique(y_train)):
            class_weights_full[class_idx] = class_weights[i]

        # Move the class weights tensor to the correct device (GPU/CPU)
        class_weights_full = class_weights_full.to(device)

        # Load the pre-trained model with the appropriate number of labels for classification
        model = AutoModelForSequenceClassification.from_pretrained(config['model_name'], num_labels=num_classes).to(device)

        # Initialize the optimizer, loss function (with class weights), and learning rate scheduler
        optimizer = optim.Adam(model.parameters(), lr=config['learning_rate'])
        scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.5, patience=2, verbose=True)
        criterion = nn.CrossEntropyLoss(weight=class_weights_full)

        # Early stopping setup
        best_val_loss = float('inf')
        best_epoch = 0
        patience = 10  # Number of epochs with no improvement before stopping
        epochs_without_improvement = 0
        best_model = None  # Variable to hold the best model

        # Training loop
        model.train()
        for epoch in range(config['epochs']):
            print(f"Epoch {epoch+1}/{config['epochs']} - Training: {target}")
            progress_bar = tqdm(train_loader, desc=f"Training Epoch {epoch+1}", total=len(train_loader), leave=True)
            for batch in progress_bar:
                optimizer.zero_grad()

                # Get the input data and labels for the current batch
                input_ids = batch['input_ids'].squeeze(1).to(device)
                attention_mask = batch['attention_mask'].squeeze(1).to(device)
                labels = batch['label'].to(device)

                # Forward pass
                outputs = model(input_ids, attention_mask=attention_mask)
                loss = criterion(outputs.logits, labels)

                # Backward pass and optimization step
                loss.backward()
                optimizer.step()

                # Update the progress bar with the current loss
                progress_bar.set_postfix(loss=loss.item())

            # Print the current learning rate
            current_lr = optimizer.param_groups[0]['lr']
            print(f"Learning rate after epoch {epoch+1}: {current_lr}")

            # Validation step
            model.eval()
            val_loss = 0
            val_steps = 0
            val_preds = []
            val_true = []
            with torch.no_grad():
                for batch in valid_loader:
                    input_ids = batch['input_ids'].squeeze(1).to(device)
                    attention_mask = batch['attention_mask'].squeeze(1).to(device)
                    labels = batch['label'].to(device)

                    # Forward pass
                    outputs = model(input_ids, attention_mask=attention_mask)
                    loss = criterion(outputs.logits, labels)

                    val_loss += loss.item()
                    val_steps += 1

                    # Collect true labels and predictions for F1 score calculation
                    _, preds = torch.max(outputs.logits, dim=1)
                    val_preds.extend(preds.cpu().numpy())
                    val_true.extend(labels.cpu().numpy())

            # Calculate average validation loss
            avg_val_loss = val_loss / val_steps
            print(f"Validation Loss after Epoch {epoch+1}: {avg_val_loss}")

            # Calculate and print Macro F1 score for validation
            macro_f1 = f1_score(val_true, val_preds, average='macro')
            print(f"Validation Macro F1 Score after Epoch {epoch+1}: {macro_f1}")

            # Apply learning rate scheduler
            scheduler.step(avg_val_loss)

            # Save the model if it has the best validation loss so far
            if avg_val_loss < best_val_loss:
                best_val_loss = avg_val_loss
                best_epoch = epoch + 1
                epochs_without_improvement = 0
                # Save the model and tokenizer
                best_model = model.state_dict()  # Save model weights
                print(f"New best model found. Saving the model at epoch {best_epoch}.")
            else:
                epochs_without_improvement += 1
                if epochs_without_improvement >= patience:
                    print("Early stopping triggered!")
                    break

        # After training, save the best model
        if best_model:
            print(f"Saving the best model from epoch {best_epoch}.")
            model.load_state_dict(best_model)  # Restore the best model
            model.save_pretrained(f'./best_model_{target}')
            tokenizer.save_pretrained(f'./best_model_{target}')
        else:
            print("No improvement in validation loss. No model saved.")

        # Evaluate the model on the test set
        print(f"Evaluating model for task: {target}")
        model.eval()
        y_preds = []
        y_true = []

        # Evaluate without computing gradients
        with torch.no_grad():
            for batch in tqdm(test_loader, desc="Evaluating", total=len(test_loader), leave=True):
                input_ids = batch['input_ids'].squeeze(1).to(device)
                attention_mask = batch['attention_mask'].squeeze(1).to(device)
                labels = batch['label'].to(device)

                # Forward pass
                outputs = model(input_ids, attention_mask=attention_mask)

                # Get predictions and append them to the list
                _, preds = torch.max(outputs.logits, dim=1)
                y_preds.extend(preds.cpu().numpy())
                y_true.extend(labels.cpu().numpy())

        # Decode the predictions and true labels
        decoded_preds = label_encoders[target].inverse_transform(y_preds)
        decoded_true = label_encoders[target].inverse_transform(y_true)

        # Calculate the macro F1 score
        f1 = f1_score(decoded_true, decoded_preds, average='macro')
        f1_scores.append(f1)
        print(f"F1-Score for {target}: {f1}")

        # Print the classification report
        print(f"Classification Report for {target}:\n")
        print(classification_report(decoded_true, decoded_preds, zero_division=0))

        # Save the label encoder for the current task
        np.save(f'./best_model_{target}/{target}_label_encoder.npy', label_encoders[target].classes_)
        print(f"Label Encoder for {target} saved in './best_model_{target}'")

    # Return the F1 scores for each target task
    return f1_scores

# Train and evaluate for all targets (subtasks 1 and 2)
text_f1_scores = train_and_evaluate_bert(text_splits, targets_subtask1 + targets_subtask2)


Starting training for task: hazard-category


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/30 - Training: hazard-category


Training Epoch 1: 100%|██████████| 223/223 [01:15<00:00,  2.96it/s, loss=0.463]


Learning rate after epoch 1: 2e-05
Validation Loss after Epoch 1: 0.9384824289008975
Validation Macro F1 Score after Epoch 1: 0.5587439761411614
New best model found. Saving the model at epoch 1.
Epoch 2/30 - Training: hazard-category


Training Epoch 2: 100%|██████████| 223/223 [01:13<00:00,  3.05it/s, loss=3.17]


Learning rate after epoch 2: 2e-05
Validation Loss after Epoch 2: 0.7329244911670685
Validation Macro F1 Score after Epoch 2: 0.6340427245473744
New best model found. Saving the model at epoch 2.
Epoch 3/30 - Training: hazard-category


Training Epoch 3: 100%|██████████| 223/223 [01:13<00:00,  3.05it/s, loss=0.329]


Learning rate after epoch 3: 2e-05
Validation Loss after Epoch 3: 0.8242765362374485
Validation Macro F1 Score after Epoch 3: 0.6098472645479922
Epoch 4/30 - Training: hazard-category


Training Epoch 4: 100%|██████████| 223/223 [01:13<00:00,  3.05it/s, loss=0.0249]


Learning rate after epoch 4: 2e-05
Validation Loss after Epoch 4: 0.6147611557971686
Validation Macro F1 Score after Epoch 4: 0.8092738177963058
New best model found. Saving the model at epoch 4.
Epoch 5/30 - Training: hazard-category


Training Epoch 5: 100%|██████████| 223/223 [01:13<00:00,  3.05it/s, loss=0.0204]


Learning rate after epoch 5: 2e-05
Validation Loss after Epoch 5: 0.6068663128244225
Validation Macro F1 Score after Epoch 5: 0.725188351652302
New best model found. Saving the model at epoch 5.
Epoch 6/30 - Training: hazard-category


Training Epoch 6: 100%|██████████| 223/223 [01:13<00:00,  3.05it/s, loss=0.0144]


Learning rate after epoch 6: 2e-05
Validation Loss after Epoch 6: 0.6178242091555148
Validation Macro F1 Score after Epoch 6: 0.7736758860062695
Epoch 7/30 - Training: hazard-category


Training Epoch 7: 100%|██████████| 223/223 [01:13<00:00,  3.05it/s, loss=0.0123]


Learning rate after epoch 7: 2e-05
Validation Loss after Epoch 7: 0.6915650431765243
Validation Macro F1 Score after Epoch 7: 0.759024379088256
Epoch 8/30 - Training: hazard-category


Training Epoch 8: 100%|██████████| 223/223 [01:13<00:00,  3.05it/s, loss=0.00477]


Learning rate after epoch 8: 2e-05
Validation Loss after Epoch 8: 0.6429004476958653
Validation Macro F1 Score after Epoch 8: 0.8170218853844202
Epoch 9/30 - Training: hazard-category


Training Epoch 9: 100%|██████████| 223/223 [01:13<00:00,  3.05it/s, loss=0.00957]


Learning rate after epoch 9: 1e-05
Validation Loss after Epoch 9: 0.663783719901403
Validation Macro F1 Score after Epoch 9: 0.814561216472598
Epoch 10/30 - Training: hazard-category


Training Epoch 10: 100%|██████████| 223/223 [01:13<00:00,  3.05it/s, loss=0.00445]


Learning rate after epoch 10: 1e-05
Validation Loss after Epoch 10: 0.6653343394646072
Validation Macro F1 Score after Epoch 10: 0.8167525165660636
Epoch 11/30 - Training: hazard-category


Training Epoch 11: 100%|██████████| 223/223 [01:13<00:00,  3.05it/s, loss=0.0068]


Learning rate after epoch 11: 1e-05
Validation Loss after Epoch 11: 0.6780790077973506
Validation Macro F1 Score after Epoch 11: 0.8119429376230991
Epoch 12/30 - Training: hazard-category


Training Epoch 12: 100%|██████████| 223/223 [01:13<00:00,  3.05it/s, loss=0.0056]


Learning rate after epoch 12: 5e-06
Validation Loss after Epoch 12: 0.6877619866718305
Validation Macro F1 Score after Epoch 12: 0.8159647814351672
Epoch 13/30 - Training: hazard-category


Training Epoch 13: 100%|██████████| 223/223 [01:13<00:00,  3.05it/s, loss=0.00345]


Learning rate after epoch 13: 5e-06
Validation Loss after Epoch 13: 0.6954912872424757
Validation Macro F1 Score after Epoch 13: 0.816647144049085
Epoch 14/30 - Training: hazard-category


Training Epoch 14: 100%|██████████| 223/223 [01:13<00:00,  3.05it/s, loss=0.00947]


Learning rate after epoch 14: 5e-06
Validation Loss after Epoch 14: 0.7036524026807456
Validation Macro F1 Score after Epoch 14: 0.816647144049085
Epoch 15/30 - Training: hazard-category


Training Epoch 15: 100%|██████████| 223/223 [01:13<00:00,  3.05it/s, loss=0.00884]


Learning rate after epoch 15: 2.5e-06
Validation Loss after Epoch 15: 0.7087633764458587
Validation Macro F1 Score after Epoch 15: 0.814824892920131
Early stopping triggered!
Saving the best model from epoch 5.
Evaluating model for task: hazard-category


Evaluating: 100%|██████████| 32/32 [00:04<00:00,  7.91it/s]


F1-Score for hazard-category: 0.7486717750824723
Classification Report for hazard-category:

                                precision    recall  f1-score   support

                     allergens       0.97      0.97      0.97       179
                    biological       0.98      0.97      0.97       178
                      chemical       0.88      0.93      0.90        30
food additives and flavourings       0.00      0.00      0.00         1
                foreign bodies       0.98      0.98      0.98        60
                         fraud       0.69      0.73      0.71        30
          organoleptic aspects       0.62      1.00      0.77         5
                  other hazard       0.89      0.67      0.76        12
              packaging defect       0.83      0.56      0.67         9

                      accuracy                           0.94       504
                     macro avg       0.76      0.76      0.75       504
                  weighted avg       0.94

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/30 - Training: product-category


Training Epoch 1: 100%|██████████| 223/223 [01:14<00:00,  2.98it/s, loss=2.73]


Learning rate after epoch 1: 2e-05
Validation Loss after Epoch 1: 2.910222500562668
Validation Macro F1 Score after Epoch 1: 0.07768763526370068
New best model found. Saving the model at epoch 1.
Epoch 2/30 - Training: product-category


Training Epoch 2: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=0.861]


Learning rate after epoch 2: 2e-05
Validation Loss after Epoch 2: 1.7884679790586233
Validation Macro F1 Score after Epoch 2: 0.44345189533056933
New best model found. Saving the model at epoch 2.
Epoch 3/30 - Training: product-category


Training Epoch 3: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=0.715]


Learning rate after epoch 3: 2e-05
Validation Loss after Epoch 3: 1.4199561150744557
Validation Macro F1 Score after Epoch 3: 0.5358503540826343
New best model found. Saving the model at epoch 3.
Epoch 4/30 - Training: product-category


Training Epoch 4: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=0.88]


Learning rate after epoch 4: 2e-05
Validation Loss after Epoch 4: 1.38889953866601
Validation Macro F1 Score after Epoch 4: 0.5532876320343469
New best model found. Saving the model at epoch 4.
Epoch 5/30 - Training: product-category


Training Epoch 5: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=0.278]


Learning rate after epoch 5: 2e-05
Validation Loss after Epoch 5: 1.3787265194114298
Validation Macro F1 Score after Epoch 5: 0.5822862547037627
New best model found. Saving the model at epoch 5.
Epoch 6/30 - Training: product-category


Training Epoch 6: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=0.0628]


Learning rate after epoch 6: 2e-05
Validation Loss after Epoch 6: 1.3683422638569027
Validation Macro F1 Score after Epoch 6: 0.6089249473859365
New best model found. Saving the model at epoch 6.
Epoch 7/30 - Training: product-category


Training Epoch 7: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=0.148]


Learning rate after epoch 7: 2e-05
Validation Loss after Epoch 7: 1.472761491779238
Validation Macro F1 Score after Epoch 7: 0.60524364727159
Epoch 8/30 - Training: product-category


Training Epoch 8: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=0.0316]


Learning rate after epoch 8: 2e-05
Validation Loss after Epoch 8: 1.5697566645685583
Validation Macro F1 Score after Epoch 8: 0.6000279968931946
Epoch 9/30 - Training: product-category


Training Epoch 9: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=0.0138]


Learning rate after epoch 9: 2e-05
Validation Loss after Epoch 9: 1.518411838915199
Validation Macro F1 Score after Epoch 9: 0.6126484813271964
Epoch 10/30 - Training: product-category


Training Epoch 10: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=0.0139]


Learning rate after epoch 10: 1e-05
Validation Loss after Epoch 10: 1.6291560565587133
Validation Macro F1 Score after Epoch 10: 0.5915436096202223
Epoch 11/30 - Training: product-category


Training Epoch 11: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=0.0136]


Learning rate after epoch 11: 1e-05
Validation Loss after Epoch 11: 1.6259997650049627
Validation Macro F1 Score after Epoch 11: 0.6018470228653516
Epoch 12/30 - Training: product-category


Training Epoch 12: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=0.0115]


Learning rate after epoch 12: 1e-05
Validation Loss after Epoch 12: 1.645621283678338
Validation Macro F1 Score after Epoch 12: 0.5995103753571294
Epoch 13/30 - Training: product-category


Training Epoch 13: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=0.0113]


Learning rate after epoch 13: 5e-06
Validation Loss after Epoch 13: 1.6609307304024696
Validation Macro F1 Score after Epoch 13: 0.6014339923970724
Epoch 14/30 - Training: product-category


Training Epoch 14: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=0.0111]


Learning rate after epoch 14: 5e-06
Validation Loss after Epoch 14: 1.6739503392018378
Validation Macro F1 Score after Epoch 14: 0.6052467967010844
Epoch 15/30 - Training: product-category


Training Epoch 15: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=0.0136]


Learning rate after epoch 15: 5e-06
Validation Loss after Epoch 15: 1.6929726554080844
Validation Macro F1 Score after Epoch 15: 0.6052443691148341
Epoch 16/30 - Training: product-category


Training Epoch 16: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=0.00601]


Learning rate after epoch 16: 2.5e-06
Validation Loss after Epoch 16: 1.6982928826473653
Validation Macro F1 Score after Epoch 16: 0.5804676044460734
Early stopping triggered!
Saving the best model from epoch 6.
Evaluating model for task: product-category


Evaluating: 100%|██████████| 32/32 [00:04<00:00,  7.87it/s]


F1-Score for product-category: 0.7022954955155662
Classification Report for product-category:

                                                   precision    recall  f1-score   support

                              alcoholic beverages       0.62      0.83      0.71         6
                      cereals and bakery products       0.70      0.74      0.72        68
     cocoa and cocoa preparations, coffee and tea       0.81      0.81      0.81        21
                                    confectionery       0.38      0.33      0.35         9
dietetic foods, food supplements, fortified foods       0.80      0.89      0.84         9
                                    fats and oils       1.00      0.50      0.67         2
                                   feed materials       1.00      1.00      1.00         1
                   food additives and flavourings       0.00      0.00      0.00         0
                           food contact materials       0.00      0.00      0.00     

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/30 - Training: hazard


Training Epoch 1: 100%|██████████| 223/223 [01:14<00:00,  2.98it/s, loss=4.28]


Learning rate after epoch 1: 2e-05
Validation Loss after Epoch 1: 4.860393740236759
Validation Macro F1 Score after Epoch 1: 4.18025248725023e-05
New best model found. Saving the model at epoch 1.
Epoch 2/30 - Training: hazard


Training Epoch 2: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=4.26]


Learning rate after epoch 2: 2e-05
Validation Loss after Epoch 2: 4.694251693785191
Validation Macro F1 Score after Epoch 2: 0.03158510198052792
New best model found. Saving the model at epoch 2.
Epoch 3/30 - Training: hazard


Training Epoch 3: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=3.23]


Learning rate after epoch 3: 2e-05
Validation Loss after Epoch 3: 3.640483595430851
Validation Macro F1 Score after Epoch 3: 0.34991935068563307
New best model found. Saving the model at epoch 3.
Epoch 4/30 - Training: hazard


Training Epoch 4: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=1.19]


Learning rate after epoch 4: 2e-05
Validation Loss after Epoch 4: 2.9673202764242887
Validation Macro F1 Score after Epoch 4: 0.41836889281203204
New best model found. Saving the model at epoch 4.
Epoch 5/30 - Training: hazard


Training Epoch 5: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=2.12]


Learning rate after epoch 5: 2e-05
Validation Loss after Epoch 5: 2.495855084620416
Validation Macro F1 Score after Epoch 5: 0.47582239695450096
New best model found. Saving the model at epoch 5.
Epoch 6/30 - Training: hazard


Training Epoch 6: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=0.704]


Learning rate after epoch 6: 2e-05
Validation Loss after Epoch 6: 2.312446055933833
Validation Macro F1 Score after Epoch 6: 0.48115917748088155
New best model found. Saving the model at epoch 6.
Epoch 7/30 - Training: hazard


Training Epoch 7: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=0.53]


Learning rate after epoch 7: 2e-05
Validation Loss after Epoch 7: 2.113416781183332
Validation Macro F1 Score after Epoch 7: 0.48537806055057703
New best model found. Saving the model at epoch 7.
Epoch 8/30 - Training: hazard


Training Epoch 8: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=0.861]


Learning rate after epoch 8: 2e-05
Validation Loss after Epoch 8: 2.0077602132223547
Validation Macro F1 Score after Epoch 8: 0.530734016022892
New best model found. Saving the model at epoch 8.
Epoch 9/30 - Training: hazard


Training Epoch 9: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=0.167]


Learning rate after epoch 9: 2e-05
Validation Loss after Epoch 9: 2.0037050540558994
Validation Macro F1 Score after Epoch 9: 0.5139398571170957
New best model found. Saving the model at epoch 9.
Epoch 10/30 - Training: hazard


Training Epoch 10: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=0.985]


Learning rate after epoch 10: 2e-05
Validation Loss after Epoch 10: 1.9546615639701486
Validation Macro F1 Score after Epoch 10: 0.542937658227054
New best model found. Saving the model at epoch 10.
Epoch 11/30 - Training: hazard


Training Epoch 11: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=0.853]


Learning rate after epoch 11: 2e-05
Validation Loss after Epoch 11: 1.9443017500452697
Validation Macro F1 Score after Epoch 11: 0.5545860005737923
New best model found. Saving the model at epoch 11.
Epoch 12/30 - Training: hazard


Training Epoch 12: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=0.228]


Learning rate after epoch 12: 2e-05
Validation Loss after Epoch 12: 1.9460990221705288
Validation Macro F1 Score after Epoch 12: 0.5454109129607987
Epoch 13/30 - Training: hazard


Training Epoch 13: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=0.0599]


Learning rate after epoch 13: 2e-05
Validation Loss after Epoch 13: 1.994172711740248
Validation Macro F1 Score after Epoch 13: 0.5330821698709572
Epoch 14/30 - Training: hazard


Training Epoch 14: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=0.112]


Learning rate after epoch 14: 2e-05
Validation Loss after Epoch 14: 2.03485669742804
Validation Macro F1 Score after Epoch 14: 0.5445297137705369
Epoch 15/30 - Training: hazard


Training Epoch 15: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=0.0921]


Learning rate after epoch 15: 1e-05
Validation Loss after Epoch 15: 2.0432454781839624
Validation Macro F1 Score after Epoch 15: 0.5474101601882174
Epoch 16/30 - Training: hazard


Training Epoch 16: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=0.435]


Learning rate after epoch 16: 1e-05
Validation Loss after Epoch 16: 2.0644265041919425
Validation Macro F1 Score after Epoch 16: 0.5388297278473856
Epoch 17/30 - Training: hazard


Training Epoch 17: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=0.0878]


Learning rate after epoch 17: 1e-05
Validation Loss after Epoch 17: 2.079194209421985
Validation Macro F1 Score after Epoch 17: 0.5412959095072009
Epoch 18/30 - Training: hazard


Training Epoch 18: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=0.0465]


Learning rate after epoch 18: 5e-06
Validation Loss after Epoch 18: 2.09698756900616
Validation Macro F1 Score after Epoch 18: 0.5399579617406817
Epoch 19/30 - Training: hazard


Training Epoch 19: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=0.0679]


Learning rate after epoch 19: 5e-06
Validation Loss after Epoch 19: 2.1046264028409496
Validation Macro F1 Score after Epoch 19: 0.5401442514542824
Epoch 20/30 - Training: hazard


Training Epoch 20: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=0.0312]


Learning rate after epoch 20: 5e-06
Validation Loss after Epoch 20: 2.11284611816518
Validation Macro F1 Score after Epoch 20: 0.5398270091727543
Epoch 21/30 - Training: hazard


Training Epoch 21: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=0.0319]


Learning rate after epoch 21: 2.5e-06
Validation Loss after Epoch 21: 2.118343486916274
Validation Macro F1 Score after Epoch 21: 0.5398270091727543
Early stopping triggered!
Saving the best model from epoch 11.
Evaluating model for task: hazard


Evaluating: 100%|██████████| 32/32 [00:04<00:00,  7.85it/s]


F1-Score for hazard: 0.5503586326918537
Classification Report for hazard:

                                                 precision    recall  f1-score   support

                                alcohol content       0.00      0.00      0.00         1
                                      alkaloids       0.00      0.00      0.00         1
                                      allergens       0.00      0.00      0.00         2
                                         almond       0.83      0.83      0.83         6
                           bad smell / off odor       0.33      1.00      0.50         1
                                  bone fragment       1.00      1.00      1.00         1
                              bulging packaging       0.25      0.33      0.29         3
                                         cashew       1.00      1.00      1.00         2
                    celery and products thereof       1.00      1.00      1.00         2
 cereals containing gluten and pro

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/30 - Training: product


Training Epoch 1: 100%|██████████| 223/223 [01:14<00:00,  2.98it/s, loss=6.94]


Learning rate after epoch 1: 2e-05
Validation Loss after Epoch 1: 6.939593628048897
Validation Macro F1 Score after Epoch 1: 0.0
New best model found. Saving the model at epoch 1.
Epoch 2/30 - Training: product


Training Epoch 2: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=6.64]


Learning rate after epoch 2: 2e-05
Validation Loss after Epoch 2: 6.966125153005123
Validation Macro F1 Score after Epoch 2: 5.166512388866166e-05
Epoch 3/30 - Training: product


Training Epoch 3: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=7.01]


Learning rate after epoch 3: 2e-05
Validation Loss after Epoch 3: 6.985692508518696
Validation Macro F1 Score after Epoch 3: 2.0779652564209126e-05
Epoch 4/30 - Training: product


Training Epoch 4: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=6.65]


Learning rate after epoch 4: 2e-05
Validation Loss after Epoch 4: 7.004343509674072
Validation Macro F1 Score after Epoch 4: 0.0005458666419533582
Epoch 5/30 - Training: product


Training Epoch 5: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=6.65]


Learning rate after epoch 5: 1e-05
Validation Loss after Epoch 5: 6.977593764662743
Validation Macro F1 Score after Epoch 5: 0.00036284978813714446
Epoch 6/30 - Training: product


Training Epoch 6: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=6.42]


Learning rate after epoch 6: 1e-05
Validation Loss after Epoch 6: 6.954767659306526
Validation Macro F1 Score after Epoch 6: 0.002763824053022659
Epoch 7/30 - Training: product


Training Epoch 7: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=5.75]


Learning rate after epoch 7: 1e-05
Validation Loss after Epoch 7: 6.924479126930237
Validation Macro F1 Score after Epoch 7: 0.00325495402342869
New best model found. Saving the model at epoch 7.
Epoch 8/30 - Training: product


Training Epoch 8: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=5.69]


Learning rate after epoch 8: 1e-05
Validation Loss after Epoch 8: 6.912690341472626
Validation Macro F1 Score after Epoch 8: 0.012609167387043223
New best model found. Saving the model at epoch 8.
Epoch 9/30 - Training: product


Training Epoch 9: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=5.95]


Learning rate after epoch 9: 1e-05
Validation Loss after Epoch 9: 6.899848930537701
Validation Macro F1 Score after Epoch 9: 0.01288345283836874
New best model found. Saving the model at epoch 9.
Epoch 10/30 - Training: product


Training Epoch 10: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=5.43]


Learning rate after epoch 10: 1e-05
Validation Loss after Epoch 10: 6.8855986297130585
Validation Macro F1 Score after Epoch 10: 0.015198627358416646
New best model found. Saving the model at epoch 10.
Epoch 11/30 - Training: product


Training Epoch 11: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=4.49]


Learning rate after epoch 11: 1e-05
Validation Loss after Epoch 11: 6.894347332417965
Validation Macro F1 Score after Epoch 11: 0.019622336032110468
Epoch 12/30 - Training: product


Training Epoch 12: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=4.33]


Learning rate after epoch 12: 1e-05
Validation Loss after Epoch 12: 6.89058893173933
Validation Macro F1 Score after Epoch 12: 0.022648493328200615
Epoch 13/30 - Training: product


Training Epoch 13: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=3.96]


Learning rate after epoch 13: 1e-05
Validation Loss after Epoch 13: 6.865941971540451
Validation Macro F1 Score after Epoch 13: 0.02238974048449547
New best model found. Saving the model at epoch 13.
Epoch 14/30 - Training: product


Training Epoch 14: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=3.52]


Learning rate after epoch 14: 1e-05
Validation Loss after Epoch 14: 6.867269240319729
Validation Macro F1 Score after Epoch 14: 0.02504013455592866
Epoch 15/30 - Training: product


Training Epoch 15: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=4.35]


Learning rate after epoch 15: 1e-05
Validation Loss after Epoch 15: 6.8675569370388985
Validation Macro F1 Score after Epoch 15: 0.021884726064515754
Epoch 16/30 - Training: product


Training Epoch 16: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=3.47]


Learning rate after epoch 16: 1e-05
Validation Loss after Epoch 16: 6.859305188059807
Validation Macro F1 Score after Epoch 16: 0.026199325705904653
New best model found. Saving the model at epoch 16.
Epoch 17/30 - Training: product


Training Epoch 17: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=2.8]


Learning rate after epoch 17: 1e-05
Validation Loss after Epoch 17: 6.853459648787975
Validation Macro F1 Score after Epoch 17: 0.023766710274110054
New best model found. Saving the model at epoch 17.
Epoch 18/30 - Training: product


Training Epoch 18: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=4.14]


Learning rate after epoch 18: 1e-05
Validation Loss after Epoch 18: 6.869184032082558
Validation Macro F1 Score after Epoch 18: 0.02103239643025178
Epoch 19/30 - Training: product


Training Epoch 19: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=2.65]


Learning rate after epoch 19: 1e-05
Validation Loss after Epoch 19: 6.868677102029324
Validation Macro F1 Score after Epoch 19: 0.02180418379290668
Epoch 20/30 - Training: product


Training Epoch 20: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=3.07]


Learning rate after epoch 20: 1e-05
Validation Loss after Epoch 20: 6.869208164513111
Validation Macro F1 Score after Epoch 20: 0.024088005807711203
Epoch 21/30 - Training: product


Training Epoch 21: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=2.99]


Learning rate after epoch 21: 5e-06
Validation Loss after Epoch 21: 6.86033920198679
Validation Macro F1 Score after Epoch 21: 0.023752260770029572
Epoch 22/30 - Training: product


Training Epoch 22: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=1.92]


Learning rate after epoch 22: 5e-06
Validation Loss after Epoch 22: 6.865063369274139
Validation Macro F1 Score after Epoch 22: 0.02622819570038649
Epoch 23/30 - Training: product


Training Epoch 23: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=2.59]


Learning rate after epoch 23: 5e-06
Validation Loss after Epoch 23: 6.8740339651703835
Validation Macro F1 Score after Epoch 23: 0.025978322607825673
Epoch 24/30 - Training: product


Training Epoch 24: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=1.94]


Learning rate after epoch 24: 2.5e-06
Validation Loss after Epoch 24: 6.868209637701511
Validation Macro F1 Score after Epoch 24: 0.02725250165387525
Epoch 25/30 - Training: product


Training Epoch 25: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=2.17]


Learning rate after epoch 25: 2.5e-06
Validation Loss after Epoch 25: 6.868911735713482
Validation Macro F1 Score after Epoch 25: 0.02572804565810019
Epoch 26/30 - Training: product


Training Epoch 26: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=2.09]


Learning rate after epoch 26: 2.5e-06
Validation Loss after Epoch 26: 6.874522358179092
Validation Macro F1 Score after Epoch 26: 0.027294120048236226
Epoch 27/30 - Training: product


Training Epoch 27: 100%|██████████| 223/223 [01:13<00:00,  3.04it/s, loss=1.87]


Learning rate after epoch 27: 1.25e-06
Validation Loss after Epoch 27: 6.870963163673878
Validation Macro F1 Score after Epoch 27: 0.027011889123490857
Early stopping triggered!
Saving the best model from epoch 17.
Evaluating model for task: product


Evaluating: 100%|██████████| 32/32 [00:04<00:00,  7.84it/s]


F1-Score for product: 0.03762012740408766
Classification Report for product:

                                                                        precision    recall  f1-score   support

                                                Catfishes (freshwater)       0.00      0.00      0.00         1
                                                 Fishes not identified       0.00      0.00      0.00         4
                                              Not classified pork meat       0.00      0.00      0.00         2
                                                 all purpose seasoning       1.00      1.00      1.00         1
                                                           almond milk       0.00      0.00      0.00         0
                                                         almond powder       0.00      0.00      0.00         1
                                                       almond products       0.00      0.00      0.00         2
                         

# Generate predictions on the test data and print the predictions DataFrame
Here, we load the test dataset, use the trained model to generate predictions, and display the results.


In [None]:
# Import necessary libraries
import os
import torch
import numpy as np
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from sklearn.preprocessing import LabelEncoder

# Load the test data for predictions (CSV containing validation data)
test_path = '/content/drive/MyDrive/Data/validation_data/incidents.csv'
test_df = pd.read_csv(test_path, index_col=0)

# Define the predict function
def predict(texts, model_base_path, target):
    # Load the tokenizer for the specified pre-trained model
    tokenizer = AutoTokenizer.from_pretrained(model_base_path)

    # Load the correct label encoder for the given target
    label_encoder_path = f'{model_base_path}/{target}_label_encoder.npy'
    label_encoder = LabelEncoder()

    # Check if the label encoder file exists and load it
    if os.path.exists(label_encoder_path):
        label_encoder.classes_ = np.load(label_encoder_path, allow_pickle=True)
    else:
        # Print a warning if the label encoder is not found
        print(f"Warning: Label encoder not found for {target} at {label_encoder_path}")
        return None

    # Load the pre-trained model for sequence classification
    model = AutoModelForSequenceClassification.from_pretrained(model_base_path).to(device)

    # Tokenize the input texts
    inputs = tokenizer(
        texts,
        padding=True,  # Pad sequences to the max length
        truncation=True,  # Truncate sequences to the max length
        max_length=512,  # Limit sequence length to 512 tokens
        return_tensors="pt"  # Return PyTorch tensors
    ).to(device)

    # Put the model in evaluation mode
    model.eval()

    # Make predictions with no gradient calculation
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        predictions = torch.argmax(logits, dim=-1)  # Get the predicted class for each input

    # Decode the predictions using the label encoder
    decoded_predictions = label_encoder.inverse_transform(predictions.cpu().numpy())

    # Return the decoded predictions
    return decoded_predictions

# Define device for model prediction (use GPU if available, else use CPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Prepare an empty dataframe to store the predictions
predictions = pd.DataFrame()

# Run predictions for all targets using the correct saved model
for column in targets_subtask1 + targets_subtask2:
    # Define the model path dynamically based on the target column
    model_path = f'./best_model_{column}'  # Update model path to point to the best model

    # Get the decoded predictions for the current target
    decoded_preds = predict(test_df['text'].tolist(), model_path, column)

    # If predictions were successfully made, store them in the dataframe
    if decoded_preds is not None:
        predictions[column] = decoded_preds

# Display the final predictions
print("\nFinal Predictions:\n")
print(predictions)


Using device: cuda

Final Predictions:

    hazard-category                                   product-category  \
0        biological                       meat, egg and dairy products   
1        biological                       meat, egg and dairy products   
2        biological                       meat, egg and dairy products   
3         allergens                                  ices and desserts   
4    foreign bodies                       meat, egg and dairy products   
..              ...                                                ...   
560       allergens                              fruits and vegetables   
561       allergens  dietetic foods, food supplements, fortified foods   
562  foreign bodies                        cereals and bakery products   
563       allergens                        cereals and bakery products   
564       allergens       cocoa and cocoa preparations, coffee and tea   

                           hazard          product  
0                 

In [None]:
predictions

Unnamed: 0,hazard-category,product-category,hazard,product
0,biological,"meat, egg and dairy products",listeria spp,milk
1,biological,"meat, egg and dairy products",escherichia coli,chicken breast
2,biological,"meat, egg and dairy products",enteroviruses,chicken breast
3,allergens,ices and desserts,pecan nut,dried apricots
4,foreign bodies,"meat, egg and dairy products",plastic fragment,dried beef meat
...,...,...,...,...
560,allergens,fruits and vegetables,cashew,fruit pies
561,allergens,"dietetic foods, food supplements, fortified foods",milk and products thereof,lettuce
562,foreign bodies,cereals and bakery products,plastic fragment,mushrooms
563,allergens,cereals and bakery products,peanuts and products thereof,flour


# Create the submission folder and archive the results
Finally, predictions and models are saved into a submission directory for easy sharing or evaluation.


In [None]:
import os
from shutil import make_archive
import pandas as pd
from google.colab import drive

# Define the Google Drive path where you want to save the files
output_folder = '/content/drive/MyDrive/submission_finetunedPUBMEDBERTv2/'

# Create the folder in Google Drive if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# Save predictions to a CSV file named 'submission.csv' inside the folder
predictions.to_csv(f'{output_folder}submission.csv', index=False)

# Zip the folder for submission
make_archive(output_folder, 'zip', output_folder)

# Print confirmation message
print(f"Submission saved to Google Drive at {output_folder}")


Submission saved to Google Drive at /content/drive/MyDrive/submission_finetunedPUBMEDBERTv2/
