# Inferencing

A simple inferancing concept with manually inserting the index number from "test.csv" annotation file.

In [None]:
import os
import gc
import json
import csv
import torch
import torch.nn as nn
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModel, AutoTokenizer
from huggingface_hub import login
from datetime import datetime

In [None]:
torch.cuda.empty_cache()
gc.collect()

# Check for CUDA
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Mount drive to access files
from google.colab import drive
drive.mount('/content/drive')

# Define the base directory once
BASE_DIR = "/content/drive/MyDrive/Llama_3B_Instruct_with_Pre-constructed_Prompts"

# Function to generate full paths from base path
def path_builder(relative_path):
    """Returns the full path by combining BASE_DIR with the given relative path."""
    from pathlib import Path
    return str(Path(BASE_DIR) / relative_path)

Using device: cuda
Mounted at /content/drive


In [None]:
# Hyperparameter Configuration
class Config:
    # Model Related
    MODEL_NAME = "meta-llama/Llama-3.2-3B-Instruct"
    ACCESS_TOKEN = "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"
    CACHE_DIR = path_builder("model_cache")  # Specify cache directory

    # Model Matrices
    LOG_DIR = path_builder("log_dir/")
    CHECKPOINT_DIR = path_builder("checkpoints/")

    # Dataset Preprocessing Related
    MAX_LENGTH = 2048  # Maximum token sequence length
    SAMPLING_FACTOR = 3  # Sampling factor for paraphrasing

    # Training Related
    BATCH_SIZE = 32
    MICRO_BATCH = 1 # Optional
    EPOCHS = 5  # Number of initial training epochs

    # Optimizer
    LEARNING_RATE = 5e-5
    WEIGHT_DECAY = 0.01

    # Scheduler
    S_MODE = 'min'
    S_FACTOR = 0.1
    S_PATIENCE = 3

    # Others
    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    ROLE_CLASSES = ["Antagonist", "Protagonist", "Innocent"]  # Main roles
    SUBROLE_CLASSES = {
        "Antagonist": ['Instigator', 'Conspirator', 'Tyrant', 'Foreign Adversary',
                       'Traitor', 'Spy', 'Saboteur', 'Corrupt', 'Incompetent',
                       'Terrorist', 'Deceiver', 'Bigot'],
        "Protagonist": ['Guardian', 'Martyr', 'Peacemaker', 'Rebel', 'Underdog', 'Virtuous'],
        "Innocent": ['Forgotten', 'Exploited', 'Victim', 'Scapegoat']
    }

# Hugging Face Authentication
login(Config.ACCESS_TOKEN)  # Use the access token

### Function Descriptions

In [None]:
# Model for Classification
class EntityClassifier(nn.Module):
    def __init__(self, base_model_name: str, token: str, cache_dir: str, freeze_base: bool = True):
        """
        Args:
            base_model_name (str): Name of the base model to load from Hugging Face.
            token (str): Token for Hugging Face authentication.
            cache_dir (str): Directory for caching the model.
            freeze_base (bool): Whether to freeze the base model weights during training.
        """
        super(EntityClassifier, self).__init__()
        self.main_classes = Config.ROLE_CLASSES  # Use main roles from Config
        self.subclasses = Config.SUBROLE_CLASSES  # Use subroles from Config

        # Load the base model
        self.base_model = AutoModel.from_pretrained(base_model_name, cache_dir=cache_dir,  use_auth_token=token)
        self.base_model.gradient_checkpointing_enable()

        # Optionally freeze the base model
        if freeze_base:
            for param in self.base_model.parameters():
                param.requires_grad = False

        # Define classification heads
        hidden_size = self.base_model.config.hidden_size

        # Main role classifier
        self.main_classifier = nn.Sequential(
            nn.Linear(hidden_size, 1024),
            nn.LayerNorm(1024),
            nn.GELU(),
            nn.Dropout(0.5),
            nn.Linear(1024, len(self.main_classes))
        )

        # Subrole classifiers for each main role
        self.subrole_classifiers = nn.ModuleDict({
            main_class: nn.Sequential(
                nn.Linear(hidden_size, 1024),
                nn.LayerNorm(1024),
                nn.GELU(),
                nn.Dropout(0.5),
                nn.Linear(1024, len(subroles))
            )
            for main_class, subroles in self.subclasses.items()
        })

    def forward(self, input_ids, attention_mask):
        """
        Forward pass for the model.

        Args:
            input_ids (torch.Tensor): Tokenized input IDs.
            attention_mask (torch.Tensor): Attention mask for the inputs.

        Returns:
            torch.Tensor: Main role logits.
            dict: Subrole logits for each main role.
        """
        # Pass input through the base model
        outputs = self.base_model(input_ids=input_ids, attention_mask=attention_mask)
        hidden_states = outputs.last_hidden_state

        # Use the hidden state of the last token for classification
        # Determine the length of each sequence by summing the attention mask
        sequence_lengths = attention_mask.sum(dim=1) - 1  # Subtract 1 to get the last token index
        cls_representation = hidden_states[torch.arange(hidden_states.size(0)), sequence_lengths]

        # Main role predictions
        main_role_logits = self.main_classifier(cls_representation)

        # Subrole predictions
        subrole_logits = {
            main_class: classifier(cls_representation)
            for main_class, classifier in self.subrole_classifiers.items()
        }

        return main_role_logits, subrole_logits




# Load Model Function
def list_available_models_and_tokenizers(save_dir):
    """
    Lists all saved models and tokenizers in the specified directory.

    Args:
        save_dir: Directory where models and tokenizers are saved.

    Returns:
        Tuple of lists containing model filenames and tokenizer directory names.
    """
    model_files = [f for f in os.listdir(save_dir) if f.startswith("model_checkpoint_") and f.endswith(".pth")]
    tokenizer_dirs = [d for d in os.listdir(save_dir) if d.startswith("tokenizer_")]

    if not model_files:
        raise FileNotFoundError("No saved models found in the directory!")
    if not tokenizer_dirs:
        raise FileNotFoundError("No saved tokenizers found in the directory!")

    print("\nAvailable Models:")
    for i, model in enumerate(model_files, start=1):
        print(f"{i}. {model}")

    print("\nAvailable Tokenizers:")
    for i, tokenizer in enumerate(tokenizer_dirs, start=1):
        print(f"{i}. {tokenizer}")

    return model_files, tokenizer_dirs


def load_model_and_tokenizer(save_dir, model_class, device):
    """
    Loads essential layers, tokenizer, and metadata for inference, including thresholds.

    Args:
        save_dir: Directory where models and tokenizers are saved.
        model_class: Class of the model to instantiate.
        device: Device to load the model onto (e.g., 'cuda' or 'cpu').

    Returns:
        Loaded model, tokenizer, metadata (role classes, subrole classes, weights), and thresholds.
    """
    model_files, tokenizer_dirs = list_available_models_and_tokenizers(save_dir)

    # Select model and tokenizer
    selected_model_index = int(input("\nEnter the number corresponding to the model you want to load: ")) - 1
    selected_tokenizer_index = int(input("Enter the number corresponding to the tokenizer you want to load: ")) - 1

    if selected_model_index < 0 or selected_model_index >= len(model_files):
        raise ValueError("Invalid model selection!")
    if selected_tokenizer_index < 0 or selected_tokenizer_index >= len(tokenizer_dirs):
        raise ValueError("Invalid tokenizer selection!")

    selected_model_path = os.path.join(save_dir, model_files[selected_model_index])
    selected_tokenizer_path = os.path.join(save_dir, tokenizer_dirs[selected_tokenizer_index])

    print(f"\nSelected Model: {selected_model_path}")
    print(f"Selected Tokenizer: {selected_tokenizer_path}")

    # Load essential layers and metadata
    checkpoint = torch.load(selected_model_path, map_location=device)
    model = model_class().to(device)  # Instantiate your model class
    model.main_classifier.load_state_dict(checkpoint['main_classifier'])
    for role, state_dict in checkpoint['subrole_classifiers'].items():
        model.subrole_classifiers[role].load_state_dict(state_dict)
    model.eval()  # Set model to evaluation mode
    print("\nEssential model layers loaded successfully!")

    # Load metadata
    role_classes = checkpoint.get('role_classes', None)
    subrole_classes = checkpoint.get('subrole_classes', None)
    role_specific_weights = checkpoint.get('role_specific_weights', None)
    optimal_thresholds = checkpoint.get('optimal_thresholds', None)

    # Handle missing thresholds
    if optimal_thresholds is None:
        print("No fine-tuned thresholds found. Using default thresholds of 0.5.")
        optimal_thresholds = {role: [0.5] * len(subrole_classes[role]) for role in role_classes}

    print("Metadata and thresholds loaded successfully!")

    # Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained(selected_tokenizer_path)
    print("Tokenizer loaded successfully!")

    return model, tokenizer, role_classes, subrole_classes, role_specific_weights, optimal_thresholds



def load_log_file(log_file_path):
    """
    Loads the training log from a specified .json file.

    Args:
        log_file_path (str): Path to the log file to load.

    Returns:
        dict: The loaded training log dictionary.
    """
    with open(log_file_path, "r") as f:
        log = json.load(f)

    # Convert loaded log to PyTorch-compatible format if needed
    def convert_back_to_tensors(obj):
        if isinstance(obj, list):  # Check for tensor-like data (e.g., lists of floats)
            try:
                return torch.tensor(obj)
            except Exception:
                return obj  # Return as-is if conversion fails
        elif isinstance(obj, dict):  # Recursively process dictionaries
            return {k: convert_back_to_tensors(v) for k, v in obj.items()}
        else:
            return obj  # Leave other types unchanged

    processed_log = convert_back_to_tensors(log)

    print(f"Training log loaded from {log_file_path}")
    return processed_log

### Load Model Weights, Tokenizer and Logs

In [None]:
!ls "{path_builder('tuned_model')}"

model_checkpoint_20250116_193438.pth  tokenizer_20250121_181149
model_checkpoint_20250119_102956.pth  tokenizer_20250125_191304
model_checkpoint_20250121_181149.pth  training_log_20250125_192245.json
model_checkpoint_20250125_191304.pth  training_log_20250125_192351.json
tokenizer_20250116_193438	      training_summary_20250125_191805.txt
tokenizer_20250119_102956


**Note:** It's better to load the "*model_checkpoint_20250125_191304.pth*" and "*tokenizer_20250125_191304*" as it holds the lates tuning. The previous saves may contain inconsistencies as the code base was evolved multiple times during the workflow. Also load the related log file "*training_log_20250125_192351.json*".

In [None]:
save_dir = path_builder("tuned_model/")
model, tokenizer, role_classes, subrole_classes, role_specific_weights, finetuned_optimal_thresholds = load_model_and_tokenizer(
    save_dir=save_dir,
    model_class=lambda: EntityClassifier(
        base_model_name=Config.MODEL_NAME,
        token=Config.ACCESS_TOKEN,
        cache_dir=Config.CACHE_DIR,
        freeze_base=False  # Adjust based on use case
    ),
    device=device
)

# Use thresholds during inference
for role, thresholds in finetuned_optimal_thresholds.items():
    print(f"Loaded thresholds for {role}: {thresholds}")


# Load the log
loaded_log = load_log_file(log_file_path=path_builder("tuned_model/training_log_20250125_192351.json"))


Available Models:
1. model_checkpoint_20250116_193438.pth
2. model_checkpoint_20250119_102956.pth
3. model_checkpoint_20250121_181149.pth
4. model_checkpoint_20250125_191304.pth

Available Tokenizers:
1. tokenizer_20250116_193438
2. tokenizer_20250119_102956
3. tokenizer_20250121_181149
4. tokenizer_20250125_191304

Enter the number corresponding to the model you want to load: 4
Enter the number corresponding to the tokenizer you want to load: 4

Selected Model: /content/drive/MyDrive/NLP_Project/tuned_model/model_checkpoint_20250125_191304.pth
Selected Tokenizer: /content/drive/MyDrive/NLP_Project/tuned_model/tokenizer_20250125_191304


  checkpoint = torch.load(selected_model_path, map_location=device)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]


Essential model layers loaded successfully!
Metadata and thresholds loaded successfully!
Tokenizer loaded successfully!
Loaded thresholds for Antagonist: [0.3899999999999999, 0.4199999999999998, 0.12, 0.43999999999999984, 0.33999999999999986, 0.1, 0.21999999999999995, 0.2799999999999999, 0.19999999999999996, 0.4099999999999998, 0.2899999999999999, 0.12]
Loaded thresholds for Protagonist: [0.43999999999999984, 0.5899999999999997, 0.46999999999999986, 0.2899999999999999, 0.3599999999999999, 0.3199999999999999]
Loaded thresholds for Innocent: [0.12, 0.5299999999999998, 0.1, 0.5499999999999998]
Training log loaded from /content/drive/MyDrive/NLP_Project/tuned_model/training_log_20250125_192351.json


### Example Inferencing

 **Note:** Works well for the subroles with higher counts. Please check the "***NLP_Project_Llama_3B_Instruct.ipynb***" section "***2. Data***" or "***DP_Prompt_Generation.ipynb***" for a better understanding of the data distribution.

In [None]:
import pandas as pd
import json
import torch

# Define the file paths
test_file = path_builder("Dataset_EN_PT/test_data/test-prompts.json")
csv_file = path_builder("Dataset_EN_PT/test_data/test.csv")

# Function to load a single prompt from the JSON test file
def load_single_prompt(test_file, index=0):
    with open(test_file, 'r', encoding='utf-8') as f:
        test_data = [json.loads(line) for line in f]
    return test_data[index]['prompt'] if index < len(test_data) else None

# Function to map the input index to the correct CSV data row
def actual_index_mapping(index, csv_file):
    # Read the CSV without changing it
    csv_data = pd.read_csv(csv_file)

    # In test.csv, the actual data starts from index 2!
    # Valid index range starts from 2 and should not exceed the length of the CSV data
    if index < 2 or index >= len(csv_data) + 2:
        print("Invalid index. Please input an index starting from 2.")
        return None
    return index - 2  # Adjust index to match CSV data (0-based indexing)

# Function to perform inference with thresholds
def single_prompt_infer_with_thresholds(prompt, model, tokenizer, device, optimal_thresholds):
    inputs = tokenizer(
        prompt,
        padding="max_length",
        truncation=True,
        max_length=Config.MAX_LENGTH,
        return_tensors="pt"
    )
    inputs = {key: val.to(device) for key, val in inputs.items()}

    model.eval()
    with torch.no_grad():
        main_role_logits, subrole_logits = model(inputs['input_ids'], inputs['attention_mask'])

    main_role_pred_index = torch.argmax(main_role_logits, dim=1).item()
    main_role_pred = Config.ROLE_CLASSES[main_role_pred_index]

    role_thresholds = optimal_thresholds.get(main_role_pred, [0.5] * len(Config.SUBROLE_CLASSES[main_role_pred]))
    subrole_probs = torch.sigmoid(subrole_logits[main_role_pred]).cpu().numpy()[0]
    subrole_preds = [
        Config.SUBROLE_CLASSES[main_role_pred][i]
        for i, prob in enumerate(subrole_probs)
        if prob >= role_thresholds[i]
    ]

    return {
        "Main Role": main_role_pred,
        "Subroles": subrole_preds
    }

# Load and map the prompt index dynamically
index = 273  # Change this to the desired index (2 to 273 in our case)
mapped_index = actual_index_mapping(index, csv_file)
if mapped_index is not None:
    test_prompt = load_single_prompt(test_file, mapped_index)
    print(f"Loaded Test Prompt: {test_prompt}")

    # Perform inference on the selected prompt
    result = single_prompt_infer_with_thresholds(test_prompt, model, tokenizer, Config.DEVICE, finetuned_optimal_thresholds)

    # Print the result
    print("\nInference Result:")
    print(f"Main Role: {result['Main Role']}")
    print(f"Subroles: {result['Subroles']}")
else:
    print("Invalid index. Could not load prompt.")


Loaded Test Prompt: Text:
Quais foram as consequências do ataque de Iskander da Rússia contra os Patriot na Ucrânia? O ataque a um comboio de equipamento militar ucraniano na República Popular de Donetsk foi realizado com recurso a mísseis balísticos de curto alcance. As guarnições dos sistemas de defesa aérea Patriot fornecidos à Ucrânia pelos EUA, anteriormente destruídos pelas forças russas, foram "quase de certeza" eliminadas, noticiou a revista Forbes. O meio de comunicação social referiu que a "persistência e a boa sorte" de um operador russo de drones o "recompensou no sábado (9), quando localizou um comboio ucraniano incluindo pelo menos dois lançadores quádruplos montados num camião para uma bateria de mísseis terra-ar Patriot" na República Popular de Donetsk (RPD). "Os ucranianos perderam nesse dia até 13% dos seus lançadores Patriot", pelo que "o ar sobre o Leste da Ucrânia pode ter ficado muito mais seguro para os russos", segundo a Forbes. O relato surge depois de uma font

### Generate Prediction

It creates the "generated_predictions.csv" which holds all the predicted responses by the model. It is used in "***Metrics.ipynb***" during calculation of key metrics such as precision, recall, F1 score, and Exact Match Ratio (EMR) for both main roles and subclasses.

In [None]:
# Define the file paths
test_file = path_builder("Dataset_EN_PT/test_data/test-prompts.json")
output_file = path_builder("Dataset_EN_PT/generated_predictions.csv")

def load_all_prompts(test_file):
    """
    Load all prompts from the test dataset.

    Args:
        test_file (str): Path to the test JSON file.

    Returns:
        list: List of text prompts from the file.
    """
    with open(test_file, 'r', encoding='utf-8') as f:
        return [json.loads(line)['prompt'] for line in f]

def infer_all_prompts(test_prompts, model, tokenizer, device, optimal_thresholds):
    """
    Perform inference for all prompts using fine-tuned thresholds.

    Args:
        test_prompts (list): List of text prompts for inference.
        model: The trained model for inference.
        tokenizer: Tokenizer for preprocessing inputs.
        device: The device to run the model on.
        optimal_thresholds: Fine-tuned thresholds for subrole predictions.

    Returns:
        list: List of predictions containing main roles and subroles for each prompt.
    """
    predictions = []
    model.eval()
    with torch.no_grad():
        for prompt in test_prompts:
            # Tokenize the input
            inputs = tokenizer(
                prompt,
                padding="max_length",
                truncation=True,
                max_length=Config.MAX_LENGTH,
                return_tensors="pt"
            )
            inputs = {key: val.to(device) for key, val in inputs.items()}

            # Perform inference
            main_role_logits, subrole_logits = model(inputs['input_ids'], inputs['attention_mask'])

            # Decode predictions
            main_role_pred_index = torch.argmax(main_role_logits, dim=1).item()
            main_role_pred = Config.ROLE_CLASSES[main_role_pred_index]

            # Apply fine-tuned thresholds for subrole predictions
            role_thresholds = optimal_thresholds.get(main_role_pred, [0.5] * len(Config.SUBROLE_CLASSES[main_role_pred]))
            subrole_probs = torch.sigmoid(subrole_logits[main_role_pred]).cpu().numpy()[0]
            subrole_preds = [
                Config.SUBROLE_CLASSES[main_role_pred][i]
                for i, prob in enumerate(subrole_probs)
                if prob >= role_thresholds[i]
            ]

            # Append the prediction
            predictions.append({
                "Main Role": main_role_pred,
                "Subroles": subrole_preds
            })
    return predictions

def save_predictions_to_csv(predictions, output_file):
    """
    Save predictions to a CSV file in the required format.

    Args:
        predictions (list): List of predictions containing main roles and subroles.
        output_file (str): Path to save the output CSV file.
    """
    with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["Main Role", "Subroles"])  # Header row

        for prediction in predictions:
            writer.writerow([
                prediction["Main Role"],
                str(prediction["Subroles"])  # Convert list to string
            ])
    print(f"Predictions saved to {output_file}")

# Main workflow
if __name__ == "__main__":
    # Load all prompts
    test_prompts = load_all_prompts(test_file)

    # Perform inference on all prompts
    predictions = infer_all_prompts(
        test_prompts,
        model,
        tokenizer,
        Config.DEVICE,
        finetuned_optimal_thresholds
    )

    # Save predictions to CSV
    save_predictions_to_csv(predictions, output_file)


Predictions saved to /content/drive/My Drive/NLP_Project/Dataset_EN_PT/generated_predictions.csv
