# Playing with the trained model

## Natural Language Inferencing (NLI): 

(NLI) is a classical NLP (Natural Language Processing) problem that involves taking two sentences (the premise and the hypothesis ), and deciding how they are related (if the premise *entails* the hypothesis, *contradicts* it, or *neither*).

Ex: 

| Premise | Label | Hypothesis |
| --- | --- | --- |
| A man inspects the uniform of a figure in some East Asian country. | contradiction | The man is sleeping. |
| An older and younger man smiling. | neutral | Two men are smiling and laughing at the cats playing on the floor. |
| A soccer game with multiple males playing. | entailment | Some men are playing a sport. |




In [None]:
## Setting up environment (if not done already)
%pip install -r requirement.txt

In [31]:
## Imports and functions

from transformers import (
    AlbertForSequenceClassification,
    Trainer,
    TrainingArguments,
    AlbertTokenizer,
)
from datasets import load_dataset, load_from_disk
import torch
import os
import sys

from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

import wandb
import yaml


CONFIG_PATH = "./config_sample.yaml"

# Load YAML file
with open(CONFIG_PATH, "r") as f:
    config = yaml.safe_load(f)

def load_model(model_checkpoint:str)->torch.nn:
    """ Load a trained model from a checkpoint and prepares it for evaluation. """

    model = AlbertForSequenceClassification.from_pretrained(
            config["PRETRAINED_MODEL"], num_labels=3
        )

    # Load the last model saved in a checkpoint
    checkpoint_path = os.path.join(BEST_MODEL_CHECKPOINT, "pytorch_model.bin")
    model_state_dict = torch.load(checkpoint_path)
    model.load_state_dict(model_state_dict)

    # Prepares for evaluation
    model.to(torch.device("cuda"))
    model.eval()
    model.zero_grad()

    return model

def predict(premise:str, hypothesis:str, model:torch.nn)->str:
    """ Predict the NLI label for a premise and an hypothesis"""

    tokenizer = AlbertTokenizer.from_pretrained(config["PRETRAINED_MODEL"])

    inputs = tokenizer(premise, hypothesis, truncation=True,
        padding="max_length", return_tensors="pt"
    ).to(torch.device("cuda"))

    logits = model(**inputs).logits
    
    predicted_class_id = logits.argmax().item()

    label_mapping= {0:"ENTAILMENT", 1: "NEUTRAL", 2:"CONTRADICTION"}

    return label_mapping[predicted_class_id]


### Loading the trained model

In [None]:
BEST_MODEL_CHECKPOINT = "/usr/users/cei2023_2024_sondra_cself/coscoy_rem/Documents/nlp_tp/logs/albert-base-v2_7/checkpoint-90000/"

model = load_model(BEST_MODEL_CHECKPOINT)

### Playing with the model

In [64]:
# STANDARD EXAMPLES
premises = [
    "An older and younger man smiling.",
    "A man inspects the uniform of a figure in some East Asian country",
    "A soccer game with multiple males playing."
]

hypotheses = [
    "Two men are smiling and laughing at the cats playing on the floor.",
    "The man is sleeping",
    "Some men are playing a sport"
]

for premise, hypothesis in zip(premises, hypotheses):
    prediction = predict(premise, hypothesis, model=model)
    print(f"Premise: {premise}")
    print(f"Hypothesis: {hypothesis}")
    print(f"Prediction: {prediction}\n")


Premise: An older and younger man smiling.
Hypothesis: Two men are smiling and laughing at the cats playing on the floor.
Prediction: NEUTRAL

Premise: A man inspects the uniform of a figure in some East Asian country
Hypothesis: The man is sleeping
Prediction: CONTRADICTION

Premise: A soccer game with multiple males playing.
Hypothesis: Some men are playing a sport
Prediction: ENTAILMENT



In [65]:
## CUSTOM EXAMPLE
PREMISE = "All animals are pink."
HYPOTHESIS = "Socrates is an animal and he is red."

predict(PREMISE, HYPOTHESIS, model=model)

'CONTRADICTION'