In [8]:
# Install required libraries
!pip install transformers datasets torch



# Libraries

In [9]:
# Import necessary libraries
import torch
from transformers import pipeline
from datasets import load_dataset

# Import Dataset
Cite- The Hugging Face Datasets library

URL: https://github.com/huggingface/datasets

In [10]:
#Load the PIQA Dataset
print("Loading PIQA dataset...")
piqa = load_dataset("piqa")

#Access the validation set for evaluation
validation_data = piqa['validation']

#Inspect a sample
print(validation_data[0])

Loading PIQA dataset...
{'goal': "How do I ready a guinea pig cage for it's new occupants?", 'sol1': 'Provide the guinea pig with a cage full of a few inches of bedding made of ripped paper strips, you will also need to supply it with a water bottle and a food dish.', 'sol2': 'Provide the guinea pig with a cage full of a few inches of bedding made of ripped jeans material, you will also need to supply it with a water bottle and a food dish.', 'label': 0}


# Evaluation Function And Accuracy Function
Citation for the function logic and methodology: Adapted from general principles in transformer-based text classification using the Hugging Face Transformers library.

URL: https://huggingface.co/docs/transformers/main/en/task_summary#text-classification



In [11]:
# Define Evaluation Function
def evaluate_model(data, classifier, model_name, max_examples=20):
    correct = 0
    total = 0

    print(f"Evaluating model: {model_name}")
    for example in data.select(range(max_examples)):
        goal = example['goal']
        sol1 = example['sol1']
        sol2 = example['sol2']

        # Use the model to score each solution in the context of the goal
        input_1 = f"Question: {goal} Answer: {sol1}"
        input_2 = f"Question: {goal} Answer: {sol2}"

        result_1 = classifier(input_1, truncation=True, max_length=512)[0]['score']
        result_2 = classifier(input_2, truncation=True, max_length=512)[0]['score']

        # Select the higher scoring solution as the prediction
        prediction = 1 if result_1 > result_2 else 2
        correct_solution = 1 if example['label'] == 0 else 2

        # Compare prediction with correct label
        if prediction == correct_solution:
            correct += 1
        total += 1

        print(f"Goal: {goal}")
        print(f"  Solution 1: {sol1} -> Score: {result_1:.3f}")
        print(f"  Solution 2: {sol2} -> Score: {result_2:.3f}")
        print(f"  Prediction: Solution {prediction}, Correct: Solution {correct_solution}")
        print("-" * 50)

    # Calculate accuracy
    accuracy = correct / total * 100
    print(f"Accuracy of {model_name} on {total} Examples: {accuracy:.2f}%")
    return accuracy

# Model-1
Citation for Hugging Face Transformers library:
Hugging Face. "Transformers: State-of-the-art Machine Learning for Pytorch, TensorFlow, and JAX."

URL: https://github.com/huggingface/transformers


In [12]:
#Load the First Model
print("Loading the first pre-trained model...")
model_1 = "facebook/opt-1.3b"
classifier_1 = pipeline("text-classification",
                        model=model_1,
                        device=0 if torch.cuda.is_available() else -1)

# Evaluate the first model
accuracy_1 = evaluate_model(validation_data, classifier_1, model_1, max_examples=20)

Loading the first pre-trained model...


Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-1.3b and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Evaluating model: facebook/opt-1.3b
Goal: How do I ready a guinea pig cage for it's new occupants?
  Solution 1: Provide the guinea pig with a cage full of a few inches of bedding made of ripped paper strips, you will also need to supply it with a water bottle and a food dish. -> Score: 0.643
  Solution 2: Provide the guinea pig with a cage full of a few inches of bedding made of ripped jeans material, you will also need to supply it with a water bottle and a food dish. -> Score: 0.603
  Prediction: Solution 1, Correct: Solution 1
--------------------------------------------------
Goal: dresser
  Solution 1: replace drawer with bobby pin  -> Score: 0.629
  Solution 2: finish, woodgrain with  bobby pin  -> Score: 0.692
  Prediction: Solution 2, Correct: Solution 2
--------------------------------------------------
Goal: To fight Ivan Drago in Rocky for sega master system.
  Solution 1: Drago isn't in this game because it was released before Rocky IV. -> Score: 0.747
  Solution 2: You ha

# Model-2
Citation for Hugging Face Transformers library: Hugging Face. "Transformers: State-of-the-art Machine Learning for Pytorch, TensorFlow, and JAX."

URL: https://github.com/huggingface/transformers

In [13]:
# Load the Second Model
print("Loading the second pre-trained model...")
model_2 = "roberta-large-mnli"
classifier_2 = pipeline("text-classification",
                        model=model_2,
                        device=0 if torch.cuda.is_available() else -1)

# Evaluate the second model
accuracy_2 = evaluate_model(validation_data, classifier_2, model_2, max_examples=20)

Loading the second pre-trained model...


Some weights of the model checkpoint at roberta-large-mnli were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Evaluating model: roberta-large-mnli
Goal: How do I ready a guinea pig cage for it's new occupants?
  Solution 1: Provide the guinea pig with a cage full of a few inches of bedding made of ripped paper strips, you will also need to supply it with a water bottle and a food dish. -> Score: 0.997
  Solution 2: Provide the guinea pig with a cage full of a few inches of bedding made of ripped jeans material, you will also need to supply it with a water bottle and a food dish. -> Score: 0.997
  Prediction: Solution 1, Correct: Solution 1
--------------------------------------------------
Goal: dresser
  Solution 1: replace drawer with bobby pin  -> Score: 0.533
  Solution 2: finish, woodgrain with  bobby pin  -> Score: 0.567
  Prediction: Solution 2, Correct: Solution 2
--------------------------------------------------
Goal: To fight Ivan Drago in Rocky for sega master system.
  Solution 1: Drago isn't in this game because it was released before Rocky IV. -> Score: 0.552
  Solution 2: You h

In [14]:
# Compare Results
print("\nComparison of Model Performance:")
print(f"Model 1 ({model_1}): {accuracy_1:.2f}% accuracy")
print(f"Model 2 ({model_2}): {accuracy_2:.2f}% accuracy")


Comparison of Model Performance:
Model 1 (facebook/opt-1.3b): 70.00% accuracy
Model 2 (roberta-large-mnli): 55.00% accuracy
