## Step - 1
### spplitting the dataset into training and testing

In [5]:
import json
import random

RANDOM_SEED = 42
random.seed(RANDOM_SEED)

with open("dataset/high_school_physics.json", "r", encoding="utf-8") as f:
    data = json.load(f)

random.shuffle(data)

train_size = int(0.70 * len(data))
test_size = int(0.15 * len(data))
train_data = data[:train_size]
test_data = data[train_size:train_size + test_size]
eval_data = data[train_size + test_size:]

# Save train, test, and evaluation sets
with open("dataset/train.json", "w", encoding="utf-8") as f:
    json.dump(train_data, f, indent=4)

with open("dataset/test.json", "w", encoding="utf-8") as f:
    json.dump(test_data, f, indent=4)

with open("dataset/eval.json", "w", encoding="utf-8") as f:
    json.dump(eval_data, f, indent=4)

print(f"Dataset split into {len(train_data)} training, {len(test_data)} testing, and {len(eval_data)} evaluation samples.")

Dataset split into 280 training, 60 testing, and 60 evaluation samples.


In [3]:
print(test_data[0])
print(test_data[3])

{'id': 261, 'question': 'A 10 Ω resistor is connected across a 15 V battery. What is the current flowing through the resistor?', 'subject': 'Electrostatics and Current Electricity', 'choices': ['0.5 A', '1 A', '1.5 A', '2 A'], 'answer': 'C', 'explanation': 'I = V/R = 15/10 = 1.5 A', 'dataset': 'high_school_physics'}
{'id': 338, 'question': 'A force of 80 N is applied to a 8 kg block. What is the acceleration of the block?', 'subject': 'Mechanics', 'choices': ['5 m/s²', '10 m/s²', '15 m/s²', '20 m/s²'], 'answer': 'B', 'explanation': 'F = ma => a = F/m = 80/8 = 10 m/s²', 'dataset': 'high_school_physics'}


### Few-Shot Evaluation of Flan-T5 from Hugging Face 

In [None]:
from transformers import pipeline

qa_pipeline = pipeline("text2text-generation", model="google/flan-t5-large")

def evaluate_model(model, dataset):
    correct = 0
    total = len(dataset)

    for item in dataset:
        question = item["question"]
        idx = ord(item["answer"])-ord("A")
        correct_answer = item["choices"][idx]

        prompt = f"Give me the final answer without any explaination, just the couple of words with units that directly show the answer for the Question: {question} with Choices: {', '.join(item['choices'])} Answer:"
        prediction = model(prompt, max_length=20, truncation=True)[0]["generated_text"]

        # print(correct_answer, prediction, item["id"])
        if correct_answer in prediction:
          correct += 1

    accuracy = (correct / total) * 100
    return accuracy

zero_shot_accuracy = evaluate_model(qa_pipeline, test_data)
print(f"Zero-Shot Accuracy: {zero_shot_accuracy:.2f}%")


Device set to use cuda:0


Zero-Shot Accuracy: 100.00%
