In [1]:
import pandas as pd
import random
import requests
import json
import transformers
import torch

In [2]:
data = pd.read_csv("../../Data/mmlu.csv",index_col=0)
data.head(1)

Unnamed: 0,question,subject,choices,answer
823,"A fluorescent molecule of 1,000 daltons inject...",college_biology,['spot desmosome' 'belt desmosome' 'gap juncti...,2


In [3]:
data = data.head(200)

In [4]:
data['answer_option'] = data['answer'].replace({0: 'A', 1: 'B', 2: 'C', 3:'D'})

In [5]:
def parse_choices(choices_string):
    choices_list = choices_string.strip("[]").split(" '")
    choices_list = [choice.replace("'", "").strip() for choice in choices_list]
    return choices_list

In [6]:
data['choices'] = data['choices'].apply(parse_choices)

In [7]:
l = data['choices'].tolist()
data['choices_copy'] = l

In [8]:
def mask_choice(choice_list, correct_index):
    choice_list_copy = choice_list[:]
    masked_options = []
    incorrect_indices = [i for i in range(len(choice_list_copy)) if i != correct_index]
    
    if incorrect_indices:
        mask_index_1 = random.choice(incorrect_indices)
        masked_options.append(choice_list_copy[mask_index_1])  
        choice_list_copy[mask_index_1] = ""  
        incorrect_indices.remove(mask_index_1)  
    
    return choice_list_copy, masked_options

In [9]:
data[['choices_copy', 'masked_options']] = data.apply(
    lambda row: pd.Series(mask_choice(row['choices_copy'], row['answer'])), axis=1
)

In [10]:
def format_question_answer(row):
    question = f"Question: {row['question']}\nAnswer:\n"
    
    choices = row['choices_copy']
    options_text = [
        f"A: {str(choices[0]) if len(choices) > 0 else ''}",
        f"B: {str(choices[1]) if len(choices) > 1 else ''}",
        f"C: {str(choices[2]) if len(choices) > 2 else ''}",
        f"D: {str(choices[3]) if len(choices) > 3 else ''}"
    ]
    answer_options = "\n".join(options_text)
    
    return question + answer_options

In [11]:
data['question_answer'] = data.apply(format_question_answer, axis=1)

In [12]:
data['question_answer'].values[0]

'Question: A fluorescent molecule of 1,000 daltons injected into one cell is observed to enter an adjacent cell by passing through a\nAnswer:\nA: spot desmosome\nB: belt desmosome\nC: gap junction\nD: '

In [13]:
model_id = "meta-llama/Llama-3.2-3B-Instruct"
pipeline = transformers.pipeline(
    "text-generation",
    model=model_id,
    model_kwargs={"torch_dtype": torch.bfloat16},
    device_map="auto"
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [15]:
result = {}
for index, row in data[:].iterrows():
    
    prompt = f"""
        Q:Complete the missing option in one word: [
        Q: Which of the following countries generated the most total energy from solar sources in 2019?
        Answer:
        A: China
        B: United States
        C: 
        D: Japan
        ]. 
        Reply with ONLY ONE word the missing option which is enclosed in **[ANSWER]**, not with the entire sentence.
        A: **Germany**
        Similarly, complete the missing option in one word: [{row['question_answer']}]. Reply with ONLY ONE word which is enclosed in **[ANSWER]**, not with the entire sentence. Only answer the option asked, do not reply with any other information. Do not assume anything else is given to you other than the question and the options. Take the question and options given, and help deduce the missing option."""
    response = pipeline(prompt, max_new_tokens=10, temperature=0.5, top_p=0.9)
    response_text = response[0]['generated_text'][len(prompt):].strip()
    # answer = generated_text[len(prompt):].strip()
    result[row['question']] = response_text

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id

In [17]:
import re

def extract_final_answer(response_text):
    match = re.search(r"\*\*\s*\[?([^*\[\]]+)\]?\s*\*\*", response_text)
    if match:
        candidate = match.group(1).strip()
        if candidate and candidate != "[ANSWER]":  
            return candidate
    second_answer_match = re.search(r"A:\s*\[?([^\[\]]+)\]?", response_text)
    if second_answer_match:
        candidate = second_answer_match.group(1).strip()
        if candidate and candidate != "[ANSWER]":
            return candidate
    return "Invalid response"

processed_output = dict()

for key, val in result.items():
    processed_output[key] = extract_final_answer(val).lower()

In [20]:
acc = 0
for index, row in data.iterrows():
    if row['masked_options'] and len(row['masked_options']) > 0:
        first_option = row['masked_options'][0].lower()
        comparison = processed_output[row['question']].lower()
        if first_option == comparison:
            acc += 1

In [21]:
print(f"Accuracy of Llama-3.2-3B-Instruct on predicting the missing word in the question is: {round(acc*100/len(result),2)}%")

Accuracy of Llama-3.2-3B-Instruct on predicting the missing word in the question is: 0.0%
