In [None]:
from openai import OpenAI
from math import exp
import numpy as np
from IPython.display import display, HTML
import os
import math

# load env
from dotenv import load_dotenv
load_dotenv()

client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

In [24]:
def get_completion(
    messages: list[dict[str, str]],
    model: str = "gpt-3.5-turbo",
    max_tokens=500,
    temperature=0,
    stop=None,
    seed=123,
    tools=None,
    logprobs=None,  # whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each output token returned in the content of message..
    top_logprobs=None,
) -> str:
    params = {
        "model": model,
        "messages": messages,
        "max_tokens": max_tokens,
        "temperature": temperature,
        "stop": stop,
        "seed": seed,
        "logprobs": logprobs,
        "top_logprobs": top_logprobs,
    }
    if tools:
        params["tools"] = tools

    completion = client.chat.completions.create(**params)
    return completion

In [None]:
def single_inference_gpt(question):

    input_text = question + " Do not add anything else to your output: "

    API_RESPONSE = get_completion(
        [{"role": "user", "content": input_text}],
        model="gpt-3.5-turbo",
        logprobs=True,
        temperature=1,
        top_logprobs=5,
    )

    generated_text = []
    log_probs = []
    linear_probs = []
    top_log_probs = []
    
    tokens = API_RESPONSE.choices[0].logprobs.content
    for each in tokens:
        generated_text.append(each.token)
        log_probs.append(each.logprob)
        linear_probs.append(exp(each.logprob)*100)
        top_log_probs.append(each.top_logprobs)

    return generated_text, tokens, log_probs, linear_probs, top_log_probs

# Running on ORIGINAL Data

## MMLU 

In [None]:
## RUNNING INFERENCE ON THE DATASET

from tqdm import tqdm
import pandas as pd

df = pd.read_csv('../../Original_Datasets/MMLU_5000_sample.csv')[:2000]

responses = []

for i, row in tqdm(df.iterrows()):
    question = row['prompt']
    answer = row['answer']
    
    response = single_inference_gpt(question)

    responses.append(response)


1000it [14:48,  1.13it/s]


In [None]:
## SAVING THE RESULTS TO A JSON FILE

import json

# Prepare the data in hierarchical JSON format
json_data = []
for i, each in enumerate(responses):
    generated_tokens, log_probs, linear_probs, top_log_probs = each[0], each[2], each[3], each[4]
    correct_answer = df['answer'].iloc[i]  # Get the correct answer for the current question
    
    # Only process the first token
    if generated_tokens:
        first_token = generated_tokens[0]
        first_log_prob = log_probs[0]
        first_linear_prob = linear_probs[0]
        first_top_alternatives = [
            {"token": alt.token, "log_probability": alt.logprob}
            for alt in top_log_probs[0]
        ]
        
        # Append data for the first token only
        token_entries = [{
            "generated_token": first_token,
            "log_probability": first_log_prob,
            "linear_probability": first_linear_prob,
            "top_alternatives": first_top_alternatives
        }]
    
        # Append each question's data with only the first token's details
        json_data.append({
            f"correct_answer_{i + 1}": correct_answer,
            "tokens": token_entries
        })

# Save to JSON file
with open("gpt_logprob_MMLU_og.json", "w") as json_file:
    json.dump(json_data, json_file, indent=4)

# print("Data saved to gpt_logprob_MMLU_og.json")


Data saved to gpt_logprob_benchmark.json


## MedQA

In [None]:
## RUNNING INFERENCE ON THE DATASET

from tqdm import tqdm
import pandas as pd

df = pd.read_csv('../../Original_Datasets/MedMCQA_5000_sample.csv')[:2000]

responses = []

for i, row in tqdm(df.iterrows()):
    question = row['prompt']
    answer = row['answer']
    
    response = single_inference_gpt(question)

    responses.append(response)

In [None]:
## SAVING THE RESULTS TO A JSON FILE

import json

# Prepare the data in hierarchical JSON format
json_data = []
for i, each in enumerate(responses):
    generated_tokens, log_probs, linear_probs, top_log_probs = each[0], each[2], each[3], each[4]
    correct_answer = df['answer'].iloc[i]  # Get the correct answer for the current question
    
    # Only process the first token
    if generated_tokens:
        first_token = generated_tokens[0]
        first_log_prob = log_probs[0]
        first_linear_prob = linear_probs[0]
        first_top_alternatives = [
            {"token": alt.token, "log_probability": alt.logprob}
            for alt in top_log_probs[0]
        ]
        
        # Append data for the first token only
        token_entries = [{
            "generated_token": first_token,
            "log_probability": first_log_prob,
            "linear_probability": first_linear_prob,
            "top_alternatives": first_top_alternatives
        }]
    
        # Append each question's data with only the first token's details
        json_data.append({
            f"correct_answer_{i + 1}": correct_answer,
            "tokens": token_entries
        })

# Save to JSON file
with open("gpt_logprob_MedQA_og.json", "w") as json_file:
    json.dump(json_data, json_file, indent=4)

# print("Data saved to gpt_logprob_MedQA_og.json")

## ScienceMCQ

In [None]:
## RUNNING INFERENCE ON THE DATASET

from tqdm import tqdm
import pandas as pd

df = pd.read_csv('../../Original_Datasets/ScienceMCQ_5000_sample.csv')[:2000]

responses = []

for i, row in tqdm(df.iterrows()):
    question = row['prompt']
    answer = row['answer']
    
    response = single_inference_gpt(question)

    responses.append(response)

In [None]:
## SAVING THE RESULTS TO A JSON FILE

import json

# Prepare the data in hierarchical JSON format
json_data = []
for i, each in enumerate(responses):
    generated_tokens, log_probs, linear_probs, top_log_probs = each[0], each[2], each[3], each[4]
    correct_answer = df['answer'].iloc[i]  # Get the correct answer for the current question
    
    # Only process the first token
    if generated_tokens:
        first_token = generated_tokens[0]
        first_log_prob = log_probs[0]
        first_linear_prob = linear_probs[0]
        first_top_alternatives = [
            {"token": alt.token, "log_probability": alt.logprob}
            for alt in top_log_probs[0]
        ]
        
        # Append data for the first token only
        token_entries = [{
            "generated_token": first_token,
            "log_probability": first_log_prob,
            "linear_probability": first_linear_prob,
            "top_alternatives": first_top_alternatives
        }]
    
        # Append each question's data with only the first token's details
        json_data.append({
            f"correct_answer_{i + 1}": correct_answer,
            "tokens": token_entries
        })

# Save to JSON file
with open("gpt_logprob_Science_og.json", "w") as json_file:
    json.dump(json_data, json_file, indent=4)

# print("Data saved to gpt_logprob_Science_og.json")

# Running on POSITIONAL VARIATION Dataset

## MMLU

In [None]:
## RUNNING INFERENCE ON THE DATASET

from tqdm import tqdm
import pandas as pd

df = pd.read_csv('../../Shuffled_Datasets/SHUFFLED_MMLU_5000_sample.csv')[:2000]

responses = []

for i, row in tqdm(df.iterrows()):
    question = row['prompt']
    answer = row['answer']
    
    response = single_inference_gpt(question)

    responses.append(response)


In [None]:
## SAVING THE RESULTS TO A JSON FILE

import json

# Prepare the data in hierarchical JSON format
json_data = []
for i, each in enumerate(responses):
    generated_tokens, log_probs, linear_probs, top_log_probs = each[0], each[2], each[3], each[4]
    correct_answer = df['answer'].iloc[i]  # Get the correct answer for the current question
    
    # Only process the first token
    if generated_tokens:
        first_token = generated_tokens[0]
        first_log_prob = log_probs[0]
        first_linear_prob = linear_probs[0]
        first_top_alternatives = [
            {"token": alt.token, "log_probability": alt.logprob}
            for alt in top_log_probs[0]
        ]
        
        # Append data for the first token only
        token_entries = [{
            "generated_token": first_token,
            "log_probability": first_log_prob,
            "linear_probability": first_linear_prob,
            "top_alternatives": first_top_alternatives
        }]
    
        # Append each question's data with only the first token's details
        json_data.append({
            f"correct_answer_{i + 1}": correct_answer,
            "tokens": token_entries
        })

# Save to JSON file
with open("gpt_logprob_MMLU_position.json", "w") as json_file:
    json.dump(json_data, json_file, indent=4)

# print("Data saved to gpt_logprob_MMLU_position.json")

## MedMCQ

In [None]:
## RUNNING INFERENCE ON THE DATASET

from tqdm import tqdm
import pandas as pd

df = pd.read_csv('../../Shuffled_Datasets/SHUFFLED_MedMCQA_5000_sample.csv')[:2000]

responses = []

for i, row in tqdm(df.iterrows()):
    question = row['prompt']
    answer = row['answer']
    
    response = single_inference_gpt(question)

    responses.append(response)

In [None]:
## SAVING THE RESULTS TO A JSON FILE

import json

# Prepare the data in hierarchical JSON format
json_data = []
for i, each in enumerate(responses):
    generated_tokens, log_probs, linear_probs, top_log_probs = each[0], each[2], each[3], each[4]
    correct_answer = df['answer'].iloc[i]  # Get the correct answer for the current question
    
    # Only process the first token
    if generated_tokens:
        first_token = generated_tokens[0]
        first_log_prob = log_probs[0]
        first_linear_prob = linear_probs[0]
        first_top_alternatives = [
            {"token": alt.token, "log_probability": alt.logprob}
            for alt in top_log_probs[0]
        ]
        
        # Append data for the first token only
        token_entries = [{
            "generated_token": first_token,
            "log_probability": first_log_prob,
            "linear_probability": first_linear_prob,
            "top_alternatives": first_top_alternatives
        }]
    
        # Append each question's data with only the first token's details
        json_data.append({
            f"correct_answer_{i + 1}": correct_answer,
            "tokens": token_entries
        })

# Save to JSON file
with open("gpt_logprob_MedQA_position.json", "w") as json_file:
    json.dump(json_data, json_file, indent=4)

# print("Data saved to gpt_logprob_MedQA_position.json")

## ScienceMCQ

In [None]:
## RUNNING INFERENCE ON THE DATASET

from tqdm import tqdm
import pandas as pd

df = pd.read_csv('../../Shuffled_Datasets/SHUFFLED_ScienceMCQ_5000_sample.csv')[:2000]

responses = []

for i, row in tqdm(df.iterrows()):
    question = row['prompt']
    answer = row['answer']
    
    response = single_inference_gpt(question)

    responses.append(response)

In [None]:
## SAVING THE RESULTS TO A JSON FILE

import json

# Prepare the data in hierarchical JSON format
json_data = []
for i, each in enumerate(responses):
    generated_tokens, log_probs, linear_probs, top_log_probs = each[0], each[2], each[3], each[4]
    correct_answer = df['answer'].iloc[i]  # Get the correct answer for the current question
    
    # Only process the first token
    if generated_tokens:
        first_token = generated_tokens[0]
        first_log_prob = log_probs[0]
        first_linear_prob = linear_probs[0]
        first_top_alternatives = [
            {"token": alt.token, "log_probability": alt.logprob}
            for alt in top_log_probs[0]
        ]
        
        # Append data for the first token only
        token_entries = [{
            "generated_token": first_token,
            "log_probability": first_log_prob,
            "linear_probability": first_linear_prob,
            "top_alternatives": first_top_alternatives
        }]
    
        # Append each question's data with only the first token's details
        json_data.append({
            f"correct_answer_{i + 1}": correct_answer,
            "tokens": token_entries
        })

# Save to JSON file
with open("gpt_logprob_Science_position.json", "w") as json_file:
    json.dump(json_data, json_file, indent=4)

# print("Data saved to gpt_logprob_Science_position.json")