In [1]:
import os
from pinecone import Pinecone
import openai
import csv
from dotenv import load_dotenv
import re
load_dotenv()

  from tqdm.autonotebook import tqdm


True

In [2]:
pinecone_api_key = os.getenv('PINECONE_API_KEY')
pc = Pinecone(api_key=pinecone_api_key)
questions_namespace = 'questions'

In [3]:
openai.api_key = os.getenv("OPENAI_API_KEY")

In [4]:
def get_similar_questions_from_set_a(question_b, index_name_set_a,num_questions=3):
    model_name = 'text-embedding-ada-002'
 
    embedres = openai.embeddings.create(
        model=model_name,
        input=question_b
    )
    # retrieve from Pinecone
    embedding_vector = embedres.data[0].embedding

    index = pc.Index(index_name_set_a)
 
    # # get relevant contexts (including the questions)
    res = index.query(vector=embedding_vector, top_k=num_questions, include_metadata=True,namespace=questions_namespace)
 
    return res

In [5]:
def get_answers_from_set_a(similar_questions):
    # Get the index for Set A

    correct_answers = []
    for match in similar_questions['matches']:
        correct_answer = match['metadata'].get('correct_answer', None)
        if correct_answer:
            correct_answers.append(correct_answer)
    return correct_answers

In [6]:
def generate_answer_with_openai(question_b, options_b, answers_a):
    # Prepare prompt for OpenAI API
    prompt = f'''
                    You are provided with a question with 7 options, out of which one is right. Choose the correct answer out of the 7 options provided while also giving explanation as to why the correct answer was chosen.
                    Your question to answer: {question_b}
                    7 options for the question: {options_b} {answers_a}
                    Provide the answer and explanation in the below format strictly 
                    Answer:
                    Explanation:
                    Note: Strictly stick to the qustion and options provided for choosing the correct answer and it's explanation'''
 
                # Call OpenAI API to get the response
    response = openai.chat.completions.create(
        model="gpt-3.5-turbo-0125",
        messages=[
                    {"role": "system", "content": "You are a helpful assistant tasked with giving correct answer and explanation for the correct answer."},
                    {"role": "user", "content": prompt},
                ],
        temperature=0.5
    )

    # Extract answer and justification from OpenAI response
    answer_and_justification = response.choices[0].message.content
    return answer_and_justification

In [7]:
def read_set_b_from_csv(csv_file):
    questions = []
    options = []
    correct_answers = []
    explanations = []
    with open(csv_file, 'r') as file:
        reader = csv.DictReader(file)
        for row in reader:
            questions.append(row['Question'])
            options.append([row['Option A'], row['Option B'], row['Option C'], row['Option D']])
            correct_answers.append(row['Correct Answer'])
            explanations.append(row['Explanation'])
    return questions, options, correct_answers, explanations

In [9]:
def main():
    # Index names for Set A and Set B
    index_name_set_a = os.getenv("pinecone_index_name1")
    index_name_set_b = os.getenv("pinecone_index_name")

    # CSV file containing Set B questions and options
    csv_file_set_b = "./data/qna_data_setb.csv"
    output_file = "./data/openai_answers.csv"

    # Read Set B data from CSV
    questions_b, options_b, correct_answers_b, explanations_b = read_set_b_from_csv(csv_file_set_b)

    # Create a list to store the OpenAI responses
    openai_responses = []


    for i, (question_b, options_b) in enumerate(zip(questions_b, options_b)):
        similar_questions = get_similar_questions_from_set_a(question_b, index_name_set_a)

        # Get answers for similar questions from Set A
        answers_a = get_answers_from_set_a(similar_questions)

        # Generate answer and justification with OpenAI API
        answer_and_justification = generate_answer_with_openai(question_b, options_b, answers_a)

        # Extract correct answer option from OpenAI answer
        answer_pattern = r"Answer: *\n?Option ([A-Za-z])|Answer:\n([A-Za-z])"
        match = re.search(answer_pattern, answer_and_justification) #type: ignore
        if match:
            correct_answer_openai = f"Option {match.group(1)}"
        else:
            correct_answer_openai = None
 
        # Append the question, options, correct answer, and explanation to the list
        openai_responses.append({
            'Question': question_b,
            'Options': ', '.join(options_b),
            'OpenAI Answer and Justification': answer_and_justification,
            'Correct Answer (Set B)': correct_answers_b[i],  # Get the correct answer from Set B
            'Explanation (Set B)': explanations_b[i],  # Get the explanation from Set B
            'Correct Answer (OpenAI)': correct_answer_openai  # Store the correct answer option from OpenAI
        })
 
 
        # Compare OpenAI answers with correct answers from Set B
        num_correct_answers = sum(1 for response in openai_responses if response['Correct Answer (Set B)'] == response['Correct Answer (OpenAI)'])
        print(f"Number of correct answers provided by OpenAI: {num_correct_answers}/{len(openai_responses)}")
 
        # Save OpenAI responses to CSV
        fieldnames = ['Question', 'Options', 'OpenAI Answer and Justification', 'Correct Answer (Set B)', 'Explanation (Set B)', 'Correct Answer (OpenAI)']
        with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
            writer.writerows(openai_responses)
 
        print(f"OpenAI responses saved to {output_file}")
 
if __name__ == "__main__":
    main()

Number of correct answers provided by OpenAI: 0/1
OpenAI responses saved to ./data/openai_answers.csv
Number of correct answers provided by OpenAI: 0/2
OpenAI responses saved to ./data/openai_answers.csv
Number of correct answers provided by OpenAI: 0/3
OpenAI responses saved to ./data/openai_answers.csv
Number of correct answers provided by OpenAI: 0/4
OpenAI responses saved to ./data/openai_answers.csv
Number of correct answers provided by OpenAI: 0/5
OpenAI responses saved to ./data/openai_answers.csv
Number of correct answers provided by OpenAI: 0/6
OpenAI responses saved to ./data/openai_answers.csv
Number of correct answers provided by OpenAI: 0/7
OpenAI responses saved to ./data/openai_answers.csv
Number of correct answers provided by OpenAI: 0/8
OpenAI responses saved to ./data/openai_answers.csv
Number of correct answers provided by OpenAI: 1/9
OpenAI responses saved to ./data/openai_answers.csv
Number of correct answers provided by OpenAI: 1/10
OpenAI responses saved to ./dat