##### **This part of the project handles the question generation part. The project uses the OpenAI API functionality to make prompts and get questions based on the prompt given.** 

In [3]:
import openai
from openai import OpenAI
import os
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")
import random
from sentence_transformers import SentenceTransformer
from sentence_transformers import util

In [16]:
model = SentenceTransformer('all-mpnet-base-v2') 
os.environ["OPENAI_API_KEY"] = "sk-proj-fPbd82pXUd0snI5DTCSYT3BlbkFJNT1ZqfvGxFa1K1Pz3bJG"
def generate_questions(prompt, num_questions=10):
    
    try:
        
        ## Using OpenAI API, questions developed using prompting based on the requirement.
        api_key = os.getenv("OPENAI_API_KEY")
        if not api_key:
            raise ValueError("OPENAI_API_KEY environment variable not set.")

        client=OpenAI(api_key=api_key)
        generated_texts = set()
        generated_questions=[]
        questions = []
        i = 0

        while i < num_questions:
            variation_prompt = f"{prompt} - Question {i+1}"
            response = client.completions.create(
                model="gpt-3.5-turbo-instruct",
                prompt=f"I am in the process of developing a questionnaire with multiple choice questions to test general knowledge of Seniors in my University. I want diverse questions strictly"\
                    f" adhering to my required format. Generate a different multiple-choice question about {variation_prompt} strictly in the format:\n\n" \
                       f"Question:\n" \
                       f"Option A:\n" \
                       f"Option B:\n" \
                       f"Option C:\n" \
                       f"Option D:\n" \
                       f"Correct Option: Option A or Option B or Option C or Option D",
                max_tokens=250,
                n=1,
                stop=None,
                temperature=0.9
            )

            ## Question Format Check Filtering
            question_text=response.choices[0].text.strip()
            if question_text in generated_texts:
                continue

            generated_texts.add(question_text)
            lines = question_text.splitlines()
            lines = [line.strip() for line in lines if line.strip()]
            if len(lines) < 6:  # Check for the minimum expected length
                print("Error: OpenAI response is not formatted correctly.")
                continue
            if not (lines[0].startswith('Question:') and lines[1].startswith('Option A') and
                    lines[2].startswith('Option B') and lines[3].startswith('Option C') and
                    lines[4].startswith('Option D') and lines[5].startswith('Correct Option: Option')):
                continue
        
            ## This part ensures similar questions are not asked again (uses cosine similarity of the embeddings)
            is_duplicate= False
            for existing_question in generated_questions:
                question_embeddings = model.encode([lines[0], existing_question])
                cosine_similarity = util.cos_sim(question_embeddings[0], question_embeddings[1]).item()
                if cosine_similarity > 0.8: 
                    is_duplicate= True
                    print("Similar exitss")
                    break
            if is_duplicate:
                continue
            
            generated_questions.append(lines[0])


            ## Questions added to the list as a dictionary 
            question_data = {
                "Question": lines[0].replace("Question:", "").strip(),
                "Category": prompt,
                "Option A": lines[1].replace("Option A:", "").strip(),
                "Option B": lines[2].replace("Option B:", "").strip(),
                "Option C": lines[3].replace("Option C:", "").strip(),
                "Option D": lines[4].replace("Option D:", "").strip(),
                "Correct Option": lines[5].replace("Correct Option:", "").strip()[0:8]}

            questions.append(question_data)
            i += 1
            print(question_data)
        questions_df = pd.DataFrame(questions)
        return questions_df
    
    ## Error message output
    except ValueError as ve:
        print(f"ValueError: {ve}")
        return None
    except Exception as e:
        print(f"An error occurred: {e}")
        return None



In [19]:
## Prompts to generate questions (I have selected 5 GK categories )
prompts=["Sports", "Trivia", "International Affairs", "Science and Technology", "Historical Events"]
test_list =[generate_questions(prompt,num_questions=40) for prompt in prompts]
test=pd.concat(test_list,ignore_index=True)

## Creating unique question_id and storing as csv file
test=test.reset_index().rename(columns={'index':'question_id'})
prefix='Q'
test['question_id']=prefix+test['question_id'].astype(str)
test.to_csv('GK.csv',header=True)


{'Question': 'What year was the first modern Olympic Games held?', 'Category': 'Sports', 'Option A': '1912', 'Option B': '1896', 'Option C': '1908', 'Option D': '1924', 'Correct Option': 'Option B'}
{'Question': 'Who is regarded as the greatest basketball player of all time?', 'Category': 'Sports', 'Option A': 'Michael Jordan', 'Option B': 'LeBron James', 'Option C': 'Kobe Bryant', 'Option D': 'Larry Bird', 'Correct Option': 'Option A'}
{'Question': "Who holds the record for the most Grand Slam men's singles titles in tennis?", 'Category': 'Sports', 'Option A': 'Roger Federer', 'Option B': 'Rafael Nadal', 'Option C': 'Novak Djokovic', 'Option D': 'Andy Murray', 'Correct Option': 'Option A'}
Similar exitss
{'Question': 'Which former professional basketball player was known as "The Black Mamba"?', 'Category': 'Sports', 'Option A': 'Michael Jordan', 'Option B': 'Kobe Bryant', 'Option C': 'LeBron James', 'Option D': 'Larry Bird', 'Correct Option': 'Option B'}
{'Question': 'In what year did

In [20]:
test

Unnamed: 0,question_id,Question,Category,Option A,Option B,Option C,Option D,Correct Option
0,Q0,What year was the first modern Olympic Games h...,Sports,1912,1896,1908,1924,Option B
1,Q1,Who is regarded as the greatest basketball pla...,Sports,Michael Jordan,LeBron James,Kobe Bryant,Larry Bird,Option A
2,Q2,Who holds the record for the most Grand Slam m...,Sports,Roger Federer,Rafael Nadal,Novak Djokovic,Andy Murray,Option A
3,Q3,Which former professional basketball player wa...,Sports,Michael Jordan,Kobe Bryant,LeBron James,Larry Bird,Option B
4,Q4,In what year did the United States host the Wi...,Sports,1960,1972,1980,1996,Option C
...,...,...,...,...,...,...,...,...
195,Q195,What famous historical event was sparked by th...,Historical Events,World War I,The French Revolution,The American Civil War,The Holocaust,Option A
196,Q196,"Which historical event took place on June 6, 1...",Historical Events,The bombing of Pearl Harbor,The signing of the Declaration of Independence,The sinking of the Titanic,The invasion of Normandy,Option D
197,Q197,In what year did the United Nations officially...,Historical Events,1945,1955,1965,1975,Option A
198,Q198,Who was the first African American woman to wi...,Historical Events,Florence Griffith-Joyner,Wilma Rudolph,Althea Gibson,Marion Jones,Option C
