In [37]:
import os
import json
import pandas as pd
import traceback
from dotenv import load_dotenv
import os

In [66]:
load_dotenv()
#get the OpenAI API key from environment variables  
openai_api_key = os.getenv("OPENAI_API_KEY") 

In [67]:
from langchain.chat_models import ChatOpenAI
from  langchain import PromptTemplate
from langchain.chains import LLMChain
from langchain.chains import SequentialChain
from langchain.callbacks import get_openai_callback
import PyPDF2

In [68]:
llm= ChatOpenAI(
    model_name="gpt-3.5-turbo",
    temperature=0.5,
    #max_tokens=1000,
    openai_api_key=openai_api_key
)

In [69]:
RESPONSE_JSON = {
    "1": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer",
    },
    "2": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer",
    },
    "3": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer",
    },
}

In [70]:
TEMPLATE="""
Text:{text}
You are an expert MCQ maker. Given the above text, it is your job to \
create a quiz  of {number} multiple choice questions for {subject} students in {tone} tone. 
Make sure the questions are not repeated and check all the questions to be conforming the text as well.
Make sure to format your response like  RESPONSE_JSON below  and use it as a guide. \
Ensure to make {number} MCQs
### RESPONSE_JSON
{response_json}

"""

In [71]:
quiz_generation_prompt = PromptTemplate(
    input_variables=["text", "number", "subject", "tone", "response_json"],
    template=TEMPLATE
)   

In [72]:
quiz_chain= LLMChain(
    llm=llm,
    prompt=quiz_generation_prompt,
    verbose=True,
    output_key="quiz",
)   

In [73]:
TEMPLATE2="""
You are an expert english grammarian and writer. Given a Multiple Choice Quiz for {subject} students.\
You need to evaluate the complexity of the question and give a complete analysis of the quiz. Only use at max 50 words for complexity analysis. 
if the quiz is not at per with the cognitive and analytical abilities of the students,\
update the quiz questions which needs to be changed and change the tone such that it perfectly fits the student abilities
Quiz_MCQs:
{quiz}

Check from an expert English Writer of the above quiz:
"""

In [74]:
quiz_evaluation_prompt = PromptTemplate(
    input_variables=[ "subject", "quiz"],
    template=TEMPLATE2
)   

In [75]:
review_chain= LLMChain(
    llm=llm,
    prompt=quiz_evaluation_prompt,
    verbose=True,
    output_key="review",
)

In [80]:
# Till now we have created the quiz and the review chain, now we need to create a sequential chain to run both of them in sequence
sequential_chain = SequentialChain(
    chains=[quiz_chain, review_chain],
    input_variables=["text", "number", "subject", "tone", "response_json"],
    output_variables=["quiz", "review"],   
    verbose=True,
)

In [82]:
from pathlib import Path
file_path=r"D:\GenerativeAI\mcqgen\data.txt"

In [83]:
#load the text file to variable TEXT
with open(file_path, "r", encoding="utf-8") as file:
    TEXT = file.read()

In [None]:
#serialize the response_json to a string    
response_json = json.dumps(RESPONSE_JSON, indent=4)

'{\n    "1": {\n        "mcq": "multiple choice question",\n        "options": {\n            "a": "choice here",\n            "b": "choice here",\n            "c": "choice here",\n            "d": "choice here"\n        },\n        "correct": "correct answer"\n    },\n    "2": {\n        "mcq": "multiple choice question",\n        "options": {\n            "a": "choice here",\n            "b": "choice here",\n            "c": "choice here",\n            "d": "choice here"\n        },\n        "correct": "correct answer"\n    },\n    "3": {\n        "mcq": "multiple choice question",\n        "options": {\n            "a": "choice here",\n            "b": "choice here",\n            "c": "choice here",\n            "d": "choice here"\n        },\n        "correct": "correct answer"\n    }\n}'

In [90]:
NUMBER = 5
SUBJECT = "Computer Science"        
TONE = "formal"
# Run the sequential chain with the input variables

In [None]:
with get_openai_callback() as cb:
    # Run the sequential chain
    result = sequential_chain(
        {
            "text": TEXT,
            "number": NUMBER,
            "subject": SUBJECT,
            "tone": TONE,
            "response_json": response_json
        }
    )

In [97]:
print(f'Total Tokens Used: {cb.total_tokens}')
print(f'Total Cost: ${cb.total_cost}')
print(f'Completion Tokens: {cb.completion_tokens}')
print(f'Total Prompt Tokens: {cb.prompt_tokens}')

Total Tokens Used: 1418
Total Cost: $0.002345
Completion Tokens: 436
Total Prompt Tokens: 982


In [101]:
quiz=result.get("quiz")  # Get the quiz from the result
print("Quiz Generated:" + quiz)

Quiz Generated:{
    "1": {
        "mcq": "What is the main focus of machine learning?",
        "options": {
            "a": "Developing and studying statistical algorithms",
            "b": "Creating explicit instructions for tasks",
            "c": "Exploring data analysis through supervised learning",
            "d": "Applying mathematical programming methods"
        },
        "correct": "a"
    },
    "2": {
        "mcq": "Which field of study focuses on exploratory data analysis via unsupervised learning?",
        "options": {
            "a": "Machine learning",
            "b": "Natural language processing",
            "c": "Data mining",
            "d": "Predictive analytics"
        },
        "correct": "c"
    },
    "3": {
        "mcq": "What has allowed neural networks to surpass many previous machine learning approaches in performance?",
        "options": {
            "a": "Deep learning",
            "b": "Mathematical optimization",
            "c": "Stat

In [106]:
quiz_table_data = []

# Parse the quiz string to extract questions and options
quiz_dict = json.loads(quiz)
for key, value in quiz_dict.items():
    mcq = value["mcq"]
    options = " | ".join(
        [
            f"{option}: {option_value}"
            for option, option_value in value["options"].items()
        ]
    )
    correct = value["correct"]
    quiz_table_data.append({
        "Question": mcq,
        "Options": options,
        "Correct Answer": correct
    })
    print(quiz_table_data)

[{'Question': 'What is the main focus of machine learning?', 'Options': 'a: Developing and studying statistical algorithms | b: Creating explicit instructions for tasks | c: Exploring data analysis through supervised learning | d: Applying mathematical programming methods', 'Correct Answer': 'a'}]
[{'Question': 'What is the main focus of machine learning?', 'Options': 'a: Developing and studying statistical algorithms | b: Creating explicit instructions for tasks | c: Exploring data analysis through supervised learning | d: Applying mathematical programming methods', 'Correct Answer': 'a'}, {'Question': 'Which field of study focuses on exploratory data analysis via unsupervised learning?', 'Options': 'a: Machine learning | b: Natural language processing | c: Data mining | d: Predictive analytics', 'Correct Answer': 'c'}]
[{'Question': 'What is the main focus of machine learning?', 'Options': 'a: Developing and studying statistical algorithms | b: Creating explicit instructions for task

In [None]:
quiz=pd.DataFrame(quiz_table_data)

Unnamed: 0,Question,Options,Correct Answer
0,What is the main focus of machine learning?,a: Developing and studying statistical algorit...,a
1,Which field of study focuses on exploratory da...,a: Machine learning | b: Natural language proc...,c
2,What has allowed neural networks to surpass ma...,a: Deep learning | b: Mathematical optimizatio...,a
3,What is the application of machine learning to...,a: Natural language processing | b: Data minin...,c
4,Which framework provides a theoretical viewpoi...,a: Deep learning | b: Probably approximately c...,b


In [111]:
# If you have overwritten 'quiz' with a string, use the DataFrame variable created earlier
quiz_df = pd.DataFrame(quiz_table_data)
quiz_df.to_csv("quiz.csv", index=False)