In [31]:
import os 
import json 
import pandas as pd 
import traceback

In [32]:
from dotenv import load_dotenv
load_dotenv()

True

In [33]:
from langchain.chat_models import ChatOpenAI

In [34]:
KEY = os.getenv('OPENAI_API_KEY')

In [35]:
llm = ChatOpenAI(openai_api_key = KEY, model_name = 'gpt-3.5-turbo', temperature = 0.5)

In [36]:
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.chains import SequentialChain
from langchain.callbacks import get_openai_callback
import PyPDF2

In [37]:
RESPONSE_JSON = {
    "1": {
        'mcq': 'multiple choice question',
        "options": {
            'a': 'choice here',
            'b': 'choice here',
            'c': 'choice here',
            'd': 'choice here',
        },
        'correct': 'correct answer',
    },
    "2": {
        'mcq': 'multiple choice question',
        "options": {
            'a': 'choice here',
            'b': 'choice here',
            'c': 'choice here',
            'd': 'choice here',
        },
        'correct': 'correct answer',
    },
    "3": {
        'mcq': 'multiple choice question',
        "options": {
            'a': 'choice here',
            'b': 'choice here',
            'c': 'choice here',
            'd': 'choice here',
        },
        'correct': 'correct answer',
    },
    "4": {
        'mcq': 'multiple choice question',
        "options": {
            'a': 'choice here',
            'b': 'choice here',
            'c': 'choice here',
            'd': 'choice here',
        },
        'correct': 'correct answer',
    },
    "5": {
        'mcq': 'multiple choice question',
        "options": {
            'a': 'choice here',
            'b': 'choice here',
            'c': 'choice here',
            'd': 'choice here',
        },
        'correct': 'correct answer',
    },
}

In [38]:
TEMPLATE = """
Text: {text}
You are an expert MCQ maker. Given the above text, it is your job to \
create a quiz of {number} multiple choice questions for {subject} students in {tone} tone.
Make sure the question are not repeated and check all the questions to be conforming the text as well.
Make sure to format your response like RESPONSE_JSON below and use it as a guide. \
Ensure to make {number} MCQs
### RESPONSE_JSON
{response_json}
"""

In [39]:
quiz_generation_prompt = PromptTemplate(
    input_variables = ['text', 'number', 'subject', 'tone', 'response_json'],
    template = TEMPLATE
)

In [40]:
quiz_chain = LLMChain(llm = llm, prompt = quiz_generation_prompt, output_key = 'quiz', verbose = True)

In [41]:
TEMPLATE2 = """
You are an expert english grammarian and writer. Give a Mulitple Choice Quiz for {subject} students. \
You need to evaluate the complexity of the question and give a complete analysis of the quiz. Only use at max 50 words for complex questions.
If the quiz is not at par with the cognitive and analytical abilities of the students, \
update the quiz question which needs to be changed and change the tone such that it perfectly fits the student abilities.
Quiz MCQs:
{quiz}

Check from an expert English Writer of the above quiz:
"""

In [42]:
quiz_evaluation_prompt = PromptTemplate(input_variables = ['subject', 'quiz'], template = TEMPLATE)

In [43]:
review_chain = LLMChain(llm = llm, prompt = quiz_evaluation_prompt, output_key = 'review', verbose = True)

In [44]:
generate_evaluate_chain = SequentialChain(chains = [quiz_chain, review_chain], input_variables = ['text', 'number', 'subject', 'tone', 'response_json'],
                                        output_variables = ['quiz', 'review'], verbose = True)

In [45]:
file_path = r'/Users/kushalbanda/Generative AI/MCQ-Generator/data/data.txt'

In [46]:
with open(file_path, 'r') as file:
    TEXT = file.read()

In [47]:
print(TEXT)

A key characteristic of LLMs is their ability to respond to unpredictable queries. A traditional computer program receives commands in its accepted syntax, or from a certain set of inputs from the user. A video game has a finite set of buttons, an application has a finite set of things a user can click or type, and a programming language is composed of precise if/then statements.
By contrast, an LLM can respond to natural human language and use data analysis to answer an unstructured question or prompt in a way that makes sense. Whereas a typical computer program would not recognize a prompt like "What are the four greatest funk bands in history?", an LLM might reply with a list of four such bands, and a reasonably cogent defense of why they are the best.
In terms of the information they provide, however, LLMs can only be as reliable as the data they ingest. If fed false information, they will give false information in response to user queries. LLMs also sometimes "hallucinate": they c

In [48]:
# Serialize the Python dictionary into a JSON-formatted string
json.dumps(RESPONSE_JSON)

'{"1": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"}, "2": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"}, "3": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"}, "4": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"}, "5": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"}}'

In [49]:
NUMBER = 5
SUBJECT = 'Generative AI'
TONE = 'simple'

In [51]:
# How to setup Token Usage Tracking in LangChain
with get_openai_callback() as cb:
    response = generate_evaluate_chain(
        {
            'text': TEXT,
            "number": NUMBER,
            'subject': SUBJECT,
            'tone': TONE,
            'response_json': json.dumps(RESPONSE_JSON)
        }
    )




[1m> Entering new SequentialChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
Text: A key characteristic of LLMs is their ability to respond to unpredictable queries. A traditional computer program receives commands in its accepted syntax, or from a certain set of inputs from the user. A video game has a finite set of buttons, an application has a finite set of things a user can click or type, and a programming language is composed of precise if/then statements.
By contrast, an LLM can respond to natural human language and use data analysis to answer an unstructured question or prompt in a way that makes sense. Whereas a typical computer program would not recognize a prompt like "What are the four greatest funk bands in history?", an LLM might reply with a list of four such bands, and a reasonably cogent defense of why they are the best.
In terms of the information they provide, however, LLMs can only be as reliable as the data they

In [52]:
print(f'Total Tokens: {cb.total_tokens}')
print(f'Prompt Tokens: {cb.prompt_tokens}')
print(f'Completion Tokens: {cb.completion_tokens}')
print(f'Total Cost: {cb.total_cost}')


Total Tokens: 2314
Prompt Tokens: 1458
Completion Tokens: 856
Total Cost: 0.003899


In [55]:
quiz = response.get('quiz')

In [59]:
quiz = json.loads(quiz)

In [60]:
quiz_table_data = []
for key, value in quiz.items():
    mcq = value['mcq']
    options = " | ".join(
        [
            f'{option}: {option_value}'
            for option, option_value in value['options'].items()
            ]
        )
    correct = value['correct']
    quiz_table_data.append({'MCQ': mcq, 'Choices': options, 'Correct': correct})


In [61]:
quiz_table_data

[{'MCQ': 'What is a key characteristic of LLMs?',
  'Choices': 'a: They can only respond to predictable queries | b: They can respond to unpredictable queries | c: They have a finite set of inputs | d: They only understand programming languages',
  'Correct': 'b'},
 {'MCQ': 'How do LLMs differ from traditional computer programs in terms of query response?',
  'Choices': 'a: LLMs use a finite set of buttons | b: LLMs respond to natural human language | c: Traditional programs are more reliable | d: Traditional programs can hallucinate',
  'Correct': 'b'},
 {'MCQ': 'What can happen if LLMs are fed false information?',
  'Choices': 'a: They will provide accurate responses | b: They will give false information | c: They will refuse to respond | d: They will delete the data',
  'Correct': 'b'},
 {'MCQ': 'What is a security concern with user-facing applications based on LLMs?',
  'Choices': 'a: They are not prone to bugs | b: They cannot be manipulated | c: They may expose confidential data 

In [63]:
quiz = pd.DataFrame(quiz_table_data)

In [64]:
quiz.to_csv('LLM.csv', index = False)