In [2]:
import pandas as pd
import PyPDF2
import json
import traceback
import os

In [3]:
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.chains import SequentialChain
from langchain.callbacks import get_openai_callback 

## Define your API to access the model -> gpt-3.5-turbo model

In [4]:
# Define the API key directly in the script
OPENAI_API_KEY = "paste_your_api_key"  # Replace with your actual API key

# Set the environment variable
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

## Create the instance for LLM with API key

In [8]:
llm = ChatOpenAI( model = "gpt-3.5-turbo", temperature = 0.7)

In [9]:
# sample response json for prompt template to generate quiz

RESPONSE_JSON = { 
    "1": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer",
    },
    "2": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer",
    },
    "3": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer",
    },
}

### Prompt template to generate a quiz from the text ( blog / PDF file / word document )

In [10]:
TEMPLATE = """
Text:{text}
You are an expert MCQ maker. Given the above text, it is your job to \
create a quiz  of {number} multiple choice questions for {subject} students in {tone} tone. 
Make sure the questions are not repeated and check all the questions to be conforming the text as well.
Make sure to format your response like  RESPONSE_JSON below  and use it as a guide. \
Ensure to make {number} MCQs
### RESPONSE_JSON
{response_json}

"""

In [11]:
quiz_generation_prompt = PromptTemplate(
    input_variables = ["text", "number", "subject", "tone", "response_json"],
    template = TEMPLATE
)

In [13]:
## Define LLM chain to generate the Quiz
quiz_chain = LLMChain(
    llm = llm,
    prompt = quiz_generation_prompt,
    output_key = "quiz",
    verbose = True
)

In [None]:
### prompt template 2 to generate Quiz from the text


In [14]:
## prompt template used to evaluate the complexity of the QUIZ
quiz_evalution_prompt = PromptTemplate(
    input_variables = ["subject", "quiz"],
    template = TEMPLATE
)

In [15]:
## Review the chain to evaluate the quiz and give the feedback
review_chain = LLMChain(
    llm = llm,
    prompt = quiz_evalution_prompt,
    output_key = "review",
    verbose = True
)

In [17]:
## Dequential chain to generate and evaluate the mcq questions
generate_evaluate_chain = SequentialChain(
    chains = [quiz_chain, review_chain],
    input_variables = ["text", "number", "subject", "tone", "response_json"],
    output_variables = ["quiz", "review"],
    verbose = True
)

In [19]:
# File path so that we can generate questions based upon the given data
file_path = r"C:\Users\shiva\Desktop\LLM_MCQ_generation_project\mcq_training_data.txt"
file_path

'C:\\Users\\shiva\\Desktop\\LLM_MCQ_generation_project\\mcq_training_data.txt'

In [20]:
# Read the file
with open(file_path, "r") as file:
    TEXT = file.read()

print(TEXT)

Machine learning (ML) is a field of study in artificial intelligence concerned with the development and study of statistical algorithms that can learn from data and generalize to unseen data and thus perform tasks without explicit instructions.[1] Recently, artificial neural networks have been able to surpass many previous approaches in performance.[2]

ML finds application in many fields, including natural language processing, computer vision, speech recognition, email filtering, agriculture, and medicine. When applied to business problems, it is known under the name predictive analytics. Although not all machine learning is statistically based, computational statistics is an important source of the field's methods.

The mathematical foundations of ML are provided by mathematical optimization (mathematical programming) methods. Data mining is a related (parallel) field of study, focusing on exploratory data analysis (EDA) through unsupervised learning.

From a theoretical viewpoint, p

In [22]:
json.dumps(RESPONSE_JSON)

'{"1": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"}, "2": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"}, "3": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"}}'

In [23]:
NUMBER = 10
SUBJECT = "large language model" #large Language Model
TONE = "simple" # "formal", "professional", "simple"

In [25]:
with get_openai_callback() as cb:
    response = generate_evaluate_chain(
        {
            "text" : TEXT,
    "number":NUMBER,
    "subject" :SUBJECT,
    "tone":TONE,
    "response_json": json.dumps(RESPONSE_JSON)
        }
    )

  response = generate_evaluate_chain(




[1m> Entering new SequentialChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
Text:Machine learning (ML) is a field of study in artificial intelligence concerned with the development and study of statistical algorithms that can learn from data and generalize to unseen data and thus perform tasks without explicit instructions.[1] Recently, artificial neural networks have been able to surpass many previous approaches in performance.[2]

ML finds application in many fields, including natural language processing, computer vision, speech recognition, email filtering, agriculture, and medicine. When applied to business problems, it is known under the name predictive analytics. Although not all machine learning is statistically based, computational statistics is an important source of the field's methods.

The mathematical foundations of ML are provided by mathematical optimization (mathematical programming) methods. Data mining is a relat

In [26]:
print(f"Total Tokens:{cb.total_tokens}")
print(f"Prompt Tokens:{cb.prompt_tokens}")
print(f"Completion Tokens:{cb.completion_tokens}")
print(f"Total Cost:{cb.total_cost}")

Total Tokens:7928
Prompt Tokens:6060
Completion Tokens:1868
Total Cost:0.012825999999999999


In [27]:
response

{'text': 'Machine learning (ML) is a field of study in artificial intelligence concerned with the development and study of statistical algorithms that can learn from data and generalize to unseen data and thus perform tasks without explicit instructions.[1] Recently, artificial neural networks have been able to surpass many previous approaches in performance.[2]\n\nML finds application in many fields, including natural language processing, computer vision, speech recognition, email filtering, agriculture, and medicine. When applied to business problems, it is known under the name predictive analytics. Although not all machine learning is statistically based, computational statistics is an important source of the field\'s methods.\n\nThe mathematical foundations of ML are provided by mathematical optimization (mathematical programming) methods. Data mining is a related (parallel) field of study, focusing on exploratory data analysis (EDA) through unsupervised learning.\n\nFrom a theoret

In [29]:
# Response in json
quiz_Str = response.get("quiz")

In [30]:
quiz_Str

'{\n    "1": {\n        "mcq": "Who coined the term \'machine learning\'?",\n        "options": {\n            "a": "Alan Turing",\n            "b": "Arthur Samuel",\n            "c": "Donald Hebb",\n            "d": "Tom M. Mitchell"\n        },\n        "correct": "b"\n    },\n    "2": {\n        "mcq": "Which field of study focuses on exploratory data analysis through unsupervised learning?",\n        "options": {\n            "a": "Data mining",\n            "b": "Artificial intelligence",\n            "c": "Statistical physics",\n            "d": "Predictive analytics"\n        },\n        "correct": "a"\n    },\n    "3": {\n        "mcq": "What was the experimental \'learning machine\' developed by Raytheon Company in the early 1960s called?",\n        "options": {\n            "a": "Cybernet",\n            "b": "Cybertron",\n            "c": "Cyberspace",\n            "d": "Cyberbot"\n        },\n        "correct": "b"\n    },\n    "4": {\n        "mcq": "Which researcher introd

In [31]:
# use the json.lods on the quiz to convert it from json to dict

quiz = json.loads(quiz_Str)

In [32]:
quiz

{'1': {'mcq': "Who coined the term 'machine learning'?",
  'options': {'a': 'Alan Turing',
   'b': 'Arthur Samuel',
   'c': 'Donald Hebb',
   'd': 'Tom M. Mitchell'},
  'correct': 'b'},
 '2': {'mcq': 'Which field of study focuses on exploratory data analysis through unsupervised learning?',
  'options': {'a': 'Data mining',
   'b': 'Artificial intelligence',
   'c': 'Statistical physics',
   'd': 'Predictive analytics'},
  'correct': 'a'},
 '3': {'mcq': "What was the experimental 'learning machine' developed by Raytheon Company in the early 1960s called?",
  'options': {'a': 'Cybernet',
   'b': 'Cybertron',
   'c': 'Cyberspace',
   'd': 'Cyberbot'},
  'correct': 'b'},
 '4': {'mcq': 'Which researcher introduced a theoretical neural structure formed by interactions among nerve cells in 1949?',
  'options': {'a': 'Ray Kurzweil',
   'b': 'Walter Pitts',
   'c': 'Elon Musk',
   'd': 'Mark Zuckerberg'},
  'correct': 'b'},
 '5': {'mcq': 'What is the main difference between machine learning an

In [37]:
# Convert dict to table

quiz_table_data = []

for key, value in quiz.items():
    mcq = value["mcq"]
    options = " | " .join(
        [
            f"{option}: {option_value}"
            for option, option_value in value["options"].items()
        ]
    )
    correct = value["correct"]
    quiz_table_data.append({"MCQ":mcq,
                               "Choices": options,
                               "Correct": correct})


In [38]:
quiz_table_data

[{'MCQ': "Who coined the term 'machine learning'?",
  'Choices': 'a: Alan Turing | b: Arthur Samuel | c: Donald Hebb | d: Tom M. Mitchell',
  'Correct': 'b'},
 {'MCQ': 'Which field of study focuses on exploratory data analysis through unsupervised learning?',
  'Choices': 'a: Data mining | b: Artificial intelligence | c: Statistical physics | d: Predictive analytics',
  'Correct': 'a'},
 {'MCQ': "What was the experimental 'learning machine' developed by Raytheon Company in the early 1960s called?",
  'Choices': 'a: Cybernet | b: Cybertron | c: Cyberspace | d: Cyberbot',
  'Correct': 'b'},
 {'MCQ': 'Which researcher introduced a theoretical neural structure formed by interactions among nerve cells in 1949?',
  'Choices': 'a: Ray Kurzweil | b: Walter Pitts | c: Elon Musk | d: Mark Zuckerberg',
  'Correct': 'b'},
 {'MCQ': 'What is the main difference between machine learning and statistics?',
  'Choices': 'a: Machine learning focuses on prediction, while statistics focuses on generalizati

In [39]:
pd.DataFrame(quiz_table_data)

Unnamed: 0,MCQ,Choices,Correct
0,Who coined the term 'machine learning'?,a: Alan Turing | b: Arthur Samuel | c: Donald ...,b
1,Which field of study focuses on exploratory da...,a: Data mining | b: Artificial intelligence | ...,a
2,What was the experimental 'learning machine' d...,a: Cybernet | b: Cybertron | c: Cyberspace | d...,b
3,Which researcher introduced a theoretical neur...,a: Ray Kurzweil | b: Walter Pitts | c: Elon Mu...,b
4,What is the main difference between machine le...,"a: Machine learning focuses on prediction, whi...",a
5,What is the primary goal of machine learning?,a: To draw population inferences from a sample...,c
6,Which field of study uses computational learni...,a: Data mining | b: Artificial intelligence | ...,d
7,What is the core objective of a learner in mac...,a: To draw population inferences from a sample...,b
8,Which statistical modeling paradigm did Leo Br...,a: Data model and algorithmic model | b: Super...,a
9,What does the Probably Approximately Correct L...,a: The complexity of the hypothesis should mat...,c


In [42]:
df = pd.DataFrame(quiz_table_data)
df

Unnamed: 0,MCQ,Choices,Correct
0,Who coined the term 'machine learning'?,a: Alan Turing | b: Arthur Samuel | c: Donald ...,b
1,Which field of study focuses on exploratory da...,a: Data mining | b: Artificial intelligence | ...,a
2,What was the experimental 'learning machine' d...,a: Cybernet | b: Cybertron | c: Cyberspace | d...,b
3,Which researcher introduced a theoretical neur...,a: Ray Kurzweil | b: Walter Pitts | c: Elon Mu...,b
4,What is the main difference between machine le...,"a: Machine learning focuses on prediction, whi...",a
5,What is the primary goal of machine learning?,a: To draw population inferences from a sample...,c
6,Which field of study uses computational learni...,a: Data mining | b: Artificial intelligence | ...,d
7,What is the core objective of a learner in mac...,a: To draw population inferences from a sample...,b
8,Which statistical modeling paradigm did Leo Br...,a: Data model and algorithmic model | b: Super...,a
9,What does the Probably Approximately Correct L...,a: The complexity of the hypothesis should mat...,c


In [43]:
df.to_csv("machine_learning_quiz.csv", index = False)

In [None]:
# Assignment
-> Create fill in the blank questions