In [1]:
import os
import json
import pandas as pd
import traceback

In [2]:
pip install langchain


Note: you may need to restart the kernel to use updated packages.


In [3]:
from langchain.chat_models import ChatOpenAI

In [4]:
from dotenv import load_dotenv

load_dotenv()  # take environment variables from .env.

True

In [5]:
KEY=os.getenv("OPENAI_API_KEY")

In [6]:
pip install openai

Note: you may need to restart the kernel to use updated packages.


In [7]:
llm=ChatOpenAI(openai_api_key=KEY,model_name="gpt-3.5-turbo", temperature=0.5)



In [8]:
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.chains import SequentialChain
from langchain.callbacks import get_openai_callback
import PyPDF2

In [9]:
RESPONSE_JSON = {
    "1": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer",
    },
    "2": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer",
    },
    "3": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer",
    },
}

In [10]:
TEMPLATE="""
Text:{text}
You are an expert MCQ maker. Given the above text, it is your job to \
create a quiz  of {number} multiple choice questions for {subject} students in {tone} tone. 
Make sure the questions are not repeated and check all the questions to be conforming the text as well.
Make sure to format your response like  RESPONSE_JSON below  and use it as a guide. \
Ensure to make {number} MCQs
### RESPONSE_JSON
{response_json}

"""

In [11]:
quiz_generation_prompt = PromptTemplate(
    input_variables=["text", "number", "subject", "tone", "response_json"],
    template=TEMPLATE
    )

In [12]:
quiz_chain=LLMChain(llm=llm, prompt=quiz_generation_prompt, output_key="quiz", verbose=True)

In [13]:
TEMPLATE2="""
You are an expert english grammarian and writer. Given a Multiple Choice Quiz for {subject} students.\
You need to evaluate the complexity of the question and give a complete analysis of the quiz. Only use at max 50 words for complexity analysis. 
if the quiz is not at per with the cognitive and analytical abilities of the students,\
update the quiz questions which needs to be changed and change the tone such that it perfectly fits the student abilities
Quiz_MCQs:
{quiz}

Check from an expert English Writer of the above quiz:
"""

In [14]:
quiz_evaluation_prompt=PromptTemplate(input_variables=["subject", "quiz"], template=TEMPLATE2)

In [15]:
review_chain=LLMChain(llm=llm, prompt=quiz_evaluation_prompt, output_key="review", verbose=True)

In [16]:
generate_evaluate_chain=SequentialChain(chains=[quiz_chain, review_chain], input_variables=["text", "number", "subject", "tone", "response_json"],
                                        output_variables=["quiz", "review"], verbose=True,)

In [17]:
file_path=r"C:\Users\raj.bhatt\mcqgen\data.txt"

In [18]:
with open(file_path, 'r') as file:
    TEXT = file.read()

In [19]:
# Serialize the Python dictionary into a JSON-formatted string
json.dumps(RESPONSE_JSON)

'{"1": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"}, "2": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"}, "3": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"}}'

In [20]:
NUMBER=5
SUBJECT="ML_DL_LLM"
TONE="simple"

In [21]:
with get_openai_callback() as cb:
    response=generate_evaluate_chain(
        {
            "text": TEXT,
            "number": NUMBER,
            "subject":SUBJECT,
            "tone": TONE,
            "response_json": json.dumps(RESPONSE_JSON)
        }
        )



[1m> Entering new SequentialChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
Text:Machine learning (ML) is a field of study in artificial intelligence concerned with the development and study of statistical algorithms that can effectively generalize and thus perform tasks without explicit instructions.[1] Recently, generative artificial neural networks have been able to surpass many previous approaches in performance.[2][3] While machine learning algorithms have shown remarkable performances on various tasks, they are susceptible to inheriting and amplifying biases present in their training data. This can manifest in skewed representations or unfair treatment of different demographics, such as those based on race, gender, language[4], and cultural groups[4].

Machine learning approaches have been applied to large language models, computer vision, speech recognition, email filtering, agriculture, and medicine, where it is too costly


[1m> Finished chain.[0m

[1m> Finished chain.[0m


In [22]:
print(f"Total Tokens:{cb.total_tokens}")
print(f"Prompt Tokens:{cb.prompt_tokens}")
print(f"Completion Tokens:{cb.completion_tokens}")
print(f"Total Cost:{cb.total_cost}")

Total Tokens:2230
Prompt Tokens:1796
Completion Tokens:434
Total Cost:0.003562


In [23]:
response

{'text': 'Machine learning (ML) is a field of study in artificial intelligence concerned with the development and study of statistical algorithms that can effectively generalize and thus perform tasks without explicit instructions.[1] Recently, generative artificial neural networks have been able to surpass many previous approaches in performance.[2][3] While machine learning algorithms have shown remarkable performances on various tasks, they are susceptible to inheriting and amplifying biases present in their training data. This can manifest in skewed representations or unfair treatment of different demographics, such as those based on race, gender, language[4], and cultural groups[4].\n\nMachine learning approaches have been applied to large language models, computer vision, speech recognition, email filtering, agriculture, and medicine, where it is too costly to develop algorithms to perform the needed tasks.[5][6] ML is known in its application across business problems under the n

In [24]:
quiz=response.get("quiz")

In [25]:
quiz=json.loads(quiz)

In [26]:
quiz_table_data = []
for key, value in quiz.items():
    mcq = value["mcq"]
    options = " | ".join(
        [
            f"{option}: {option_value}"
            for option, option_value in value["options"].items()
            ]
        )
    correct = value["correct"]
    quiz_table_data.append({"MCQ": mcq, "Choices": options, "Correct": correct})

In [27]:
quiz_table_data

[{'MCQ': 'What is the field of study in artificial intelligence concerned with the development and study of statistical algorithms that can perform tasks without explicit instructions?',
  'Choices': 'a: Machine learning (ML) | b: Deep learning (DL) | c: Large language models (LLMs) | d: Artificial neural networks (ANNs)',
  'Correct': 'a'},
 {'MCQ': 'Which type of machine learning method is based on artificial neural networks with representation learning?',
  'Choices': 'a: Supervised learning | b: Unsupervised learning | c: Semi-supervised learning | d: Deep learning',
  'Correct': 'd'},
 {'MCQ': 'What do deep learning algorithms use multiple layers for?',
  'Choices': 'a: To extract higher-level features from raw input | b: To identify edges in image processing | c: To achieve general-purpose language understanding | d: To learn billions of parameters during training',
  'Correct': 'a'},
 {'MCQ': 'What are the major challenges of large language models (LLMs)?',
  'Choices': 'a: Fact

In [28]:
quiz=pd.DataFrame(quiz_table_data)

In [29]:
quiz.to_csv("machinelearning.csv",index=False)

In [30]:
from datetime import datetime
datetime.now().strftime('%m_%d_%Y_%H_%M_%S')

'12_28_2023_18_45_20'