In [76]:
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.chains import SequentialChain
from langchain.callbacks import get_openai_callback
import os
import json
import pandas as pd
import traceback
from dotenv import load_dotenv
import PyPDF2


In [77]:
load_dotenv()

True

In [78]:
key = os.getenv("OPENAI_API_KEY")

In [79]:
llm = ChatOpenAI(openai_api_key=key,model_name="gpt-3.5-turbo", temperature=0.3)

In [80]:
RESPONSE_JSON = {
    "1": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer",
    },
    "2": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer",
    },
    "3": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer",
    },
}


In [81]:
"""
number=5 
subject="data science"
tone="simple"
"""

'\nnumber=5 \nsubject="data science"\ntone="simple"\n'

In [82]:
TEMPLATE="""
Text:{text}
You are an expert MCQ maker. Given the above text, it is your job to \
create a quiz  of {number} multiple choice questions for {subject} students in {tone} tone. 
Make sure the questions are not repeated and check all the questions to be conforming the text as well.
Make sure to format your response like  RESPONSE_JSON below  and use it as a guide. \
Ensure to make {number} MCQs
### RESPONSE_JSON
{response_json}

"""

In [83]:
quiz_generation_prompt = PromptTemplate(
    input_variables=["text", "number", "subject", "tone", "response_json"],
    template=TEMPLATE
    )


In [84]:
quiz_generation_prompt

PromptTemplate(input_variables=['number', 'response_json', 'subject', 'text', 'tone'], input_types={}, partial_variables={}, template='\nText:{text}\nYou are an expert MCQ maker. Given the above text, it is your job to create a quiz  of {number} multiple choice questions for {subject} students in {tone} tone. \nMake sure the questions are not repeated and check all the questions to be conforming the text as well.\nMake sure to format your response like  RESPONSE_JSON below  and use it as a guide. Ensure to make {number} MCQs\n### RESPONSE_JSON\n{response_json}\n\n')

In [85]:
quiz_chain=LLMChain(llm=llm, prompt=quiz_generation_prompt, output_key="quiz", verbose=True)

In [87]:
TEMPLATE1="""
You are an expert english grammarian and writer. Given a Multiple Choice Quiz for {subject} students.\
You need to evaluate the complexity of the question and give a complete analysis of the quiz. Only use at max 50 words for complexity analysis. 
if the quiz is not at per with the cognitive and analytical abilities of the students,\
update the quiz questions which needs to be changed and change the tone such that it perfectly fits the student abilities
Quiz_MCQs:
{quiz}

Check from an expert English Writer of the above quiz:
"""

In [88]:
quiz_evaluation_prompt=PromptTemplate(input_variables=["subject", "quiz"], template=TEMPLATE1)

In [89]:
review_chain=LLMChain(llm=llm, prompt=quiz_evaluation_prompt, output_key="review", verbose=True)

In [90]:
generate_evaluate_chain=SequentialChain(chains=[quiz_chain, review_chain], input_variables=["text", "number", "subject", "tone", "response_json"],
                                        output_variables=["quiz", "review"], verbose=True,)

In [91]:
"""pdf_reader=PyPDF2.PdfReader(r"experiments\Big Data Assesment.pdf")
text = ""
for page in pdf_reader.pages:
    text += page.extract_text()"""

  """pdf_reader=PyPDF2.PdfReader(r"experiments\Big Data Assesment.pdf")


'pdf_reader=PyPDF2.PdfReader(r"experiments\\Big Data Assesment.pdf")\ntext = ""\nfor page in pdf_reader.pages:\n    text += page.extract_text()'

In [92]:
from pathlib import Path
file_path=r"D:\MCQGenerator\test.txt"

In [93]:
file_path

'D:\\MCQGenerator\\test.txt'

In [94]:
with open(file_path, 'r') as file:
    TEXT = file.read()

In [95]:
TEXT


'Data science is an interdisciplinary academic field[1] that uses statistics, scientific computing, scientific methods, processes, algorithms and systems to extract or extrapolate knowledge and insights from noisy, structured, and unstructured data.[2]\n\nData science also integrates domain knowledge from the underlying application domain (e.g., natural sciences, information technology, and medicine).[3] Data science is multifaceted and can be described as a science, a research paradigm, a research method, a discipline, a workflow, and a profession.[4]\n\nData science is a "concept to unify statistics, data analysis, informatics, and their related methods" to "understand and analyze actual phenomena" with data.[5] It uses techniques and theories drawn from many fields within the context of mathematics, statistics, computer science, information science, and domain knowledge.[6] However, data science is different from computer science and information science. Turing Award winner Jim Gray

In [96]:
# Serialize the Python dictionary into a JSON-formatted string
json.dumps(RESPONSE_JSON)

'{"1": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"}, "2": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"}, "3": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"}}'

In [97]:
"""mcq_count=5
grade=1
tone="simple"""

'mcq_count=5\ngrade=1\ntone="simple'

In [98]:
NUMBER=5 
SUBJECT="data science"
TONE="simple"


In [99]:
#https://python.langchain.com/docs/modules/model_io/llms/token_usage_tracking

#How to setup Token Usage Tracking in LangChain
with get_openai_callback() as cb:
    response=generate_evaluate_chain(
        {
            "text": TEXT,
            "number": NUMBER,
            "subject":SUBJECT,
            "tone": TONE,
            "response_json": json.dumps(RESPONSE_JSON)
        }
        )



[1m> Entering new SequentialChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
Text:Data science is an interdisciplinary academic field[1] that uses statistics, scientific computing, scientific methods, processes, algorithms and systems to extract or extrapolate knowledge and insights from noisy, structured, and unstructured data.[2]

Data science also integrates domain knowledge from the underlying application domain (e.g., natural sciences, information technology, and medicine).[3] Data science is multifaceted and can be described as a science, a research paradigm, a research method, a discipline, a workflow, and a profession.[4]

Data science is a "concept to unify statistics, data analysis, informatics, and their related methods" to "understand and analyze actual phenomena" with data.[5] It uses techniques and theories drawn from many fields within the context of mathematics, statistics, computer science, information science, and

AuthenticationError: Error code: 401 - {'error': {'message': 'Incorrect API key provided: lsv2_pt_***************************************632d. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}

In [57]:
print(f"Total Tokens:{cb.total_tokens}")
print(f"Prompt Tokens:{cb.prompt_tokens}")
print(f"Completion Tokens:{cb.completion_tokens}")
print(f"Total Cost:{cb.total_cost}")

Total Tokens:1671
Prompt Tokens:1217
Completion Tokens:454
Total Cost:0.0027335000000000003


In [58]:
response

{'text': 'Data science is an interdisciplinary academic field[1] that uses statistics, scientific computing, scientific methods, processes, algorithms and systems to extract or extrapolate knowledge and insights from noisy, structured, and unstructured data.[2]\n\nData science also integrates domain knowledge from the underlying application domain (e.g., natural sciences, information technology, and medicine).[3] Data science is multifaceted and can be described as a science, a research paradigm, a research method, a discipline, a workflow, and a profession.[4]\n\nData science is a "concept to unify statistics, data analysis, informatics, and their related methods" to "understand and analyze actual phenomena" with data.[5] It uses techniques and theories drawn from many fields within the context of mathematics, statistics, computer science, information science, and domain knowledge.[6] However, data science is different from computer science and information science. Turing Award winner

In [59]:
quiz_str=response.get("quiz")

In [60]:
quiz_dict = json.loads(quiz_str)

In [61]:
quiz_dict

{'1': {'mcq': 'What is data science?',
  'options': {'a': 'A field that uses statistics to extract knowledge from data',
   'b': 'A field that uses scientific computing to analyze data',
   'c': 'An interdisciplinary field that uses statistics, scientific computing, and scientific methods to extract knowledge from data',
   'd': 'A field that uses algorithms and systems to extrapolate insights from data'},
  'correct': 'c'},
 '2': {'mcq': 'What does data science integrate?',
  'options': {'a': 'Domain knowledge from natural sciences',
   'b': 'Domain knowledge from information technology',
   'c': 'Domain knowledge from medicine',
   'd': 'All of the above'},
  'correct': 'd'},
 '3': {'mcq': 'How is data science different from computer science and information science?',
  'options': {'a': 'Data science uses techniques and theories from mathematics and statistics',
   'b': 'Data science uses techniques and theories from computer science and information science',
   'c': 'Data science us

In [64]:
quiz_table_data = []
for key, value in quiz_dict.items():
    mcq = value["mcq"]
    options = " | ".join(
        [
            f"{option}: {option_value}"
            for option, option_value in value["options"].items()
            ]
        )
    correct = value["correct"]
    quiz_table_data.append({"MCQ": mcq, "Choices": options, "Correct": correct})

In [29]:
quiz_table_data

[{'MCQ': 'What are the supported paradigms in Python?',
  'Choices': 'a: Object-oriented programming and structured programming | b: Functional programming and aspect-oriented programming | c: Design by contract and logic programming | d: All of the above',
  'Correct': 'd'},
 {'MCQ': 'What memory management technique does Python use?',
  'Choices': 'a: Reference counting | b: Garbage collection | c: Dynamic typing | d: Cycle detection',
  'Correct': 'b'},
 {'MCQ': 'Which module in the standard library implements functional tools borrowed from Haskell and Standard ML?',
  'Choices': 'a: itertools | b: functools | c: filter | d: map',
  'Correct': 'b'},
 {'MCQ': 'According to the Zen of Python, what is preferred: complex or complicated?',
  'Choices': 'a: Complex | b: Complicated | c: Both are equally preferred | d: None of the above',
  'Correct': 'a'},
 {'MCQ': 'What is the core philosophy of Python?',
  'Choices': 'a: Beautiful is better than ugly | b: Explicit is better than implici

In [65]:
df=pd.DataFrame(quiz_table_data)

In [66]:
df

Unnamed: 0,MCQ,Choices,Correct
0,What is data science?,a: A field that uses statistics to extract kno...,c
1,What does data science integrate?,a: Domain knowledge from natural sciences | b:...,d
2,How is data science different from computer sc...,a: Data science uses techniques and theories f...,c
3,What did Jim Gray imagine data science as?,a: A research paradigm | b: A research method ...,d
4,What is a data scientist?,a: A professional who creates insights from da...,c


In [67]:
df.to_csv("Data_Science_Quiz.csv",index=False)

In [4]:
import traceback
try:
    c=1/0
except Exception as e:
        traceback.print_exception(type(e), e, e.__traceback__)

Traceback (most recent call last):
  File "C:\Users\sunny\AppData\Local\Temp\ipykernel_34324\2720247763.py", line 3, in <module>
    c=1/0
ZeroDivisionError: division by zero


In [9]:
import traceback
try:
    c=1/0
except Exception as e:
    tb=e.__traceback__

In [10]:
while tb is not None:
    frame = tb.tb_frame
    filename = frame.f_code.co_filename
    lineno = frame.f_lineno
    function_name = frame.f_code.co_name

    print(f"File '{filename}', line {lineno}, in {function_name}")
    tb = tb.tb_next  # Move to the next frame in the traceback

File 'C:\Users\sunny\AppData\Local\Temp\ipykernel_34324\977600961.py', line 5, in <module>
