In [7]:
from langchain.llms import OpenAI
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.chains import SequentialChain
from langchain.callbacks import get_openai_callback
import os
import json
import pandas as pd
import traceback
from dotenv import load_dotenv
import PyPDF2

In [8]:
load_dotenv()

True

In [9]:
# key = os.getenv("OPENAI_API_KEY")
key = os.getenv("GEMINI_API_KEY")

In [10]:
# llm = ChatOpenAI(openai_api_key=key,model_name="gpt-3.5-turbo", temperature=0.3)
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=key,temperature=0.7)

In [11]:
RESPONSE_JSON = {
    "1": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer",
    },
    "2": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer",
    },
    "3": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer",
    },
}


In [12]:
"""
number=5 
subject="data science"
tone="simple"
"""

'\nnumber=5 \nsubject="data science"\ntone="simple"\n'

In [13]:
TEMPLATE="""
Text:{text}
You are an expert MCQ maker. Given the above text, it is your job to \
create a quiz  of {number} multiple choice questions for {subject} students in {tone} tone. 
Make sure the questions are not repeated and check all the questions to be conforming the text as well.
Make sure to format your response like  RESPONSE_JSON below  and use it as a guide. \
Ensure to make {number} MCQs
### RESPONSE_JSON
{response_json}

"""

In [14]:
quiz_generation_prompt = PromptTemplate(
    input_variables=["text", "number", "subject", "tone", "response_json"],
    template=TEMPLATE
    )


In [15]:
quiz_generation_prompt

PromptTemplate(input_variables=['number', 'response_json', 'subject', 'text', 'tone'], input_types={}, partial_variables={}, template='\nText:{text}\nYou are an expert MCQ maker. Given the above text, it is your job to create a quiz  of {number} multiple choice questions for {subject} students in {tone} tone. \nMake sure the questions are not repeated and check all the questions to be conforming the text as well.\nMake sure to format your response like  RESPONSE_JSON below  and use it as a guide. Ensure to make {number} MCQs\n### RESPONSE_JSON\n{response_json}\n\n')

In [16]:
quiz_chain=LLMChain(llm=llm, prompt=quiz_generation_prompt, output_key="quiz", verbose=True)

  quiz_chain=LLMChain(llm=llm, prompt=quiz_generation_prompt, output_key="quiz", verbose=True)


In [17]:
TEMPLATE1="""
You are an expert english grammarian and writer. Given a Multiple Choice Quiz for {subject} students.\
You need to evaluate the complexity of the question and give a complete analysis of the quiz. Only use at max 50 words for complexity analysis. 
if the quiz is not at per with the cognitive and analytical abilities of the students,\
update the quiz questions which needs to be changed and change the tone such that it perfectly fits the student abilities
Quiz_MCQs:
{quiz}

Check from an expert English Writer of the above quiz:
"""

In [18]:
quiz_evaluation_prompt=PromptTemplate(input_variables=["subject", "quiz"], template=TEMPLATE1)

In [19]:
review_chain=LLMChain(llm=llm, prompt=quiz_evaluation_prompt, output_key="review", verbose=True)

In [20]:
generate_evaluate_chain=SequentialChain(chains=[quiz_chain, review_chain], input_variables=["text", "number", "subject", "tone", "response_json"],
                                        output_variables=["quiz", "review"], verbose=True,)

In [21]:
"""pdf_reader=PyPDF2.PdfReader(r"experiments\Big Data Assesment.pdf")
text = ""
for page in pdf_reader.pages:
    text += page.extract_text()"""

  """pdf_reader=PyPDF2.PdfReader(r"experiments\Big Data Assesment.pdf")


'pdf_reader=PyPDF2.PdfReader(r"experiments\\Big Data Assesment.pdf")\ntext = ""\nfor page in pdf_reader.pages:\n    text += page.extract_text()'

In [22]:
from pathlib import Path
file_path=r"E:\hamad\MCQGenerator\test.txt"

In [23]:
file_path

'E:\\hamad\\MCQGenerator\\test.txt'

In [24]:
with open(file_path, 'r') as file:
    TEXT = file.read()

In [25]:
TEXT


'Data science is an interdisciplinary academic field[1] that uses statistics, scientific computing, scientific methods, processes, algorithms and systems to extract or extrapolate knowledge and insights from noisy, structured, and unstructured data.[2]\n\nData science also integrates domain knowledge from the underlying application domain (e.g., natural sciences, information technology, and medicine).[3] Data science is multifaceted and can be described as a science, a research paradigm, a research method, a discipline, a workflow, and a profession.[4]\n\nData science is a "concept to unify statistics, data analysis, informatics, and their related methods" to "understand and analyze actual phenomena" with data.[5] It uses techniques and theories drawn from many fields within the context of mathematics, statistics, computer science, information science, and domain knowledge.[6] However, data science is different from computer science and information science. Turing Award winner Jim Gray

In [26]:
# Serialize the Python dictionary into a JSON-formatted string
json.dumps(RESPONSE_JSON)

'{"1": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"}, "2": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"}, "3": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"}}'

In [27]:
"""mcq_count=5
grade=1
tone="simple"""

'mcq_count=5\ngrade=1\ntone="simple'

In [28]:
NUMBER=5 
SUBJECT="data science"
TONE="simple"


In [29]:
#https://python.langchain.com/docs/modules/model_io/llms/token_usage_tracking

#How to setup Token Usage Tracking in LangChain
with get_openai_callback() as cb:
    response=generate_evaluate_chain(
        {
            "text": TEXT,
            "number": NUMBER,
            "subject":SUBJECT,
            "tone": TONE,
            "response_json": json.dumps(RESPONSE_JSON)
        }
        )
# response = generate_evaluate_chain({
#     "text": TEXT,
#     "number": NUMBER,
#     "subject": SUBJECT,
#     "tone": TONE,
#     "response_json": json.dumps(RESPONSE_JSON)
# })


  response=generate_evaluate_chain(




[1m> Entering new SequentialChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
Text:Data science is an interdisciplinary academic field[1] that uses statistics, scientific computing, scientific methods, processes, algorithms and systems to extract or extrapolate knowledge and insights from noisy, structured, and unstructured data.[2]

Data science also integrates domain knowledge from the underlying application domain (e.g., natural sciences, information technology, and medicine).[3] Data science is multifaceted and can be described as a science, a research paradigm, a research method, a discipline, a workflow, and a profession.[4]

Data science is a "concept to unify statistics, data analysis, informatics, and their related methods" to "understand and analyze actual phenomena" with data.[5] It uses techniques and theories drawn from many fields within the context of mathematics, statistics, computer science, information science, and

In [30]:
print(f"Total Tokens:{cb.total_tokens}")
print(f"Prompt Tokens:{cb.prompt_tokens}")
print(f"Completion Tokens:{cb.completion_tokens}")
print(f"Total Cost:{cb.total_cost}")

Total Tokens:2503
Prompt Tokens:1293
Completion Tokens:1210
Total Cost:0.0


In [31]:
response

{'text': 'Data science is an interdisciplinary academic field[1] that uses statistics, scientific computing, scientific methods, processes, algorithms and systems to extract or extrapolate knowledge and insights from noisy, structured, and unstructured data.[2]\n\nData science also integrates domain knowledge from the underlying application domain (e.g., natural sciences, information technology, and medicine).[3] Data science is multifaceted and can be described as a science, a research paradigm, a research method, a discipline, a workflow, and a profession.[4]\n\nData science is a "concept to unify statistics, data analysis, informatics, and their related methods" to "understand and analyze actual phenomena" with data.[5] It uses techniques and theories drawn from many fields within the context of mathematics, statistics, computer science, information science, and domain knowledge.[6] However, data science is different from computer science and information science. Turing Award winner

In [32]:
import json
import re

# Original Gemini output
quiz_str = response.get("quiz", "")

# Step 1: Remove ```json ... ``` wrapper
cleaned = re.sub(r"```json|```", "", quiz_str).strip()

# Step 2: Parse to Python dict
try:
    quiz_dict = json.loads(cleaned)
    print("✅ Successfully parsed JSON.")
except json.JSONDecodeError as e:
    print("❌ Failed to parse JSON:", e)
    print("Raw cleaned string:\n", cleaned)

✅ Successfully parsed JSON.


In [33]:
quiz_dict

{'1': {'mcq': 'Data science is best described as a field that:',
  'options': {'a': 'Focuses solely on creating complex computer programs.',
   'b': 'Uses data to understand and analyze real-world situations.',
   'c': 'Deals exclusively with theoretical mathematical concepts.',
   'd': 'Replaces traditional scientific methods with data analysis.'},
  'correct': 'b'},
 '2': {'mcq': 'Which of the following is NOT explicitly mentioned as a skill incorporated into data science?',
  'options': {'a': 'Computer Science',
   'b': 'Statistics',
   'c': 'Graphic Design',
   'd': 'Astrophysics'},
  'correct': 'd'},
 '3': {'mcq': "According to Jim Gray, data science represents a 'fourth paradigm' of science, building upon which of the following?",
  'options': {'a': 'Alchemy, astrology, and numerology',
   'b': 'Empirical, theoretical, and computational methods',
   'c': 'Intuition, guesswork, and speculation',
   'd': 'Traditional laboratory experiments only'},
  'correct': 'b'},
 '4': {'mcq': '

In [34]:
quiz_table_data = []
for key, value in quiz_dict.items():
    mcq = value["mcq"]
    options = " | ".join(
        [
            f"{option}: {option_value}"
            for option, option_value in value["options"].items()
            ]
        )
    correct = value["correct"]
    quiz_table_data.append({"MCQ": mcq, "Choices": options, "Correct": correct})

In [35]:
quiz_table_data

[{'MCQ': 'Data science is best described as a field that:',
  'Choices': 'a: Focuses solely on creating complex computer programs. | b: Uses data to understand and analyze real-world situations. | c: Deals exclusively with theoretical mathematical concepts. | d: Replaces traditional scientific methods with data analysis.',
  'Correct': 'b'},
 {'MCQ': 'Which of the following is NOT explicitly mentioned as a skill incorporated into data science?',
  'Choices': 'a: Computer Science | b: Statistics | c: Graphic Design | d: Astrophysics',
  'Correct': 'd'},
 {'MCQ': "According to Jim Gray, data science represents a 'fourth paradigm' of science, building upon which of the following?",
  'Choices': 'a: Alchemy, astrology, and numerology | b: Empirical, theoretical, and computational methods | c: Intuition, guesswork, and speculation | d: Traditional laboratory experiments only',
  'Correct': 'b'},
 {'MCQ': 'A data scientist primarily uses programming and statistical knowledge to:',
  'Choices

In [36]:
df=pd.DataFrame(quiz_table_data)

In [37]:
df

Unnamed: 0,MCQ,Choices,Correct
0,Data science is best described as a field that:,a: Focuses solely on creating complex computer...,b
1,Which of the following is NOT explicitly menti...,a: Computer Science | b: Statistics | c: Graph...,d
2,"According to Jim Gray, data science represents...","a: Alchemy, astrology, and numerology | b: Emp...",b
3,A data scientist primarily uses programming an...,a: Write complex algorithms for software devel...,b
4,Which of the following emerging foundational p...,"a: Quantum Physics, String Theory, and Cosmolo...",b


In [38]:
df.to_csv("Big_Data_Science.csv",index=False)

In [39]:
import traceback
try:
    c=1/0
except Exception as e:
        traceback.print_exception(type(e), e, e.__traceback__)

Traceback (most recent call last):
  File "C:\Users\Dell\AppData\Local\Temp\ipykernel_13236\2720247763.py", line 3, in <module>
    c=1/0
      ~^~
ZeroDivisionError: division by zero


In [40]:
import traceback
try:
    c=1/0
except Exception as e:
    tb=e.__traceback__

In [41]:
while tb is not None:
    frame = tb.tb_frame
    filename = frame.f_code.co_filename
    lineno = frame.f_lineno
    function_name = frame.f_code.co_name

    print(f"File '{filename}', line {lineno}, in {function_name}")
    tb = tb.tb_next  # Move to the next frame in the traceback

File 'C:\Users\Dell\AppData\Local\Temp\ipykernel_13236\977600961.py', line 5, in <module>
