In [7]:
import os, json, traceback
import pandas as pd
from langchain_openai import OpenAI, ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain_core.runnables import RunnableSequence, RunnableMap, RunnableLambda
from langchain_core.output_parsers import StrOutputParser
from langchain.chains import SequentialChain
from langchain.callbacks import get_openai_callback
from dotenv import load_dotenv
import PyPDF2

In [8]:
KEY = os.getenv("OPENAI_API_KEY")
llm = ChatOpenAI(
    openai_api_key = KEY,
    model_name = 'gpt-4o-mini',
    temperature = 0.5
)

In [9]:
RESPONSE_JSON = {
    "1": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer",
    },
    "2": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer",
    },
    "3": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer",
    },
}

In [10]:
TEMPLATE_1 = """
Text:{text}
You are an expert MCQ maker. Given the above text, it is your job to \
create a quiz  of {number} multiple choice questions for {subject} students in {tone} tone. 
Make sure the questions are not repeated and check all the questions to be conforming the text as well.
Make sure to format your response like  RESPONSE_JSON below  and use it as a guide. \
Ensure to make {number} MCQs
### RESPONSE_JSON
{response_json}
"""

In [11]:
quiz_generation_prompt = PromptTemplate(
    input_variables = ["text", "number", "subject", "tone", "response_json"],
    template = TEMPLATE_1
)
quiz_generation_prompt

PromptTemplate(input_variables=['number', 'response_json', 'subject', 'text', 'tone'], input_types={}, partial_variables={}, template='\nText:{text}\nYou are an expert MCQ maker. Given the above text, it is your job to create a quiz  of {number} multiple choice questions for {subject} students in {tone} tone. \nMake sure the questions are not repeated and check all the questions to be conforming the text as well.\nMake sure to format your response like  RESPONSE_JSON below  and use it as a guide. Ensure to make {number} MCQs\n### RESPONSE_JSON\n{response_json}\n')

In [12]:
output_parser = StrOutputParser()
quiz_chain = quiz_generation_prompt | llm | output_parser
print(quiz_chain)

first=PromptTemplate(input_variables=['number', 'response_json', 'subject', 'text', 'tone'], input_types={}, partial_variables={}, template='\nText:{text}\nYou are an expert MCQ maker. Given the above text, it is your job to create a quiz  of {number} multiple choice questions for {subject} students in {tone} tone. \nMake sure the questions are not repeated and check all the questions to be conforming the text as well.\nMake sure to format your response like  RESPONSE_JSON below  and use it as a guide. Ensure to make {number} MCQs\n### RESPONSE_JSON\n{response_json}\n') middle=[ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x15d684bc0>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x16110d0a0>, root_client=<openai.OpenAI object at 0x15d684aa0>, root_async_client=<openai.AsyncOpenAI object at 0x1610ee0c0>, model_name='gpt-4o-mini', temperature=0.5, model_kwargs={}, openai_api_key=SecretStr('**********'))] last=StrOutputParser()


In [13]:
TEMPLATE_2 = """
You are an expert english grammarian and writer. Given a Multiple Choice Quiz for {subject} students.\
You need to evaluate the complexity of the question and give a complete analysis of the quiz. Only use at max 50 words for complexity analysis. 
if the quiz is not at per with the cognitive and analytical abilities of the students,\
update the quiz questions which needs to be changed and change the tone such that it perfectly fits the student abilities
Quiz_MCQs:
# {quiz}

Check from an expert English Writer of the above quiz:
"""

In [14]:
quiz_evaluation_prompt = PromptTemplate(
    input_variables = ["subject", "quiz"],
    template = TEMPLATE_2
)
quiz_evaluation_prompt

PromptTemplate(input_variables=['quiz', 'subject'], input_types={}, partial_variables={}, template='\nYou are an expert english grammarian and writer. Given a Multiple Choice Quiz for {subject} students.You need to evaluate the complexity of the question and give a complete analysis of the quiz. Only use at max 50 words for complexity analysis. \nif the quiz is not at per with the cognitive and analytical abilities of the students,update the quiz questions which needs to be changed and change the tone such that it perfectly fits the student abilities\nQuiz_MCQs:\n# {quiz}\n\nCheck from an expert English Writer of the above quiz:\n')

In [27]:
review_chain = quiz_evaluation_prompt | llm | output_parser
review_chain

PromptTemplate(input_variables=['quiz', 'subject'], input_types={}, partial_variables={}, template='\nYou are an expert english grammarian and writer. Given a Multiple Choice Quiz for {subject} students.You need to evaluate the complexity of the question and give a complete analysis of the quiz. Only use at max 50 words for complexity analysis. \nif the quiz is not at per with the cognitive and analytical abilities of the students,update the quiz questions which needs to be changed and change the tone such that it perfectly fits the student abilities\nQuiz_MCQs:\n# {quiz}\n\nCheck from an expert English Writer of the above quiz:\n')
| ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x15d684bc0>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x16110d0a0>, root_client=<openai.OpenAI object at 0x15d684aa0>, root_async_client=<openai.AsyncOpenAI object at 0x1610ee0c0>, model_name='gpt-4o-mini', temperature=0.5, model_kwargs={}, openai

In [None]:
generate_evaluate_chain = RunnableSequence(
    first = RunnableMap({
        "text": lambda inputs: inputs,
        "number": lambda inputs: inputs,
        "subject": lambda inputs: inputs,
        "tone": lambda inputs: inputs,
        "response_json": lambda inputs: inputs,
        "quiz": lambda inputs: inputs
    }),
    middle = [
        quiz_chain,
        RunnableMap({
            "subject": lambda inputs: inputs,
            "quiz": lambda inputs: inputs
        })
    ],
    last = review_chain,
    name = "generate_evaluate_chain"
)

generate_evaluate_chain

<bound method RunnableSequence.invoke of {
  text: RunnableLambda(...),
  number: RunnableLambda(...),
  subject: RunnableLambda(...),
  tone: RunnableLambda(...),
  response_json: RunnableLambda(...),
  quiz: RunnableLambda(...)
}
| PromptTemplate(input_variables=['number', 'response_json', 'subject', 'text', 'tone'], input_types={}, partial_variables={}, template='\nText:{text}\nYou are an expert MCQ maker. Given the above text, it is your job to create a quiz  of {number} multiple choice questions for {subject} students in {tone} tone. \nMake sure the questions are not repeated and check all the questions to be conforming the text as well.\nMake sure to format your response like  RESPONSE_JSON below  and use it as a guide. Ensure to make {number} MCQs\n### RESPONSE_JSON\n{response_json}\n')
  | ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x15d684bc0>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x16110d0a0>, root_client=<

In [17]:
file_path = os.getenv("DATA_FILE_PATH")

with open(file_path, 'r') as file:
    TEXT = file.read()

TEXT

'Data science is an interdisciplinary academic field[1] that uses statistics, scientific computing, scientific methods, processes, algorithms and systems to extract or extrapolate knowledge and insights from noisy, structured, and unstructured data.[2]\n\nData science also integrates domain knowledge from the underlying application domain (e.g., natural sciences, information technology, and medicine).[3] Data science is multifaceted and can be described as a science, a research paradigm, a research method, a discipline, a workflow, and a profession.[4]\n\nData science is a "concept to unify statistics, data analysis, informatics, and their related methods" to "understand and analyze actual phenomena" with data.[5] It uses techniques and theories drawn from many fields within the context of mathematics, statistics, computer science, information science, and domain knowledge.[6] However, data science is different from computer science and information science. Turing Award winner Jim Gray

In [18]:
json.dumps(RESPONSE_JSON)

'{"1": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"}, "2": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"}, "3": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"}}'

In [19]:
NUMBER = 5 
SUBJECT = "data science"
TONE = "simple"

In [20]:
#https://python.langchain.com/docs/modules/model_io/llms/token_usage_tracking

#Setup Token Usage Tracking in LangChain
with get_openai_callback() as cb:
    response = generate_evaluate_chain.invoke({
        "text": TEXT,
        "number": NUMBER,
        "subject":SUBJECT,
        "tone": TONE,
        "response_json": json.dumps(RESPONSE_JSON),
        "quiz": "this is the quiz"
    })

print(response)

### Complexity Analysis
The quiz questions are moderately complex, requiring comprehension of specific terminology and concepts in data science. They may challenge students who are new to the subject, particularly regarding paradigm classifications and domain knowledge integration.

### Suggested Changes
To better align with students' cognitive and analytical abilities, I recommend simplifying some questions for clarity and comprehension. Here are the revised questions:

1. **What is the main goal of data science?**  
   - a) To analyze data  
   - b) To create games  
   - c) To write stories  
   - d) To build structures  
   - **Correct:** a

2. **Which field is NOT mentioned as part of data science?**  
   - a) Natural sciences  
   - b) Information technology  
   - c) Medicine  
   - d) Cooking  
   - **Correct:** d

3. **What does Jim Gray call data science?**  
   - a) First type of science  
   - b) Second type of science  
   - c) Third type of science  
   - d) Fourth type o

In [21]:
print(f"Total Tokens:{cb.total_tokens}")
print(f"Prompt Tokens:{cb.prompt_tokens}")
print(f"Completion Tokens:{cb.completion_tokens}")
print(f"Total Cost:{cb.total_cost}")

Total Tokens:7115
Prompt Tokens:5873
Completion Tokens:1242
Total Cost:0.0016261499999999998


In [22]:
quiz_str = response.get("quiz")

AttributeError: 'str' object has no attribute 'get'