In [309]:
import os
import json
import pandas as pd
import traceback
from langchain.chat_models import ChatOpenAI

In [310]:
from dotenv import load_dotenv
load_dotenv()
KEY=os.getenv('OPENAI_API_KEY')

In [311]:
llm = ChatOpenAI(openai_api_key=KEY, model="gpt-4-0125-preview", temperature=0)

In [312]:
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.chains import SequentialChain
from langchain.callbacks import get_openai_callback

import PyPDF2

In [313]:
NUMBER=10
SUBJECT="marijuana"
TONE="Technical stoner conniseur"

In [314]:
RESPONSE_JSON = {
    "1": {
        "mcq": "multiple choice question",
        "option": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer here"
    },
    "2": {
        "mcq": "multiple choice question",
        "option": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer here"
    },
    "3": {
        "mcq": "multiple choice question",
        "option": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer here"
    },
}

In [315]:
TEMPLATE="""
Text: {text}
You are an expert Multiple Choice Question writer. 
You have been asked to write a multiple choice question based on the {text} and {subject}. 
The question should be challenging and test the reader's understanding of the text. 
The questions should not be repeated, and correct answer should be checked with question.
The question should have {number} answer choices, with one correct answer and three distractors. 
The question should be multiple choice and have only one correct answer. 
The question should be clear and concise. 
The question should be in {tone} tone.
ensure that there are {number} of multiple choice questions.
Make sure to format your response like RESPONSE_JSON below and use it as a guide, and be sure to return valid json dixtionry as string as this will be used.
{response_json}
"""

In [316]:
quiz_generateion_prompt = PromptTemplate(
    input_variables=["text", "number","subject", "tone", "response_json"],
    template=TEMPLATE
)

In [317]:
quiz_chain=LLMChain(llm=llm, prompt=quiz_generateion_prompt, output_key="quiz", verbose=True)

In [318]:
TEMPLATE2="""
You are an expert english grammarian and writer. Given a Multiple Choice Quiz for {subject} students.
You need to evaluate the complexity of the question and give a complete analysis of the quiz. Only use at max 50 words for complete analysis.
if the quiz is not at par with the cognitive and analytical skills of the students,
update the quiz questions which needs to be changed and the tone such that it perfectly fits the student abilities
Quiz_MCQs: {quiz}

Check from an expert English writer of the above quiz and update the quiz questions and tone to fit the student abilities.

"""

In [319]:
quiz_evaluation_prompt=PromptTemplate(input_variables=["subject", "quiz"], template=TEMPLATE2)

In [320]:
review_chain=LLMChain(llm=llm, prompt=quiz_evaluation_prompt, output_key="review", verbose=True)

In [321]:
generate_evaluate_chain=SequentialChain(chains=[quiz_chain, review_chain], input_variables=["text","number","subject","tone", "response_json"], output_variables=["quiz", "review"], verbose=True)

In [322]:
from langchain.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper

In [323]:
wikipedia = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())

In [324]:
subject_text = wikipedia.run(SUBJECT)

In [325]:
print(subject_text)

Page: Cannabis (drug)
Summary: Cannabis, also known as marijuana or weed among other names, is a psychoactive drug from the cannabis plant. Native to Central or South Asia, the cannabis plant has been used as a drug for both recreational and entheogenic purposes and in various traditional medicines for centuries. Tetrahydrocannabinol (THC) is the main psychoactive component of cannabis, which is one of the 483 known compounds in the plant, including at least 65 other cannabinoids, such as cannabidiol (CBD). Cannabis can be used by smoking, vaporizing, within food, or as an extract.
Cannabis has various mental and physical effects, which include euphoria, altered states of mind and sense of time, difficulty concentrating, impaired short-term memory, impaired body movement (balance and fine psychomotor control), relaxation, and an increase in appetite. Onset of effects is felt within minutes when smoked, but may take up to 90 minutes when eaten (as orally consumed drugs must be metaboliz

In [326]:
file_path = "../data.txt"

In [327]:
with open(file_path, 'w', encoding='utf-8') as file:
    file.write(subject_text)


In [328]:
with open (file_path, 'r', encoding='utf-8') as file:
    TEXT = file.read()

In [329]:
# Serialize the python dictionary into a JSON-formatted string
json.dumps(RESPONSE_JSON)

'{"1": {"mcq": "multiple choice question", "option": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer here"}, "2": {"mcq": "multiple choice question", "option": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer here"}, "3": {"mcq": "multiple choice question", "option": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer here"}}'

In [330]:
# How to set up token usage tracking in langchain

with get_openai_callback() as cb:
    response=generate_evaluate_chain(
        {
            "text": TEXT, 
            "number": NUMBER,
            "subject": SUBJECT,
            "tone":TONE,
            "response_json": json.dumps(RESPONSE_JSON),
        }
    )
print(f"Total Tokens: {cb.total_tokens}")
print(f"Prompt Tokens: {cb.prompt_tokens}")
print(f"Completion Tokens: {cb.completion_tokens}")
print(f"Total Cost (USD): ${cb.total_cost}")



[1m> Entering new SequentialChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
Text: Page: Cannabis (drug)
Summary: Cannabis, also known as marijuana or weed among other names, is a psychoactive drug from the cannabis plant. Native to Central or South Asia, the cannabis plant has been used as a drug for both recreational and entheogenic purposes and in various traditional medicines for centuries. Tetrahydrocannabinol (THC) is the main psychoactive component of cannabis, which is one of the 483 known compounds in the plant, including at least 65 other cannabinoids, such as cannabidiol (CBD). Cannabis can be used by smoking, vaporizing, within food, or as an extract.
Cannabis has various mental and physical effects, which include euphoria, altered states of mind and sense of time, difficulty concentrating, impaired short-term memory, impaired body movement (balance and fine psychomotor control), relaxation, and an increase in appetite.

In [331]:
quiz=response.get("quiz")
quiz = quiz.replace("```json\n","").replace("\n```","")
quiz = json.loads(quiz)


In [332]:
print(response.get("review"))

The quiz is well-structured, covering a broad range of topics related to cannabis, from its components and effects to legal status and uses. The language is clear and accessible, suitable for students with a basic understanding of the subject. However, to ensure it aligns perfectly with student abilities and maintains engagement, consider the following adjustments for clarity and inclusivity:

1. No change needed. The question is clear and straightforward.
2. Update for clarity and relevance: "As of 2013, which of these countries had the lowest reported adult use of cannabis?"
3. Clarify consumption method: "Which consumption method of cannabis typically takes the longest for effects to be felt?"
4. Simplify and clarify: "What is a potential long-term effect of starting regular cannabis use in adolescence?"
5. Update for precision: "Which was the first country to legalize recreational cannabis use, doing so in 2013?"
6. Simplify language: "Which is NOT a common physical effect of using

In [333]:
quiz_table_data = []
for key, value in quiz.items():
    mcq = value["mcq"]
    options = " | ".join([
        f"{option}: {option_value}"
        for option, option_value in value["option"].items()
    ])
    correct = value["correct"]
    quiz_table_data.append([key, mcq, options, correct])

In [334]:
quiz_table_data

[['1',
  'What is the main psychoactive component found in cannabis?',
  'a: Cannabidiol (CBD) | b: Tetrahydrocannabinol (THC) | c: Cannabinol (CBN) | d: Tetrahydrocannabivarin (THCV)',
  'b'],
 ['2',
  'As of 2013, which country was NOT listed as having the highest use of cannabis among adults?',
  'a: Zambia | b: United States | c: Canada | d: France',
  'd'],
 ['3',
  'Which method of cannabis consumption can take up to 90 minutes for effects to be felt?',
  'a: Smoking | b: Vaporizing | c: Within food | d: As an extract',
  'c'],
 ['4',
  'What is a potential long-term adverse effect of cannabis use, particularly in those who started regular use as adolescents?',
  'a: Increased appetite | b: Decreased mental ability | c: Euphoria | d: Altered sense of time',
  'b'],
 ['5',
  'Which country was the first to legalize recreational use of cannabis in 2013?',
  'a: Canada | b: Uruguay | c: Germany | d: South Africa',
  'b'],
 ['6',
  'What is NOT a physical effect of cannabis use?',
  

In [335]:
quiz=pd.DataFrame(quiz_table_data, columns=["Question Number", "Question", "Options", "Correct Answer"])

In [336]:
type(quiz)

pandas.core.frame.DataFrame

In [337]:
quiz.to_csv(f"{SUBJECT}.csv",index=False)