In [1]:
import json
import pandas as pd
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.chains.llm import LLMChain
from langchain.chains.sequential import SequentialChain
from langchain.callbacks.manager import get_openai_callback
from PyPDF2 import PdfReader
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings

load_dotenv()

True

In [2]:
pdfreader = PdfReader("077_Entrance_Study_Guide.pdf")

text = ""
for i, page in enumerate(pdfreader.pages):
    content = page.extract_text()
    if content:
        text += content

In [5]:
llm = ChatOpenAI(
    model_name="gpt-3.5-turbo", 
    temperature=0.2
    )

In [6]:
RESPONSE_JSON = {
    "1": {
        "mcq": "multiple choice question",
        "options": {
            "A.": "choice here",
            "B.": "choice here",
            "C.": "choice here",
            "D.": "choice here",
        },
        "correct": "correct answer",
    },
    "2": {
        "mcq": "multiple choice question",
        "options": {
            "A.": "choice here",
            "B.": "choice here",
            "C.": "choice here",
            "D.": "choice here",
        },
        "correct": "correct answer",
    },
    "3": {
        "mcq": "multiple choice question",
        "options": {
            "A.": "choice here",
            "B.": "choice here",
            "C.": "choice here",
            "D.": "choice here",
        },
        "correct": "correct answer",
    },
}


In [7]:
first_template = """
Text: {text}
You are an examiner with the Apprenticeship and Industry Training organization in Alberta, Canada. The above text is a sample exam for skilled trades pre-apprenticeships from the AIT organization. \
The answers to the questions are at the end of the text. Please generate SEVENTY exam standard multiple choice questions using the text as a reference. The multiple choice questions should be \
generated from the questions part of the text. Generate a quiz having the SEVENTY multiple choice questions for all three subjects (Mathematics, English and Science) in the text. There is no \
need to reference diagrams in the generated questions. Just create questions that can be answered without referring to the diagrams. Make sure the questions are not repeated and also \
format your response like RESPONSE_JSON below. The generated questions must be SEVENTY in total (a combination of Mathematics, Science, and English). \
### RESPONSE_JSON
{response_json}
"""

In [8]:
quiz_generation_prompt = PromptTemplate(
    input_variables=["text", "response_json"],
    template=first_template,
    )

In [9]:
quiz_chain = LLMChain(
    llm=llm, 
    prompt=quiz_generation_prompt, 
    output_key="quiz", 
    verbose=True
    )

In [10]:
second_template = """
You are an expert AIT exam reviewer and you ensure that the exam questions meet up to the required standard. Given a Multiple Choice Quiz recently generated, \
you need to evaluate the complexity of the question and give a complete analysis of the quiz. Only use at max 50 words for complexity analysis. \
if the quiz is not at per with the required standard, update the quiz questions that needs to be changed.
Quiz_MCQs:
{quiz}

Check from an expert reviewer for the above quiz:
"""

In [11]:
quiz_evaluation_prompt = PromptTemplate(
    input_variables=["quiz"], 
    template=second_template
    )

In [12]:
review_chain = LLMChain(
    llm=llm, 
    prompt=quiz_evaluation_prompt, 
    output_key="review", 
    verbose=True
    )

In [13]:
generate_evaluate_chain = SequentialChain(
    chains=[quiz_chain, review_chain], 
    input_variables=["text", "response_json"],
    output_variables=["quiz", "review"], 
    verbose=True
    )

In [14]:
# Serialize the Python dictionary into a JSON-formatted string
json.dumps(RESPONSE_JSON)

'{"1": {"mcq": "multiple choice question", "options": {"A.": "choice here", "B.": "choice here", "C.": "choice here", "D.": "choice here"}, "correct": "correct answer"}, "2": {"mcq": "multiple choice question", "options": {"A.": "choice here", "B.": "choice here", "C.": "choice here", "D.": "choice here"}, "correct": "correct answer"}, "3": {"mcq": "multiple choice question", "options": {"A.": "choice here", "B.": "choice here", "C.": "choice here", "D.": "choice here"}, "correct": "correct answer"}}'

In [15]:
with get_openai_callback() as cb:
    response=generate_evaluate_chain(
        {
            "text": text,
            "response_json": json.dumps(RESPONSE_JSON)
        }
    )

  warn_deprecated(




[1m> Entering new SequentialChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
Text: 2020-May-21  Trade Entrance Exam Study Guide  
Classification: Protected A  APPRENTICESHIP  
TRAINING  
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  2 
Classification: Protected A  TRADE ENTRANCE EXAM  
Study Guide  
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1  
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2003, Her Majesty the Queen in right of the Province of Alberta, as represented by the Minister of Learning, 
10th floor, Commerce Place, Edmonton, Alberta, Canada, T5J 4L5.  All rights reserved.  No part of this 
material may be reproduced in any form or by any means, without the prior written consent of the Minister of 
Learning.  
Care has been taken to acknowledge all sources and references in these materials.  If there are any 
inadvertent omissions, please contact A

In [16]:
print(f"Total Tokens:{cb.total_tokens}")
print(f"Prompt Tokens:{cb.prompt_tokens}")
print(f"Completion Tokens:{cb.completion_tokens}")
print(f"Total Cost:{cb.total_cost}")

Total Tokens:15446
Prompt Tokens:13536
Completion Tokens:1910
Total Cost:0.024124


In [17]:
response

{'text': '2020-May-21  Trade Entrance Exam Study Guide  \nClassification: Protected A  APPRENTICESHIP  \nTRAINING  \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n  2 \nClassification: Protected A  TRADE ENTRANCE EXAM  \nStudy Guide  \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n1  \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n2003, Her Majesty the Queen in right of the Province of Alberta, as represented by the Minister of Learning, \n10th floor, Commerce Place, Edmonton, Alberta, Canada, T5J 4L5.  All rights reserved.  No part of this \nmaterial may be reproduced in any form or by any means, without the prior written consent of the Minister of \nLearning.  \nCare has been taken to acknowledge all sources and references in these materials.  If there are any \ninadvertent omissions, please contact Alberta Advan

In [18]:
quiz = response.get("quiz")
review = response.get("review")

In [19]:
quiz = json.loads(quiz)

In [20]:
print(review)

The complexity of the quiz questions is appropriate for an AIT exam. However, question 5 needs to be revised for clarity. Instead of "too good," it should be "well." Additionally, question 11 is a duplicate of question 7 and should be replaced with a new question.


In [21]:
quiz_table_data = []

for key, value in quiz.items():
    mcq = value["mcq"]
    options = " | ".join(
        [
            f"{option}: {option_value}"
            for option, option_value in value["options"].items()
            ]
        )
    correct = value["correct"]
    quiz_table_data.append({"MCQ": mcq, "Choices": options, "Correct": correct})

In [22]:
quiz_table_data

[{'MCQ': 'What is the capital of Canada?',
  'Choices': "A.: Vancouver | B.: Yellowknife | C.: Ottawa | D.: St. John's",
  'Correct': 'C.'},
 {'MCQ': 'Which is the smallest number?',
  'Choices': 'A.: 0.01001 | B.: 0.00998 | C.: 0.00385 | D.: 0.00297',
  'Correct': 'D.'},
 {'MCQ': 'What is the circumference of a circle with a diameter of 50 mm?',
  'Choices': 'A.: 78.5 mm | B.: 157.0 mm | C.: 78.5 cm | D.: 157.0 cm',
  'Correct': 'B.'},
 {'MCQ': 'How many tablespoons are there in 49.74 ml?',
  'Choices': 'A.: 3.50 tbsp | B.: 10.49 tbsp | C.: 35.00 tbsp | D.: 706.81 tbsp',
  'Correct': 'A.'},
 {'MCQ': 'Which sentence uses correct grammar?',
  'Choices': "A.: Our printers don't work too good. | B.: Brewing coffee results in bitterness using more water. | C.: Apprenticeship involves a combination of work experience and formal training. | D.: Tests used to measure your knowledge take long to write.",
  'Correct': 'C.'},
 {'MCQ': 'What is the volume of a cube which is 5 meters per side?',
 

In [23]:
quiz = pd.DataFrame(quiz_table_data)
quiz.head()

Unnamed: 0,MCQ,Choices,Correct
0,What is the capital of Canada?,A.: Vancouver | B.: Yellowknife | C.: Ottawa |...,C.
1,Which is the smallest number?,A.: 0.01001 | B.: 0.00998 | C.: 0.00385 | D.: ...,D.
2,What is the circumference of a circle with a d...,A.: 78.5 mm | B.: 157.0 mm | C.: 78.5 cm | D.:...,B.
3,How many tablespoons are there in 49.74 ml?,A.: 3.50 tbsp | B.: 10.49 tbsp | C.: 35.00 tbs...,A.
4,Which sentence uses correct grammar?,A.: Our printers don't work too good. | B.: Br...,C.


In [24]:
quiz.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26 entries, 0 to 25
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   MCQ      26 non-null     object
 1   Choices  26 non-null     object
 2   Correct  26 non-null     object
dtypes: object(3)
memory usage: 752.0+ bytes


In [38]:
quiz.to_csv("machinelearning.csv", index=False)