In [1]:
import json, os
import pandas as pd
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv
from langchain.prompts import PromptTemplate
from langchain.chains.llm import LLMChain
from langchain.chains.sequential import SequentialChain
from langchain.callbacks.manager import get_openai_callback
from PyPDF2 import PdfReader
from docx import Document

load_dotenv()  # take environment variables from .env

True

In [2]:
read_vocab = PdfReader("pdf_files/science_study_guide.pdf")

sample_questions = ""
for i, page in enumerate(read_vocab.pages):
    content = page.extract_text()
    if content:
        sample_questions += content

In [14]:
def preprocess_text(text):
    text = text.replace("\n", " ").strip()
    text = text.replace("\n\n", " ").strip()
    cleaned_text = text.replace("  ", " ").strip()
    return cleaned_text

sample_questions = preprocess_text(sample_questions)

In [15]:
llm = ChatOpenAI(
    model_name="gpt-3.5-turbo", 
    temperature=0.8,
    )

In [16]:
RESPONSE_JSON = {
    "1": {
        "mcq": "multiple choice question",
        "options": {
            "A.": "choice here",
            "B.": "choice here",
            "C.": "choice here",
            "D.": "choice here",
        },
        "correct": "correct answer",
    },
    "2": {
        "mcq": "multiple choice question",
        "options": {
            "A.": "choice here",
            "B.": "choice here",
            "C.": "choice here",
            "D.": "choice here",
        },
        "correct": "correct answer",
    },
    "3": {
        "mcq": "multiple choice question",
        "options": {
            "A.": "choice here",
            "B.": "choice here",
            "C.": "choice here",
            "D.": "choice here",
        },
        "correct": "correct answer",
    },
}

In [17]:
first_template = """
This is how you are to respond to this task: You are an expert seasoned examiner with the Apprenticeship and Industry Training (AIT) organization in Alberta, Canada. You have 20 years of experience 
preparing the preapprenticeship entrance exams for skilled trade participants. You are highly skilled and have high level expertise with minimal typographic mistakes or errors in the answers. You are 
skilled in creating multiple choice questions that contains both calculations, theory and conceptual questions to prepare participants for the entrance exams. This tone must strongly reflect in the 
generated multiple choice questions. You are provided with sample questions from the stuyd guide that was prepared by AIT. Using the {sample_questions}, creatively generate mutiple choice questions 
having {number} questions. The quiz questions should conform to the standard of the AIT exam questions. The questions should be similar to the {sample_questions}. Each question must be unique and 
completely different from others. Format your response using the {response_json} template as a guide. Ensure to include some questions that will involve calculations using formulas and also conversion 
between metric/imperial units. Some questions should not be straightforward and should require the trade participants to think critically. Ensure that you cross-check the {sample_questions} and provide 
the correct information for each question and the exact answer for each question. Use the {response_json} template to format your response. The total number of quesions must be {number}. Some examples 
of calculation questions and their corresponding answers are provided in the below:

**Specific Requirements:**

1. **Theory and Conceptual Questions**: Include questions that test theoretical knowledge and conceptual understanding.
2. **Critical Thinking**: Some questions should not be straightforward and should require the trade participants to think critically.
3. **Accuracy**: Ensure that you cross-check the {sample_questions} and provide the correct information for each question and the exact answer for each question.
4. **Diversity**: Questions should cover a wide range of topics within the provided vocabulary handout to ensure a comprehensive assessment.
5. **Unique Questions**: Each question must be unique and completely different from others and also different from the {sample_questions}.

Ensure that all your generated questions follow the {response_json} structure exactly.
"""

In [18]:
quiz_generation_prompt = PromptTemplate(
    input_variables=["sample_questions", "number", "response_json"],
    template=first_template,
    )

In [19]:
quiz_chain = LLMChain(
    llm=llm,
    prompt=quiz_generation_prompt,
    output_key="quiz",
    verbose=True,
    )

In [20]:
second_template = """
This is the tone in which you are to respond to the assigned task: You are an expert exam reviewer with 20 years of experience working with the Apprenticeship and Industry Training (AIT) 
organization in Alberta, Canada. Your job is to ensure that the exam questions meet the required standard for pre-apprenticeship entrance exams. You are assigned to check for the correctness 
of each question, cross-check the answer, and carefully fact-check the information contained in the generated questions.

**Task Overview:**

Given some multiple-choice questions recently generated, you must:
1. Evaluate the complexity of each question and provide a complete analysis of the {quiz}. Use a maximum of 100 words for the complexity analysis.
2. Ensure that the questions include a balanced mix of calculation questions and theoretical/conceptual questions.
4. Check for the accuracy of the questions, correctness of formulas, and units used.
5. Verify the correctness of each provided answer.

**If the quiz does not meet the required standard:**

- Identify the questions that need to be changed.
- Generate new questions to replace the incorrect ones, ensuring they follow the existing structure and meet the specified requirements.
- Ensure the new questions are unique and diverse, covering a range of topics within the provided vocabulary handout.

**Evaluation Criteria:**

- Accuracy: Ensure all questions and answers are factually correct.
- Complexity: Analyze and ensure the questions are appropriately challenging.
- Calculation: Verify the inclusion and correctness of calculation-based questions.

**Quiz_MCQs:**
{quiz}

**Check from an Expert Reviewer:**
"""

In [21]:
quiz_evaluation_prompt = PromptTemplate(
    input_variables=["quiz"],
    template=second_template,
    )

In [22]:
review_chain = LLMChain(
    llm=llm,
    prompt=quiz_evaluation_prompt,
    output_key="review",
    verbose=True,
    )

In [23]:
generate_evaluate_chain = SequentialChain(
    chains=[quiz_chain, review_chain],
    input_variables=["sample_questions", "number", "response_json"],
    output_variables=["quiz", "review"],
    verbose=True,
    )

In [24]:
# Serialize the Python dictionary into a JSON-formatted string
json.dumps(RESPONSE_JSON)

'{"1": {"mcq": "multiple choice question", "options": {"A.": "choice here", "B.": "choice here", "C.": "choice here", "D.": "choice here"}, "correct": "correct answer"}, "2": {"mcq": "multiple choice question", "options": {"A.": "choice here", "B.": "choice here", "C.": "choice here", "D.": "choice here"}, "correct": "correct answer"}, "3": {"mcq": "multiple choice question", "options": {"A.": "choice here", "B.": "choice here", "C.": "choice here", "D.": "choice here"}, "correct": "correct answer"}}'

In [31]:
with get_openai_callback() as cb:
    response = generate_evaluate_chain(
        {
            "sample_questions": sample_questions,
            "number": 20,
            "response_json": json.dumps(RESPONSE_JSON)
        }
    )



[1m> Entering new SequentialChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
This is how you are to respond to this task: You are an expert seasoned examiner with the Apprenticeship and Industry Training (AIT) organization in Alberta, Canada. You have 20 years of experience 
preparing the preapprenticeship entrance exams for skilled trade participants. You are highly skilled and have high level expertise with minimal typographic mistakes or errors in the answers. You are 
skilled in creating multiple choice questions that contains both calculations, theory and conceptual questions to prepare participants for the entrance exams. This tone must strongly reflect in the 
generated multiple choice questions. You are provided with sample questions from the stuyd guide that was prepared by AIT. Using the Trade Entrance Exam  Study Guide  English and Reading Comprehension   20 Trade Entrance Exam Study Guide     NOTE: The answer key for al

In [32]:
print(f"Total Tokens: {cb.total_tokens}")
print(f"Prompt Tokens: {cb.prompt_tokens}")
print(f"Completion Tokens: {cb.completion_tokens}")
print(f"Total Cost: {cb.total_cost}")

Total Tokens: 17927
Prompt Tokens: 17134
Completion Tokens: 793
Total Cost: 0.027287


In [33]:
response

{'sample_questions': "Trade Entrance Exam  Study Guide  English and Reading Comprehension   20 Trade Entrance Exam Study Guide     NOTE: The answer key for all the following questions in this study guide can be found at the end of the study guide (page 33)    SECTION THREE: SCIENCE   Studying for the Science section of the trade entrance exam.   Each Entrance Exam includes a se ction for Science questions. The questions are designed to challenge the exam writer's ability to recall basic information, manipulate basic equations , and interpret basic scientific relationships. The types of questions asked and the difficulty will va ry with the exam for each cluster.     38. What does 6.4 liters of water equal in imperial gallons?   A. 1.408 gal.  B. 3.635 gal.  C. 5.628 gal.  D. 5.632 gal.   This question is applicable to Cluster 2, Cluster 4, Cluster 5 .    39. Convert 11.4 km to miles and express to two decimal places.   A. 5.70 mi.  B. 7.08 mi.  C. 9.79 mi.  D. 18.34 mi.   This question

In [34]:
print(response["quiz"])

{"1": {"mcq": "What is the process called when a gas changes into a liquid?", "options": {"A. Condensation": "Condensation", "B. Vaporization": "Vaporization", "C. Sublimation": "Sublimation", "D. Melting": "Melting"}, "correct": "A. Condensation"}, "2": {"mcq": "Which of the following is the SI unit of force?", "options": {"A. Newton": "Newton", "B. Joule": "Joule", "C. Watt": "Watt", "D. Ohm": "Ohm"}, "correct": "A. Newton"}, "3": {"mcq": "What is the formula to calculate density?", "options": {"A. Density = Mass / Volume": "Density = Mass / Volume", "B. Density = Volume / Mass": "Density = Volume / Mass", "C. Density = Mass * Volume": "Density = Mass * Volume", "D. Density = Mass + Volume": "Density = Mass + Volume"}, "correct": "A. Density = Mass / Volume"}, "4": {"mcq": "Which of the following is a non-metallic element?", "options": {"A. Iron": "Iron", "B. Copper": "Copper", "C. Oxygen": "Oxygen", "D. Silver": "Silver"}, "correct": "C. Oxygen"}, "5": {"mcq": "What is the force tha

In [35]:
print(response["review"])

As an expert exam reviewer with extensive experience in working with the Apprenticeship and Industry Training (AIT) organization in Alberta, Canada, it is imperative that we meticulously evaluate the provided multiple-choice questions to ensure they meet the required standard for pre-apprenticeship entrance exams.

Let's start by analyzing the complexity of each question:
1. The question on the process of a gas changing into a liquid is straightforward and falls within the expected difficulty level for pre-apprenticeship students.
2. The inquiry about the SI unit of force is fundamental yet essential for understanding basic physics principles.
3. The question regarding the formula to calculate density is a fundamental concept that students should grasp at this level.
4. Identifying a non-metallic element is a critical knowledge check for students studying elements and their properties.
5. Understanding the force that resists motion through a fluid is a practical application of physics 

In [36]:
quiz = response.get("quiz")

In [38]:
print(response["quiz"])

{"1": {"mcq": "What is the process called when a gas changes into a liquid?", "options": {"A. Condensation": "Condensation", "B. Vaporization": "Vaporization", "C. Sublimation": "Sublimation", "D. Melting": "Melting"}, "correct": "A. Condensation"}, "2": {"mcq": "Which of the following is the SI unit of force?", "options": {"A. Newton": "Newton", "B. Joule": "Joule", "C. Watt": "Watt", "D. Ohm": "Ohm"}, "correct": "A. Newton"}, "3": {"mcq": "What is the formula to calculate density?", "options": {"A. Density = Mass / Volume": "Density = Mass / Volume", "B. Density = Volume / Mass": "Density = Volume / Mass", "C. Density = Mass * Volume": "Density = Mass * Volume", "D. Density = Mass + Volume": "Density = Mass + Volume"}, "correct": "A. Density = Mass / Volume"}, "4": {"mcq": "Which of the following is a non-metallic element?", "options": {"A. Iron": "Iron", "B. Copper": "Copper", "C. Oxygen": "Oxygen", "D. Silver": "Silver"}, "correct": "C. Oxygen"}, "5": {"mcq": "What is the force tha

In [37]:
quiz = json.loads(quiz)

JSONDecodeError: Unterminated string starting at: line 1 column 1339 (char 1338)

In [56]:
len(quiz)

15

In [57]:
quiz_table_data = []

for key, value in quiz.items():
    mcq = value["mcq"]
    options = " | ".join(
        [
            f"{option}: {option_value}"
            for option, option_value in value["options"].items()
            ]
        )
    correct = value["correct"]
    quiz_table_data.append({"MCQ": mcq, "Choices": options, "Correct": correct})

In [58]:
quiz_table_data

[{'MCQ': 'What is the SI unit of pressure?',
  'Choices': 'A.: Watts | B.: Joules | C.: Pascals | D.: Kelvin',
  'Correct': 'C. Pascals'},
 {'MCQ': 'Which type of pressure includes the weight of the atmosphere?',
  'Choices': 'A.: Atmospheric Pressure | B.: Pneumatic Pressure | C.: Gauge Pressure | D.: Hydraulic Pressure',
  'Correct': 'C. Gauge Pressure'},
 {'MCQ': "In the Ideal Gas Law equation PV = nRT, what does 'n' represent?",
  'Choices': 'A.: Volume | B.: Pressure | C.: Number of moles of gas | D.: Ideal gas constant',
  'Correct': 'C. Number of moles of gas'},
 {'MCQ': "What does Pascal's Principle state?",
  'Choices': 'A.: Volume is inversely proportional to pressure | B.: Pressure is transmitted undiminished in an enclosed fluid | C.: Temperature is directly proportional to volume | D.: Density is inversely proportional to pressure',
  'Correct': 'B. Pressure is transmitted undiminished in an enclosed fluid'},
 {'MCQ': 'How does pressure change with altitude?',
  'Choices':

In [59]:
doc = Document()

# Add a heading
doc.add_heading('Sample Practice Quiz Questions', level=1)

# Write each question, options, and correct answer
for i, (key, value) in enumerate(quiz.items(), start=1):
    mcq = value["mcq"]
    options = "\n".join([f"{option}: {option_value}" for option, option_value in value["options"].items()])
    correct = value["correct"]
    # Add question number
    doc.add_paragraph(f'{i}. {mcq}', style='BodyText')
    # Add options
    doc.add_paragraph(options, style='BodyText')
    # Add correct answer
    doc.add_paragraph(f'Correct Answer: {correct}', style='BodyText')
    
    # Add spacing between questions
    if i < len(quiz):
        doc.add_paragraph('\n')

# Save the document
doc.save('Lesson_4-6_Quiz_Questions.docx')

  return self._get_style_id_from_style(self[style_name], style_type)


In [89]:
quiz = pd.DataFrame(quiz_table_data)
quiz.head(10)

Unnamed: 0,MCQ,Choices,Correct
0,What is the basic property of matter that is t...,A.: Volume | B.: Mass | C.: Shape | D.: Weight,B. Mass
1,Which state of matter has a definite shape and...,A.: Solids | B.: Liquids | C.: Gases | D.: Plasma,A. Solids
2,What is the characteristic of gases in terms o...,"A.: Definite shape, definite volume | B.: No d...","C. No definite shape, no definite volume"
3,Which state of matter has molecules that are t...,A.: Solids | B.: Liquids | C.: Gases | D.: Plasma,A. Solids
4,What is the expansion behavior of gases when h...,A.: Contracts back to original size | B.: Expa...,D. Easily expands and contracts
5,At what temperature does a liquid begin to cha...,A.: Melting point | B.: Freezing point | C.: B...,C. Boiling point
6,Which type of property describes how a substan...,A.: Physical property | B.: Chemical property ...,B. Chemical property
7,What is the SI unit for force?,A.: Joules | B.: Watts | C.: Volts | D.: Newtons,D. Newtons
8,Which law of motion states that to every actio...,A.: Newton's First Law | B.: Newton's Second L...,C. Newton's Third Law
9,What is the SI unit for pressure?,A.: Pascals | B.: Watts | C.: Volts | D.: Ohms,A. Pascals


In [None]:
quiz.info()

In [38]:
quiz.to_csv("machinelearning.csv", index=False)