In [1]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_groq import ChatGroq
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
import os

In [6]:
prompt_template = """
You are a quiz creator. Based on the following content, generate a JSON quiz containing 3-5 multiple-choice questions. 
Each question should have 4 options, 1 correct answer, and a clear explanation.

Content: {content}

Output format:
[
    {{
        "id": 1,
        "question": "Question text",
        "options": ["Option A", "Option B", "Option C", "Option D"],
        "correctAnswer": 1,  // Index of the correct answer
        "explanation": "Explanation of the correct answer."
    }},
    ...
]
"""
prompt_template2="""
You are a quiz creator. Based on the following content, generate a JSON quiz containing 3-5 multiple-choice questions. 
Each question should have 4 options, 1 correct answer, and a clear explanation.

Content: {content}
Output format:
[
    {
        "id": 1,
        "question": "Question text",
        "options": ["Option A", "Option B", "Option C", "Option D"],
        "correctAnswer": 0,  // 0-based index of the correct answer
        "explanation": "Explanation of the correct answer."
    },
    {
        "id": 2,
        "question": "Another question text",
        "options": ["Option A", "Option B", "Option C", "Option D"],
        "correctAnswer": 2,
        "explanation": "Explanation of the correct answer."
    }
    ...
]
"""

In [3]:
loader = PyPDFLoader('QUANTSmod9.pdf')
documents = loader.load()

In [9]:
GROQ_API_KEY = "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"
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
text_chunks = text_splitter.split_documents(documents)

        # Use LangChain's LLM to generate the quiz
llm = ChatGroq(model='llama3-70b-8192', api_key=GROQ_API_KEY)  # Use your preferred LLM
prompt = PromptTemplate(input_variables=["content"], template=prompt_template)
        # chain = LLMChain(llm=llm, prompt=prompt)
chain = prompt | llm

        # Generate quiz questions for the first chunk of content
content = text_chunks[0].page_content
print(f"Content being passed to the chain: {content[:500]}")
  # You can extend this for more chunks
# content_str = str(content)  # Ensure content is a string
quiz_json = chain.invoke(content)
print(quiz_json)


Content being passed to the chain: Parametric and Non-Parametric 
Tests of Independence
by Pamela Peterson Drake, PhD, CFA.
Pamela Peterson Drake, PhD, CFA, is at James Madison University (USA).
LEARNING OUTCOMES
Mastery The candidate should be able to:
explain parametric and nonparametric tests of the hypothesis that 
the population correlation coefficient equals zero, and determine whether the hypothesis is rejected at a given level of significance 
explain tests of independence based on contingency table data
INTRODUCTION
In m
content='Here is a JSON quiz based on the provided content:\n\n```\n[\n    {\n        "id": 1,\n        "question": "What is the primary goal of tests of independence in investments?",\n        "options": ["To evaluate the correlation between two variables", "To determine the mean of a single variable", "To compare the variance of two variables", "To identify the mode of a dataset"],\n        "correctAnswer": 0,\n        "explanation": "The primary goal of tes

In [11]:
import re
import json

def extract_json_from_response(response):
    """
    Extracts JSON content from the LLM response safely.
    Handles cases where the response is an AIMessage object.
    """
    if isinstance(response, dict):  # Sometimes response can be a dict
        response_text = json.dumps(response)
    elif hasattr(response, "content"):  # AIMessage object case
        response_text = response.content
    elif isinstance(response, str):
        response_text = response
    else:
        print(f"Unexpected response type: {type(response)}")
        return None

    # Extract JSON part using regex
    match = re.search(r"\[.*\]", response_text, re.DOTALL)
    if match:
        json_string = match.group(0)  # Extract JSON
        try:
            return json.loads(json_string)  # Convert to Python dict
        except json.JSONDecodeError as e:
            print(f"Error decoding JSON: {e}")
            return None
    else:
        print("No JSON found in the response.")
        return None


cleaned=extract_json_from_response(quiz_json)
print(cleaned)

[{'id': 1, 'question': 'What is the primary goal of tests of independence in investments?', 'options': ['To evaluate the correlation between two variables', 'To determine the mean of a single variable', 'To compare the variance of two variables', 'To identify the mode of a dataset'], 'correctAnswer': 0, 'explanation': 'The primary goal of tests of independence in investments is to evaluate the correlation between two variables, which helps assess the strength of their linear relationship.'}, {'id': 2, 'question': 'What type of data is used in tests of independence based on contingency table data?', 'options': ['Time series data', 'Cross-sectional data', 'Panel data', 'Contingency table data'], 'correctAnswer': 3, 'explanation': 'Tests of independence based on contingency table data use contingency table data, which is a table used to display the relationship between two categorical variables.'}, {'id': 3, 'question': 'What is the hypothesis being tested in parametric and nonparametric 