In [20]:
from langchain_openai import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.schema import BaseOutputParser
from pydantic import BaseModel, Field, RootModel
from typing import List, Dict, Any
from curriculum import get_exam_curriculum
from constants import *
import re
import json
import tiktoken

In [21]:
# Prompt
template = """You are an expert Integrated Science teacher preparing a comprehensive exam for Junior Secondary School in Kenya.

You will receive a list of breakdown items. For each item:
- Generate EXACTLY one question.
- The question MUST align strictly with the `strand`, `sub_strand`, `target_bloom_skill`, and `skills_to_assess`.
- Use scenario-based questions (e.g. involving Mary, James, Sofia) where appropriate, especially for application and comprehension skills.
- For knowledge-based items, use direct questions when suitable (e.g. definitions, naming, listing).

Rules:
- Do NOT mix content between strands.
- Do NOT skip any item.
- Keep questions academically appropriate and realistic for junior secondary learners.

Use this structure per item:
{{
  "question": "[your generated question here]",
  "expected_answer": "[concise but accurate answer here]"
}}

Here are the breakdown items:
{question_breakdown}

Return ONLY a valid JSON array (no explanation, no markdown). Each object must have exactly two fields: "question" and "expected_answer".
"""

prompt = PromptTemplate(
    input_variables=["question_breakdown"],
    template=template
)


In [22]:
def get_token_count_from_str(text: str, llm_model: str) -> int:
    encoding = tiktoken.encoding_for_model(llm_model)
    return len(encoding.encode(text))

def clean_llm_response(raw: str) -> str:
    """Strip ```json and ``` from markdown-wrapped LLM responses."""
    return re.sub(r"^```json\s*|\s*```$", "", raw.strip(), flags=re.MULTILINE)

def llm_generate_exam(llm: Any, input_data: Dict[str, Any], llm_model: str = "gpt-4o") -> List[Dict[str, Any]]:
    # LCEL-style prompt-to-LLM pipe
    runnable = prompt | llm

    # Format and count input
    formatted_prompt = prompt.format(question_breakdown=json.dumps(input_data))
    input_tokens = get_token_count_from_str(formatted_prompt, llm_model)
    print(f"📝 Input token count ({llm_model}): {input_tokens}")

    # Invoke model
    response = runnable.invoke({"question_breakdown": json.dumps(input_data)})
    print("📦 Raw LLM output:\n", response)

    # Clean and count output tokens
    cleaned = clean_llm_response(response.content)
    output_tokens = get_token_count_from_str(cleaned, llm_model)
    print(f"📤 Output token count ({llm_model}): {output_tokens}")
    print(f"🔢 Total token usage: {input_tokens + output_tokens}")

    # Parse response
    try:
        parsed = json.loads(cleaned)
        return parsed
    except json.JSONDecodeError as e:
        print("❌ JSON parsing failed:", e)
        return [{"raw": response}]

In [23]:
OPENAI_LLM_4O = ChatOpenAI(
    model_name="gpt-4o",
    temperature=0.1,
    max_tokens=2048,
    openai_api_key=OPENAI_API_KEY,
)

In [24]:
question_brd = get_exam_curriculum(strand_ids=[1,2,3,4,5,6,7,8,9,10], question_count=25)

✅ Question breakdown written to output/question_breakdown.json


In [25]:
parsed_output = llm_generate_exam(
    llm=OPENAI_LLM_4O,
    input_data=question_brd
)

📝 Input token count (gpt-4o): 3340
📦 Raw LLM output:
 content='```json\n[\n    {\n        "question": "What are some basic skills required to use and care for laboratory apparatus and instruments effectively?",\n        "expected_answer": "Basic skills include proper handling, cleaning, and storage of laboratory apparatus and instruments to ensure their longevity and accuracy."\n    },\n    {\n        "question": "How can you identify an acid and a base using litmus paper?",\n        "expected_answer": "Acids turn blue litmus paper red, while bases turn red litmus paper blue."\n    },\n    {\n        "question": "What are the different types of pollution, and what causes them?",\n        "expected_answer": "Types of pollution include air, water, soil, and noise pollution. Causes include industrial emissions, waste disposal, agricultural activities, and urbanization."\n    },\n    {\n        "question": "What is the mechanical advantage of a lever if the effort arm is 2 meters and the l

In [27]:
# Save parsed_output (already a list of dicts) to JSON
with open(QUESTION_LIST_OUTPUT_FILE, 'w', encoding='utf-8') as f:
    json.dump(parsed_output, f, ensure_ascii=False, indent=4)

print(f"✅ Question list written to {QUESTION_LIST_OUTPUT_FILE}")

✅ Question list written to output/question_list.json
