In [1]:
import os
import sys
sys.path.insert(0, os.path.abspath(".."))

import json
from anthropic import Client
from typing import Any, Dict, List, Union
from operator import itemgetter

from gen.curriculum import get_cbc_grouped_questions
from gen.constants import *
from gen.utils import CREATE_EXAM_LLM_PROMPT, get_db_question_objects

In [2]:
# Initialize the client
client = Client(api_key=os.getenv("ANTHROPIC_API_KEY"))

CLAUDE_SONNET_4 = "claude-sonnet-4-20250514"
CLAUDE_OPUS_4 = "claude-opus-4-20250514"

In [3]:
def safe_parse_llm_output(raw):
    # If it's already a list, just return it
    if isinstance(raw, list):
        return raw
    # If it's a string, try to parse as JSON
    if isinstance(raw, str):
        try:
            cleaned = raw.strip()
            # Sometimes LLMs wrap outputs in ``` or ```json code fences
            if cleaned.startswith("```"):
                cleaned = cleaned.lstrip("`").replace("json", "", 1).strip()
            return json.loads(cleaned)
        except Exception:
            print("Warning: Could not parse LLM output as JSON.")
            return []
    # If it's something else, return empty list
    return []

In [4]:
def generate_claude_sub_strand_questions(
    sub_strand_data: Dict[str, Any],
    llm: str,
) -> str:
    prompt_template = CREATE_EXAM_LLM_PROMPT
    formatted_prompt = prompt_template.format(
        strand=sub_strand_data["strand"],
        sub_strand=sub_strand_data["sub_strand"],
        learning_outcomes=sub_strand_data["learning_outcomes"],
        skills_to_assess=sub_strand_data["skills_to_assess"],
        skills_to_test=sub_strand_data["skills_to_test"],
        question_count=sub_strand_data["question_count"],
    )

    response = client.messages.create(
        model=llm,
        max_tokens=10240,
        temperature=0.1,
        messages=[
            {"role": "user", "content": formatted_prompt}
        ]
    )
    # This works for non-streaming Claude calls:
    return "".join(block.text for block in response.content)

In [5]:
def generate_llm_question_list(
    grouped_question_data: List[Dict[str, Any]],
    llm: Any,
    output_file: str = QUESTION_LIST_OUTPUT_FILE,
) -> Union[List[Dict[str, Any]], Dict[str, Any]]:
    all_question_list = []

    for group in grouped_question_data:
        strand = group["strand"]
        sub_strand = group["sub_strand"]
        learning_outcomes = "\n- " + "\n- ".join(group["learning_outcomes"])
        skills_to_assess = "\n- " + "\n- ".join(group["skills_to_assess"])

        # Step 1: Flatten all skills with their associated breakdown number
        numbered_skills = []
        for entry in group["skills_to_test"]:
            number = entry["number"]
            for skill in entry["skills_to_test"]:
                numbered_skills.append({"number": number, "skill": skill})

        # Step 2: Build a flat list of just the skills (in order)
        skills_only = [entry["skill"] for entry in numbered_skills]

        # Step 3: Generate all questions in one LLM call
        sub_strand_data = {
            "question_count": len(skills_only),
            "strand": strand,
            "sub_strand": sub_strand,
            "learning_outcomes": learning_outcomes,
            "skills_to_assess": skills_to_assess,
            "skills_to_test": skills_only,
        }
       
        print(f"\n{sub_strand} =========")

        parsed_output = generate_claude_sub_strand_questions(
            sub_strand_data=sub_strand_data,
            llm=llm,
        )

        parsed_output = safe_parse_llm_output(parsed_output)

        if not parsed_output:
            print(f"Skipping sub-strand: {sub_strand} due to un-parse-able output")
            continue

        tagged_responses = []
        for idx, qa in enumerate(parsed_output):
            # Prevent index overflow if LLM returns fewer/more questions
            if idx >= len(numbered_skills):
                break
            question_item = {}
            question_item["number"] = numbered_skills[idx]["number"]
            question_item["grade"] = group["grade"]
            question_item["strand"] = group["strand"]
            question_item["sub_strand"] = group["sub_strand"]
            question_item["bloom_skill"] = numbered_skills[idx]["skill"]
            question_item["description"] = qa["question"]
            question_item["expected_answer"] = qa["expected_answer"]
            tagged_responses.append(question_item)
        all_question_list.extend(tagged_responses)

    all_question_list = sorted(all_question_list, key=itemgetter("number"))

    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(all_question_list, f, ensure_ascii=False, indent=4)
    print(
        f"\n✅ Question list written to {output_file}. Total: {len(all_question_list)}")

    return all_question_list

In [6]:
grouped_questions = get_cbc_grouped_questions(
    strand_ids=[2,4,1,6,3,8,9], 
    question_count=10,
    bloom_skill_count=2,
)

In [7]:
selected_model = CLAUDE_SONNET_4
all_question_list = generate_llm_question_list(
    grouped_question_data=grouped_questions,
    llm=selected_model,
)

print(all_question_list)

# If there was a generation error
if not isinstance(all_question_list, list):
    print(all_question_list)











✅ Question list written to /Users/melaniefayne/Desktop/mtihani/mtihani_api/mtihaniapi/gen/output/question_list.json. Total: 20
[{'number': 1, 'grade': 7, 'strand': 'Scientific Investigation', 'sub_strand': 'Introduction to Integrated Science', 'bloom_skill': 'Remembering', 'description': 'List two main components that make up Integrated Science as a field of study.', 'expected_answer': 'Biology and Chemistry, or Biology and Physics, or Chemistry and Physics (any two of the three main branches: Biology, Chemistry, and Physics)'}, {'number': 1, 'grade': 7, 'strand': 'Scientific Investigation', 'sub_strand': 'Introduction to Integrated Science', 'bloom_skill': 'Understanding', 'description': 'Amina notices that her grandmother uses traditional herbs to treat common illnesses in their Mombasa home while also consulting modern doctors for serious conditions. Explain how this situation demonstrates the importance of science in daily life.', 'expected_answer': 'Science helps us unde

In [8]:
exam_questions = get_db_question_objects(
    all_question_list=all_question_list,
)

In [9]:
OUTPUT_FILE = os.path.join(
    BASE_DIR, "output/model_comparisons", f"{selected_model}_EXAM.txt")

def export_exam_questions_to_txt(exam_questions, output_file=OUTPUT_FILE):
    with open(output_file, "w", encoding="utf-8") as f:
        for idx, q in enumerate(exam_questions, start=1):
            f.write(f"\nQuestion {idx}\n")
            f.write("-" * 20 + "\n")
            f.write(f"Grade: {q.get('grade', '')}\n")
            f.write(f"Strand: {q.get('strand', '')}\n")
            f.write(f"Sub Strand: {q.get('sub_strand', '')}\n\n")
            
            # Handle single or multiple bloom skills per question
            bloom_skills = q.get("bloom_skills", [])
            questions = q.get("questions", [])
            answers = q.get("expected_answers", [])
            
            # If your structure is per-skill, per-QA:
            for b_idx, (bloom, ques, ans) in enumerate(zip(bloom_skills, questions, answers), start=1):
                f.write(f"Bloom Skill {b_idx}: {bloom}\n")
                f.write(f"      Q: {ques}\n")
                f.write(f"      A: {ans}\n\n")
            
            # If your structure is single-skill per question, you can do:
            # f.write(f"Bloom Skill 1: {q.get('bloom_skill', '')}\n")
            # f.write(f"      Q: {q.get('description', '')}\n")
            # f.write(f"      A: {q.get('expected_answer', '')}\n\n")
                
        print(f"✅ Exam questions exported to {output_file}")

# Usage:
# exam_questions = [...]  # your processed list of dicts
export_exam_questions_to_txt(exam_questions)

✅ Exam questions exported to /Users/melaniefayne/Desktop/mtihani/mtihani_api/mtihaniapi/gen/output/model_comparisons/claude-sonnet-4-20250514_EXAM.txt
