In [1]:
import os
import sys

# Ensure correct path to project root
sys.path.insert(0, os.path.abspath(".."))

import django
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'mtihaniapi.settings')
django.setup()

In [None]:
import json
import asyncio
from typing import Any, Dict, List, Union
from operator import itemgetter

# Django/project imports
from gen.curriculum import get_cbc_grouped_questions
from gen.constants import *
from gen.utils import CREATE_EXAM_LLM_PROMPT, get_db_question_objects

from anthropic import Client

In [3]:
# Initialize the client
client = Client(api_key=os.getenv("ANTHROPIC_API_KEY"))

CLAUDE_SONNET_4 = "claude-sonnet-4-20250514"
CLAUDE_OPUS_4 = "claude-opus-4-20250514"

In [4]:
def safe_parse_llm_output(raw):
    # If it's already a list, just return it
    if isinstance(raw, list):
        return raw
    # If it's a string, try to parse as JSON
    if isinstance(raw, str):
        try:
            cleaned = raw.strip()
            # Sometimes LLMs wrap outputs in ``` or ```json code fences
            if cleaned.startswith("```"):
                cleaned = cleaned.lstrip("`").replace("json", "", 1).strip()
            return json.loads(cleaned)
        except Exception:
            print("Warning: Could not parse LLM output as JSON.")
            return []
    # If it's something else, return empty list
    return []

In [5]:
def generate_claude_sub_strand_questions(
    sub_strand_data: Dict[str, Any],
    llm: str,
) -> str:
    prompt_template = CREATE_EXAM_LLM_PROMPT
    formatted_prompt = prompt_template.format(
        strand=sub_strand_data["strand"],
        sub_strand=sub_strand_data["sub_strand"],
        learning_outcomes=sub_strand_data["learning_outcomes"],
        skills_to_assess=sub_strand_data["skills_to_assess"],
        skills_to_test=sub_strand_data["skills_to_test"],
        question_count=sub_strand_data["question_count"],
        sample_questions=sub_strand_data["sample_questions"],
    )

    response = client.messages.create(
        model=llm,
        max_tokens=10240,
        temperature=0.1,
        messages=[
            {"role": "user", "content": formatted_prompt}
        ]
    )
    # This works for non-streaming Claude calls:
    return "".join(block.text for block in response.content)

In [None]:
from asgiref.sync import sync_to_async

@sync_to_async
def get_reference_for_sub_strand(sub_strand: str) -> str:
    from rag.models import SubStrandReference
    try:
        ref = SubStrandReference.objects.get(sub_strand=sub_strand)
        return ref.reference_text or ""
    except SubStrandReference.DoesNotExist:
        return ""

In [None]:
async def generate_llm_question_list(
    grouped_question_data: List[Dict[str, Any]],
    llm: Any,
    output_file: str = QUESTION_LIST_OUTPUT_FILE,
) -> Union[List[Dict[str, Any]], Dict[str, Any]]:
    all_question_list = []

    for group in grouped_question_data:
        strand = group["strand"]
        sub_strand = group["sub_strand"]
        learning_outcomes = "\n- " + "\n- ".join(group["learning_outcomes"])
        skills_to_assess = "\n- " + "\n- ".join(group["skills_to_assess"])
        sample_questions = await get_reference_for_sub_strand(sub_strand)

        # Step 1: Flatten all skills with their associated breakdown number
        numbered_skills = []
        for entry in group["skills_to_test"]:
            number = entry["number"]
            for skill in entry["skills_to_test"]:
                numbered_skills.append({"number": number, "skill": skill})

        # Step 2: Build a flat list of just the skills (in order)
        skills_only = [entry["skill"] for entry in numbered_skills]

        # Step 3: Generate all questions in one LLM call
        
        
        sub_strand_data = {
            "question_count": len(skills_only),
            "strand": strand,
            "sub_strand": sub_strand,
            "learning_outcomes": learning_outcomes,
            "skills_to_assess": skills_to_assess,
            "skills_to_test": skills_only,
            "sample_questions": sample_questions,
        }
       
        print(f"\n{sub_strand} =========")

        parsed_output = generate_claude_sub_strand_questions(
            sub_strand_data=sub_strand_data,
            llm=llm,
        )

        parsed_output = safe_parse_llm_output(parsed_output)

        if not parsed_output:
            print(f"Skipping sub-strand: {sub_strand} due to un-parse-able output")
            continue

        tagged_responses = []
        for idx, qa in enumerate(parsed_output):
            # Prevent index overflow if LLM returns fewer/more questions
            if idx >= len(numbered_skills):
                break
            question_item = {}
            question_item["number"] = numbered_skills[idx]["number"]
            question_item["grade"] = group["grade"]
            question_item["strand"] = group["strand"]
            question_item["sub_strand"] = group["sub_strand"]
            question_item["bloom_skill"] = numbered_skills[idx]["skill"]
            question_item["description"] = qa["question"]
            question_item["expected_answer"] = qa["expected_answer"]
            tagged_responses.append(question_item)
        all_question_list.extend(tagged_responses)

    all_question_list = sorted(all_question_list, key=itemgetter("number"))

    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(all_question_list, f, ensure_ascii=False, indent=4)
    print(
        f"\n✅ Question list written to {output_file}. Total: {len(all_question_list)}")

    return all_question_list

In [8]:
grouped_questions = get_cbc_grouped_questions(
    strand_ids=[2,4,1,6,3,8,9], 
    question_count=10,
    bloom_skill_count=2,
)

In [10]:
selected_model = CLAUDE_SONNET_4
all_question_list = await generate_llm_question_list(
    grouped_question_data=grouped_questions,
    llm=selected_model,
)

print(all_question_list)

# If there was a generation error
if not isinstance(all_question_list, list):
    print(all_question_list)



 Q: Ethan is researching on properties of different substances. He has observed that some acidic and basic substances are very useful. Why are some bases and acids important?
A: Acids and bases are important because they are used in various applications such as cleaning, food preservation, and manufacturing processes. They also play crucial roles in biological systems and chemical reactions.

Q: Test and classify the following specimens provided into acids, neutral or bases: a) solution A b) Solution B c) Solution C
A: Use an acid-base indicator to test each solution. If the indicator shows a red or pink color, the solution is acidic. If it shows blue or green, the solution is basic. If there is no color change, the solution is neutral.

Q: Test and classify the following specimens provided into acids, neutral or bases: Specimen a) solution A b) Solution B c) Solution C
A: Use an acid-base indicator to test each solution. Solution A, B, and C can be classified based on the color chan

In [11]:
exam_questions = get_db_question_objects(
    all_question_list=all_question_list,
)

In [12]:
OUTPUT_FILE = os.path.join(
    BASE_DIR, "output/model_comparisons", f"{selected_model}_EXAM.txt")

def export_exam_questions_to_txt(exam_questions, output_file=OUTPUT_FILE):
    with open(output_file, "w", encoding="utf-8") as f:
        for idx, q in enumerate(exam_questions, start=1):
            f.write(f"\nQuestion {idx}\n")
            f.write("-" * 20 + "\n")
            f.write(f"Grade: {q.get('grade', '')}\n")
            f.write(f"Strand: {q.get('strand', '')}\n")
            f.write(f"Sub Strand: {q.get('sub_strand', '')}\n\n")
            
            # Handle single or multiple bloom skills per question
            bloom_skills = q.get("bloom_skills", [])
            questions = q.get("questions", [])
            answers = q.get("expected_answers", [])
            
            # If your structure is per-skill, per-QA:
            for b_idx, (bloom, ques, ans) in enumerate(zip(bloom_skills, questions, answers), start=1):
                f.write(f"Bloom Skill {b_idx}: {bloom}\n")
                f.write(f"      Q: {ques}\n")
                f.write(f"      A: {ans}\n\n")
            
            # If your structure is single-skill per question, you can do:
            # f.write(f"Bloom Skill 1: {q.get('bloom_skill', '')}\n")
            # f.write(f"      Q: {q.get('description', '')}\n")
            # f.write(f"      A: {q.get('expected_answer', '')}\n\n")
                
        print(f"✅ Exam questions exported to {output_file}")

# Usage:
# exam_questions = [...]  # your processed list of dicts
export_exam_questions_to_txt(exam_questions)

✅ Exam questions exported to /Users/melaniefayne/Desktop/mtihani/mtihani_api/mtihaniapi/gen/output/model_comparisons/claude-sonnet-4-20250514_EXAM.txt
