In [1]:
import sys
import os
sys.path.insert(0, os.path.abspath(".."))
from gen.curriculum import get_cbc_grouped_questions, get_rubrics_by_sub_strand
from gen.utils import *

## Generate questions

In [2]:
grouped_questions = get_cbc_grouped_questions(
    strand_ids=[1], 
    question_count=2,
    bloom_skill_count=1,
    is_debug=True,
)


✅ Question breakdown written to /Users/melaniefayne/Desktop/mtihani/mtihani_api/mtihaniapi/gen/output/question_breakdown.json. Total: 1


In [3]:
all_question_list = generate_llm_question_list(
    grouped_question_data=grouped_questions,
    is_debug=True,
)

# If there was a generation error
if not isinstance(all_question_list, list):
    print(all_question_list)


📝 Input token count (gpt-4o): 689
📦 Raw LLM output:
 content='[{"question":"Amina noticed that her phone\'s battery lasts longer when she reduces the screen brightness. Explain how this observation relates to the importance of science in daily life.","expected_answer":"This observation shows how understanding scientific principles, like energy conservation, can help us make practical decisions to improve efficiency and save resources in daily life."},{"question":"Brian is planning a science project to demonstrate the water cycle using simple materials at home. Describe how he can creatively combine different components of Integrated Science to effectively illustrate this natural process.","expected_answer":"Brian can use biology to explain plant transpiration, chemistry to show evaporation and condensation, and physics to demonstrate the movement of water through different states, integrating these concepts to create a comprehensive model of the water cycle."}]' additional_kwargs={'re

In [4]:
exam_questions = get_db_question_objects(
    all_question_list=all_question_list,
    is_debug=True,
)


✅ Question list to /Users/melaniefayne/Desktop/mtihani/mtihani_api/mtihaniapi/gen/output/question_list.json. Total: 2


## Simulate student answers

In [5]:
def convert_to_single_qa_list(data):
    result = []
    for index, item in enumerate(data):
        questions = item["questions"]
        expected_answers = item["expected_answers"]

        if isinstance(questions, str):
            questions = [questions]
        if isinstance(expected_answers, str):
            expected_answers = [expected_answers]

        if questions and expected_answers:
            result.append({
                "id": index + 1,
                "question": questions[0],
                "expected_answer": expected_answers[0]
            })

    return result

exam_output = convert_to_single_qa_list(exam_questions)
print(exam_output)

[{'id': 1, 'question': "Amina noticed that her phone's battery lasts longer when she reduces the screen brightness. Explain how this observation relates to the importance of science in daily life.", 'expected_answer': 'This observation shows how understanding scientific principles, like energy conservation, can help us make practical decisions to improve efficiency and save resources in daily life.'}, {'id': 2, 'question': 'Brian is planning a science project to demonstrate the water cycle using simple materials at home. Describe how he can creatively combine different components of Integrated Science to effectively illustrate this natural process.', 'expected_answer': 'Brian can use biology to explain plant transpiration, chemistry to show evaporation and condensation, and physics to demonstrate the movement of water through different states, integrating these concepts to create a comprehensive model of the water cycle.'}]


In [6]:
sample_students = [{'id': 1, 'avg_score': 12}, {'id': 2, 'avg_score': 95}, {
    'id': 3, 'avg_score': 54}, {'id': 4, 'avg_score': 71},]

In [7]:
parsed_answers = generate_llm_exam_answers_list(
    llm=OPENAI_LLM_4O,
    exam_data=exam_output,
    student_data=sample_students,
    is_debug=True,
)

📝 Input token count (gpt-4o): 671
📦 Raw LLM output:
 content='```json\n[\n  {\n    "id": 1,\n    "answers": [\n      {\n        "question_id": "1",\n        "answer": "When Amina makes her phone less bright, the battery lasts longer. This shows that using less energy is good, but I don\'t know much about why."\n      },\n      {\n        "question_id": "2",\n        "answer": "Brian can use water and maybe some plants to show how water moves, but I\'m not sure how to do it exactly."\n      }\n    ]\n  },\n  {\n    "id": 2,\n    "answers": [\n      {\n        "question_id": "1",\n        "answer": "Amina\'s observation shows how science helps us save energy. By reducing screen brightness, she uses less battery power, which is a practical application of energy conservation."\n      },\n      {\n        "question_id": "2",\n        "answer": "Brian can use a bowl of water to show evaporation, a plastic cover to show condensation, and a small plant to demonstrate transpiration. This combin

In [12]:
# # ==== DB MOCK ANSWERS
# # ====================
# exam = []
# EXAM_FILE = "data/exam.json"

# with open(EXAM_FILE, "r") as f:
#     exam = json.load(f)

# print(exam)


# students = []
# STUDENT_FILE = "data/classroom.json"

# with open(STUDENT_FILE, "r") as f:
#     students = json.load(f)

# print(students)


# parsed_answers = generate_llm_exam_answers_list(
#     llm=OPENAI_LLM_4O,
#     exam_data=exam,
#     student_data=students,
#     is_debug=True,
# )

[{'id': 1, 'question': 'During a science class, Amina accidentally spills a chemical on her hand. What immediate First Aid measure should she take to ensure her safety?', 'expected_answer': 'Amina should immediately rinse her hand under running water for at least 15 minutes to remove the chemical and prevent further injury.'}, {'id': 2, 'question': 'Amina noticed that when she left her iron nail outside in the rain, it developed a reddish-brown coating. Evaluate whether this change is physical or chemical and justify your answer.', 'expected_answer': 'This change is chemical because the iron reacts with oxygen and water to form iron oxide, a new substance.'}, {'id': 3, 'question': 'Amina noticed that her voice has started to change and she is growing taller. What are these changes called, and why do they occur during adolescence?', 'expected_answer': 'These changes are called physical changes during adolescence, and they occur due to hormonal changes in the body that prepare individual

## Grade answers

In [9]:
def get_answers_for_question(target_question_id, start_count):
    result = []
    count = start_count
    for entry in parsed_answers:
        for answer in entry["answers"]:
            if answer["question_id"] == str(target_question_id):
                result.append({
                    "answer_id": count,
                    "answer": answer["answer"],
                })
                count += 1
    return result, count


grouped_answers_data = []
count = 1

for idx, q in enumerate(exam_questions):
    item = {
        "question": q["description"],
        "expected_answer": q["expected_answer"],
        "rubrics": get_rubrics_by_sub_strand(
            sub_strand_name=q["sub_strand"],
        )
    }
    student_answers, count = get_answers_for_question(idx + 1, count)
    item["student_answers"] = student_answers

    grouped_answers_data.append(item)

In [10]:
ANSWERS_LIST_OUTPUT_FILE = "output/answers_list.json"
with open(ANSWERS_LIST_OUTPUT_FILE, 'w', encoding='utf-8') as f:
    json.dump(grouped_answers_data, f, ensure_ascii=False, indent=4)
print(f"✅ Mocked Answers list written to {ANSWERS_LIST_OUTPUT_FILE}")

✅ Mocked Answers list written to output/answers_list.json


In [11]:
parsed_grades = generate_llm_answer_grades_list(
    llm=OPENAI_LLM_4O,
    grouped_answers_data=grouped_answers_data,
    is_debug=True,
)


📝 Input token count (gpt-4o): 546
📦 Raw LLM output:
 content='```json\n[\n    {"answer_id": 1, "score": 1},\n    {"answer_id": 2, "score": 4},\n    {"answer_id": 3, "score": 2},\n    {"answer_id": 4, "score": 3}\n]\n```' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 63, 'prompt_tokens': 553, 'total_tokens': 616, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_f5bdcc3276', 'id': 'chatcmpl-BWin0fkD2qxQrUjhZrYXHMGEH5LHd', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None} id='run--2b01feaf-eb37-4498-a3b5-b9d3cdb18b63-0' usage_metadata={'input_tokens': 553, 'output_tokens': 63, 'total_tokens': 616, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}
📤 Out