In [None]:
import sys
import os
sys.path.insert(0, os.path.abspath(".."))
from gen.curriculum import get_cbc_grouped_questions, get_rubrics_by_sub_strand, get_all_strand_names
from gen.utils import *

In [2]:
names = get_all_strand_names()
print(json.dumps(names))

["Scientific Investigation", "Introduction to Integrated Science", "Laboratory Safety", "Laboratory Apparatus and Instruments", "Mixtures, Elements and Compounds", "Mixtures", "Acids, Bases and Indicators", "Living Things and Their Environment", "Human Reproductive System", "Human Excretory System", "Force and Energy", "Electrical Energy", "Magnetism", "Mixtures, Elements and Compounds", "Elements and Compounds", "Physical and Chemical Changes", "Classes of Fire", "Living Things and the Environment", "The Cell", "Movement of Materials In and Out of the Cell", "Reproduction in Human Beings", "Force and Energy", "Transformation of Energy", "Pressure", "Mixtures, Elements and Compounds", "Structure of the Atom", "Metals and Alloys", "Water Hardness", "Living Things and Their Environment", "Pollution", "Conservation of the Environment", "Force and Energy", "Friction", "Machines"]


## Generate questions

**get_cbc_grouped_questions**
- Generates {x} number of questions grouped by sub_strands
- Uses **round robin algorithm** to ensure fair distribution of both sub strands and bloom skills in the question plan (each appears a least once before repeating the selection cycle).

In [3]:
grouped_questions = get_cbc_grouped_questions(
    strand_ids=[1,2,3,4], 
    question_count=10,
    bloom_skill_count=2,
    is_debug=True,
)


✅ Question breakdown written to /Users/melaniefayne/Desktop/mtihani/mtihani_api/mtihaniapi/gen/output/question_breakdown.json. Total: 9


**generate_llm_question_list**
- Takes a list of grouped cbc question plans (by sub_strand) and generates a list of questions for each specified bloom skill in each sub_strand.
- Uses the CREATE_EXAM_PROMPT_TEXT prompt
- Generating by sub_strand is for enhanced context.

In [4]:
all_question_list = generate_llm_question_list(
    grouped_question_data=grouped_questions,
    is_debug=True,
)

# If there was a generation error
if not isinstance(all_question_list, list):
    print(all_question_list)


📝 Input token count (gpt-4o): 715
📦 Raw LLM output:
 content='[{"question":"In a school laboratory, Amina observes that lemon juice turns blue litmus paper red. Evaluate whether lemon juice is an acid or a base and explain your reasoning.","expected_answer":"Lemon juice is an acid because it turns blue litmus paper red, which is a characteristic reaction of acids."},{"question":"Imagine you are tasked with creating a natural indicator for a science project using local plants. Describe the steps you would take to prepare an acid-base indicator from hibiscus flowers.","expected_answer":"To prepare an indicator from hibiscus flowers, crush the flowers to extract the juice, then filter the juice to remove solid particles, and use the filtered liquid as an indicator to test for acids and bases."},{"question":"What is the color change observed when red litmus paper is dipped into a basic solution?","expected_answer":"The red litmus paper turns blue when dipped into a basic solution."},{"que

In [5]:
exam_questions = get_db_question_objects(
    all_question_list=all_question_list,
    is_debug=True,
)


✅ Question list to /Users/melaniefayne/Desktop/mtihani/mtihani_api/mtihaniapi/gen/output/question_list.json. Total: 10


## Simulate student answers

In [6]:
def convert_to_single_qa_list(data):
    result = []
    for index, item in enumerate(data):
        questions = item["questions"]
        expected_answers = item["expected_answers"]

        if isinstance(questions, str):
            questions = [questions]
        if isinstance(expected_answers, str):
            expected_answers = [expected_answers]

        if questions and expected_answers:
            result.append({
                "id": index + 1,
                "question": questions[0],
                "expected_answer": expected_answers[0]
            })

    return result

exam_output = convert_to_single_qa_list(exam_questions)
print(exam_output)

[{'id': 1, 'question': 'What are the three main components of Integrated Science as a field of study?', 'expected_answer': 'The three main components of Integrated Science are biology, chemistry, and physics.'}, {'id': 2, 'question': 'In a school science experiment, Amina mixed salt with water to create a solution. Describe the method she should use to separate the salt from the water and explain why this method is appropriate.', 'expected_answer': 'Amina should use evaporation to separate the salt from the water. This method is appropriate because when the water is heated, it evaporates, leaving the salt behind as a solid.'}, {'id': 3, 'question': 'During a school health club meeting, Amina and her friends discuss the importance of understanding the functions of the male and female reproductive systems. Evaluate why it is essential for adolescents to have this knowledge and how it can impact their health decisions.', 'expected_answer': 'Understanding the functions of the reproductive 

In [7]:
sample_students = [{'id': 1, 'avg_score': 12}, {'id': 2, 'avg_score': 95}, {
    'id': 3, 'avg_score': 54}, {'id': 4, 'avg_score': 71},]

**generate_llm_exam_answers_list**
- Takes a list of questions and students and generates sample answers for each as per the student's average score
- Uses the MOCK_EXAM_ANSWERS_PROMPT_TEXT prompt

In [8]:
parsed_answers = generate_llm_exam_answers_list(
    llm=OPENAI_LLM_4O,
    exam_data=exam_output,
    student_data=sample_students,
    is_debug=True,
)

📝 Input token count (gpt-4o): 1483
📦 Raw LLM output:
 content='```json\n[\n  {\n    "id": 1,\n    "answers": [\n      {\n        "question_id": "1",\n        "answer": "Science is about things like biology, chemistry, and physics, but I am not sure."\n      },\n      {\n        "question_id": "2",\n        "answer": "Amina can maybe use heat to get salt back, but I don\'t know why."\n      },\n      {\n        "question_id": "3",\n        "answer": "Knowing about body is good for health, but I don\'t know much."\n      },\n      {\n        "question_id": "4",\n        "answer": "Magnet can stick to nails."\n      },\n      {\n        "question_id": "5",\n        "answer": "Wash hand with water."\n      },\n      {\n        "question_id": "6",\n        "answer": "Lemon juice is acid because it changes color."\n      },\n      {\n        "question_id": "7",\n        "answer": "Kidneys and bladder are parts, but I don\'t know functions."\n      },\n      {\n        "question_id": "8",\n  

## Grade answers

In [9]:
def get_answers_for_question(target_question_id, start_count):
    result = []
    count = start_count
    for entry in parsed_answers:
        for answer in entry["answers"]:
            if answer["question_id"] == str(target_question_id):
                result.append({
                    "answer_id": count,
                    "answer": answer["answer"],
                })
                count += 1
    return result, count


grouped_answers_data = []
count = 1

for idx, q in enumerate(exam_questions):
    item = {
        "question": q["description"],
        "expected_answer": q["expected_answer"],
        "rubrics": get_rubrics_by_sub_strand(
            sub_strand_name=q["sub_strand"],
        )
    }
    student_answers, count = get_answers_for_question(idx + 1, count)
    item["student_answers"] = student_answers

    grouped_answers_data.append(item)

In [10]:
ANSWERS_LIST_OUTPUT_FILE = "output/answers_list.json"
with open(ANSWERS_LIST_OUTPUT_FILE, 'w', encoding='utf-8') as f:
    json.dump(grouped_answers_data, f, ensure_ascii=False, indent=4)
print(f"✅ Mocked Answers list written to {ANSWERS_LIST_OUTPUT_FILE}")

✅ Mocked Answers list written to output/answers_list.json


**generate_llm_answer_grades_list**
- Takes a list of questions with assessment context from the cbc data file
- Each question has the the list of student answers to be graded
- Uses the GRADE_ANSWERS_PROMPT_TEXT prompt
- It grades per question for defined context

In [11]:
parsed_grades = generate_llm_answer_grades_list(
    llm=OPENAI_LLM_4O,
    grouped_answers_data=grouped_answers_data,
    is_debug=True,
)


📝 Input token count (gpt-4o): 484
📦 Raw LLM output:
 content='```json\n[\n    {"answer_id": 1, "score": 1},\n    {"answer_id": 2, "score": 4},\n    {"answer_id": 3, "score": 3},\n    {"answer_id": 4, "score": 3}\n]\n```' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 62, 'prompt_tokens': 491, 'total_tokens': 553, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_a288987b44', 'id': 'chatcmpl-BlEZAfAGnkvFZLebt08bjN2BgFrLE', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None} id='run--973ec780-6f4c-4a57-9bc3-03933c2c88af-0' usage_metadata={'input_tokens': 491, 'output_tokens': 62, 'total_tokens': 553, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}
📤 Out

## Create Insights

In [None]:
# import sys
# import os
# sys.path.insert(0, os.path.abspath(".."))
# from gen.utils import *

In [2]:
# strand_performance_data = [
#     {
#         "strand_name": "Mixtures (G7)",
#         "strand_grade": 7,
#         "avg_score": 74.5,
#         "avg_expectation_level": "Meets",
#         "bloom_skill_scores": [],
#         "score_variance": {"min": 40.0, "max": 99.0, "std_dev": 10.2},
#         "sub_strand_scores": [
#             {"name": "Elements and Compounds", "percentage": 81.2, "strand_difference": 6.7,
#              "strand_difference_descriptor": "Above Strand Average"},
#             {"name": "Physical and Chemical Changes", "percentage": 66.5,
#              "strand_difference": -8.0, "strand_difference_descriptor": "Below Strand Average"}
#         ],
#         "top_students": [
#             {"student_name": "Akinyi", "avg_score": 98.2,
#                 "avg_expectation_level": "Exceeds", "exam_id": 10, "student_id": 3}
#         ],
#         "bottom_students": [
#             {"student_name": "Wanjiru", "avg_score": 44.5,
#                 "avg_expectation_level": "Below", "exam_id": 10, "student_id": 9}
#         ]
#     }
# ]

# insights_data = generate_llm_strand_insights(
#     strand_performance_data=strand_performance_data,
#     is_debug=True,
# )

In [3]:
# sub_strand_correlations = [
#     {
#         "name": "Elements and Compounds",
#         "average_correlation": -0.48,
#         "strongest_negative_pair": "Acids, Bases and Indicators",
#         "correlation": -0.67
#     }
# ]
# insights_data = generate_llm_sub_strand_corr_insights(
#     sub_strand_correlations=sub_strand_correlations,
#     is_debug=True,
# )

In [None]:
# class_performance_data = {
#     "average_score": 74.1,
#     "average_expectation_level": "Meets",
#     "score_distribution": "Scores range from 40 to 99, with most students scoring between 70 and 80. Standard deviation is 10.2.",
#     "expectation_level_distribution": "Exceeds: 20%, Meets: 60%, Approaching: 15%, Below: 5%",
#     "bloom_skill_scores":
#     [{"name": "Synthesis", "percentage": 81.67}, {"name": "Knowledge", "percentage": 81.0}, {"name": "Application", "percentage": 73.44}, {
#         "name": "Analysis", "percentage": 68.5}, {"name": "Comprehension", "percentage": 71.25}, {"name": "Evaluation", "percentage": 65.0}],
# }

# insights_data = generate_llm_class_perf_insights(
#     class_performance_data=class_performance_data,
#     is_debug=True,
# )

📝 Input token count (gpt-4o): 430
📦 Raw LLM output:
 content='```json\n[\n    "The average score of 74.1% indicates that the class generally performs at a satisfactory level, aligning with the \'Meets\' expectation category.",\n    "A significant majority of students (60%) are meeting expectations, with 20% exceeding them, which is a positive indicator of overall class performance.",\n    "The score distribution shows a wide range from 40 to 99, but most students score between 70 and 80, suggesting a concentration of students around the average score.",\n    "The standard deviation of 10.2 indicates a moderate spread in scores, suggesting some variability in student performance.",\n    "Only 5% of students are below expectations, which is a relatively small portion of the class, indicating that most students are performing at or above the expected level.",\n    "The \'Exceeds\' category, with 20% of students, is a point of pride, showing that a notable portion of the class is performin

In [None]:
# INSIGHTS_OUTPUT_FILE = "output/insights_lits.json"
# with open(INSIGHTS_OUTPUT_FILE, 'w', encoding='utf-8') as f:
#     json.dump(insights_data, f, ensure_ascii=False, indent=4)
# print(f"✅ Insights written to {INSIGHTS_OUTPUT_FILE}")

✅ Insights written to output/insights_lits.json


## Create Follow Up Quizzes

In [13]:
import sys
import os
sys.path.insert(0, os.path.abspath(".."))
from gen.utils import *

In [14]:
exam_questions = [
    {
        "id": "3",
        "question": "List three safety rules you must observe in a science laboratory.",
        "expected_answer": "Do not eat or drink in the laboratory; wear safety goggles; report spills immediately.",
        "strand": "Scientific Investigation (G7)",
        "sub_strand": "Laboratory Safety",
        "bloom_skill": "Knowledge"
    },
    {
        "id": "1",
        "question": "Differentiate between a mixture and a compound with examples.",
        "expected_answer": "A mixture is a combination of substances that can be separated physically (e.g., sand and salt). A compound is a chemical combination of elements with fixed properties (e.g., water).",
        "strand": "Mixtures, Elements and Compounds (G7)",
        "sub_strand": "Mixtures",
        "bloom_skill": "Analysis"
    },
    {
        "id": "2",
        "question": "State the function of the cell membrane.",
        "expected_answer": "The cell membrane controls the movement of substances in and out of the cell.",
        "strand": "Living Things and Their Environment (G7)",
        "sub_strand": "The Cell",
        "bloom_skill": "Comprehension"
    },
    {
        "id": "4",
        "question": "Explain how you would test for the presence of starch in a leaf.",
        "expected_answer": "Boil the leaf in water, then in alcohol, rinse, and add iodine solution. A blue-black color shows starch is present.",
        "strand": "Living Things and Their Environment (G7)",
        "sub_strand": "The Cell",
        "bloom_skill": "Application"
    }
]

cluster_performance = {
    "cluster_label": "Cluster B",
    "avg_score": 56.7,
    "cluster_size": 12,
    "avg_expectation_level": "Approaching",
    "score_variance": {"min": 47.5, "max": 64.0, "std_dev": 5.2},
    "bloom_skill_scores": [
        {"name": "Knowledge", "percentage": 62.1},
        {"name": "Comprehension", "percentage": 54.2},
        {"name": "Application", "percentage": 49.8},
        {"name": "Analysis", "percentage": 52.3},
        {"name": "Synthesis", "percentage": 45.0},
        {"name": "Evaluation", "percentage": 48.7}
    ],
    "strand_scores": [
        {
            "name": "Scientific Investigation (G7)",
            "percentage": 60.5,
            "sub_strands": [
                {"name": "Laboratory Safety", "percentage": 57.0},
                {"name": "Laboratory Apparatus and Instruments", "percentage": 63.0}
            ]
        },
        {
            "name": "Mixtures, Elements and Compounds (G7)",
            "percentage": 49.0,
            "sub_strands": [
                {"name": "Mixtures", "percentage": 45.5},
                {"name": "Elements and Compounds", "percentage": 52.5}
            ]
        },
        {
            "name": "Living Things and Their Environment (G7)",
            "percentage": 52.0,
            "sub_strands": [
                {"name": "The Cell", "percentage": 53.0},
                {"name": "Human Reproductive System", "percentage": 50.0}
            ]
        }
    ],
    "top_best_question_ids": [3, 1],
    "top_worst_question_ids": [2, 4]
}

question_count = 5

In [15]:
follow_up_quiz = generate_llm_follow_up_quiz(
    exam_questions=exam_questions,
    cluster_performance=cluster_performance,
    question_count=question_count,
    is_debug=True,
)

📝 Input token count (gpt-4o): 956
📦 Raw LLM output:
 content='```json\n[\n  {\n    "question": "Why is it important for the cell membrane to be selectively permeable, and how does this relate to everyday life?",\n    "expected_answer": "The cell membrane\'s selective permeability allows essential nutrients to enter the cell while keeping harmful substances out, similar to how a security system protects a house by allowing only authorized people to enter.",\n    "grade": 7,\n    "strand": "Living Things and Their Environment (G7)",\n    "sub_strand": "The Cell",\n    "bloom_skill": "Comprehension"\n  },\n  {\n    "question": "Describe a simple experiment you could conduct at home to observe the process of diffusion using common kitchen items.",\n    "expected_answer": "Place a drop of food coloring in a glass of water and observe how the color spreads without stirring, demonstrating diffusion.",\n    "grade": 7,\n    "strand": "Living Things and Their Environment (G7)",\n    "sub_strand

In [16]:
FOLLOW_UP_OUTPUT_FILE = "output/follow_up_list.json"
with open(FOLLOW_UP_OUTPUT_FILE, 'w', encoding='utf-8') as f:
    json.dump(follow_up_quiz, f, ensure_ascii=False, indent=4)
print(f"✅ Follow up quiz written to {FOLLOW_UP_OUTPUT_FILE}")

✅ Follow up quiz written to output/follow_up_list.json


# DB Mock-ups

In [1]:
import sys
import os
sys.path.insert(0, os.path.abspath(".."))
from gen.utils import *

# # ==== DB MOCK ANSWERS
# # ====================
exam = []
EXAM_FILE = "data/exam.json"

with open(EXAM_FILE, "r") as f:
    exam = json.load(f)

print(exam)


students = []
STUDENT_FILE = "data/classroom.json"

with open(STUDENT_FILE, "r") as f:
    students = json.load(f)

print(students)


parsed_answers = generate_llm_exam_answers_list(
    llm=OPENAI_LLM_4O,
    exam_data=exam,
    student_data=students,
    is_debug=True,
)

[{'id': 97, 'question': 'What is the function of a Bunsen burner in a science laboratory?', 'expected_answer': 'A Bunsen burner is used to heat substances during experiments.'}, {'id': 98, 'question': 'In a school science experiment, Amina mixed salt with water to create a solution. Describe how she can separate the salt from the water to obtain dry salt again.', 'expected_answer': 'Amina can separate the salt from the water by heating the solution until the water evaporates, leaving the dry salt behind.'}, {'id': 99, 'question': "Amina noticed that her younger brother's voice has started to deepen and he is growing facial hair. What are the social and reproductive implications of these changes during adolescence?", 'expected_answer': 'These changes indicate that her brother is going through puberty, which can lead to increased social interactions and awareness of reproductive responsibilities.'}, {'id': 100, 'question': 'What are the two poles of a magnet called?', 'expected_answer': 