In [None]:
import json
import pandas as pd
import os
from typing import List, Literal, Annotated, Dict
from tqdm import tqdm

# Pydantic imports
from pydantic import BaseModel, Field, conint, ValidationError, ConfigDict

# OpenAI imports
import openai
from openai import OpenAI


In [14]:
# --- 1. Data Source ---
moderation_category_groups_data_source = [
    {
        "code": "l/discriminatory-language",
        "title": "Discriminatory behaviour or language",
        "llmDescription": "Flag content depicting or discussing discriminatory behaviour, including historic injustice or outdated representations, based on race, gender, disability, religion, sexual orientation, or other grounds. Include the use of slurs or terms of abuse.",
        "abbreviation": "l1",
        "criteria5": "Not involved: Content contains no discussion or depiction of discriminatory behaviour or language.",
        "criteria4": "Minimally involved: Content makes a brief, passing reference to a discriminatory concept or an outdated social attitude.",
        "criteria3": "Moderately involved: Content includes isolated or historical examples of discriminatory behaviour or language, clearly contextualized for educational or analytical purposes (e.g., studying historical injustice).",
        "criteria2": "Significantly involved: Content centrally discusses or depicts specific instances of discriminatory behaviour or historical injustice as a core part of the lesson.",
        "criteria1": "Heavily involved: Content is focused on depicting or analyzing overt, strong, or frequent discriminatory behaviour or language related to race, gender, disability, religion, or sexual orientation. Includes prominent use of slurs or epithets."
    },
    {
        "code": "l/offensive-language",
        "title": "Language may offend",
        "llmDescription": "Flag content containing offensive language. This includes swear words, curse words, and terms of racist, ethnic, sexual, or sexist abuse, as well as pejorative terms related to disability, sexuality, or religion. Exclude any language that is used to incite hatred.",
        "abbreviation": "l2",
        "criteria5": "Not involved: Content contains no swear words or otherwise offensive language.",
        "criteria4": "Minimally involved: Content contains a single, mild swear word or a term that could be considered mildly offensive.",
        "criteria3": "Moderately involved: Content contains isolated swear words or offensive terms, often for literary or characterization purposes, and not used gratuitously.",
        "criteria2": "Significantly involved: Content contains multiple instances of offensive language, or a single instance of a particularly strong swear word or abusive term.",
        "criteria1": "Heavily involved: Content contains frequent or strong use of offensive swear words, or terms of racist, sexist, or other abuse."
    },
    {
        "code": "u/sensitive-content",
        "title": "Sensitive or upsetting content",
        "llmDescription": "Flag content depicting or discussing topics that pupils may find sensitive or upsetting. This includes scary, confusing, or unsettling events or situations where individuals are placed in danger; personal safety topics like bullying, peer pressure, feeling unsafe, being asked to keep secrets, and consent; significant life events such as bereavement, death, divorce, adoption, and migration; health and medical topics like illness, injury, disease, medical procedures, references to blood, vaccinations, abortion, euthanasia, and organ donation; substance use, including smoking, vaping, alcohol use, and drug use (legal and illegal); and broader sensitive themes such as climate change, extinction, genetics and inheritance, terrorism, extremism, radicalisation, and household items which could pose a risk.",
        "abbreviation": "u1",
        "criteria5": "Not involved: Content contains no upsetting, disturbing, or sensitive material.",
        "criteria4": "Minimally involved: Content has a brief, non-graphic, or passing reference to a sensitive topic. This could be a mention of smoking, vaping, alcohol use, a reference to blood, or household items which could pose a risk.",
        "criteria3": "Moderately involved: Content mentions sensitive topics, but they are handled with care, are not graphic, and are not the sole focus. Examples include adoption, migration, peer pressure, vaccinations, organ donation, or genetics and inheritance.",
        "criteria2": "Content directly discusses or depicts sensitive topics that are central to the lesson. This includes themes like divorce, consent, substance abuse, climate change, extinction, extremism, or radicalisation.",
        "criteria1": "Heavily involved: Content includes prominent, direct, or potentially graphic depiction/discussion of highly upsetting themes. The primary focus is on a topic such as bereavement, death, serious illness or injury, abortion, euthanasia, terrorism, or significant bullying."
    },
    {
        "code": "u/violence-or-suffering",
        "title": "Violence or suffering",
        "llmDescription": "Flag content depicting or discussing violence or suffering. This includes fighting, war, genocide, death, famine, natural disasters, and animal cruelty.",
        "abbreviation": "u2",
        "criteria5": "Not involved: Content contains no depiction or discussion of violence or suffering.",
        "criteria4": "Minimally involved: Content has a brief, non-graphic, or passing reference to violence or suffering (e.g., a character in a story was in a past war).",
        "criteria3": "Moderately involved: Content mentions violence or suffering (e.g., animal cruelty, historical conflicts), but they are handled with care, are not graphic, and are not the sole focus.",
        "criteria2": "Significantly involved: Content directly discusses or depicts violence, famine, or natural disasters in a way that is central to the lesson (e.g., a history lesson on a specific war).",
        "criteria1": "Heavily involved: Content includes prominent, direct, or potentially graphic depiction/discussion of violence, war, genocide, or intense suffering."
    },
    {
        "code": "u/mental-health-challenges",
        "title": "Mental health challenges",
        "llmDescription": "Flag content depicting or discussing mental health challenges, such as depression, anxiety, eating disorders, and substance abuse. Exclude self-harm, suicide and attempted suicide.",
        "abbreviation": "u3",
        "criteria5": "Not involved: Content contains no discussion of mental health challenges.",
        "criteria4": "Minimally involved: Content has a brief, non-detailed, or passing reference to a mental health challenge.",
        "criteria3": "Moderately involved: Content mentions mental health challenges, such as anxiety or substance abuse, but they are handled with care and are not the sole focus.",
        "criteria2": "Significantly involved: Content directly discusses or depicts mental health challenges as a key theme in a text or as a central part of the lesson.",
        "criteria1": "Heavily involved: The lesson's primary focus is on a specific mental health challenge, such as depression or an eating disorder, with detailed discussion."
    },
    {
        "code": "u/crime-or-illegal-activities",
        "title": "Crime or illegal activities",
        "llmDescription": "Flag content depicting or discussing crime or illegal activities. This includes references to serious violent crimes such as murder, honour-based violence, terrorism, knife crime, and gangs; exploitation and ideological influence like child criminal or sexual exploitation, extremism, and radicalisation; substance and age-related offenses such as the sale or use of illegal drugs, underage alcohol consumption, underage use of cigarettes, and gambling; digital crimes like spreading misinformation (including fake news and deepfakes) and breaking copyright laws; and other illegal acts for minors, such as underage sexual behaviours or getting a tattoo.",
        "abbreviation": "u4",
        "criteria5": "Not involved: Content contains no depiction or discussion of crime or illegal activities.",
        "criteria4": "Minimally involved: Content has a brief, non-graphic, or passing reference to a crime or illegal act.",
        "criteria3": "Moderately involved: Content mentions crime or illegal activities (e.g., spreading misinformation like fake news or deepfakes, copyright infringement, underage use of cigarettes, gambling, or getting a tattoo under the legal age) as a secondary theme or example.",
        "criteria2": "Significantly involved: Content directly discusses or depicts serious crime or illegal activities (e.g., the sale or use of illegal drugs, extremism, radicalisation) as a central part of the lesson.",
        "criteria1": "Heavily involved: The lesson's primary focus is on a specific, serious crime or illegal activity, such as murder, terrorism, knife crime, gangs, child criminal exploitation, or honour-based violence, with direct and detailed discussion."
    },
    {
        "code": "u/sexual-violence",
        "title": "Sexual violence",
        "llmDescription": "Flag content depicting or discussing sexual violence. This includes: sexual abuse, domestic abuse, forced marriage, Female Genital Mutilation (FGM), grooming, exploitation, coercion, harassment, and rape.",
        "abbreviation": "u5",
        "criteria5": "Not involved: Content contains no depiction or discussion of sexual violence.",
        "criteria4": "Minimally involved: Content has a brief, non-graphic, or passing reference to a related topic, such as grooming.",
        "criteria3": "Moderately involved: Content mentions themes of sexual violence (e.g., coercion, harassment) in a non-graphic, contextualized way (e.g., analyzing a literary text).",
        "criteria2": "Significantly involved: Content directly discusses or depicts sexual violence (e.g., forced marriage, domestic abuse, FGM) as a central part of the lesson.",
        "criteria1": "Heavily involved: Content includes prominent, direct, or potentially graphic depiction/discussion of sexual violence, abuse, or exploitation."
    },
    {
        "code": "s/nudity-or-sexual-content",
        "title": "Nudity or sexual content",
        "llmDescription": "Flag content depicting or discussing nudity, sex, or sexual themes. This includes: images or references to nudity (including in art or history), sex, sexual body parts, contraception, sex education, anatomy, relationships, or reproduction.",
        "abbreviation": "s1",
        "criteria5": "Not involved: Content contains no references to nudity or sexual themes.",
        "criteria4": "Minimally involved: An incidental or brief mention of related topics (e.g., a single non-explicit line in a novel about a relationship).",
        "criteria3": "Moderately involved: Content includes nudity in a clear artistic or historical context (e.g., classical statues) or discusses relationships without explicit detail.",
        "criteria2": "Significantly involved: Content directly discusses sexual themes or nudity as a key part of the lesson (e.g., puberty, reproduction, anatomy).",
        "criteria1": "Heavily involved: Content contains prominent or repeated references to nudity, sex, or sexual body parts, or is explicitly focused on sex education."
    },
    {
        "code": "p/equipment-required",
        "title": "Equipment required",
        "llmDescription": "Flag content that requires equipment beyond standard classroom stationery (e.g., art materials, science apparatus, sports equipment).",
        "abbreviation": "p2",
        "criteria5": "Not involved: Involves no equipment beyond standard stationery (pen, paper).",
        "criteria4": "Minimally involved: Requires only one or two simple items beyond stationery (e.g., a ruler, glue stick).",
        "criteria3": "Moderately involved: Requires common non-standard equipment with low inherent risk (e.g., basic art supplies like paint, sports balls, calculators).",
        "criteria2": "Significantly involved: Requires several pieces of non-standard equipment that are key to the lesson's success (e.g., a range of art supplies, specific PE equipment like cones and bibs).",
        "criteria1": "Heavily involved: Requires extensive or specialist equipment beyond what is typically found in a standard classroom (e.g., full science lab apparatus, specific sports kits)."
    },
    {
        "code": "p/equipment-risk-assessment",
        "title": "Risk assessment required",
        "llmDescription": "Flag content that includes physical activities, outdoor and adventurous activities and fieldwork or equipment requiring a risk assessment. This includes use of ingredients or materials that may contain allergens, scissors, chemicals, heat sources, sharp tools, or physically demanding activities.",
        "abbreviation": "p3",
        "criteria5": "Not involved: Involves no activities or equipment that would require a risk assessment.",
        "criteria4": "Minimally involved: Suggests minor physical movement within the classroom where risk is negligible but should be supervised.",
        "criteria3": "Moderately involved: Involves low-risk equipment or activities where a brief, informal risk assessment is advisable (e.g., use of scissors, potential allergens in food tech).",
        "criteria2": "Significantly involved: Involves activities or equipment requiring a formal risk assessment due to potential hazards (e.g., science experiments with heat, use of sharp tools, outdoor fieldwork).",
        "criteria1": "Heavily involved: Involves activities or equipment with high inherent risk requiring a formal, detailed risk assessment (e.g., use of hazardous chemicals, power tools, contact sports)."
    },
    {
        "code": "p/outdoor-learning",
        "title": "Outdoor learning",
        "llmDescription": "Flag content that suggests or requires adventurous or outdoor learning activities taking place outside the classroom, including fieldwork or exploration.",
        "abbreviation": "p4",
        "criteria5": "Not involved: The lesson is designed to take place entirely within the classroom.",
        "criteria4": "Minimally involved: A brief activity is suggested that could take place just outside the classroom door or by a window.",
        "criteria3": "Moderately involved: The lesson suggests an optional or supplementary outdoor activity.",
        "criteria2": "Significantly involved: A major component of the lesson requires being outdoors or in a non-classroom environment on school grounds (e.g., a PE lesson, a science experiment in the playground).",
        "criteria1": "Heavily involved: The entire lesson is designed as an off-site or adventurous outdoor activity (e.g., a geography field trip, a nature walk in a forest)."
    },
    {
        "code": "p/additional-qualifications",
        "title": "Additional qualifications required",
        "llmDescription": "Flag content that includes activities requiring additional qualifications beyond standard teaching certifications (e.g., swimming, trampolining, contact rugby).",
        "abbreviation": "p5",
        "criteria5": "Not involved: Involves no activities that require qualifications beyond standard teaching certifications.",
        "criteria4": "Minimally involved: Involves an activity where extra experience would be beneficial but is not required.",
        "criteria3": "Moderately involved: Recommends specific experience or training but may not require formal certification (e.g., leading complex drama exercises, using specific D&T equipment).",
        "criteria2": "Significantly involved: Requires specific training or qualifications for activities with a moderate level of risk or technical skill (e.g., gymnastics vaulting, rugby tackling).",
        "criteria1": "Heavily involved: Requires specialist teacher qualifications and certifications for high-risk activities (e.g., swimming, trampolining)."
    },
    {
        "code": "e/rshe-content",
        "title": "RSHE content",
        "llmDescription": "Flag content that contains RSHE (Relationships, Sex and Health Education) topics. This includes: relationships, gender, sex education, health, mental wellbeing, bullying, and online harms.",
        "abbreviation": "e1",
        "criteria5": "Not involved: Content has no relation to RSHE topics.",
        "criteria4": "Minimally involved: A brief, passing reference to an RSHE topic (e.g., a character in a story deals with a relationship issue).",
        "criteria3": "Moderately involved: RSHE content is discussed as a secondary topic or example (e.g., discussing healthy eating in a science lesson).",
        "criteria2": "Significantly involved: RSHE content is a key component of the lesson, even if the subject is not RSHE (e.g., a drama lesson about bullying).",
        "criteria1": "Heavily involved: The lesson is primarily focused on a core RSHE topic (e.g., consent, puberty, mental wellbeing, online harms)."
    },
    {
        "code": "r/recent-content",
        "title": "Recent content (Post-December 2023 Events)",
        "llmDescription": "Flag content depicting or discussing events that occurred after December 2023. Exclude armed conflicts.",
        "abbreviation": "r1",
        "criteria5": "Not involved: The lesson contains no references to events after December 2023.",
        "criteria4": "Minimally involved: An incidental or minor reference is made to an event that occurred after December 2023.",
        "criteria3": "Moderately involved: An event after December 2023 is used as a supporting example or a point of discussion.",
        "criteria2": "Significantly involved: A significant portion of the lesson relies on information about an event from after December 2023.",
        "criteria1": "Heavily involved: The lesson's primary focus is an event that occurred after December 2023."
    },
    {
        "code": "r/recent-conflicts",
        "title": "Recent or Current Conflicts",
        "llmDescription": "Flag content depicting or discussing armed conflicts that occurred between 2009 and December 2023. Do not include conflicts ongoing after December 2023.",
        "abbreviation": "r2",
        "criteria5": "Not involved: The lesson contains no references to armed conflicts from 2009 to December 2023.",
        "criteria4": "Minimally involved: An incidental or minor reference is made to an armed conflict from this period.",
        "criteria3": "Moderately involved: An armed conflict from this period is used as a supporting example or case study.",
        "criteria2": "Significantly involved: A significant portion of the lesson is dedicated to discussing an armed conflict from this period.",
        "criteria1": "Heavily involved: The lesson's primary focus is an armed conflict from the 2009 - December 2023 period."
    },
    {
        "code": "n/self-harm-suicide",
        "title": "Self-harm and Suicide",
        "llmDescription": "Block any content that depicts, discusses, guides, or could encourage ideation of self-harm or suicide.",
        "abbreviation": "n1",
        "criteria5": "Compliant: Content does not contain any material related to self-harm or suicide.",
        "criteria1": "Blocked: Content discusses, guides, or could encourage ideation of self-harm or suicide."
    },
    {
        "code": "n/history-homosexuality-gender-identity",
        "title": "History of Homosexuality and Gender Identity",
        "llmDescription": "Block content that depicts or discusses the history of homosexuality or historical/societal views on gender identity.",
        "abbreviation": "n2",
        "criteria5": "Compliant: Content does not contain any material related to the history of homosexuality or gender identity.",
        "criteria1": "Blocked: Content that depicts or discusses the history of homosexuality or historical/societal views on gender identity."
    },
    {
        "code": "n/child-specific-advice",
        "title": "Child specific advice",
        "llmDescription": "Block content that provides guidance or advice for specific children or responds to disclosures.",
        "abbreviation": "n3",
        "criteria5": "Compliant: Content is general and does not provide child-specific advice.",
        "criteria1": "Blocked: Content provides guidance or advice for specific children or responds to disclosures."
    },
    {
        "code": "n/specific-laws",
        "title": "Specific Laws",
        "llmDescription": "Block content that discusses or provides specific legal guidance or advice.",
        "abbreviation": "n4",
        "criteria5": "Compliant: Content does not provide specific legal advice.",
        "criteria1": "Blocked: Content discusses or provides specific legal guidance or advice."
    },
    {
        "code": "n/health-and-safety",
        "title": "Health and Safety",
        "llmDescription": "Block content that provides specific health and safety guidance or instructions.",
        "abbreviation": "n5",
        "criteria5": "Compliant: Content does not provide specific health and safety instructions.",
        "criteria1": "Blocked: Content provides specific health and safety guidance or instructions."
    },
    {
        "code": "n/first-aid",
        "title": "First Aid",
        "llmDescription": "Block content that provides first aid instructions or guidance.",
        "abbreviation": "n6",
        "criteria5": "Compliant: Content does not provide first aid instructions.",
        "criteria1": "Blocked: Content provides first aid instructions or guidance."
    },
    {
        "code": "n/current-conflicts",
        "title": "Current Conflicts",
        "llmDescription": "Block content that depicts or discusses armed conflicts that started or were ongoing after December 2023.",
        "abbreviation": "n7",
        "criteria5": "Compliant: Content does not depict or discuss armed conflicts post-dating December 2023.",
        "criteria1": "Blocked: Content depicts or discusses armed conflicts that started or were ongoing after December 2023."
    },
    {
        "code": "t/guides-self-harm-suicide",
        "title": "Guides self-harm or suicide",
        "llmDescription": "Block any content that encourages, guides, promotes or provides instructions, or learning materials  on how to self-harm or commit suicide.",
        "abbreviation": "t1",
        "criteria5": "Fully compliant: Content does not contain any such material.",
        "criteria1": "Severely non-compliant: lesson contains content that encourages or promotes harming oneself, including self harm or suicide, or provides guides, instructions or learning material on how to do this."
    },
    {
        "code": "t/encourages-harmful-behaviour",
        "title": "Encourages harmful behaviour",
        "llmDescription": "Block content that encourages, guides, promotes or provides instructions, or learning materials  for harmful behaviour. This includes harming others via offesive language, using weapons, engaging in risky sexual behaviour, harassment, spreading misinformation, or using pornography.",
        "abbreviation": "t2",
        "criteria5": "Fully compliant: Content does not contain any such material.",
        "criteria1": "Severely non-compliant: lesson contains content that encourages or promotes harmful or dangerous behaviour, unwanted sexual behaviour, harassment, or misinformation or provides guides, instructions or learning material on how to do."
    },
    {
        "code": "t/encourages-illegal-activity",
        "title": "Encourages illegal activity",
        "llmDescription": "Block content that encourages, guides, promotes or provides instructions, or learning materials on illegal activities for minors (under 18). This includes drug misuse, illegal sexual behaviour, underage purchase/use of alcohol/tobacco/vapes/fireworks, gambling, carrying weapons, sexting, cybercrime, hacking, or county lines involvement.",
        "abbreviation": "t3",
        "criteria5": "Fully compliant: Content does not contain any such material.",
        "criteria1": "Severely non-compliant: lesson contains encouragement of illegal activity for minors (under 18), such as drug misuse, underage drinking, gambling, or cybercrime."
    },
    {
        "code": "t/encourages-violence-harm-others",
        "title": "Encourages violence or harm to others",
        "llmDescription": "Block content that encourages, guides, promotes or provides instructions violent behaviour including sexual violence, carrying a weapon, assault, domestic violence, hate crimes, honour-based violence, harassment, stalking, extortion, threats of violence, indecent exposure, revenge porn, gang involvement, county lines, rioting, arson, cyberbullying, doxxing, or inciting violence online.",
        "abbreviation": "t4",
        "criteria5": "Fully compliant: Content does not contain any such material.",
        "criteria1": "Severely non-compliant: lesson contains any encouragement of violent behaviour, including sexual violence, assault, hate crimes, or inciting violence."
    },
    {
        "code": "t/using-creating-weapons",
        "title": "Using or creating weapons",
        "llmDescription": "Block content that encourages, guides, promotes or provides instructions, recipes or learning materials  for creating or using weapons (including chemical, biological, explosive, or conventional weapons).",
        "abbreviation": "t5",
        "criteria5": "Fully compliant: Content does not contain any such material.",
        "criteria1": "Severely non-compliant: lesson contains any guides, recipes, or instructions for creating or using weapons."
    },
    {
        "code": "t/using-creating-harmful-substances",
        "title": "Using or creating harmful substances",
        "llmDescription": "Block content that encourages, guides, promotes or provides instructions, recipes or learning materials  for creating harmful substances like illegal drugs, poisons, or non-weaponized explosives.",
        "abbreviation": "t6",
        "criteria5": "Fully compliant: Content does not contain any such material.",
        "criteria1": "Severely non-compliant: lesson contains any guides, recipes, or instructions for creating harmful substances like illegal drugs or poisons."
    }
]


In [37]:
# --- 2. Process Data Source to Create Flat List & Mappings ---
processed_categories_list_with_abbr: List[Dict] = []
_abbreviations_for_literal: List[str] = []
abbreviation_to_pydantic_code_map: Dict[str, str] = {}

for category in moderation_category_groups_data_source:
    original_code = category["code"]
    pydantic_field_name = original_code.replace("/", "_").replace("-", "_")

    abbreviation = category.get("abbreviation")
    if not abbreviation:
        raise ValueError(f"Abbreviation missing for category code: {original_code}")
    if abbreviation in _abbreviations_for_literal:
        raise ValueError(f"Duplicate abbreviation '{abbreviation}' found. Abbreviations must be unique.")

    _abbreviations_for_literal.append(abbreviation)
    abbreviation_to_pydantic_code_map[abbreviation] = pydantic_field_name

    processed_categories_list_with_abbr.append({
        "pydantic_field_name": pydantic_field_name,
        "abbreviation": abbreviation,
        "title": category["title"],
        "llmDescription": category["llmDescription"],
        "original_code": original_code,
        "criteria5": category.get("criteria5", ""),
        "criteria4": category.get("criteria4", ""),
        "criteria3": category.get("criteria3", ""),
        "criteria2": category.get("criteria2", ""),
        "criteria1": category.get("criteria1", "")
    })


In [38]:
# --- 3. Define Pydantic Models ---
LikertScale = Annotated[int, conint(ge=1, le=5)]
AbbreviatedModerationCategoryCode = Literal[tuple(sorted(_abbreviations_for_literal))]

# Dynamically create NewModerationScores model
score_fields = {}
for cat_detail in processed_categories_list_with_abbr:
    score_fields[cat_detail['abbreviation']] = (
        LikertScale,
        Field(..., description=f"Score for '{cat_detail['title']}' (Abbreviation: {cat_detail['abbreviation']})")
    )

NewModerationScores = type("NewModerationScores", (BaseModel,), {
    "model_config": ConfigDict(extra="forbid"),
    "__annotations__": {
        k: v[0] for k, v in score_fields.items()
    },
    **{k: v[1] for k,v in score_fields.items()}
})

class NewModerationResponse(BaseModel):
    model_config = ConfigDict(extra="forbid", populate_by_name=True)
    scores: NewModerationScores
    justifications: Dict[AbbreviatedModerationCategoryCode, str] = Field(
        default_factory=dict,
        description="A dictionary where keys are the ABBREVIATED codes of categories that scored less than 5, "
                    "and values are their specific justifications. Empty if all scores are 5."
    )
    flagged_categories: List[AbbreviatedModerationCategoryCode] = Field(
        default_factory=list,
        description="A list of ABBREVIATED codes for all categories that scored less than 5. Empty if all scores are 5."
    )


In [None]:
# --- 4. New Prompt Generation Function ---
def generate_new_moderation_prompt_with_abbr(categories_to_assess: List[dict]) -> str:
    category_details_text = "\n\n".join(
        (
            f"<category>\n"
            f"Category Title: '{cat_spec['title']}'\n"
            f"Output Score Code (Abbreviation): '{cat_spec['abbreviation']}'\n"
            # f"Internal Pydantic Field Name (for LLM reference if confused): '{cat_spec['pydantic_field_name']}'\n"
            f"Original Full Code (for context): '{cat_spec['original_code']}'\n"
            f"Description for Assessment: {cat_spec['llmDescription']}\n"
            "Rating Criteria:\n" +
            "\n".join(
                f"- {i}: {cat_spec[f'criteria{i}']}"
                for i in [5, 4, 3, 2, 1]
                if f'criteria{i}' in cat_spec and cat_spec[f'criteria{i}']
            ) +
            "\n</category>"
        )
        for cat_spec in categories_to_assess
    )
    return f"""
OBJECTIVE:

You are a content moderation supervisor examining a lesson plan document. Your job is to:
1.  Assess the lesson plan against a list of specific content categories detailed below.
2.  Provide a 1-5 Likert score for EACH category. A score of 5 means the content is not present or not involved at all. A score of 1 means it's heavily involved. Scores 2-4 represent varying degrees of involvement.
3.  For ANY category that scores less than 5, provide a concise justification explaining why it received that score.

CATEGORIES FOR ASSESSMENT:

{category_details_text}

IMPORTANT NOTES ON SCORING AND JUSTIFICATION:

*   Your primary goal is to provide content guidance. The score for each category must reflect the presence and significance of relevant content.
*   Pay close attention to the 'Original Full Code' provided for each category, especially for those starting with 'n/' (Not to be planned by Aila) or 't/' (Toxic). A score of 1-4 on these indicates serious issues and requires careful justification.
*   For all other categories, when determining a score, consider the Key Stage (age group) of the pupils if Key Stage is specified within the lesson plan. For example, "Students will analyze Wilfred Owen's poem 'Dulce et Decorum Est', focusing on the graphic imagery of gas attacks and the psychological trauma of World War I soldiers.”, would be suitable for KS4 pupils but not for KS2 pupils.

INSTRUCTION:

Your response MUST be a valid JSON object.
Use the 'Output Score Code (Abbreviation)' provided for each category as the key in your JSON response. All categories listed above MUST be included in your response, even if they score 5. Any category that scores less than 5 must have a justification provided in the 'justifications' object, using the same 'Output Score Code (Abbreviation)' as the key. If all categories score 5, the 'justifications' object must be empty: {{}}.

Provide a JSON response with the following top-level keys:

1.  `scores`: An object.
    *   Keys: The 'Output Score Code (Abbreviation)' (e.g., 'l1, 'u2') for EACH category listed above.
    *   Values: The 1-5 Likert scale rating for that category. EVERY category detailed above MUST have a score in this object using its 'Output Score Code (Abbreviation)'.

2.  `justifications`: An object (dictionary).
    *   Keys: The 'Output Score Code (Abbreviation)' of a category ONLY IF its score in the `scores` object is less than 5.
    *   Values: A string containing a concise, precise justification.
    *   If all categories score 5, this field MUST be an empty object: {{}}.

3.  `flagged_categories`: A list of strings.
    *   Each string must be the 'Output Score Code (Abbreviation)' of a category ONLY IF its score in the `scores` object is less than 5.
    *   If all categories score 5, this field MUST be an empty list: [].
"""


In [52]:
# --- 5. Updated moderate_lesson_plan Function ---
def moderate_lesson_plan(
        lesson_plan: str,
        llm: str = "gpt-4o",
        temp: float = 0.2,
        api_key: str | None = None
        ) -> NewModerationResponse:

    system_prompt_text = generate_new_moderation_prompt_with_abbr(processed_categories_list_with_abbr)
    user_lesson_plan_text = str(lesson_plan if lesson_plan is not None else "")

    #print(f"System prompt:\n{system_prompt_text}")
    #print(f"User lesson plan:\n{user_lesson_plan_text}")

    try:
        current_llm_str_lower = str(llm).lower()
        current_temp_float = float(temp)
    except ValueError as ve:
        raise RuntimeError(f"Type conversion error for LLM parameters: {ve}")

    moderation_data_content: str | None = None

    client: OpenAI
    try:
        if api_key and api_key.strip():
            client = OpenAI(api_key=api_key)
        else:
            client = OpenAI()
    except Exception as e:
        raise RuntimeError(f"Failed to initialize OpenAI client: {e}. Ensure API key is correctly passed, or OPENAI_API_KEY env var is set.")

    messages_payload = [
        {"role": "system", "content": system_prompt_text},
        {"role": "user", "content": user_lesson_plan_text}
    ]

    try:
        response = client.chat.completions.create(
            model=str(llm),
            messages=messages_payload,
            temperature=current_temp_float,
            response_format={"type": "json_object"},
        )
        if response.choices and response.choices[0].message and response.choices[0].message.content:
            moderation_data_content = response.choices[0].message.content
        else:
            raise RuntimeError("OpenAI response did not contain expected data (choices, message, or content is missing/null).")
    except openai.APIConnectionError as e:
        raise RuntimeError(f"Network error connecting to OpenAI: {e}")
    except openai.RateLimitError as e:
        raise RuntimeError(f"OpenAI rate limit exceeded: {e}")
    except openai.APIStatusError as e:
        error_message = f"OpenAI API Error (Status {e.status_code}): {getattr(e, 'message', str(e))}"
        if hasattr(e, 'response') and e.response:
            try:
                error_details = e.response.json()
                error_message += f" | Details: {error_details}"
            except:
                error_message += f" | Raw Response: {e.response.text}"
        raise RuntimeError(error_message)
    except Exception as e:
        raise RuntimeError(f"Unexpected error making OpenAI API call: {e}")

    if moderation_data_content is None:
        raise RuntimeError("LLM response content is null after API call.")

    try:
        moderation_response = NewModerationResponse.model_validate_json(moderation_data_content)
        return moderation_response
    except ValidationError as e:
        print(f"Pydantic Validation error for NewModerationResponse: {e.errors(include_url=False)}")
        print(f"Problematic LLM response content that failed validation: {moderation_data_content}")
        raise RuntimeError(f"Invalid JSON structure or type from LLM for NewModerationResponse: {moderation_data_content}")
    except Exception as e:
        print(f"Error processing LLM response into NewModerationResponse: {type(e).__name__} - {e}")
        print(f"Problematic LLM response content: {moderation_data_content}")
        raise RuntimeError(f"Could not process response from LLM: {e}")



In [None]:
# --- 6. Updated moderate_dataframe_column Function ---
def moderate_dataframe_column(
    df: pd.DataFrame,
    text_column_name: str,
    output_column_name: str,
    llm_model: str = "gpt-4o",
    temperature: float = 0.2,
    api_key: str | None = None
) -> pd.DataFrame:

    if text_column_name not in df.columns:
        raise ValueError(f"Column '{text_column_name}' not found in DataFrame.")

    results = []
    for idx, row in tqdm(df.iterrows(), total=len(df)):
        text_to_moderate = row[text_column_name]

        if pd.isna(text_to_moderate) or not str(text_to_moderate).strip():
            print(f"Skipping DataFrame row with index {index} due to empty or NaN content.")
            results.append(None)
            continue

        try:
            response = moderate_lesson_plan(
                lesson_plan=str(text_to_moderate),
                llm=llm_model,
                temp=temperature,
                api_key=api_key
            )
            results.append(response)
        except Exception as e:
            print(f"ERROR moderating DataFrame row with index {index}: {e}")
            results.append(None)

    df_copy = df.copy()
    df_copy[output_column_name] = results
    return df_copy


In [None]:
# Simple test of moderation process

lesson_text = """
Today we will learn about World War II. We'll discuss the violence and death
that occurred during battles, including graphic descriptions of soldiers being
killed and civilians suffering. Students will see images of concentration camps
and learn about genocide. We'll also examine some strong language used in
historical documents that includes offensive terms.
"""

result = moderate_lesson_plan(
    lesson_plan=lesson_text,
    api_key=api_key
)

print("=== MODERATION RESULTS ===")
print(f"Flagged categories: {result.flagged_categories}")
print(f"Number of issues found: {len(result.flagged_categories)}")

if result.justifications:
    print("\nJustifications:")
    for code, reason in result.justifications.items():
        print(f"  {code}: {reason}")
else:
    print("\nNo issues found - all categories scored 5/5")

print(f"\nAll scores:")
for category, score in result.scores.model_dump().items():
    if score < 5:
        print(f"  {category}: {score}/5 ⚠️")
    else:
        print(f"  {category}: {score}/5 ✓")

=== MODERATION RESULTS ===
Flagged categories: ['l1', 'l2', 'u1', 'u2']
Number of issues found: 4

Justifications:
  l1: The lesson includes discussion of offensive terms used in historical documents, presented in an educational context.
  l2: Strong language including offensive terms is mentioned as part of historical documents, but used for educational purposes.
  u1: The lesson directly discusses sensitive topics such as violence, death, and genocide, which are central to the lesson.
  u2: The lesson heavily involves graphic descriptions of violence, death, and suffering during battles and genocide.

All scores:
  l1: 3/5 ⚠️
  l2: 3/5 ⚠️
  u1: 2/5 ⚠️
  u2: 1/5 ⚠️
  u3: 5/5 ✓
  u4: 5/5 ✓
  u5: 5/5 ✓
  s1: 5/5 ✓
  p2: 5/5 ✓
  p3: 5/5 ✓
  p4: 5/5 ✓
  p5: 5/5 ✓
  e1: 5/5 ✓
  r1: 5/5 ✓
  r2: 5/5 ✓
  n1: 5/5 ✓
  n2: 5/5 ✓
  n3: 5/5 ✓
  n4: 5/5 ✓
  n5: 5/5 ✓
  n6: 5/5 ✓
  n7: 5/5 ✓
  t1: 5/5 ✓
  t2: 5/5 ✓
  t3: 5/5 ✓
  t4: 5/5 ✓
  t5: 5/5 ✓
  t6: 5/5 ✓


In [None]:
# Simple test of moderation process (csv input)

input_csv = "new_moderation_dataset_json.csv"

df = pd.read_csv(input_csv).head(10)

# Apply moderation and get results DataFrame
df_with_results = moderate_dataframe_column(
    df,
    text_column_name="transcript_sentences",
    output_column_name="new_moderation_json",
    llm_model="gpt-4o",
    temperature=0.2,
    api_key=api_key
)

# Convert NewModerationResponse objects to JSON strings
df_with_results["new_moderation_json"] = df_with_results["new_moderation_json"].apply(
    lambda x: json.dumps(x.model_dump(), ensure_ascii=False) if x is not None else None
)

for idx, row in df_with_results.iterrows():
    mod_json = row["new_moderation_json"]
    if mod_json:
        data = json.loads(mod_json)
        flagged = data.get("flagged_categories", [])
        justifications = data.get("justifications", {})
        print(f"\n=== ROW {idx} MODERATION RESULTS ===")
        print(f"Flagged categories: {flagged}")
        print(f"Number of issues found: {len(flagged)}")
        if justifications:
            print("Justifications:")
            for code, reason in justifications.items():
                print(f"  {code}: {reason}")
        else:
            print("No issues found - all categories scored 5/5")
        print("All scores:")
        for category, score in data.get("scores", {}).items():
            if score < 5:
                print(f"  {category}: {score}/5 ⚠️")
            else:
                print(f"  {category}: {score}/5 ✓")

# Save to CSV with _json appended
base, ext = os.path.splitext(input_csv)
output_csv = f"{base}_json{ext}"
df_with_results.to_csv(output_csv, index=False)
print(f"Saved: {output_csv}")


5

Processing DataFrame row with index 0: Hello everybody and welcome to today's lesson.
My name is Miss Halliday and I will be teaching you t...
Successfully moderated DataFrame row with index 0.

Processing DataFrame row with index 1: Hi, everyone.
My name is Ms. Voyle and welcome to today's lesson where you are going to be writing p...
Successfully moderated DataFrame row with index 1.

Processing DataFrame row with index 2: Hello there, welcome to your lesson today where we are going to look at comparing the poem "Tissue" ...
Successfully moderated DataFrame row with index 2.

Processing DataFrame row with index 3: Hello, there.
Welcome to our lesson today, all about "A Midsummer Night's Dream" by William Shakespe...
Successfully moderated DataFrame row with index 3.

Processing DataFrame row with index 4: Hi, everyone.
My name is Ms. Voyle, and I am really excited to be teaching you your reading lesson t...
Successfully moderated DataFrame row with index 4.

=== ROW 0 MODERATION R