In [None]:
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())

import modules.logger_tool as logger
import os

os.environ['LOG_NAME'] = 'science'
os.environ['LOG_DIR'] = 'logs'
os.environ['LOG_LEVEL'] = 'INFO'

logging = logger.get_logger(os.environ['LOG_NAME'], log_level=os.environ['LOG_LEVEL'], log_path=os.environ['LOG_DIR'], log_file=os.environ['LOG_NAME'])


In [None]:
import modules.database.schemas.curriculum_neo as neo_curriculum
import modules.database.schemas.relationships.curricular_relationships as neo_relationships
import modules.database.tools.xl_planner_tools as planner
import modules.database.tools.neontology_tools as neon
import modules.database.tools.neo4j_driver_tools as driver_tools
import modules.database.tools.neo4j_session_tools as session_tools

import requests
import pandas as pd
from pydantic import ValidationError


In [None]:
db_name = 'science'

In [None]:
url = 'http://localhost:9500/database/admin/stop-database'
data = {'db_name': db_name}
response = requests.post(url, json=data)
logging.info(response.text)

url = 'http://localhost:9500/database/admin/drop-database'
data = {'db_name': db_name}
response = requests.post(url, json=data)
logging.info(response.text)

In [None]:
url = 'http://localhost:9500/database/admin/create-database'
params = {'db_name': db_name}
response = requests.post(url, params=params)
logging.info(response.text)

In [None]:
driver = driver_tools.get_driver(database=db_name)


In [None]:
neon.init_neo4j_connection()


In [None]:
excel_planner = planner.create_dataframes(os.getenv("EXCEL_PLANNER_FILE"))

topic_df = excel_planner['topiclookup_df']
lesson_df = excel_planner['lessonlookup_df']
statement_df = excel_planner['statementlookup_df']
keystagesyllabus_df = excel_planner['keystagesyllabuslookup_df']
yeargroupsyllabus_df = excel_planner['yeargroupsyllabuslookup_df']


In [None]:
default_topic_values = {
    'topic_assessment_type': 'Null',
    'topic_type': 'Null',
    'total_number_of_lessons_for_topic': '1',
    'topic_title': 'Null'
    }

default_topic_lesson_values = {
    'topic_lesson_title': 'Null',
    'topic_lesson_type': 'Null',
    'topic_lesson_length': '1',  # Ensure this is a string if the model expects a string
    'topic_lesson_suggested_activities': 'Null',
    'topic_lesson_skills_learned': 'Null',
    'topic_lesson_weblinks': 'Null',
}

default_learning_statement_values = {
    # Add default values for fields that might contain NaN
    'lesson_learning_statement': 'Null',
    'lesson_learning_statement_type': 'Student learning outcome'
}


In [None]:
# Dictionary to store nodes by their IDs and keep track of whether they have been created
node_library = {}
key_stage_nodes_created = {}
year_group_nodes_created = {}
last_year_group_node = {}  # Dictionary to track the last node for each year for proper linking
last_key_stage_node = None

# Function to sort year groups numerically where possible
def sort_year_groups(df):
    df = df.copy()
    # Attempt to convert 'YearGroup' to numeric, marking non-convertible as NaN
    df['YearGroupNumeric'] = pd.to_numeric(df['YearGroup'], errors='coerce')
    # Sort by the numeric version, NaNs will be placed at the end by default
    return df.sort_values(by='YearGroupNumeric')

for index, ks_row in keystagesyllabus_df.sort_values('KeyStage').iterrows():
    key_stage = str(ks_row['KeyStage'])
    if key_stage not in key_stage_nodes_created:
        key_stage_node = neo_curriculum.KeyStageNode(
            key_stage_id=key_stage,
            key_stage_name=f"Key Stage {key_stage}"
        )
        neon.create_or_merge_neontology_node(key_stage_node, database=db_name, operation='merge')
        key_stage_nodes_created[key_stage] = key_stage_node
        node_library[key_stage] = key_stage_node

        if last_key_stage_node:
            neon.create_or_merge_neontology_relationship(
                neo_relationships.KeyStageFollowsKeyStage(source=last_key_stage_node, target=key_stage_node),
                database=db_name, operation='merge'
            )
        last_key_stage_node = key_stage_node

    key_stage_syllabus_node = neo_curriculum.KeyStageSyllabusNode(
        ks_syllabus_id=ks_row['ID'],
        ks_syllabus_name=ks_row['Title'],
        ks_syllabus_key_stage=str(ks_row['KeyStage']),
        ks_syllabus_subject=ks_row['Subject']
    )
    neon.create_or_merge_neontology_node(key_stage_syllabus_node, database=db_name, operation='merge')
    node_library[ks_row['ID']] = key_stage_syllabus_node

    neon.create_or_merge_neontology_relationship(
        neo_relationships.KeyStageIncludesKeyStageSyllabus(source=key_stage_node, target=key_stage_syllabus_node), database=db_name, operation='merge'
    )

    related_yeargroups = sort_year_groups(yeargroupsyllabus_df[yeargroupsyllabus_df['KeyStage'] == ks_row['KeyStage']])
    
    logging.info(related_yeargroups)
    
    for yg_index, yg_row in related_yeargroups.iterrows():
        year_group = yg_row['YearGroup']
        numeric_year_group = pd.to_numeric(year_group, errors='coerce')

        if pd.notna(numeric_year_group):
            numeric_year_group = int(numeric_year_group)
            if numeric_year_group not in year_group_nodes_created:
                year_group_node = neo_curriculum.YearGroupNode(
                    year_group_id=str(numeric_year_group),
                    year_group_name=f"Year {numeric_year_group}"
                )
                neon.create_or_merge_neontology_node(year_group_node, database=db_name, operation='merge')
                year_group_nodes_created[numeric_year_group] = year_group_node
                node_library[str(numeric_year_group)] = year_group_node

                # Create sequential relationships correctly
                if numeric_year_group - 1 in last_year_group_node:
                    neon.create_or_merge_neontology_relationship(
                        neo_relationships.YearGroupFollowsYearGroup(source=last_year_group_node[numeric_year_group - 1], target=year_group_node),
                        database=db_name, operation='merge'
                    )
                last_year_group_node[numeric_year_group] = year_group_node

        # Always create year group syllabus nodes
        year_group_syllabus_node = neo_curriculum.YearGroupSyllabusNode(
            yr_syllabus_id=yg_row['ID'],
            yr_syllabus_name=yg_row['Title'],
            yr_syllabus_year_group=str(yg_row['YearGroup']),
            yr_syllabus_subject=yg_row['Subject']
        )
        neon.create_or_merge_neontology_node(year_group_syllabus_node, database=db_name, operation='merge')
        node_library[yg_row['ID']] = year_group_syllabus_node

        if yg_row['Subject'] == ks_row['Subject']:
            neon.create_or_merge_neontology_relationship(
                neo_relationships.KeyStageSyllabusIncludesYearGroupSyllabus(source=key_stage_syllabus_node, target=year_group_syllabus_node),
                database=db_name, operation='merge'
            )

        logging.info(f'numeric year group: {numeric_year_group} node year: {year_group_node.year_group_id}')
        if pd.notna(numeric_year_group) and str(numeric_year_group) == str(year_group_node.year_group_id):
            neon.create_or_merge_neontology_relationship(
                neo_relationships.YearGroupHasYearGroupSyllabus(source=year_group_node, target=year_group_syllabus_node),
                database=db_name, operation='merge'
            )

# Process topics, lessons, and statements
for index, topic_row in topic_df.iterrows():
    yr_syllabus_node = node_library.get(topic_row['SyllabusYearID'])
    if yr_syllabus_node:
        topic_node = neo_curriculum.TopicNode(
            topic_id=topic_row['TopicID'],
            topic_title=topic_row.get('TopicTitle', default_topic_values['topic_title']),
            total_number_of_lessons_for_topic=str(topic_row.get('TotalNumberOfLessonsForTopic', default_topic_values['total_number_of_lessons_for_topic'])),
            topic_type=topic_row.get('TopicType', default_topic_values['topic_type']),
            topic_assessment_type=topic_row.get('TopicAssessmentType', default_topic_values['topic_assessment_type'])
        )
        neon.create_or_merge_neontology_node(topic_node, database=db_name, operation='merge')
        # logging.info(f"Creating topic relationship for  to ")
        neon.create_or_merge_neontology_relationship(
            neo_relationships.TopicPartOfYearGroupSyllabus(source=yr_syllabus_node, target=topic_node),
            database=db_name, operation='merge'
        )

        lessons_df = lesson_df[lesson_df['TopicID'] == topic_row['TopicID']].copy()
        lessons_df.loc[:, 'Lesson'] = lessons_df['Lesson'].astype(str)
        lessons_df = lessons_df.sort_values('Lesson')

        previous_lesson_node = None
        for lesson_index, lesson_row in lessons_df.iterrows():
            lesson_data = {
                'topic_lesson_id': lesson_row['LessonID'],
                'topic_lesson_title': lesson_row.get('LessonTitle', default_topic_lesson_values['topic_lesson_title']),
                'topic_lesson_type': lesson_row.get('LessonType', default_topic_lesson_values['topic_lesson_type']),
                'topic_lesson_length': str(lesson_row.get('SuggestedNumberOfPeriodsForLesson', default_topic_lesson_values['topic_lesson_length'])),
                'topic_lesson_suggested_activities': lesson_row.get('SuggestedActivities', default_topic_lesson_values['topic_lesson_suggested_activities']),
                'topic_lesson_skills_learned': lesson_row.get('SkillsLearned', default_topic_lesson_values['topic_lesson_skills_learned']),
                'topic_lesson_weblinks': lesson_row.get('WebLinks', default_topic_lesson_values['topic_lesson_weblinks'])
            }
            for key, value in lesson_data.items():
                if pd.isna(value):
                    lesson_data[key] = default_topic_lesson_values[key]

            lesson_node = neo_curriculum.TopicLessonNode(**lesson_data)
            neon.create_or_merge_neontology_node(lesson_node, database=db_name, operation='merge')
            # logging.info(f"Creating topic lesson relationship for  to ")
            neon.create_or_merge_neontology_relationship(
                neo_relationships.TopicIncludesTopicLesson(source=topic_node, target=lesson_node),
                database=db_name, operation='merge'
            )

            # Create sequential relationships if the lesson number is a digit
            if lesson_row['Lesson'].isdigit() and previous_lesson_node:
                # logging.info(f"Creating lesson relationship for  to ")
                neon.create_or_merge_neontology_relationship(
                    neo_relationships.TopicLessonFollowsTopicLesson(source=previous_lesson_node, target=lesson_node),
                    database=db_name, operation='merge'
                )

            previous_lesson_node = lesson_node  # Update the previous lesson node for the next iteration

            # Process each learning statement related to the lesson
            for statement_index, statement_row in statement_df[statement_df['LessonID'] == lesson_row['LessonID']].iterrows():
                statement_data = {
                    'lesson_learning_statement_id': statement_row['StatementID'],
                    'lesson_learning_statement': statement_row.get('LearningOutcomeStatement', default_learning_statement_values['lesson_learning_statement']),
                    'lesson_learning_statement_type': statement_row.get('LearningStatementType', default_learning_statement_values['lesson_learning_statement_type'])
                }
                for key in statement_data:
                    if pd.isna(statement_data[key]):
                        statement_data[key] = default_learning_statement_values[key]

                statement_node = neo_curriculum.LearningStatementNode(**statement_data)
                neon.create_or_merge_neontology_node(statement_node, database=db_name, operation='merge')
                # logging.info(f"Creating lesson learning statement relationship for  to ")
                neon.create_or_merge_neontology_relationship(
                    neo_relationships.LessonIncludesLearningStatement(source=lesson_node, target=statement_node),
                    database=db_name, operation='merge'
                )