In [1]:
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())

import modules.logger_tool as logger
import os

os.environ['LOG_NAME'] = 'science'
os.environ['LOG_DIR'] = 'logs'
os.environ['LOG_LEVEL'] = 'INFO'

logging = logger.get_logger(os.environ['LOG_NAME'], log_level=os.environ['LOG_LEVEL'], log_path=os.environ['LOG_DIR'], log_file=os.environ['LOG_NAME'])


In [2]:
import modules.database.schemas.curriculum_neo as neo_curriculum
import modules.database.schemas.relationships.curricular_relationships as neo_relationships
import modules.database.tools.xl_planner_tools as planner
import modules.database.tools.neontology_tools as neon
import modules.database.tools.neo4j_driver_tools as driver_tools
import modules.database.tools.neo4j_session_tools as session_tools

import requests
import pandas as pd
from pydantic import ValidationError


In [3]:
db_name = 'science'

In [4]:
url = 'http://localhost:9500/database/admin/stop-database'
data = {'db_name': db_name}
response = requests.post(url, json=data)
logging.info(response.text)

url = 'http://localhost:9500/database/admin/drop-database'
data = {'db_name': db_name}
response = requests.post(url, json=data)
logging.info(response.text)

[32m2024-06-30 03:44:49,064 INFO      : 3522179693 > <module> >>> {"results":[{"columns":[],"data":[]}],"errors":[],"lastBookmarks":["FB:kcwQAAAAAAAAAAAAAAAAAAAAAckBBpA="]}[0m
[32m2024-06-30 03:44:51,130 INFO      : 3522179693 > <module> >>> {"results":[{"columns":[],"data":[]}],"errors":[],"lastBookmarks":["FB:kcwQAAAAAAAAAAAAAAAAAAAAAckBB5A="]}[0m


In [5]:
url = 'http://localhost:9500/database/admin/create-database'
params = {'db_name': db_name}
response = requests.post(url, params=params)
logging.info(response.text)

[32m2024-06-30 03:44:53,266 INFO      : 2595822508 > <module> >>> {"results":[{"columns":[],"data":[]}],"errors":[],"lastBookmarks":["FB:kcwQAAAAAAAAAAAAAAAAAAAAAckBCJA="]}[0m


In [6]:
driver = driver_tools.get_driver(database=db_name)


[32m2024-06-30 03:44:53,319 INFO      : neo4j_driver_tools > get_driver >>> Connection successful[0m


In [7]:
neon.init_neo4j_connection()


[32m2024-06-30 03:44:53,460 INFO      : neontology_tools > init_neo4j_connection >>> Neontology connection initialized with host: 192.168.0.20, port: 7687, user: neo4j[0m


In [8]:
excel_planner = planner.create_dataframes(os.getenv("EXCEL_PLANNER_FILE"))

topic_df = excel_planner['topiclookup_df']
lesson_df = excel_planner['lessonlookup_df']
statement_df = excel_planner['statementlookup_df']
keystagesyllabus_df = excel_planner['keystagesyllabuslookup_df']
yeargroupsyllabus_df = excel_planner['yeargroupsyllabuslookup_df']


In [9]:
default_topic_values = {
    'topic_assessment_type': 'Null',
    'topic_type': 'Null',
    'total_number_of_lessons_for_topic': '1',
    'topic_title': 'Null'
    }

default_topic_lesson_values = {
    'topic_lesson_title': 'Null',
    'topic_lesson_type': 'Null',
    'topic_lesson_length': '1',  # Ensure this is a string if the model expects a string
    'topic_lesson_suggested_activities': 'Null',
    'topic_lesson_skills_learned': 'Null',
    'topic_lesson_weblinks': 'Null',
}

default_learning_statement_values = {
    # Add default values for fields that might contain NaN
    'lesson_learning_statement': 'Null',
    'lesson_learning_statement_type': 'Student learning outcome'
}


In [10]:
# Dictionary to store nodes by their IDs and keep track of whether they have been created
node_library = {}
key_stage_nodes_created = {}  # Tracks whether a KeyStageNode has been created
year_group_nodes_created = {}  # Tracks YearGroupNodes creation by KeyStage

# Initialize previous node pointers
key_stage_prev_node = None
year_group_prev_node = {}

for index, ks_row in keystagesyllabus_df.sort_values('KeyStage').iterrows():
    key_stage = str(ks_row['KeyStage'])
    if key_stage not in key_stage_nodes_created:
        logging.info(f"Creating key stage node for {key_stage}")
        key_stage_node = neo_curriculum.KeyStageNode(
            key_stage_id=key_stage,
            key_stage_name=f"Key Stage {key_stage}"
        )
        neon.create_or_merge_neontology_node(key_stage_node, database=db_name, operation='merge')
        key_stage_nodes_created[key_stage] = key_stage_node
        node_library[key_stage] = key_stage_node  # Store in node_library for easy access
    else:
        logging.info(f"Key stage node for {key_stage} already exists")
        key_stage_node = key_stage_nodes_created[key_stage]

    key_stage_syllabus_node = neo_curriculum.KeyStageSyllabusNode(
        ks_syllabus_id=ks_row['ID'],
        ks_syllabus_name=ks_row['Title']
    )
    neon.create_or_merge_neontology_node(key_stage_syllabus_node, database=db_name, operation='merge')
    node_library[ks_row['ID']] = key_stage_syllabus_node  # Store in node_library

    neon.create_or_merge_neontology_relationship(
        neo_relationships.KeyStageIncludesKeyStageSyllabus(source=key_stage_node, target=key_stage_syllabus_node),
        database=db_name, operation='merge'
    )

    if key_stage_prev_node and key_stage_prev_node != key_stage_node:
        neon.create_or_merge_neontology_relationship(
            neo_relationships.KeyStageFollowsKeyStage(source=key_stage_prev_node, target=key_stage_node),
            database=db_name, operation='merge'
        )
    key_stage_prev_node = key_stage_node

    related_yeargroups = yeargroupsyllabus_df[yeargroupsyllabus_df['KeyStage'] == ks_row['KeyStage']].copy()
    related_yeargroups.loc[:, 'YearGroup'] = related_yeargroups['YearGroup'].astype(str)
    related_yeargroups = related_yeargroups.sort_values('YearGroup')

    for yg_index, yg_row in related_yeargroups.iterrows():
        year_group = yg_row['YearGroup']
        if year_group.isdigit() and (year_group) not in year_group_nodes_created:
            logging.info(f"Creating year group node for Year {year_group}")
            year_group_node = neo_curriculum.YearGroupNode(
                year_group_id=year_group,
                year_group_name=f"Year {year_group}"
            )
            neon.create_or_merge_neontology_node(year_group_node, database=db_name, operation='merge')
            year_group_nodes_created[(year_group)] = year_group_node
            node_library[year_group] = year_group_node  # Store in node_library
        elif year_group.isdigit():
            year_group_node = year_group_nodes_created[(year_group)]
        else:
            logging.warning(f"Skipping YearGroup {yg_row['YearGroup']} as it is not a digit.")
            continue

        year_group_syllabus_node = neo_curriculum.YearGroupSyllabusNode(
            yr_syllabus_id=yg_row['ID'],
            yr_syllabus_name=yg_row['Title']
        )
        neon.create_or_merge_neontology_node(year_group_syllabus_node, database=db_name, operation='merge')
        node_library[yg_row['ID']] = year_group_syllabus_node  # Store in node_library

        neon.create_or_merge_neontology_relationship(
            neo_relationships.YearGroupHasYearGroupSyllabus(source=year_group_node, target=year_group_syllabus_node),
            database=db_name, operation='merge'
        )
        neon.create_or_merge_neontology_relationship(
            neo_relationships.KeyStageSyllabusIncludesYearGroupSyllabus(source=key_stage_syllabus_node, target=year_group_syllabus_node),
            database=db_name, operation='merge'
        )

        if (key_stage, year_group) in year_group_prev_node and year_group_prev_node[(key_stage, year_group)] != year_group_node:
            neon.create_or_merge_neontology_relationship(
                neo_relationships.YearGroupFollowsYearGroup(source=year_group_prev_node[(key_stage, year_group)], target=year_group_node),
                database=db_name, operation='merge'
            )
        year_group_prev_node[(key_stage, year_group)] = year_group_node

# Process topics, lessons, and statements
for index, topic_row in topic_df.iterrows():
    
    yr_syllabus_node = node_library.get(topic_row['SyllabusYearID'])
    if yr_syllabus_node:
        logging.info(f"Processing topic {topic_row['TopicID']} for year group syllabus {topic_row['SyllabusYearID']}")
        topic_node = neo_curriculum.TopicNode(
            topic_id=topic_row['TopicID'],
            topic_title=topic_row.get('TopicTitle', default_topic_values['topic_title']),
            total_number_of_lessons_for_topic=str(topic_row.get('TotalNumberOfLessonsForTopic', default_topic_values['total_number_of_lessons_for_topic'])),
            topic_type=topic_row.get('TopicType', default_topic_values['topic_type']),
            topic_assessment_type=topic_row.get('TopicAssessmentType', default_topic_values['topic_assessment_type'])
        )
        neon.create_or_merge_neontology_node(topic_node, database=db_name, operation='merge')

        neon.create_or_merge_neontology_relationship(
            neo_relationships.TopicPartOfYearGroupSyllabus(source=yr_syllabus_node, target=topic_node),
            database=db_name, operation='merge'
        )

        previous_lesson_node = None
        lessons_df = lesson_df[lesson_df['TopicID'] == topic_row['TopicID']].copy()
        lessons_df.loc[:, 'Lesson'] = lessons_df['Lesson'].astype(str)
        lessons_df = lessons_df.sort_values('Lesson')

        for lesson_index, lesson_row in lessons_df.iterrows():
            lesson_node_data = {
                'topic_lesson_id': lesson_row['LessonID'],
                'topic_lesson_title': lesson_row.get('LessonTitle', default_topic_lesson_values['topic_lesson_title']),
                'topic_lesson_type': lesson_row.get('LessonType', default_topic_lesson_values['topic_lesson_type']),
                'topic_lesson_length': str(lesson_row.get('SuggestedNumberOfPeriodsForLesson', default_topic_lesson_values['topic_lesson_length'])),
                'topic_lesson_suggested_activities': lesson_row.get('SuggestedActivities', default_topic_lesson_values['topic_lesson_suggested_activities']),
                'topic_lesson_skills_learned': lesson_row.get('SkillsLearned', default_topic_lesson_values['topic_lesson_skills_learned']),
                'topic_lesson_weblinks': lesson_row.get('WebLinks', default_topic_lesson_values['topic_lesson_weblinks'])
            }

            # Replace NaN with default values
            for key, value in lesson_node_data.items():
                if pd.isna(value):
                    lesson_node_data[key] = default_topic_lesson_values[key]

            lesson_node = neo_curriculum.TopicLessonNode(**lesson_node_data)
            neon.create_or_merge_neontology_node(lesson_node, database=db_name, operation='merge')

            neon.create_or_merge_neontology_relationship(
                neo_relationships.TopicIncludesTopicLesson(source=topic_node, target=lesson_node),
                database=db_name, operation='merge'
            )

            if previous_lesson_node:
                neon.create_or_merge_neontology_relationship(
                    neo_relationships.TopicLessonFollowsTopicLesson(source=previous_lesson_node, target=lesson_node),
                    database=db_name, operation='merge'
                )
            previous_lesson_node = lesson_node

[32m2024-06-30 03:45:04,210 INFO      : 1958526637 > <module> >>> Creating key stage node for 3[0m
[32m2024-06-30 03:45:04,308 INFO      : 1958526637 > <module> >>> Creating year group node for Year 7[0m
[32m2024-06-30 03:45:04,424 INFO      : 1958526637 > <module> >>> Creating year group node for Year 8[0m
[32m2024-06-30 03:45:04,483 INFO      : 1958526637 > <module> >>> Creating key stage node for 4[0m
[32m2024-06-30 03:45:04,559 INFO      : 1958526637 > <module> >>> Creating year group node for Year 10[0m
[32m2024-06-30 03:45:04,731 INFO      : 1958526637 > <module> >>> Creating year group node for Year 11[0m
[32m2024-06-30 03:45:04,856 INFO      : 1958526637 > <module> >>> Creating year group node for Year 9[0m
[32m2024-06-30 03:45:04,999 INFO      : 1958526637 > <module> >>> Key stage node for 4 already exists[0m
[32m2024-06-30 03:45:05,417 INFO      : 1958526637 > <module> >>> Key stage node for 4 already exists[0m
[32m2024-06-30 03:45:05,804 INFO      : 195852