In [1]:
from neo4j import GraphDatabase
import csv
import pandas as pd


def load_csv_to_neo4j(
    uri,
    user,
    password,
    csv_file_path,
    node_label,
    properties_map,
    relationship_label=None,
    relationship_source_property=None,
    relationship_target_property=None,
):
    """
    Loads data from a CSV file into Neo4j.

    Args:
        uri (str): Neo4j URI (e.g., "bolt://localhost:7687").
        user (str): Neo4j username.
        password (str): Neo4j password.
        csv_file_path (str): Path to the CSV file.
        node_label (str): Label to apply to the nodes.
        properties_map (dict): A dictionary mapping CSV column names to Neo4j property names.
        relationship_label (str, optional): Label to apply to relationships. Defaults to None.
        relationship_source_property (str, optional): CSV column name for the source node ID. Defaults to None.
        relationship_target_property (str, optional): CSV column name for the target node ID. Defaults to None.
    """

    driver = GraphDatabase.driver(uri, auth=(user, password))

    def create_nodes(tx, csv_file_path, node_label, properties_map):
        with open(csv_file_path, "r", encoding="utf-8") as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                properties = {
                    properties_map[k]: row[k] for k in properties_map if row[k]
                }  # Added check for empty values
                query = f"CREATE (n:{node_label} $properties)"
                tx.run(query, properties=properties)

    def create_relationships(
        tx,
        csv_file_path,
        relationship_label,
        source_property,
        target_property,
        properties_map,
    ):
        with open(csv_file_path, "r", encoding="utf-8") as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                source_id = row.get(source_property)
                target_id = row.get(target_property)
                if (
                    source_id and target_id
                ):  # check to make sure both source and target exist
                    source_prop_neo4j = properties_map.get(source_property)
                    target_prop_neo4j = properties_map.get(target_property)

                    query = f"""
                    MATCH (a), (b)
                    WHERE a.{source_prop_neo4j} = $source_id AND b.{target_prop_neo4j} = $target_id
                    CREATE (a)-[r:{relationship_label}]->(b)
                    """
                    tx.run(query, source_id=source_id, target_id=target_id)

    with driver.session() as session:
        session.execute_write(create_nodes, csv_file_path, node_label, properties_map)
        if (
            relationship_label
            and relationship_source_property
            and relationship_target_property
        ):
            session.execute_write(
                create_relationships,
                csv_file_path,
                relationship_label,
                relationship_source_property,
                relationship_target_property,
                properties_map,
            )

    driver.close()

In [None]:
uri = "bolt://127.0.0.1:7687"
user = "neo4j"
password = ""  # Replace with your password.

In [3]:
csv_file_path = "data/bva/output/session_this_filtered_valid_cols.csv"  # Replace with your CSV file path.

In [4]:
data = pd.read_csv(csv_file_path)
data.head()

Unnamed: 0,session_id,date,start_time,end_time,theatre__name,title,stream,synopsis_stripped,sponsored_session,sponsored_by,key_text
0,49669,2025-06-12,08:45:00,09:00:00,Clinical Theatre 1,BVA's President's Welcome,No Data,No Data,False,Not Sponsored,bvaspresidentswelcome
1,49670,2025-06-12,09:00:00,09:50:00,Clinical Theatre 1,Managing diabetes in 2025! 104 years after the...,Endocrinology,No Data,False,Not Sponsored,managingdiabetesin2025104yearsafterthefirstdia...
2,49671,2025-06-12,10:15:00,11:05:00,Clinical Theatre 1,"Itchy, scratchy cats: Where to start, when to ...",Dermatology; Small Animal,This session will discuss the various differen...,False,Not Sponsored,itchyscratchycatswheretostartwhentousepreds
3,49672,2025-06-12,11:45:00,12:35:00,Clinical Theatre 1,It’s a knock out: Tips and tricks for successf...,Exotic Animal,This talk will give an overview of how to prep...,True,British Veterinary Zoological Society,itsaknockouttipsandtricksforsuccessfulsmallmam...
4,49674,2025-06-12,14:15:00,15:05:00,Clinical Theatre 1,Anaesthesia check lists: Do they protect our f...,Anaesthesia; Small Animal,This session will look at the history of anaes...,False,Not Sponsored,anaesthesiachecklistsdotheyprotectourfelinepat...


In [5]:
len(data.columns)

11

In [6]:
{x: x for x in data.columns}

{'session_id': 'session_id',
 'date': 'date',
 'start_time': 'start_time',
 'end_time': 'end_time',
 'theatre__name': 'theatre__name',
 'title': 'title',
 'stream': 'stream',
 'synopsis_stripped': 'synopsis_stripped',
 'sponsored_session': 'sponsored_session',
 'sponsored_by': 'sponsored_by',
 'key_text': 'key_text'}

# session this year

In [7]:
csv_file_path = "data/bva/output/session_this_filtered_valid_cols.csv"  # Replace with your CSV file path.
node_label = "Sessions_this_year"
properties_map = {x: x for x in data.columns}

# Example with relationships (if applicable):
relationship_label = "KNOWS"
relationship_source_property = "source_id"
relationship_target_property = "target_id"

# Example CSV data (data.csv):
# id,name,age,source_id,target_id
# 1,Alice,30,,
# 2,Bob,25,1,3
# 3,Charlie,35,2,1
# 4,David,40,,

# Example without relationships:
load_csv_to_neo4j(uri, user, password, csv_file_path, node_label, properties_map)

# Example with relationships:
# load_csv_to_neo4j(uri, user, password, csv_file_path, node_label, properties_map, relationship_label, relationship_source_property, relationship_target_property)

# Session Past Year DVA

In [8]:
csv_file_path = "data/bva/output/session_last_filtered_valid_cols_bva.csv"  # Replace with your CSV file path.
data = pd.read_csv(csv_file_path)

node_label = "Sessions_past_year"
properties_map = {x: x for x in data.columns}


# Example without relationships:
load_csv_to_neo4j(uri, user, password, csv_file_path, node_label, properties_map)

# Session Past Year LVA

In [9]:
csv_file_path = "data/bva/output/session_last_filtered_valid_cols_lva.csv"  # Replace with your CSV file path.
data = pd.read_csv(csv_file_path)

node_label = "Sessions_past_year"
properties_map = {x: x for x in data.columns}


# Example without relationships:
load_csv_to_neo4j(uri, user, password, csv_file_path, node_label, properties_map)

# Load Streams

In [10]:
import json

In [11]:
with open("data/bva/output/streams.json", "r") as f:
    streams = json.load(f)

In [12]:
streams.keys()

dict_keys(['urology', 'reproduction', 'nursing', 'internal medicine', 'sports medicine', 'nutrition', 'orthopaedics', 'exotics', 'clinical pathology', 'emergency and critical care (ecc)', 'geriatric medicine', 'welfare', 'haematology', 'feline', 'respiratory', 'debate', 'infectious disease', 'business', 'practice management', 'small animal', 'oncology', 'pain management', 'obesity', 'career development', 'behaviour', 'neurology', 'large animal', 'wellbeing', 'exotic animal', 'endocrinology', 'farm', 'cardiology', 'careers', 'farm animal', 'orthopeadics', 'community', 'toxicology', 'equine', 'ophthalmology', 'diagnostics', 'animal welfare', 'dermatology', 'leadership', 'emergency medicine', 'surgery', 'anaesthesia', 'imaging', 'gastroenterology', 'parasitology', 'wildlife', 'dentistry', 'sustainability'])

In [13]:
streams["careers"]

'The "Careers" category focuses on the professional development and well-being of individuals working within the veterinary field. It addresses challenges such as managing cognitive overload during high-pressure situations, fostering inclusive workplaces that support neurodivergent colleagues, and overcoming common psychological hurdles like imposter syndrome. Additionally, it explores the evolving responsibilities of veterinary professionals, including promoting sustainability in pet ownership. Overall, this category aims to equip veterinary professionals with the knowledge, skills, and support needed to thrive personally and professionally.'

In [14]:
# Function to create nodes
def create_stream_nodes():
    # Create connection to Neo4j
    driver = GraphDatabase.driver(uri, auth=(user, password))

    # Open a session
    with driver.session() as session:
        for key in streams.keys():
            # Create Stream node with 'label' and 'description' properties
            session.run(
                "CREATE (s:Stream {stream: $s, description: $description})",
                s=key,
                description=streams[key],
            )

    # Close connection
    driver.close()
    print("Stream nodes with descriptions created successfully!")

In [15]:
# Execute the function
create_stream_nodes()

Stream nodes with descriptions created successfully!


# RelationShip HAS_STREAM.
this is a connection in between the Node Session with The Node Stream. IN Session Input each session is classified to one or multiples streams, this appears in the file like a list of streams separated with ;. The method split this list in Individual Streams and create a connection from The session to the Stream
Previous he have created the description of the Stream in the Notebook 3_session_data with the help of a LLM

In [16]:
from neo4j import GraphDatabase

session_node = "Sessions_this_year"
stream_node = "Stream"

# Function to create relationships
# def create_stream_relationships(session_node, stream_node):
#     # Connect to Neo4j
#     driver = GraphDatabase.driver(uri, auth=(user, password))

#     # Open a session
#     with driver.session() as session:
#         # Match all session nodes
#         result = session.run(f"""
#             MATCH (s:{session_node})
#             RETURN s.session_id AS session_id, s.stream AS stream
#         """)

#         # Loop through each session node and process its streams
#         for record in result:
#             session_id = record["session_id"]
#             streams = record["stream"]

#             # Process the stream property: split by ";" and strip spaces
#             stream_list = [stream.strip().lower()  for stream in streams.split(";")]

#             # Create relationships for each stream
#             for stream in stream_list:
#                 session.run(f"""
#                     MATCH (s:{session_node} {{session_id: $session_id}})
#                     MATCH (st:{stream_node} {{stream: $stream_name}})
#                     CREATE (s)-[:HAS_STREAM]->(st)
#                 """, session_id=session_id, stream_name=stream)

#     # Close connection
#     driver.close()
#     print("Relationships created successfully!")


def create_stream_relationships(uri, user, password, session_node, stream_node):
    """
    Creates relationships between session nodes and stream nodes, only if the relationship doesn't already exist.
    """
    driver = GraphDatabase.driver(uri, auth=(user, password))

    with driver.session() as session:
        result = session.run(
            f"""
            MATCH (s:{session_node})
            RETURN s.session_id AS session_id, s.stream AS stream
        """
        )

        for record in result:
            session_id = record["session_id"]
            streams = record["stream"]

            stream_list = [stream.strip().lower() for stream in streams.split(";")]

            for stream in stream_list:
                # Check if the relationship already exists
                relationship_exists = session.run(
                    f"""
                    MATCH (s:{session_node} {{session_id: $session_id}})-[:HAS_STREAM]->(st:{stream_node} {{stream: $stream_name}})
                    RETURN count(*) > 0 AS exists
                """,
                    session_id=session_id,
                    stream_name=stream,
                ).single()["exists"]

                if not relationship_exists:
                    session.run(
                        f"""
                        MATCH (s:{session_node} {{session_id: $session_id}})
                        MATCH (st:{stream_node} {{stream: $stream_name}})
                        CREATE (s)-[:HAS_STREAM]->(st)
                    """,
                        session_id=session_id,
                        stream_name=stream,
                    )

    driver.close()
    print("Relationships created (or skipped if existed) successfully!")

In [17]:
# Execute the function this year
session_node = "Sessions_this_year"
stream_node = "Stream"
create_stream_relationships(uri, user, password, session_node, stream_node)

Relationships created (or skipped if existed) successfully!


In [18]:
# Execute the function this year
session_node = "Sessions_past_year"
stream_node = "Stream"
create_stream_relationships(uri, user, password, session_node, stream_node)

Relationships created (or skipped if existed) successfully!
