# Add to Neo4J Visitors BVA this year, BVA and LVA last year (NEo4J Notebooks only in Windows)

In [None]:
from neo4j import GraphDatabase
import csv
import pandas as pd


def load_csv_to_neo4j(
    uri,
    user,
    password,
    csv_file_path,
    node_label,
    properties_map,
    relationship_label=None,
    relationship_source_property=None,
    relationship_target_property=None,
):
    """
    Loads data from a CSV file into Neo4j.

    Args:
        uri (str): Neo4j URI (e.g., "bolt://localhost:7687").
        user (str): Neo4j username.
        password (str): Neo4j password.
        csv_file_path (str): Path to the CSV file.
        node_label (str): Label to apply to the nodes.
        properties_map (dict): A dictionary mapping CSV column names to Neo4j property names.
        relationship_label (str, optional): Label to apply to relationships. Defaults to None.
        relationship_source_property (str, optional): CSV column name for the source node ID. Defaults to None.
        relationship_target_property (str, optional): CSV column name for the target node ID. Defaults to None.
    """

    driver = GraphDatabase.driver(uri, auth=(user, password))

    def create_nodes(tx, csv_file_path, node_label, properties_map):
        with open(csv_file_path, "r", encoding="utf-8") as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                properties = {
                    properties_map[k]: row[k] for k in properties_map if row[k]
                }  # Added check for empty values
                query = f"CREATE (n:{node_label} $properties)"
                tx.run(query, properties=properties)

    def create_relationships(
        tx,
        csv_file_path,
        relationship_label,
        source_property,
        target_property,
        properties_map,
    ):
        with open(csv_file_path, "r", encoding="utf-8") as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                source_id = row.get(source_property)
                target_id = row.get(target_property)
                if (
                    source_id and target_id
                ):  # check to make sure both source and target exist
                    source_prop_neo4j = properties_map.get(source_property)
                    target_prop_neo4j = properties_map.get(target_property)

                    query = f"""
                    MATCH (a), (b)
                    WHERE a.{source_prop_neo4j} = $source_id AND b.{target_prop_neo4j} = $target_id
                    CREATE (a)-[r:{relationship_label}]->(b)
                    """
                    tx.run(query, source_id=source_id, target_id=target_id)

    with driver.session() as session:
        session.execute_write(create_nodes, csv_file_path, node_label, properties_map)
        if (
            relationship_label
            and relationship_source_property
            and relationship_target_property
        ):
            session.execute_write(
                create_relationships,
                csv_file_path,
                relationship_label,
                relationship_source_property,
                relationship_target_property,
                properties_map,
            )

    driver.close()

In [None]:
uri = "bolt://127.0.0.1:7687"
user = "neo4j"
password = ""  # Replace with your password.

In [3]:
csv_file_path = (
    "data/bva/output/df_reg_demo_this.csv"  # Replace with your CSV file path.
)

In [4]:
data = pd.read_csv(csv_file_path)
data.head()

Unnamed: 0,Email,Email_domain,Company,JobTitle,Country,BadgeType,ShowRef,BadgeId,Source,Days_since_registration,assist_year_before,BadgeId_last_year_bva,BadgeId_last_year_lva,what_type_does_your_practice_specialise_in,organisation_type,job_role
0,jimmyjackson2019@outlook.com,outlook.com,Locum,farm animal locum,UK,Delegate,BVA2025,5DQTDB8,DS Celeste,47,0,,,Wildlife,Locum,Locum Vet
1,Ahmed.Fareha@pdsa.org.uk,pdsa.org.uk,PDSA Oldbury,veterinary surgeon,UK,Delegate,BVA2025,ATSLIS7,DS Celeste,47,0,,,,Corporate Group,Vet/Vet Surgeon
2,richard.hooker@ivcevidensia.com,ivcevidensia.com,IVC Evidensia,country medical director,UK,Delegate,BVA2025,7GRN839,DS Celeste,47,1,,GBXFR48,Mixed,Corporate Group,Vet/Vet Surgeon
3,leonieager@aol.co.uk,aol.co.uk,Medivet,practice manager,UK,Delegate,BVA2025,EEKHJZD,DS Celeste,47,0,,,"Small Animal, Exotics",Corporate Group,Practice Manager
4,sadie_vet@hotmail.co.uk,hotmail.co.uk,Carewell Vets,veterinary surgeon,UK,Delegate,BVA2025,CNBXGCU,DS Celeste,47,0,,,,Independent Practice,Vet/Vet Surgeon


In [None]:
data["job_role"].value_counts()

job_role
Vet/Vet Surgeon            774
Vet Nurse                  231
Other (please specify)     212
Student                     84
Locum Vet                   74
Practice Partner/Owner      72
Assistant Vet               62
Practice Manager            58
Vet/Owner                   46
Head Nurse/Senior Nurse     41
Academic                    28
Locum RVN                    8
Receptionist                 3
Name: count, dtype: int64

In [None]:
data["what_type_does_your_practice_specialise_in"].value_counts()

what_type_does_your_practice_specialise_in
Mixed                                              618
Small Animal                                       483
Small Animal, Exotics                              153
Other                                               68
Small Animal, Exotics, Mixed, Wildlife              19
Equine                                              13
Small Animal;Exotics                                12
Farm                                                10
Small Animal;Exotics;Wildlife                        7
Mixed, Exotics, Wildlife                             6
Mixed;Other                                          6
Farm;Other                                           5
Wildlife                                             4
Mixed, Wildlife                                      4
Small Animal;Wildlife                                4
Small Animal;Mixed                                   4
Exotics                                              4
Small Animal, Wildlife

In [9]:
data.isnull().sum()

Email                                            0
Email_domain                                     0
Company                                         33
JobTitle                                        21
Country                                          8
BadgeType                                        0
ShowRef                                          0
BadgeId                                          0
Source                                           3
Days_since_registration                          0
assist_year_before                               0
BadgeId_last_year_bva                         1258
BadgeId_last_year_lva                         1486
what_type_does_your_practice_specialise_in     227
organisation_type                               66
job_role                                         0
dtype: int64

In [6]:
len(data.columns)

16

In [7]:
{x: x for x in data.columns}

{'Email': 'Email',
 'Email_domain': 'Email_domain',
 'Company': 'Company',
 'JobTitle': 'JobTitle',
 'Country': 'Country',
 'BadgeType': 'BadgeType',
 'ShowRef': 'ShowRef',
 'BadgeId': 'BadgeId',
 'Source': 'Source',
 'Days_since_registration': 'Days_since_registration',
 'assist_year_before': 'assist_year_before',
 'BadgeId_last_year_bva': 'BadgeId_last_year_bva',
 'BadgeId_last_year_lva': 'BadgeId_last_year_lva',
 'what_type_does_your_practice_specialise_in': 'what_type_does_your_practice_specialise_in',
 'organisation_type': 'organisation_type',
 'job_role': 'job_role'}

# Visitors this year

In [10]:
csv_file_path = (
    "data/bva/output/df_reg_demo_this.csv"  # Replace with your CSV file path.
)
node_label = "Visitor_this_year"
properties_map = {x: x for x in data.columns}

# Example with relationships (if applicable):
relationship_label = "KNOWS"
relationship_source_property = "source_id"
relationship_target_property = "target_id"

# Example CSV data (data.csv):
# id,name,age,source_id,target_id
# 1,Alice,30,,
# 2,Bob,25,1,3
# 3,Charlie,35,2,1
# 4,David,40,,

# Example without relationships:
load_csv_to_neo4j(uri, user, password, csv_file_path, node_label, properties_map)

# Example with relationships:
# load_csv_to_neo4j(uri, user, password, csv_file_path, node_label, properties_map, relationship_label, relationship_source_property, relationship_target_property)

# Visitors Past Year BVA

In [11]:
csv_file_path = (
    "data/bva/output/df_reg_demo_last_bva.csv"  # Replace with your CSV file path.
)
data = pd.read_csv(csv_file_path)

In [None]:
data["job_role"].value_counts()

job_role
Vet/Vet Surgeon            238
Vet Nurse                   53
Other (please specify)      43
Locum Vet                   25
Assistant Vet               22
Practice Partner/Owner      11
Vet/Owner                   11
Head Nurse/Senior Nurse     10
Practice Manager             8
Student                      7
Academic                     7
Name: count, dtype: int64

In [None]:
node_label = "Visitor_last_year_bva"
properties_map = {x: x for x in data.columns}


# Example without relationships:
load_csv_to_neo4j(uri, user, password, csv_file_path, node_label, properties_map)

In [14]:
{x: x for x in data.columns}

{'Email': 'Email',
 'Email_domain': 'Email_domain',
 'Company': 'Company',
 'JobTitle': 'JobTitle',
 'Country': 'Country',
 'BadgeType': 'BadgeType',
 'ShowRef': 'ShowRef',
 'BadgeId': 'BadgeId',
 'Source': 'Source',
 'Days_since_registration': 'Days_since_registration',
 'what_areas_do_you_specialise_in': 'what_areas_do_you_specialise_in',
 'organisation_type': 'organisation_type',
 'job_role': 'job_role'}

# Visitors Past Year LVA

In [15]:
csv_file_path = (
    "data/bva/output/df_reg_demo_last_lva.csv"  # Replace with your CSV file path.
)
data = pd.read_csv(csv_file_path)

In [None]:
data["job_role"].value_counts()

job_role
Vet/Vet Surgeon           114
Locum Vet                  24
Other (please specify)     23
Vet Nurse                  17
Practice Partner/Owner     10
Vet/Owner                   7
Assistant Vet               5
Practice Manager            3
Student                     2
Academic                    2
Name: count, dtype: int64

In [None]:
node_label = "Visitor_last_year_lva"
properties_map = {x: x for x in data.columns}


# Example without relationships:
load_csv_to_neo4j(uri, user, password, csv_file_path, node_label, properties_map)