# Add to Neo4J Visitors BVA this year, BVA and LVA last year

In [None]:
from neo4j import GraphDatabase
import csv
import pandas as pd


def load_csv_to_neo4j(
    uri,
    user,
    password,
    csv_file_path,
    node_label,
    properties_map,
    relationship_label=None,
    relationship_source_property=None,
    relationship_target_property=None,
):
    """
    Loads data from a CSV file into Neo4j.

    Args:
        uri (str): Neo4j URI (e.g., "bolt://localhost:7687").
        user (str): Neo4j username.
        password (str): Neo4j password.
        csv_file_path (str): Path to the CSV file.
        node_label (str): Label to apply to the nodes.
        properties_map (dict): A dictionary mapping CSV column names to Neo4j property names.
        relationship_label (str, optional): Label to apply to relationships. Defaults to None.
        relationship_source_property (str, optional): CSV column name for the source node ID. Defaults to None.
        relationship_target_property (str, optional): CSV column name for the target node ID. Defaults to None.
    """

    driver = GraphDatabase.driver(uri, auth=(user, password))

    def create_nodes(tx, csv_file_path, node_label, properties_map):
        with open(csv_file_path, "r", encoding="utf-8") as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                properties = {
                    properties_map[k]: row[k] for k in properties_map if row[k]
                }  # Added check for empty values
                query = f"CREATE (n:{node_label} $properties)"
                tx.run(query, properties=properties)

    def create_relationships(
        tx,
        csv_file_path,
        relationship_label,
        source_property,
        target_property,
        properties_map,
    ):
        with open(csv_file_path, "r", encoding="utf-8") as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                source_id = row.get(source_property)
                target_id = row.get(target_property)
                if (
                    source_id and target_id
                ):  # check to make sure both source and target exist
                    source_prop_neo4j = properties_map.get(source_property)
                    target_prop_neo4j = properties_map.get(target_property)

                    query = f"""
                    MATCH (a), (b)
                    WHERE a.{source_prop_neo4j} = $source_id AND b.{target_prop_neo4j} = $target_id
                    CREATE (a)-[r:{relationship_label}]->(b)
                    """
                    tx.run(query, source_id=source_id, target_id=target_id)

    with driver.session() as session:
        session.execute_write(create_nodes, csv_file_path, node_label, properties_map)
        if (
            relationship_label
            and relationship_source_property
            and relationship_target_property
        ):
            session.execute_write(
                create_relationships,
                csv_file_path,
                relationship_label,
                relationship_source_property,
                relationship_target_property,
                properties_map,
            )

    driver.close()

In [None]:
# Example usage:
uri = "bolt://127.0.0.1:7687"
user = "neo4j"
password = ""  # Replace with your password.

In [None]:
csv_file_path = (
    "data/bva/output/df_reg_demo_this.csv"  # Replace with your CSV file path.
)

In [None]:
data = pd.read_csv(csv_file_path)
data.head()

In [None]:
len(data.columns)

In [None]:
{x: x for x in data.columns}

# Visitors this year

In [None]:
csv_file_path = (
    "data/bva/output/df_reg_demo_this.csv"  # Replace with your CSV file path.
)
node_label = "Visitor_this_year"
properties_map = {x: x for x in data.columns}

# Example with relationships (if applicable):
relationship_label = "KNOWS"
relationship_source_property = "source_id"
relationship_target_property = "target_id"

# Example CSV data (data.csv):
# id,name,age,source_id,target_id
# 1,Alice,30,,
# 2,Bob,25,1,3
# 3,Charlie,35,2,1
# 4,David,40,,

# Example without relationships:
load_csv_to_neo4j(uri, user, password, csv_file_path, node_label, properties_map)

# Example with relationships:
# load_csv_to_neo4j(uri, user, password, csv_file_path, node_label, properties_map, relationship_label, relationship_source_property, relationship_target_property)

# Visitors Past Year BVA

In [None]:
csv_file_path = (
    "data/bva/output/df_reg_demo_last_bva.csv"  # Replace with your CSV file path.
)
data = pd.read_csv(csv_file_path)

node_label = "Visitor_last_year_bva"
properties_map = {x: x for x in data.columns}


# Example without relationships:
load_csv_to_neo4j(uri, user, password, csv_file_path, node_label, properties_map)

In [None]:
{x: x for x in data.columns}

# Visitors Past Year LVA

In [None]:
csv_file_path = (
    "data/bva/output/df_reg_demo_last_lva.csv"  # Replace with your CSV file path.
)
data = pd.read_csv(csv_file_path)

node_label = "Visitor_last_year_lva"
properties_map = {x: x for x in data.columns}


# Example without relationships:
load_csv_to_neo4j(uri, user, password, csv_file_path, node_label, properties_map)