# This Notebook Create the relationship spezialization_to_stream
## file needed (from Giovanni)

- data/bva/stream_job_specialism.csv
- data/bva/spezialization_to_stream.csv

In [None]:
from neo4j import GraphDatabase
import csv
import pandas as pd
import json

In [None]:
map_stream_job = pd.read_csv("data/bva/stream_job_specialism.csv")
map_specialization_stream = pd.read_csv("data/bva/spezialization_to_stream.csv")

In [None]:
map_stream_job.columns = ["lve_2024", "bva_2024", "bva_2025"]

In [None]:
map_stream_job["bva_2024"].unique()

In [None]:
list(map_stream_job["bva_2024"].unique()) + list(map_stream_job["lve_2024"].unique())

In [None]:
map_stream_job

In [None]:
map_specialization = {
    "Wildlife": "Other",
    "Small Animal": "Companion Animal",
    "Mixed": "Other",
    "Dairy": "Farm",
    "Poultry": "Farm",
    "Cattle": "Farm",
    "Pigs": "Farm",
    "Sheep": "Farm",
    "Cattle": "Farm",
}

In [None]:
specialization_stream_mapping = json.loads(
    map_specialization_stream.set_index("spezialization").to_json(orient="index")
)

In [None]:
from neo4j import GraphDatabase


class SpecializationToStreamRelationship:
    def __init__(
        self,
        uri,
        username,
        password,
        map_specialization=None,
        specialization_stream_mapping=None,
    ):
        """Initialize connection to Neo4j database and set mapping dictionaries

        Args:
            uri: Neo4j connection URI
            username: Neo4j username
            password: Neo4j password
            map_specialization: Dictionary mapping specialization names to standard format
            specialization_stream_mapping: Dictionary mapping specializations to applicable streams
        """
        self.driver = GraphDatabase.driver(uri, auth=(username, password))

        # Set default mapping if not provided
        self.map_specialization = map_specialization

        # Set default stream mapping if not provided
        self.specialization_stream_mapping = specialization_stream_mapping

    def close(self):
        """Close the connection to Neo4j"""
        self.driver.close()

    def create_relationships(self):
        """Create specialization_to_stream relationships based on visitor specializations"""
        with self.driver.session() as session:
            session.execute_write(
                self._create_relationships,
                self.map_specialization,
                self.specialization_stream_mapping,
            )

    @staticmethod
    def _create_relationships(tx, map_specialization, specialization_stream_mapping):
        """Create the relationships using the provided mapping dictionaries"""

        # Process Visitor_last_year nodes lva
        query_last_year = """
        MATCH (v:Visitor_last_year_lva), (s:Stream)
        WHERE v.what_areas_do_you_specialise_in IS NOT NULL 
        AND v.what_areas_do_you_specialise_in <> 'NA'
        AND s.stream IS NOT NULL
        RETURN v, s, v.what_areas_do_you_specialise_in as specializations, s.stream as stream
        """

        results_last_year = tx.run(query_last_year)

        for record in results_last_year:
            visitor = record["v"]
            stream = record["s"]
            specializations_text = record["specializations"]
            stream_name = record["stream"].lower()

            # Split specializations by semicolon
            specializations = specializations_text.split(";")

            # Process each specialization
            for spec in specializations:
                spec = spec.strip()

                # Map specialization if needed
                mapped_spec = map_specialization.get(spec, spec)

                # Check if this stream applies to this specialization
                if mapped_spec in specialization_stream_mapping:
                    stream_dict = specialization_stream_mapping[mapped_spec]
                    if stream_name in stream_dict and stream_dict[stream_name] == "YES":
                        # Create the relationship
                        create_rel_query = """
                        MATCH (v:Visitor_last_year_lva), (s:Stream)
                        WHERE id(v) = $visitor_id AND id(s) = $stream_id
                        MERGE (v)-[r:spezialization_to_stream]->(s)
                        RETURN r
                        """
                        tx.run(
                            create_rel_query, visitor_id=visitor.id, stream_id=stream.id
                        )
        # Process Visitor_last_year nodes bva
        query_last_year = """
        MATCH (v:Visitor_last_year_bva), (s:Stream)
        WHERE v.what_areas_do_you_specialise_in IS NOT NULL 
        AND v.what_areas_do_you_specialise_in <> 'NA'
        AND s.stream IS NOT NULL
        RETURN v, s, v.what_areas_do_you_specialise_in as specializations, s.stream as stream
        """

        results_last_year = tx.run(query_last_year)

        for record in results_last_year:
            visitor = record["v"]
            stream = record["s"]
            specializations_text = record["specializations"]
            stream_name = record["stream"].lower()

            # Split specializations by semicolon
            specializations = specializations_text.split(";")

            # Process each specialization
            for spec in specializations:
                spec = spec.strip()

                # Map specialization if needed
                mapped_spec = map_specialization.get(spec, spec)

                # Check if this stream applies to this specialization
                if mapped_spec in specialization_stream_mapping:
                    stream_dict = specialization_stream_mapping[mapped_spec]
                    if stream_name in stream_dict and stream_dict[stream_name] == "YES":
                        # Create the relationship
                        create_rel_query = """
                        MATCH (v:Visitor_last_year_bva), (s:Stream)
                        WHERE id(v) = $visitor_id AND id(s) = $stream_id
                        MERGE (v)-[r:spezialization_to_stream]->(s)
                        RETURN r
                        """
                        tx.run(
                            create_rel_query, visitor_id=visitor.id, stream_id=stream.id
                        )
        # Process Visitor_this_year nodes
        query_this_year = """
        MATCH (v:Visitor_this_year), (s:Stream)
        WHERE v.what_type_does_your_practice_specialise_in IS NOT NULL 
        AND v.what_type_does_your_practice_specialise_in <> 'NA'
        AND s.stream IS NOT NULL
        RETURN v, s, v.what_type_does_your_practice_specialise_in as specializations, s.stream as stream
        """

        results_this_year = tx.run(query_this_year)

        for record in results_this_year:
            visitor = record["v"]
            stream = record["s"]
            specializations_text = record["specializations"]
            stream_name = record["stream"].lower()

            # Split specializations by semicolon
            specializations = specializations_text.split(";")

            # Process each specialization
            for spec in specializations:
                spec = spec.strip()

                # Map specialization if needed
                mapped_spec = map_specialization.get(spec, spec)

                # Check if this stream applies to this specialization
                if mapped_spec in specialization_stream_mapping:
                    stream_dict = specialization_stream_mapping[mapped_spec]
                    if stream_name in stream_dict and stream_dict[stream_name] == "YES":
                        # Create the relationship
                        create_rel_query = """
                        MATCH (v:Visitor_this_year), (s:Stream)
                        WHERE id(v) = $visitor_id AND id(s) = $stream_id
                        MERGE (v)-[r:spezialization_to_stream]->(s)
                        RETURN r
                        """
                        tx.run(
                            create_rel_query, visitor_id=visitor.id, stream_id=stream.id
                        )

In [None]:
uri = "bolt://127.0.0.1:7687"
username = "neo4j"
password = ""

In [None]:
app = SpecializationToStreamRelationship(
    uri,
    username,
    password,
    map_specialization=map_specialization,
    specialization_stream_mapping=specialization_stream_mapping,
)

try:
    app.create_relationships()
    print("Relationships created successfully")
finally:
    app.close()