In [None]:
from neo4j import GraphDatabase
import csv
import pandas as pd
import json

In [None]:
map_job_stream = pd.read_csv("data/bva/job_to_stream.csv")

In [None]:
map_job_stream.columns

In [None]:
job_stream_mapping = json.loads(
    map_job_stream.set_index("Job Role").to_json(orient="index")
)

In [None]:
from neo4j import GraphDatabase


class JobToStreamRelationship:
    def __init__(self, uri, username, password, job_stream_mapping=None):
        """Initialize connection to Neo4j database and set job to stream mapping

        Args:
            uri: Neo4j connection URI
            username: Neo4j username
            password: Neo4j password
            job_stream_mapping: Dictionary mapping job roles to applicable streams
        """
        self.driver = GraphDatabase.driver(uri, auth=(username, password))

        # Set default job stream mapping if not provided
        self.job_stream_mapping = job_stream_mapping

    def close(self):
        """Close the connection to Neo4j"""
        self.driver.close()

    def create_relationships(self):
        """Create job_to_stream relationships based on visitor job roles"""
        with self.driver.session() as session:
            session.execute_write(self._create_relationships, self.job_stream_mapping)

    @staticmethod
    def _create_relationships(tx, job_stream_mapping):
        """Create the relationships using the provided job stream mapping"""

        # Get all Visitor_this_year nodes with valid job roles
        query_visitors = """
        MATCH (v:Visitor_this_year)
        WHERE v.job_role IS NOT NULL 
        AND v.job_role <> 'NA'
        RETURN v, v.job_role as job_role
        """

        results_visitors = tx.run(query_visitors)

        for record in results_visitors:
            visitor = record["v"]
            job_role = record["job_role"]

            # Check if this job role exists in our mapping
            if job_role in job_stream_mapping:
                stream_dict = job_stream_mapping[job_role]

                # For each stream that applies to this job role
                for stream_name, applies in stream_dict.items():
                    if applies == "YES":
                        # Find the Stream node and create relationship
                        create_rel_query = """
                        MATCH (v:Visitor_this_year), (s:Stream)
                        WHERE id(v) = $visitor_id AND s.stream = $stream_name
                        MERGE (v)-[r:job_to_stream]->(s)
                        RETURN r
                        """
                        tx.run(
                            create_rel_query,
                            visitor_id=visitor.id,
                            stream_name=stream_name,
                        )

In [None]:
# Example usage:
uri = "bolt://127.0.0.1:7687"
username = "neo4j"
password = ""  # Replace with your password.

In [None]:
app = JobToStreamRelationship(
    uri, username, password, job_stream_mapping=job_stream_mapping
)

try:
    app.create_relationships()
    print("Job to stream relationships created successfully")
finally:
    app.close()