## Has_recommendations relationship
This Notebook Run only 1 time to create the attribute has_recommendation relationship from Visitor_this_year to Session_this_year 


In [None]:
from neo4j import GraphDatabase
import csv
import pandas as pd

In [None]:
recommendations_df = pd.read_csv(
    "data/bva/recomendations/20240515 BVA Personalised Sessions - Third Run.csv"
)
recommendations_df.head(2)

In [None]:
recommendations_df.columns

In [None]:
def create_recommendations_optimized(driver, recommendations_df):
    """
    Optimized version that minimizes database queries.
    """

    # Prepare data for bulk operations
    unique_badge_ids = list(recommendations_df["badgeid"].unique())
    recommendations_data = recommendations_df[["badgeid", "session_id"]].to_dict(
        "records"
    )

    with driver.session() as session:
        # Single query to update all visitors and create relationships
        result = session.run(
            """
            // First, set all visitors to has_recommendation = 0
            MATCH (v:Visitor_this_year)
            SET v.has_recommendation = 0
            
            WITH 1 as dummy
            
            // Set has_recommendation = 1 for visitors with recommendations
            MATCH (v:Visitor_this_year)
            WHERE v.BadgeId IN $badge_ids
            SET v.has_recommendation = 1
            
            WITH 1 as dummy
            
            // Create all relationships
            UNWIND $recommendations AS rec
            MATCH (v:Visitor_this_year {BadgeId: rec.badgeid})
            MATCH (s:Sessions_this_year {session_id: rec.session_id})
            MERGE (v)-[:IS_RECOMMENDED]->(s)
            
            RETURN COUNT(DISTINCT v) as visitors_updated, COUNT(*) as relationships_created
        """,
            badge_ids=unique_badge_ids,
            recommendations=recommendations_data,
        )

        stats = result.single()
        print(f"Updated {stats['visitors_updated']} visitors")
        print(f"Created {stats['relationships_created']} relationships")


# To verify the results, you can run:
def verify_recommendations(driver):
    """
    Verify the recommendation updates.
    """
    with driver.session() as session:
        result = session.run(
            """
            MATCH (v:Visitor_this_year)
            RETURN 
                SUM(v.has_recommendation) as visitors_with_recommendations,
                COUNT(v) as total_visitors
        """
        )

        stats = result.single()
        print(
            f"Visitors with recommendations: {stats['visitors_with_recommendations']}"
        )
        print(f"Total visitors: {stats['total_visitors']}")

        result = session.run(
            """
            MATCH ()-[r:IS_RECOMMENDED]->()
            RETURN COUNT(r) as total_recommendations
        """
        )

        stats = result.single()
        print(f"Total IS_RECOMMENDED relationships: {stats['total_recommendations']}")

In [None]:
uri = "neo4j+s://c6cfaac8.databases.neo4j.io"
user = "neo4j"
password = ""  # Replace with your password.

In [None]:
driver = GraphDatabase.driver(uri, auth=(user, password))

In [None]:
# Create indexes for better performance (run once)
with driver.session() as session:
    session.run("CREATE INDEX IF NOT EXISTS FOR (v:Visitor_this_year) ON (v.BadgeId)")
    session.run(
        "CREATE INDEX IF NOT EXISTS FOR (s:Sessions_this_year) ON (s.session_id)"
    )

In [None]:
def create_recommendations_with_apoc(driver, recommendations_df):
    """
    Fixed implementation using APOC procedures with proper type conversion.
    """

    # Ensure session_id is integer in the dataframe
    recommendations_df["session_id"] = recommendations_df["session_id"].astype(str)

    unique_badge_ids = list(recommendations_df["badgeid"].unique())
    recommendations_list = recommendations_df[["badgeid", "session_id"]].to_dict(
        "records"
    )

    with driver.session() as session:
        # Reset all visitors
        session.run(
            """
            MATCH (v:Visitor_this_year)
            SET v.has_recommendation = 0
        """
        )

        # Update visitors with recommendations
        session.run(
            """
            CALL apoc.periodic.iterate(
                'UNWIND $badge_ids AS badge_id RETURN badge_id',
                'MATCH (v:Visitor_this_year {BadgeId: badge_id}) SET v.has_recommendation = 1',
                {batchSize: 10000, params: {badge_ids: $badge_ids}}
            )
        """,
            badge_ids=unique_badge_ids,
        )

        # Create relationships with type conversion in Cypher
        session.run(
            """
            CALL apoc.periodic.iterate(
                'UNWIND $recommendations AS rec RETURN rec',
                'MATCH (v:Visitor_this_year {BadgeId: rec.badgeid})
                 MATCH (s:Sessions_this_year {session_id: rec.session_id})
                 CREATE (v)-[:IS_RECOMMENDED]->(s)',
                {batchSize: 10000, params: {recommendations: $recommendations}}
            )
        """,
            recommendations=recommendations_list,
        )

In [None]:
# Create Recomendations
create_recommendations_with_apoc(driver, recommendations_df)

In [None]:
 recommendations_data = recommendations_df[['badgeid', 'session_id']].to_dict('records')

In [None]:
recommendations_data[0]