In [1]:
#!pip install graphdatascienceb

In [2]:
# Define connection details
uri = "bolt://localhost:7687"  # Replace with your Neo4j URI
username = "username"         # Replace with your Neo4j username
password = "password"      # Replace with your Neo4j password
db_name = "nutrition"          # Specify the database you want to work on

In [3]:
from graphdatascience import GraphDataScience
import pandas as pd

# Create another GraphDataScience instance and set the default database
gds = GraphDataScience(uri, auth=(username, password), database=db_name)

# Check the installed GDS version on the server
print(gds.version())
assert gds.version()



2.7.0


In [4]:
def clear_database(gds):
    # Define Cypher queries to drop indexes and clear user data
    drop_indexes_query = "DROP INDEX user_id_index IF EXISTS"
    clear_users_query = """
    MATCH (u:User)
    DETACH DELETE u
    """
    
    # Drop the specified indexes
    gds.run_cypher(drop_indexes_query)
    print("Indexes cleared")

    # Clear user data
    gds.run_cypher(clear_users_query)
    print("User data cleared")

# Run the function
clear_database(gds)

print("Users indexes cleared")

Indexes cleared
User data cleared
Users indexes cleared


### Set up indexes for query performance

In [5]:
# List of index creation queries
index_queries = [
    "CREATE INDEX user_id_index FOR (u:User) ON (u.id);"
]

# Run each query separately
for query in index_queries:
    try:
        gds.run_cypher(query)
        print(f"Executed: {query}")
    except Exception as e:
        print(f"Error executing query: {query}\nError message: {str(e)}")
        print()

Executed: CREATE INDEX user_id_index FOR (u:User) ON (u.id);


### Import users and connect to ingredients and diet type

In [6]:
cypher_query = """
CALL apoc.load.json("file:///survey_data_version_4.json") YIELD value
WITH 
    value.user.id AS id,
    value.user.name AS name,
    value.user.gender AS gender,
    value.user.occupation AS occupation,
    value.user.likes_ingredients AS likesIngredients,
    value.user.dislikes_ingredients AS dislikesIngredients,
    value.user.allergic_to_ingredients AS allergicToIngredients,
    value.user.diet_types AS dietTypes
MERGE (u:User {id: id})
SET 
    u.name = name,
    u.gender = gender,
    u.occupation = occupation
WITH u, [ingredient IN likesIngredients WHERE ingredient IS NOT NULL AND ingredient <> ""] AS likesIngredients,
     [ingredient IN dislikesIngredients WHERE ingredient IS NOT NULL AND ingredient <> ""] AS dislikesIngredients,
     [ingredient IN allergicToIngredients WHERE ingredient IS NOT NULL AND ingredient <> ""] AS allergicToIngredients,
     [dietType IN dietTypes WHERE dietType IS NOT NULL AND dietType <> ""] AS dietTypes
FOREACH (ingredient IN likesIngredients |
  MERGE (i:Ingredient {name: ingredient})
  ON CREATE SET i.name = ingredient
  MERGE (u)-[:LIKES]->(i)
)
FOREACH (ingredient IN dislikesIngredients |
  MERGE (i:Ingredient {name: ingredient})
  ON CREATE SET i.name = ingredient
  MERGE (u)-[:DISLIKES]->(i)
)
FOREACH (ingredient IN allergicToIngredients |
  MERGE (i:Ingredient {name: ingredient})
  ON CREATE SET i.name = ingredient
  MERGE (u)-[:ALLERGIC_TO]->(i)
)
FOREACH (dietType IN dietTypes |
  MERGE (d:DietType {name: dietType})
  MERGE (u)-[:HAS_DIET]->(d)
)
"""

# Run the Cypher query
result = gds.run_cypher(cypher_query)

# Print the result
print(result)

Empty DataFrame
Columns: []
Index: []
