In [1]:
#!pip install graphdatascience

In [2]:
# Need to create user first

# Define connection details
uri = "bolt://localhost:7687"  # Replace with your Neo4j URI
username = "username"         # Replace with your Neo4j username
password = "password"      # Replace with your Neo4j password
db_name = "nutrition"          # Specify the database you want to work on

In [3]:
from graphdatascience import GraphDataScience
import pandas as pd

# Connecting with the Neo4j database using GDS library
# Need to add a user by using CREATE USER in Neo4j browser
gds = GraphDataScience(uri, auth=(username, password))

# Should do this one in the conf file:
# dbms.security.procedures.unrestricted=jwt.security.*,gds.*
# dbms.security.procedures.allowlist=apoc.coll.*,apoc.load.*,gds.*

# Check the installed GDS version on the server
print(gds.version())
assert gds.version()



2.7.0


In [4]:
# Function to check if a database exists
def database_exists(gds, db_name):
    result = gds.run_cypher("SHOW DATABASES")
    existing_databases = result['name'].tolist()  # Extract the 'name' column and convert to a list
    return db_name in existing_databases

# Function to create a new database if it does not exist
def create_database_if_not_exists(gds, db_name):
    if database_exists(gds, db_name):
        print(f"Database '{db_name}' already exists.")
    else:
        create_db_query = f"CREATE DATABASE {db_name};"
        gds.run_cypher(create_db_query)
        print(f"Database '{db_name}' created.")

# Specify the name of the new database
new_db_name = "nutrition"

# Run the function
create_database_if_not_exists(gds, new_db_name)

Database 'nutrition' already exists.


In [5]:
# Create another GraphDataScience instance and set the default database
gds = GraphDataScience(uri, auth=(username, password), database=db_name)

# Check the installed GDS version on the server
print(gds.version())
assert gds.version()



2.7.0


In [6]:
# Function to clear the database
def clear_database(gds):
    # Delete all nodes and relationships
    gds.run_cypher("MATCH (n) DETACH DELETE n")
    
    # Drop the specified indexes
    indexes = [
        "DROP INDEX recipe_id_index IF EXISTS",
        "DROP INDEX ingredient_name_index IF EXISTS",
        "DROP INDEX keyword_name_index IF EXISTS",
        "DROP INDEX diettype_name_index IF EXISTS",
        "DROP INDEX author_name_index IF EXISTS",
        "DROP INDEX collection_name_index IF EXISTS"
    ]
    
    for index_query in indexes:
        gds.run_cypher(index_query)
    
    # Remove any remaining property keys by creating and then deleting dummy nodes with all properties
    properties_query = """
    CALL db.propertyKeys() YIELD propertyKey
    WITH collect(propertyKey) AS keys
    CALL apoc.create.node(['Temp'], apoc.map.fromPairs([key IN keys | [key, null]])) YIELD node
    WITH node
    DETACH DELETE node
    """
    gds.run_cypher(properties_query)

# Run the function
clear_database(gds)

print("Database cleared, including indexes, node labels, and property keys.")

Database cleared, including indexes, node labels, and property keys.


### Set up indexes for query performance

In [7]:
# List of index creation queries
index_queries = [
    "CREATE INDEX recipe_id_index FOR (r:Recipe) ON (r.id);",
    "CREATE INDEX ingredient_name_index FOR (i:Ingredient) ON (i.name);",
    "CREATE INDEX keyword_name_index FOR (k:Keyword) ON (k.name);",
    "CREATE INDEX diettype_name_index FOR (d:DietType) ON (d.name);",
    "CREATE INDEX author_name_index FOR (a:Author) ON (a.name);",
    "CREATE INDEX collection_name_index FOR (c:Collection) ON (c.name);"
]

# Run each query separately
for query in index_queries:
    try:
        gds.run_cypher(query)
        print(f"Executed: {query}")
    except Exception as e:
        print(f"Error executing query: {query}\nError message: {str(e)}")
        print()

Executed: CREATE INDEX recipe_id_index FOR (r:Recipe) ON (r.id);
Executed: CREATE INDEX ingredient_name_index FOR (i:Ingredient) ON (i.name);
Executed: CREATE INDEX keyword_name_index FOR (k:Keyword) ON (k.name);
Executed: CREATE INDEX diettype_name_index FOR (d:DietType) ON (d.name);
Executed: CREATE INDEX author_name_index FOR (a:Author) ON (a.name);
Executed: CREATE INDEX collection_name_index FOR (c:Collection) ON (c.name);


### Import recipes to the graph

First, ensure that the APOC plugin is installed and configured to allow reading from files. 

We have to create a separate file "apoc.conf". Add this line:

- apoc.import.file.enabled=true

By adding apoc configs to neo4j.conf, DBMS was not starting

Data has to be put in "import" folder, e.g., "Neo4J\relate-data\dbmss\dbms-1771a757-accc-4e5f-95c4-9bead0d360e8\import"

In [8]:
# CALL apoc.load.json('https://raw.githubusercontent.com/neo4j-examples/graphgists/master/browser-guides/data/stream_clean.json') YIELD value

cypher_query = """
CALL apoc.load.json("file:///stream_clean.json") YIELD value
WITH value.page.article.id AS id,
     value.page.title AS title,
     value.page.article.description AS description,
     value.page.recipe.cooking_time AS cookingTime,
     value.page.recipe.prep_time AS preparationTime,
     value.page.recipe.skill_level AS skillLevel,
     value.page.recipe.nutrition_info AS nutritionInfo
MERGE (r:Recipe {id: id})
SET r.cookingTime = cookingTime,
    r.preparationTime = preparationTime,
    r.name = title,
    r.description = description,
    r.skillLevel = skillLevel,

    // Set nutrition information
    r.nAddedSugar = split(nutritionInfo[0], " ")[2],
    r.nCarbohydrate = split(nutritionInfo[1], " ")[1],
    r.nKcal = split(nutritionInfo[2], " ")[1] + " "  + split(nutritionInfo[2], " ")[2],
    r.nProtein = split(nutritionInfo[3], " ")[1],
    r.nSalt = split(nutritionInfo[4], " ")[1],
    r.nSaturatedFat = split(nutritionInfo[5], " ")[2],
    r.nFat = split(nutritionInfo[6], " ")[1]
"""

# Run the Cypher query
result = gds.run_cypher(cypher_query)

# Print the result
print(result)

Empty DataFrame
Columns: []
Index: []


### Import authors and connect to recipes

In [9]:
# CALL apoc.load.json('https://raw.githubusercontent.com/neo4j-examples/graphgists/master/browser-guides/data/stream_clean.json') YIELD value

cypher_query = """
CALL apoc.load.json("file:///stream_clean.json") YIELD value
WITH value.page.article.id AS id,
     value.page.article.author AS author
MERGE (a:Author {name: author})
WITH a,id
MATCH (r:Recipe {id:id})
MERGE (a)-[:WROTE]->(r);
"""

# Run the Cypher query
result = gds.run_cypher(cypher_query)

# Print the result
print(result)

Empty DataFrame
Columns: []
Index: []


### Import ingredients and connect to recipes

In [10]:
# CALL apoc.load.json('https://raw.githubusercontent.com/neo4j-examples/graphgists/master/browser-guides/data/stream_clean.json') YIELD value

cypher_query = """
CALL apoc.load.json("file:///stream_clean.json") YIELD value
WITH value.page.article.id AS id,
     value.page.recipe.ingredients AS ingredients
MATCH (r:Recipe {id:id})
FOREACH (ingredient IN ingredients |
     MERGE (i:Ingredient {name: ingredient})
     MERGE (r)-[:CONTAINS_INGREDIENT]->(i)
);
"""

# Run the Cypher query
result = gds.run_cypher(cypher_query)

# Print the result
print(result)

Empty DataFrame
Columns: []
Index: []


### Import keywords and connect to recipes

In [11]:
# CALL apoc.load.json('https://raw.githubusercontent.com/neo4j-examples/graphgists/master/browser-guides/data/stream_clean.json') YIELD value

cypher_query = """
CALL apoc.load.json("file:///stream_clean.json") YIELD value
WITH value.page.article.id AS id,
     value.page.recipe.keywords AS keywords
MATCH (r:Recipe {id:id})
FOREACH (keyword IN keywords |
     MERGE (k:Keyword {name: keyword})
  MERGE (r)-[:KEYWORD]->(k)
);
"""

# Run the Cypher query
result = gds.run_cypher(cypher_query)

# Print the result
print(result)

Empty DataFrame
Columns: []
Index: []


### Import dietTypes and connect to recipes

In [12]:
# CALL apoc.load.json('https://raw.githubusercontent.com/neo4j-examples/graphgists/master/browser-guides/data/stream_clean.json') YIELD value

cypher_query = """
CALL apoc.load.json("file:///stream_clean.json") YIELD value
WITH value.page.article.id AS id,
     value.page.recipe.diet_types AS dietTypes
MATCH (r:Recipe {id:id})
FOREACH (dietType IN dietTypes |
     MERGE (d:DietType {name: dietType})
     MERGE (r)-[:DIET_TYPE]->(d)
);
"""

# Run the Cypher query
result = gds.run_cypher(cypher_query)

# Print the result
print(result)

Empty DataFrame
Columns: []
Index: []


### Import collections and connect to recipes

In [13]:
# CALL apoc.load.json('https://raw.githubusercontent.com/neo4j-examples/graphgists/master/browser-guides/data/stream_clean.json') YIELD value

cypher_query = """
CALL apoc.load.json("file:///stream_clean.json") YIELD value
WITH value.page.article.id AS id,
     value.page.recipe.collections AS collections
MATCH (r:Recipe {id:id})
FOREACH (collection IN collections |
     MERGE (c:Collection {name: collection})
     MERGE (r)-[:COLLECTION]->(c)
);
"""

# Run the Cypher query
result = gds.run_cypher(cypher_query)

# Print the result
print(result)

Empty DataFrame
Columns: []
Index: []


### Graph Schema

Now let’s review the metagraph and see the types of nodes and relationships we’re going to be working with.

In [14]:
result = gds.run_cypher('CALL db.schema.visualization()')

# Print the result
print(result)

                                               nodes  \
0  [(name, indexes, constraints), (name, indexes,...   

                              relationships  
0  [(name), (name), (name), (name), (name)]  


We can see that the graph is based around Recipes, which are then connected to several other entities. A recipe contains Ingredients, can be part of a Collection, is written by an Author, can form part of a DietType, and has certain Keywords.

### Most common ingredients

What are the most popular ingredients and in how many recipes have they
been used?

In [15]:
cypher_query = """
MATCH (i:Ingredient)<-[rel:CONTAINS_INGREDIENT]-(r:Recipe)
RETURN i.name, count(rel) as recipes
ORDER BY recipes DESC
"""

# Run the Cypher query
result = gds.run_cypher(cypher_query)

# Print the result
print(result)

                                i.name  recipes
0                            olive oil     3308
1                               butter     2856
2                         garlic clove     2740
3                                lemon     2144
4                                  egg     2125
...                                ...      ...
3072  fat-free Greek or natural yogurt        1
3073                           garnish        1
3074     chocolate-coated coffee beans        1
3075                    caramel wafers        1
3076                         tea towel        1

[3077 rows x 2 columns]


The items at the top of the list aren’t all that surprising - olive oil, butter, and garlic! Further down the list we can see some ingredients that are probably used in cakes: sugar, milk, self-raising flour.

### I want chocolate cake!

This dataset also contains collections, and one of the tastiest looking
ones is the collection of chocolate cakes. The following query returns
the recipes in this collection:

In [16]:
cypher_query = """
MATCH (:Collection {name: 'Chocolate cake'})<-[:COLLECTION]-(recipe)
RETURN recipe.id, recipe.name, recipe.description
"""

# Run the Cypher query
result = gds.run_cypher(cypher_query)

# Print the result
print(result)

   recipe.id                                        recipe.name  \
0    6034241        Salted dark chocolate, rye & courgette cake   
1      98704                      White chocolate & cherry loaf   
2      96094                           Chocolate courgette cake   
3      95041                           Chocolate & ginger torte   
4     101172                           Chocolate birthday cake    
5     103051                        White & dark chocolate cake   
6    5641126                  Low sugar chocolate sandwich cake   
7    6359111                              Chocolate & lime cake   
8    5052746                          Easy vegan chocolate cake   
9      98768                 Pistachio & milk chocolate squares   
10   4973576                             Brooklyn blackout cake   
11     99333                       Chocolate dipped cherry cake   
12     98678                          Beetroot & chocolate cake   
13   4989861                   Flowerpot chocolate chip muffin

A hunger-inducing list, but let’s not be greedy. We’ll zoom in on that seriously rich chocolate cake.

### Seriously rich chocolate cake

We’ll start with the following query, which returns a graph of the
recipe and its ingredients:

In [17]:
cypher_query = """
MATCH path = (r:Recipe {id:'97123'})-[:CONTAINS_INGREDIENT]->(i:Ingredient)
RETURN path
"""

# Run the Cypher query
result = gds.run_cypher(cypher_query)

# Print the result
print(result)

   path
0  (())
1  (())
2  (())
3  (())
4  (())
5  (())
6  (())
7  (())
8  (())


### Are there any similar cakes to this one?

Ok, so we’ve now baked this cake a few times, and while it was delicious,
we’d like to try out some other recipes. What other cakes are there
similar to this one?

In [18]:
cypher_query = """
MATCH (r:Recipe {id:'97123'})-[:CONTAINS_INGREDIENT]->(i:Ingredient)<-[:CONTAINS_INGREDIENT]-(rec:Recipe)
RETURN rec.id, rec.name, collect(i.name) AS commonIngredients
ORDER BY size(commonIngredients) DESC
LIMIT 10
"""

# Run the Cypher query
result = gds.run_cypher(cypher_query)

# Print the result
print(result)

    rec.id                                           rec.name  \
0   100341                 Black Forest sundaes with brownies   
1    97938  Chocolate & almond puds with boozy hot chocola...   
2   103089                          Little Black Forest cakes   
3   101166                             Christmas pud cupcakes   
4   102129                                   Dark mocha torte   
5  4542501  Espresso, chocolate & chilli cake with coffee ...   
6    94802                              Orange chocolate tart   
7    96335                      Sunken drunken chocolate cake   
8  4599096                      Freaky finger red velvet cake   
9    98097                        Chocolate & Earl Grey torte   

                                   commonIngredients  
0  [caster sugar, egg, kirsch, ground almond, but...  
1  [caster sugar, egg, cocoa powder, ground almon...  
2  [caster sugar, egg, cocoa powder, kirsch, butt...  
3  [caster sugar, egg, cocoa powder, ground almon...  
4   [cast

The query above:

- finds all the ingredients in the seriously rich chocolate cake
- finds other recipes that also contain these ingredients
- returns the recipes that contain the most common ingredients

### What other recipes has the author published?

Another type of recommendation query would be to find the other recipes
published by the author of seriously rich chocolate cake. The following
query does this:

In [19]:
cypher_query = """
MATCH (rec:Recipe)<-[:WROTE]-(a:Author)-[:WROTE]->(r:Recipe {id:'97123'})
RETURN rec.id, rec.name, rec.description
"""

# Run the Cypher query
result = gds.run_cypher(cypher_query)

# Print the result
print(result)

      rec.id                                      rec.name  \
0      97925                Prosciutto & rosemary potatoes   
1      98632                           Baked camembert kit   
2     102274                         Gingery treacle tart    
3      98848           Roasted summer veg & pancetta pasta   
4     102142           Chicken with braised celery & cider   
...      ...                                           ...   
3435   96054                          Exotic avocado salad   
3436   95968  Fennel & chestnut loaf with cranberry relish   
3437  102108                  Cheat's chilli prawn noodles   
3438   96969                   American blueberry pancakes   
3439   98273                        Shepherd's pie jackets   

                                        rec.description  
0     These simple canapÃ©s are small but perfectly ...  
1     The ideal gift for a cheese-lover: a box of ca...  
2     If you find traditional treacle tart too sweet...  
3     Roast up courgett

### What can I make with the ingredients in my kitchen?

### Show me the chillis


In [20]:
cypher_query = """
MATCH (r:Recipe)
WHERE (r)-[:CONTAINS_INGREDIENT]->(:Ingredient {name: 'chilli'})
RETURN r.name AS recipe,
     [(r)-[:CONTAINS_INGREDIENT]->(i) | i.name]
     AS ingredients
"""

# Run the Cypher query
result = gds.run_cypher(cypher_query)

# Print the result
print(result)

                                        recipe  \
0                        Mango with lime sugar   
1                      John's chilli con carne   
2             Tuna steaks with cucumber relish   
3                   Lime, chilli & feta butter   
4       Chicken enchiladas with red mole sauce   
..                                         ...   
237                             Squash gnocchi   
238  Chiang Mai curried noodles (Khao soi gai)   
239                   Crunchy courgette pickle   
240                       Reggae reggae nachos   
241                          Flavoured butters   

                                           ingredients  
0       [lime, mango, chilli, ice cream, caster sugar]  
1    [olive oil, coriander, thyme, smoked paprika, ...  
2    [lemon juice, cucumber, tuna steak, tomato, pa...  
3    [feta, coriander leaves, butter, lime, salt, c...  
4    [chilli, onion, cinnamon, cumin, chicken, blan...  
..                                                 ...  


### What can I make with the ingredients in my kitchen?

### Recipes with multiple ingredients (Part 1)

In [21]:
cypher_query = """
MATCH (r:Recipe)
WHERE (r)-[:CONTAINS_INGREDIENT]->(:Ingredient {name: 'chilli'})
AND   (r)-[:CONTAINS_INGREDIENT]->(:Ingredient {name: 'prawn'})
RETURN r.name AS recipe,
     [(r)-[:CONTAINS_INGREDIENT]->(i) | i.name]
     AS ingredients
LIMIT 20
"""

# Run the Cypher query
result = gds.run_cypher(cypher_query)

# Print the result
print(result)

                                              recipe  \
0                     Spinach, avocado & prawn salad   
1                Sweet & hot prawn & pineapple curry   
2                                     Kung po prawns   
3                             Spicy stuffed chillies   
4                         Prawn & avocado escabÃ¨che   
5                                Savoury party bites   
6                              Prawn & coconut laksa   
7                                One-pan prawn pilau   
8                       Hot & sour broth with prawns   
9             Spaghetti with prawns, chilli & rocket   
10           Spicy seafood stew with tomatoes & lime   
11                 Spiced prawns with coriander mayo   
12                      Cheat's chilli prawn noodles   
13  Singapore noodles with shrimps & Chinese cabbage   
14                                 Hot salmon parcel   

                                          ingredients  
0   [spinach, avocado, soy sauce, garlic clove,

### What can I make with the ingredients in my kitchen?

### Recipes with multiple ingredients (Part 2)

In [22]:
# Define the ingredients parameter
ingredients = ['chilli', 'prawn']

# Define the Cypher query
cypher_query = """
MATCH (r:Recipe)
WHERE all(i in $ingredients WHERE exists(
     (r)-[:CONTAINS_INGREDIENT]->(:Ingredient {name: i})))
RETURN r.name AS recipe,
     [(r)-[:CONTAINS_INGREDIENT]->(i) | i.name]
     AS ingredients
ORDER BY size(ingredients)
LIMIT 20
"""

# Run the Cypher query with parameters
result = gds.run_cypher(cypher_query, params={'ingredients': ingredients})

# Print the result
print(result)   

                                              recipe  \
0                                Savoury party bites   
1             Spaghetti with prawns, chilli & rocket   
2                     Spinach, avocado & prawn salad   
3                       Hot & sour broth with prawns   
4                                One-pan prawn pilau   
5                       Cheat's chilli prawn noodles   
6                         Prawn & avocado escabÃ¨che   
7                Sweet & hot prawn & pineapple curry   
8                              Prawn & coconut laksa   
9                  Spiced prawns with coriander mayo   
10                                    Kung po prawns   
11                                 Hot salmon parcel   
12                            Spicy stuffed chillies   
13  Singapore noodles with shrimps & Chinese cabbage   
14           Spicy seafood stew with tomatoes & lime   

                                          ingredients  
0             [prawn, caviar, chilli, cheese, s

### Mark’s allergic to all the things

In [23]:
allergens =   ['egg', 'milk'];
ingredients = ['coconut milk', 'rice'];

# Define the Cypher query
cypher_query = """
MATCH (r:Recipe)

WHERE all(i in $ingredients WHERE exists(
     (r)-[:CONTAINS_INGREDIENT]->(:Ingredient {name: i})))
AND none(i in $allergens WHERE exists(
     (r)-[:CONTAINS_INGREDIENT]->(:Ingredient {name: i})))

RETURN r.name AS recipe,
     [(r)-[:CONTAINS_INGREDIENT]->(i) | i.name]
     AS ingredients
ORDER BY size(ingredients)
LIMIT 20
"""

# Run the Cypher query with parameters
result = gds.run_cypher(cypher_query, params={'allergens': allergens, 'ingredients': ingredients})

# Print the result
print(result)   

                                        recipe  \
0                         Toasted coconut rice   
1                          Lamb meatball curry   
2               Slow cooker Thai chicken curry   
3   Sticky coconut rice, mango & passion fruit   
4                           Keralan hake curry   
5                       Fruity Caribbean curry   
6                     Thai green chicken curry   
7             Roasted aubergine & tomato curry   
8                       Creamy split pea curry   
9                     Satay sweet potato curry   
10              Brazilian prawn & coconut stew   
11                         Beef massaman curry   
12              Bahia-style Moqueca prawn stew   
13                       Keralan scallop molee   
14                          Coconut fish curry   
15                                 Katsu curry   
16                  One-pan jerk roast chicken   
17              Prawn, butternut & mango curry   
18                       Veggie Thai red curry   
