# Cypher Query Examples
This notebook installs a Neo4j Graph Database, imports data in the form of CSV files, and runs a few example queries.

In [1]:
#@title Check if Notebook is running in Google Colab
in_colab = False
try:
    import google.colab
    in_colab = True
except:
    pass

In [2]:
#@title Install software (Google Colab only)
if in_colab:
    # enable third party widgets in Colab
    from google.colab import output
    output.enable_custom_widget_manager()
    output.no_vertical_scroll()

    # copy required files
    !wget -q https://raw.githubusercontent.com/pwrose/neo4j-ipycytoscape/master/notebooks/neo4j_utils.py
    import os
    if not os.path.exists("kg-import"):
        !git clone --quiet https://github.com/sbl-sdsc/kg-import.git

    !cp /content/kg-import/notebooks/* /content

    # install software
    !apt -qq install openjdk-17-jre-headless 2>/dev/null > /dev/null
    %pip install -q papermill > /dev/null
    %pip install -q py2neo > /dev/null
    %pip install -q ipycytoscape > /dev/null
    %pip install -q python-dotenv > /dev/null

    # set environment variables
    from dotenv import load_dotenv
    load_dotenv("kg-import/.env.colab")
else:
    !curl -O https://raw.githubusercontent.com/pwrose/neo4j-ipycytoscape/master/notebooks/neo4j_utils.py
    from dotenv import load_dotenv
    load_dotenv("../.env", override=True)

<IPython.core.display.Javascript object>

In [3]:
#@title Imports
import os
import pandas as pd
from py2neo import Graph
import papermill as pm
import neo4j_utils
import neo4j_bulk_importer

pd.set_option('display.max_colwidth', None)

### Download and install Neo4j Database
Install the Neo4j Community Edition

In [4]:
neo4j_utils.download_neo4j_community()

### Import example Knowledge Graph
It used the CSV data files in the example_metadata and example_data files to create a Neo4J KG

In [5]:
neo4j_bulk_importer.import_from_csv_to_neo4j_community()

Executing:   0%|          | 0/85 [00:00<?, ?cell/s]

Launching server........ running.


### Connect to the local Neo4j Graph database

In [6]:
database = os.environ.get("NEO4J_DATABASE")
username = os.environ.get("NEO4J_USERNAME")
password = os.environ.get("NEO4J_PASSWORD")
stylesheet = os.environ.get("NEO4J_STYLESHEET")

graph = Graph("bolt://localhost:7687", name=database, user=username, password=password)

### Show Node metadata
The MetaNodes define the properties and their types of the Nodes in the KG.

In [7]:
query = """
MATCH (n:MetaNode) RETURN n;
"""
df = graph.run(query).to_data_frame()
metadata = df["n"].tolist()
metadata = pd.DataFrame(metadata)
metadata.fillna("", inplace=True)
metadata

Unnamed: 0,nodeName,synonyms,name,location,id,population,firstName,lastName,smoker,sex,age
0,City,Alternate names of city (string[]),Name of city (string),Latitude and longitude in WGS-84 format (point{crs:WGS-84}),Geonames.org id for location (string),Population (int),,,,,
1,Patient,,,,Unique patient id (string),,First name (string),Last name (string),Patient is a smoker (boolean),Biological sex (string),Age (int)
2,State,Alternate names of state (string[]),Name of state (string),Latitude and longitude in WGS-84 format (point{crs:WGS-84}),Geonames.org id for location (string),Population (int),,,,,
3,Disease,,Name of disease from Human Disease Ontology (string),,Disease id from Human Disease Ontology (string),,,,,,
4,Symptom,,Name of symptom (string),,Symptom id from Symptom Ontology (string),,,,,,


### Visualize the Metagraph
The metagraph shows the node labels and relationship types of the KG. Click on a node to display the node metadata.

In [8]:
query = """
MATCH p=(:MetaNode)-->(:MetaNode) RETURN p
"""

In [9]:
subgraph1 = graph.run(query).to_subgraph()

In [25]:
widget1 = neo4j_utils.draw_graph(subgraph1, stylesheet)
widget1.layout.height = "800px"
widget1.set_layout(name='cola', padding=40, nodeSpacing=65, nodeDimensionsIncludeLabels=True, unconstrIter=15000)
widget1

CytoscapeWidget(cytoscape_layout={'name': 'cola', 'padding': 40, 'nodeSpacing': 65, 'nodeDimensionsIncludeLabe…

In [11]:
query = """
MATCH p=(a)-->(b) WHERE NOT "MetaNode" in LABELS(a) RETURN p
"""

In [12]:
### Visualize the Knowledge Graph

In [13]:
subgraph2 = graph.run(query).to_subgraph()

In [14]:
widget2 = neo4j_utils.draw_graph(subgraph2, stylesheet)
widget2.layout.height = "1000px"
widget2.set_layout(name='cola', padding=0, nodeSpacing=40, nodeDimensionsIncludeLabels=True, unconstrIter=15000)
widget2

CytoscapeWidget(cytoscape_layout={'name': 'cola', 'padding': 0, 'nodeSpacing': 40, 'nodeDimensionsIncludeLabel…

### Get number of Nodes

In [15]:
query = """
MATCH (n)
RETURN COUNT(n);
"""
n = graph.evaluate(query)
print(f'Total number of nodes: {n}')

Total number of nodes: 19


### Get number of nodes by node label

In [16]:
query = """
MATCH (n) RETURN labels(n)[0] AS Node, COUNT(n) AS Count
ORDER BY Count DESC
"""
graph.run(query).to_data_frame()

Unnamed: 0,Node,Count
0,MetaNode,5
1,Symptom,4
2,Disease,3
3,Patient,3
4,City,3
5,State,1


### Where do the patients live?

In [17]:
query = """
MATCH (p:Patient)-[:LIVES_IN]->(c:City)
RETURN p.firstName AS Firstname, p.lastName AS Lastname, c.name AS City;
"""
graph.run(query).to_data_frame()

Unnamed: 0,Firstname,Lastname,City
0,John,Doe,San Francisco
1,Jane,Doe,Los Angeles
2,James,Bond,San Diego


### Which patients smoke?

In [18]:
query = """
MATCH (p:Patient)
WHERE p.smoker = True
RETURN p.firstName AS Firstname, p.lastName AS Lastname;
"""
graph.run(query).to_data_frame()

Unnamed: 0,Firstname,Lastname
0,John,Doe
1,James,Bond


## What diseases do the smokers have?

In [19]:
query = """
MATCH (p:Patient)-[:DIAGNOSED_WITH]->(d:Disease)
WHERE p.smoker = True
RETURN p.firstName AS Firstname, p.lastName AS Lastname, d.name as Disease;
"""
graph.run(query).to_data_frame()

Unnamed: 0,Firstname,Lastname,Disease
0,John,Doe,type 2 diabetes mellitus
1,James,Bond,COVID-19


### Which patients live in California?

In [20]:
query = """
MATCH (p:Patient)-[:LIVES_IN]->(c:City)-[:LOCATED_IN]->(s:State)
WHERE s.name = 'California'
RETURN p.firstName AS Firstname, p.lastName AS Lastname, c.name AS City;
"""
graph.run(query).to_data_frame()

Unnamed: 0,Firstname,Lastname,City
0,James,Bond,San Diego
1,Jane,Doe,Los Angeles
2,John,Doe,San Francisco


### How many patients live in California?

In [21]:
query = """
MATCH (p:Patient)-[:LIVES_IN]->(:City)-[:LOCATED_IN]->(s:State)
WHERE s.name = 'California'
RETURN COUNT(p)
"""
n = graph.evaluate(query)
print(f'Patients in California: {n}')

Patients in California: 3


### Full text query for Diabetes

In [22]:
query = """
CALL db.index.fulltext.queryNodes('fulltext', $keyword) YIELD node, score
RETURN LABELS(node)[0] as type, node.id as id, node.name as disease, score
"""

In [23]:
keyword = "Diabetes"
graph.run(query, keyword=keyword).to_data_frame()

Unnamed: 0,type,id,disease,score
0,Disease,DOID:9352,type 2 diabetes mellitus,0.704895


## Shutdown Neo4j before closing this notebook.
If you run this notebook locally, uncomment the last line and run neo4j_utils.stop() to stop the database. Otherwise, the database server will keep running.



In [24]:
#neo4j_utils.stop()