# Cypher Query Examples
This notebook installs a Neo4j Graph Database, imports data in the form of CSV files, and runs a few example queries.

In [1]:
#@title Check if Notebook is running in Google Colab
in_colab = False
try:
    import google.colab  
    in_colab = True
except:
    pass

In [2]:
#@title Install software (Google Colab only)
if in_colab:
    # enable third party widgets in Colab
    from google.colab import output
    output.enable_custom_widget_manager()
    
    # copy required files
    !wget -q https://raw.githubusercontent.com/pwrose/neo4j-ipycytoscape/master/notebooks/neo4j_utils.py
    !git clone https://github.com/sbl-sdsc/kg-import.git
    !cp /content/kg-import/notebooks/* /content

    # install software
    !apt -qq install openjdk-17-jre-headless
    %pip install -q papermill
    %pip install -q py2neo
    %pip install -q ipycytoscape
    %pip install -q python-dotenv

    # set environment variables
    from dotenv import load_dotenv
    load_dotenv("kg-import/.env.colab")
else:
    !curl -O https://raw.githubusercontent.com/pwrose/neo4j-ipycytoscape/master/notebooks/neo4j_utils.py
    from dotenv import load_dotenv
    load_dotenv("../.env", override=True)

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  3600  100  3600    0     0  10344      0 --:--:-- --:--:-- --:--:-- 10315


In [3]:
import os
import pandas as pd
from py2neo import Graph
import papermill as pm
import neo4j_utils
import neo4j_bulk_importer

In [4]:
pd.set_option('display.max_colwidth', None)

### Download and install Neo4j Database

In [5]:
neo4j_utils.download_neo4j()

Changed password for user 'neo4j'. IMPORTANT: this change will only take effect if performed before the database is started for the first time.


### Import example Knowledge Graph
It used the CSV data files in the example_metadata and example_data files to create a Neo4J KG

In [6]:
neo4j_bulk_importer.import_from_csv()

Executing:   0%|          | 0/85 [00:00<?, ?cell/s]

Directories in use:
home:         /Users/Peter/work/neo4j-community-5.15.0
config:       /Users/Peter/work/neo4j-community-5.15.0/conf
logs:         /Users/Peter/work/neo4j-community-5.15.0/logs
plugins:      /Users/Peter/work/neo4j-community-5.15.0/plugins
import:       /Users/Peter/work/neo4j-community-5.15.0/import
data:         /Users/Peter/work/neo4j-community-5.15.0/data
certificates: /Users/Peter/work/neo4j-community-5.15.0/certificates
licenses:     /Users/Peter/work/neo4j-community-5.15.0/licenses
run:          /Users/Peter/work/neo4j-community-5.15.0/run
Starting Neo4j.
Started neo4j (pid:14421). It is available at http://localhost:7474
There may be a short delay until the server is ready.
Launching server..... running.


### Connect to teh local Neo4j Graph database

In [7]:
database = os.environ.get("NEO4J_DATABASE")
username = os.environ.get("NEO4J_USERNAME")
password = os.environ.get("NEO4J_PASSWORD")

graph = Graph("bolt://localhost:7687", name=database, user=username, password=password)

### Display Node metadata

In [8]:
query = """
MATCH (n:MetaNode) RETURN n;
"""
graph.run(query).to_data_frame()

Unnamed: 0,n
0,"{'nodeName': 'State', 'synonyms': 'Alternate names of state (string[])', 'name': 'Name of state (string)', 'location': 'Latitude and longitude in WGS-84 format (point{crs:WGS-84})', 'id': 'Geonames.org id for location (string)', 'population': 'Population (int)'}"
1,"{'nodeName': 'City', 'synonyms': 'Alternate names of city (string[])', 'name': 'Name of city (string)', 'location': 'Latitude and longitude in WGS-84 format (point{crs:WGS-84})', 'id': 'Geonames.org id for location (string)', 'population': 'Population (int)'}"
2,"{'nodeName': 'Symptom', 'name': 'Name of symptom (string)', 'id': 'Symptom id from Symptom Ontology (string)'}"
3,"{'nodeName': 'Patient', 'firstName': 'First name (string)', 'lastName': 'Last name (string)', 'smoker': 'Patient is a smoker (boolean)', 'sex': 'Biological sex (string)', 'id': 'Unique patient id (string)', 'age': 'Age (int)'}"
4,"{'nodeName': 'Disease', 'name': 'Name of disease from Human Disease Ontology (string)', 'id': 'Disease id from Human Disease Ontology (string)'}"


### Get number of Nodes

In [9]:
query = """
MATCH (n)
RETURN COUNT(n);
"""
n = graph.evaluate(query)
print(f'Total number of nodes: {n}')

Total number of nodes: 19


### Get number of nodes by node label

In [10]:
query = """
MATCH (n) RETURN labels(n)[0] AS Node, COUNT(n) AS Count
ORDER BY Count DESC
"""
graph.run(query).to_data_frame()

Unnamed: 0,Node,Count
0,MetaNode,5
1,Symptom,4
2,Disease,3
3,Patient,3
4,City,3
5,State,1


### Where do the patients live?

In [11]:
query = """
MATCH (p:Patient)-[:LIVES_IN]->(c:City)
RETURN p.firstName AS Firstname, p.lastName AS Lastname, c.name AS City;
"""
graph.run(query).to_data_frame()

Unnamed: 0,Firstname,Lastname,City
0,John,Doe,San Francisco
1,Jane,Doe,Los Angeles
2,James,Bond,San Diego


### Which patients smoke?

In [12]:
query = """
MATCH (p:Patient)
WHERE p.smoker = True
RETURN p.firstName AS Firstname, p.lastName AS Lastname;
"""
graph.run(query).to_data_frame()

Unnamed: 0,Firstname,Lastname
0,John,Doe
1,James,Bond


## What kind of diseases do the smokers have?

In [13]:
query = """
MATCH (p:Patient)-[:DIAGNOSED_WITH]->(d:Disease)
WHERE p.smoker = True
RETURN p.firstName AS Firstname, p.lastName AS Lastname, d.name as Disease;
"""
graph.run(query).to_data_frame()

Unnamed: 0,Firstname,Lastname,Disease
0,John,Doe,type 2 diabetes mellitus
1,James,Bond,COVID-19


### Which patients live in California?

In [14]:
query = """
MATCH (p:Patient)-[:LIVES_IN]->(c:City)-[:LOCATED_IN]->(s:State)
WHERE s.name = 'California'
RETURN p.firstName AS Firstname, p.lastName AS Lastname, c.name AS City;
"""
graph.run(query).to_data_frame()

Unnamed: 0,Firstname,Lastname,City
0,James,Bond,San Diego
1,Jane,Doe,Los Angeles
2,John,Doe,San Francisco


### How many patients live in California?

In [15]:
query = """
MATCH (p:Patient)-[:LIVES_IN]->(:City)-[:LOCATED_IN]->(s:State) 
WHERE s.name = 'California' 
RETURN COUNT(p)
"""
n = graph.evaluate(query)
print(f'Patients in California: {n}')

Patients in California: 3


### Full text query for Diabetes

In [16]:
query = """
CALL db.index.fulltext.queryNodes('fulltext', $keyword) YIELD node, score
RETURN node.id as id, node.name as disease, score
"""

In [17]:
keyword = "Diabetes"
graph.run(query, keyword=keyword).to_data_frame()

Unnamed: 0,id,disease,score
0,DOID:9352,type 2 diabetes mellitus,0.704895


In [18]:
neo4j_utils.stop()

Stopping Neo4j......... stopped.
