# Cypher Query Examples
This notebook installs a Neo4j Graph Database, imports data in the form of CSV files, and runs a few example queries.

In [1]:
#@title Check if Notebook is running in Google Colab
in_colab = False
try:
    import google.colab  
    in_colab = True
except:
    pass

In [2]:
#@title Install software (Google Colab only)
if in_colab:
    # enable third party widgets in Colab
    from google.colab import output
    output.enable_custom_widget_manager()
    
    # copy required files (temporary solution)
    !wget -q https://raw.githubusercontent.com/pwrose/neo4j-ipycytoscape/master/notebooks/neo4j_utils.py
    !wget -q https://raw.githubusercontent.com/sbl-sdsc/kg-import/master/notebooks/neo4j_bulk_importer.py
    !wget -q https://raw.githubusercontent.com/sbl-sdsc/kg-import/master/notebooks/utils.py
    !wget -q https://raw.githubusercontent.com/sbl-sdsc/kg-import/master/notebooks/PrepareNeo4jBulkImport.ipynb

    # install software
    !apt -qq install openjdk-17-jre-headless
    %pip install -q papermill
    %pip install -q py2neo
    %pip install -q ipycytoscape
    %pip install -q python-dotenv

    # set environment variables
    from dotenv import load_dotenv
    load_dotenv("kg-import/.env.colab")
else:
    # copy required files (temporary solution)
    !curl -s -O https://raw.githubusercontent.com/pwrose/neo4j-ipycytoscape/master/notebooks/neo4j_utils.py
    !curl -s -O https://raw.githubusercontent.com/sbl-sdsc/kg-import/master/notebooks/neo4j_bulk_importer.py
    !curl -s -O https://raw.githubusercontent.com/sbl-sdsc/kg-import/master/notebooks/utils.py
    !curl -s -O https://raw.githubusercontent.com/sbl-sdsc/kg-import/master/notebooks/PrepareNeo4jBulkImport.ipynb
    from dotenv import load_dotenv
    load_dotenv("../../.env", override=True)

In [3]:
import os
import pandas as pd
from py2neo import Graph
import papermill as pm
import neo4j_utils
import neo4j_bulk_importer

In [4]:
pd.set_option('display.max_colwidth', None)

### Download and install Neo4j Database

In [5]:
neo4j_utils.download_neo4j()

### Import example Knowledge Graph
It used the CSV data files in the example_metadata and example_data files to create a Neo4J KG

In [6]:
neo4j_bulk_importer.import_from_csv()

Executing:   0%|          | 0/85 [00:00<?, ?cell/s]

Directories in use:
home:         /Users/Peter/work/neo4j-community-5.15.0
config:       /Users/Peter/work/neo4j-community-5.15.0/conf
logs:         /Users/Peter/work/neo4j-community-5.15.0/logs
plugins:      /Users/Peter/work/neo4j-community-5.15.0/plugins
import:       /Users/Peter/work/neo4j-community-5.15.0/import
data:         /Users/Peter/work/neo4j-community-5.15.0/data
certificates: /Users/Peter/work/neo4j-community-5.15.0/certificates
licenses:     /Users/Peter/work/neo4j-community-5.15.0/licenses
run:          /Users/Peter/work/neo4j-community-5.15.0/run
Starting Neo4j.
Started neo4j (pid:3265). It is available at http://localhost:7474
There may be a short delay until the server is ready.
Launching server... running.


### Connect to the local Neo4j Graph database

In [7]:
database = os.environ.get("NEO4J_DATABASE")
username = os.environ.get("NEO4J_USERNAME")
password = os.environ.get("NEO4J_PASSWORD")
stylesheet = os.environ.get("NEO4J_STYLESHEET")

graph = Graph("bolt://localhost:7687", name=database, user=username, password=password)

### Show Node metadata
The MetaNodes define the properties and their types of the Nodes in the KG.

In [8]:
query = """
MATCH (n:MetaNode) RETURN n;
"""
df = graph.run(query).to_data_frame()
metadata = df["n"].tolist()
metadata = pd.DataFrame(metadata)
metadata.fillna("", inplace=True)
metadata

Unnamed: 0,nodeName,country,geonames,ror,city,name,uei,duns,id,state,...,presentationUrl,endDate,eventUrl,eventType,startDate,researchInitiative,narrative,subProject,fundingMechanism,awardCode
0,Organization,Country of the organization (string),Unique identifier for geolocation from https://www.geonames.org/ (string),Research Organization Registry id of the organization from https://ror.org/ (string),City of the organization (string),Name of the organization (string),US government-owned Unique Entity Identifier (UEI) for the organization (https://grants.nih.gov/grants/guide/notice-files/NOT-OD-21-170.html) (string),Data Universal Numbering System (DUNS) by Dun & Bradstreet (string),"Unique entity identifier: ror id if available, otherwise uei id (string)",State of the organization (string),...,,,,,,,,,,
1,Publication,,,,,Title of the publication (string),,,Unique identifier for the publication (string),,...,,,,,,,,,,
2,Researcher,,,,,Name of the researcher (lastname plus initials) (string),,,Unique researcher identifier (orcid preferred or NIH profile id) (string),,...,,,,,,,,,,
3,Patent,,,,,Title of the patent (string),,,Identifier of the patents (string),,...,,,,,,,,,,
4,Dataset,,,,,Name of the dataset (string),,,Unique identifier for the dataset (string),,...,,,,,,,,,,
5,ResearchInitiative,,,,,Abbreviation of the research initiative (string),,,Unique id of the research initiative (string),,...,,,,,,,,,,
6,Presentation,,,,,Title of the presentation (string),,,Unique identifier of the presentation (hash of event id and name) (string),,...,URL of the presentation link (string),,,,,,,,,
7,Software,,,,,Name of the software application or software repository (string),,,Identifier of the software application or software repository (string),,...,,,,,,,,,,
8,Event,Country of event location (string),,,City of event location (string),Name of the event (string),,,Unique identifier of the event (date and event number) (string),State or province of the event location (string),...,,End data of event (date),URL of the event website (string),"Type of event: webinar, virtual conference, in-person conference, hybrid conference, virtual workshop, in-person workshop, hybrid workshop (string)",Start date of event (date),,,,,
9,FundingOpportunity,,,,,Name of the funding opportunity (string),,,Unique identifier for the funding opportunity (Thesaurus:C20021) (string),,...,,,,,,,,,,


### Visualize the Metagraph
The metagraph shows the node labels and relationship types of the KG. Click on a node to display the node metadata.

In [9]:
query = """
MATCH p=(:MetaNode)-->(:MetaNode) RETURN p
"""

In [10]:
subgraph = graph.run(query).to_subgraph()

In [11]:
widget = neo4j_utils.draw_graph(subgraph, stylesheet)
widget.set_layout(name='cola', padding=40, nodeSpacing=65, nodeDimensionsIncludeLabels=True, unconstrIter=5000)
widget

CytoscapeWidget(cytoscape_layout={'name': 'cola', 'padding': 40, 'nodeSpacing': 65, 'nodeDimensionsIncludeLabe…

### Get number of Nodes

In [12]:
query = """
MATCH (n)
RETURN COUNT(n);
"""
n = graph.evaluate(query)
print(f'Total number of nodes: {n}')

Total number of nodes: 4429


### Get number of nodes by node label

In [13]:
query = """
MATCH (n) RETURN labels(n)[0] AS Node, COUNT(n) AS Count
ORDER BY Count DESC
"""
graph.run(query).to_data_frame()

Unnamed: 0,Node,Count
0,Researcher,2256
1,Publication,1861
2,Presentation,87
3,Organization,59
4,Dataset,50
5,Grant,48
6,Patent,17
7,FundingOpportunity,16
8,Event,14
9,MetaNode,11


### Visualize the Knowledge Graph

In [14]:
researcher = "Solo-Gabriele"
query = """
MATCH p=(r:Researcher)--() WHERE r.lastName = $researcher RETURN p
"""
subgraph = graph.run(query, researcher=researcher).to_subgraph()

In [15]:
subgraph = graph.run(query, researcher=researcher).to_subgraph()

In [16]:
widget = neo4j_utils.draw_graph(subgraph, stylesheet)
widget.set_layout(name='cola', padding=0, nodeSpacing=40, nodeDimensionsIncludeLabels=True, unconstrIter=5000)
widget

CytoscapeWidget(cytoscape_layout={'name': 'cola', 'padding': 0, 'nodeSpacing': 40, 'nodeDimensionsIncludeLabel…

### Find researcher from two RADx-rad grants that authored a paper together

In [17]:
query = """
MATCH p=(g1:Grant)<-[:IS_INVESTIGATOR_OF]-(r1:Researcher)-[:AUTHORED]->(:Publication)<-[:AUTHORED]-(r2:Researcher)-[:IS_INVESTIGATOR_OF]-(g2:Grant) WHERE g1 <> g2 RETURN p
"""
subgraph = graph.run(query, researcher=researcher).to_subgraph()

In [18]:
widget = neo4j_utils.draw_graph(subgraph, stylesheet)
widget.set_layout(name='cola', padding=0, nodeSpacing=40, nodeDimensionsIncludeLabels=True, unconstrIter=5000)
widget

CytoscapeWidget(cytoscape_layout={'name': 'cola', 'padding': 0, 'nodeSpacing': 40, 'nodeDimensionsIncludeLabel…

In [19]:
query = """
MATCH (g:Grant) RETURN g.subProject as project
"""
graph.run(query).to_data_frame()

Unnamed: 0,project
0,Automatic Detection & Tracing
1,Exosome
2,Wastewater
3,Novel Biosensing and VOC
4,PreVAIL kIds
5,Chemosensory Testing
6,Novel Biosensing and VOC
7,Novel Biosensing and VOC
8,Exosome
9,Wastewater


### Number of publication by RADx-rad subprojects

In [20]:
query = """
MATCH (:Publication)<-[:AUTHORED]-(r:Researcher)-[:IS_INVESTIGATOR_OF]->(g:Grant) RETURN COUNT(g) AS count, g.subProject AS project
ORDER BY count DESC
"""
graph.run(query).to_data_frame()

Unnamed: 0,count,project
0,88,Wastewater
1,72,PreVAIL kIds
2,44,Multimodal Surveillance
3,25,SCENT
4,23,Automatic Detection & Tracing
5,19,Chemosensory Testing
6,18,Data Coordinating Center
7,6,Exosome


### Number of citations for the primary publications from the RADx-read subprojects

In [21]:
query = """
MATCH (:Publication)-[CITES]-(:Publication)<-[:AUTHORED]-(r:Researcher)-[:IS_INVESTIGATOR_OF]->(g:Grant) RETURN COUNT(g) AS count, g.subProject AS project
ORDER BY count DESC
"""
graph.run(query).to_data_frame()

Unnamed: 0,count,project
0,1411,Wastewater
1,714,PreVAIL kIds
2,322,Multimodal Surveillance
3,215,Automatic Detection & Tracing
4,110,SCENT
5,88,Chemosensory Testing
6,86,Data Coordinating Center
7,13,Exosome


### Full text query for aptamer

In [22]:
keyword = '"aptamer"'

In [23]:
query = """
CALL db.index.fulltext.queryNodes('fulltext', $keyword) YIELD node, score
RETURN node.id as id, LABELS(node)[0] as type, node.name, score
"""
graph.run(query, keyword=keyword).to_data_frame()

Unnamed: 0,id,type,node.name,score
0,doi:10.24425/pjvs.2023.145056,Publication,Application of aptamer-based viral detection in animals.,5.126857
1,doi:10.1021/acs.chemrev.3c00377,Publication,Aptamer-Protein Interactions: From Regulation to Biomolecular Detection.,5.088497
2,doi:10.1101/2023.11.24.568626,Publication,AptaBERT: Predicting aptamer binding interactions,4.959231
3,doi:10.1039/d3sc00439b,Publication,Aptamers 101: aptamer discovery and in vitro applications in biosensors and separations.,4.863132
4,doi:10.3390/s23229139,Publication,Divalent Aptamer-Functionalized Nanochannels for Facile Detection of Cancer Cell-Derived Exosomes,4.659555
5,doi:10.1021/acs.analchem.2c00554,Publication,Aptamer Sandwich Lateral Flow Assay (AptaFlow) for Antibody-Free SARS-CoV-2 Detection.,4.586232
6,doi:10.1002/EXP.20210027,Publication,Aptamer‐functionalized field‐effect transistor biosensors for disease diagnosis and environmental monitoring,4.47856
7,doi:10.3390/ijms242216318,Publication,Structural Insights into Protein–Aptamer Recognitions Emerged from Experimental and Computational Studies,4.3532
8,doi:10.33774/chemrxiv-2021-nd0r2-v2,Publication,Covalent Bonding Aptamer with Enhanced SARS-CoV-2 RBD-ACE2 Blocking and Pseudovirus Neutralization Activities,4.275054
9,doi:10.3390/bios12100848,Publication,Aptamer against Aflatoxin B1 Obtained by SELEX and Applied in Detection,4.131763


In [24]:
neo4j_utils.stop()

Stopping Neo4j......... stopped.
