In [None]:
pip install neo4j==4.4

## 0. Background
#### Title: 
Type 2 diabetic and hyperglycemic pancreatic islets	
#### Summary: 
Analysis of pancreatic islets from type 2 diabetes (T2D) and non-diabetic cadaver donors. Glycemic control (HbA1c) levels also measured from the same individuals (normoglycemic: HbA1c < 6%; hyperglycemic: HbA1c ≥ 6%). Results provided insight into molecular basis of islet dysfunction in T2D.
#### Organism:
Homo Sapiens
#### Platform:
GPL6244: [HuGene-1_0-st] Affymetrix Human Gene 1.0 ST Array [transcript (gene) version]
#### Samples:
63
#### Reference:
[GEO database](https://www.ncbi.nlm.nih.gov/sites/GDSbrowser?acc=GDS4337)

### 1. Database setup

In [None]:
url = "bolt://localhost:7687"
user = "neo4j"
password = "test" 

In [None]:
from neo4j import GraphDatabase
driver = GraphDatabase.driver(url, auth=(user, password))
neo4j = driver.session()

### 2. Loading data
#### File structure:
- Columns Patient
- Rows are the Illumina RNA identifier
- Annotation of diabetes marker HbA1c for 51 of 63 patients

|ID_REF|IDENTIFIER|GSM1313609|GSM1313610|GSM1313611|
|---|---|---|---|---|
|ILMN_1343291|EEF1A1|7.28|0.01|7.9|
|ILMN_1343295|GAPDH|4.76|4.56|8.8|
|ILMN_1651228|RPS28|113.4|111.56|0.0|

In [147]:
filename = '00_load_GDS4337_dataset.cypher'
with open(filename, 'r') as file:
    content = file.read()
# print(content)
commands = content.split(";")
for c in commands:
    if not c == "":
        result = neo4j.run("" + c)

### 3. Node classification pipeline
All commands in one file in the next cell

In [None]:
# all commands in one file
#with open('02_GDS_on_GDS4337_dataset.cypher', 'r') as file:
#    content = file.read()
## print(content)
#commands = content.split(";")
#for c in commands:
#    if not c == "":
#        result = neo4j.run("" + c)

Graph projection of node labels `:Patient` and `:Transcript` and relationship `:MEASURED`

In [151]:
graphProjection1 = """
    CALL gds.graph.project(
        'graph',                                                                                
        ['Patient','Transcript'],
        {MEASURED: {orientation: 'REVERSE'}},
        {relationshipProperties: ['value']}                           
    )
    YIELD graphName, nodeProjection, nodeCount AS nodes, relationshipCount AS rels
    RETURN graphName, nodeProjection.Patient AS patientProjection, nodes, rels;"""
result = neo4j.run(graphProjection1)

Calculate similarity between Patients based on ***nodeSimilarity*** using values as weights

`similarity cutoff = 0.9708` -> 1 standard deviation from mean
> Output is a monopartite graph ``(Patient)-[:SIMILAR{similarityScore}]->(Patient)``


In [152]:
nodeSimilarity ="""
    CALL gds.nodeSimilarity.write('graph', {
        relationshipWeightProperty: 'value',
        similarityCutoff: 0.9708,
        writeRelationshipType: 'SIMILAR',
        writeProperty: 'similarityScore'
    });"""
result = neo4j.run(nodeSimilarity)

Delete symmetric relationships `:SIMILAR`

In [153]:
deleteSymmetric = """
    MATCH (n)-[s:SIMILAR]->(m) WHERE id(n) < id(m) DELETE s;"""
result = neo4j.run(deleteSymmetric)

Graph projection of node labels `:Patient` and relationship `:SIMILAR`


In [154]:
graphProjection2 ="""
    CALL gds.graph.project(
        'graph2',                                                                                
        ['Patient'],
        {SIMILAR: {orientation: 'UNDIRECTED'}},
        {relationshipProperties: ['similarityScore']}                         
    );"""
result = neo4j.run(graphProjection2)

Generate ***Fast Random Projections*** (node embedding) for each `:Patient` to represent each patient as a vector but keeping topology of the graph
> Output is a vector stored as node property ``fastRP__Embed``


In [155]:
fastRandomProjection = """
    CALL gds.fastRP.write('graph2',{
        relationshipTypes:['SIMILAR'],
        embeddingDimension: 128,
        iterationWeights: [1.0],
        normalizationStrength:0.05,
        writeProperty: 'fastRP__Embed',
        relationshipWeightProperty:'similarityScore'
    });"""
result = neo4j.run(fastRandomProjection)

Graph projection of node labels `:Patient` including the newly calculated property `fastRP__Embed`
and relationship `:SIMILAR` including property `similarityScore`


In [156]:
graphProjection3 = """
    CALL gds.graph.project(
        'graph3',   {                                                                               
        Patient: {properties: {
        targetProperty:{property:'targetProperty',defaultValue:0},
        isTrain:{property:'isTrain',defaultValue:0},
        fastRP__Embed:{property:'fastRP__Embed'}
        }
      }  
    },
    {SIMILAR: {orientation: 'UNDIRECTED'}},
    {relationshipProperties: ['similarityScore']});"""
result = neo4j.run(graphProjection3)

Create a new GDS pipeline

In [157]:
createPipeline = """CALL gds.beta.pipeline.nodeClassification.create('pipe')"""
result = neo4j.run(createPipeline)

Select feature `fastRP__Embed` for the pipeline

In [158]:
selectFeatures = """
    CALL gds.beta.pipeline.nodeClassification.selectFeatures(
        'pipe', 
        ['fastRP__Embed']) 
    YIELD name, featureProperties;"""
result = neo4j.run(selectFeatures)

Configure the ratio of the test dataset

In [159]:
configureSplit = """
    CALL gds.beta.pipeline.nodeClassification.configureSplit('pipe', {
        testFraction: 0.2,
        validationFolds: 5
        })
    YIELD splitConfig;"""
result = neo4j.run(configureSplit)

Add model candidate to the pipeline

In [160]:
result = neo4j.run("""CALL gds.beta.pipeline.nodeClassification.addLogisticRegression('pipe', {penalty: 0.0625}) YIELD parameterSpace;""")

Create a subgraph from projection which will be training set `:Patient.isTrain = 1`

In [161]:
trainingSet = """
    CALL gds.beta.graph.project.subgraph(
        'graph-train',
        'graph3',
        'n:Patient AND n.isTrain = 1', '*')
    YIELD graphName, fromGraphName, nodeCount, relationshipCount;"""
result = neo4j.run(trainingSet)

Create a subgraph from projection which will be test set `:Patient.isTrain = 0`

In [162]:
testSet = """
    CALL gds.beta.graph.project.subgraph(
        'graph-test', 
        'graph3', 
        'n:Patient AND n.isTrain = 0', '*')
    YIELD graphName, fromGraphName, nodeCount, relationshipCount;"""
result = neo4j.run(testSet)

Model training writing the target property `targetProperty`

In [163]:
modelTraining = """
    CALL gds.beta.pipeline.nodeClassification.train('graph-train', {
        pipeline: 'pipe',
        targetNodeLabels: ['Patient'],
        modelName: 't2d_FRP',
        targetProperty: 'targetProperty',
        randomSeed: 42,
        metrics: ['F1_WEIGHTED','ACCURACY']
    });"""
result = neo4j.run(modelTraining)

Write prediction into the in-memory graph (not really necessary)

In [164]:
mutateStatement = """
    CALL gds.beta.pipeline.nodeClassification.predict.mutate('graph-test', {
        targetNodeLabels: ['Patient'],
        modelName: 't2d_FRP',
        mutateProperty: 'predicted_t2d'
    });"""
result = neo4j.run(mutateStatement)

Write predicted class as property `:Patient.predicted_t2d` from projection to database

In [165]:
writeResult = """
    CALL gds.graph.writeNodeProperties(
        'graph-test',
        ['predicted_t2d'],
        ['Patient']
    );"""
result = neo4j.run(writeResult)

Check Accuracy 

In [166]:
checkAccuracy = """
    MATCH (p:Patient)
    WHERE p.isTrain = 0
    WITH count(p) AS nbPatient
    MATCH (p:Patient)
    WHERE p.isTrain = 0
    AND p.targetProperty = p.predicted_t2d
    RETURN toFloat(count(p)) / nbPatient AS Ratio;"""
result = neo4j.run(checkAccuracy)
row = result.single()
print('Accuracy is '+ str(row['Ratio']))

Accuracy is 0.9166666666666666


Clean up - graph projections

In [150]:
cleanUp = """
CALL gds.graph.list()
YIELD graphName
WITH graphName as g
CALL gds.graph.drop(g)
YIELD graphName
RETURN graphName"""
result = neo4j.run(cleanUp)

In [143]:
modelDrop = """
        CALL gds.beta.model.drop('t2d_FRP')"""
result = neo4j.run(modelDrop)

In [144]:
cleanPipe = """call gds.beta.pipeline.drop('pipe')"""
result = neo4j.run(cleanPipe)

### Community detection algorithm for Subphenotyping of Diabetic patients

In [167]:
graphProjection = """
CALL gds.graph.project(
    'graph4',                                                                                
    ['Patient']  
    ,
    {SIMILAR: {orientation: 'UNDIRECTED'}},
    {relationshipProperties: ['similarityScore']}                         
);"""
result = neo4j.run(graphProjection)

Run `Louvain` Community Detection Algorithm in streaming mode

In [None]:
louvainCommunityDetection = """
    CALL gds.louvain.stream('graph4', 
    { relationshipWeightProperty: 'similarityScore'})
    YIELD nodeId, communityId, intermediateCommunityIds
    RETURN gds.util.asNode(nodeId).name AS name, communityId, intermediateCommunityIds
    ORDER BY name ASC;"""
result = neo4j.run(louvainCommunityDetection)

Write communities as property to `:Patient` `community`

In [141]:
writeCommunities = """
    CALL gds.louvain.write('graph4', 
    { 
    relationshipWeightProperty: 'similarityScore', 
    includeIntermediateCommunities: false,
    writeProperty:'community'});"""
result = neo4j.run(writeCommunities)