## [Graph Data Science Python Driver](https://github.com/neo4j/graph-data-science-client)

## Imports

In [71]:
import pandas as pd
from neo4j import GraphDatabase
from graphdatascience import GraphDataScience

## Setup (1.8)

In [72]:
URI      = "neo4j://localhost:7687"
creds    = ('neo4j', 'password')
driver   = GraphDatabase.driver(URI, auth=creds)

def run_cypher(cypher, results=False):
    with driver.session() as session:
        r = session.run(cypher).data()
    if results:
        return r

## Setup (driver)

In [74]:
URI      = "neo4j://localhost:7687"
creds    = ('neo4j', 'password')
gds      = GraphDataScience(URI, auth=creds)

## Create Link Prediction Pipeline (1.8)

In [75]:
run_cypher("CALL gds.beta.model.drop('pipe')", results=True) i

c = f'''
CALL gds.alpha.ml.pipeline.linkPrediction.create('pipe')
'''
print(c)
run_cypher(c, results=True)

## Create Link Prediction Pipeline (driver)

In [76]:
pipe, _ = gds.beta.pipeline.linkPrediction.create("pipe")

## Add Fast RP Embeddings (1.8)

In [77]:
c = '''
CALL gds.alpha.ml.pipeline.linkPrediction.addNodeProperty('pipe', 'fastRP', {
  mutateProperty: 'embedding',
  embeddingDimension: 256,
  iterationWeights: [0.8, 1, 1, 1],
  normalizationStrength: 0.5,
  randomSeed: 42
})
'''
print(c)
run_cypher(c, results=True)

## Add Fast RP Embeddings (driver)

In [78]:
pipe.addNodeProperty("fastRP", 
                     mutateProperty="embedding",
                     embeddingDimension= 256,
                     iterationWeights= [0.8, 1, 1, 1],
                     normalizationStrength= 0.5,
                     randomSeed= 42)

name                                                              pipe
nodePropertySteps    [{'name': 'gds.fastRP.mutate', 'config': {'ran...
featureSteps                                                        []
splitConfig          {'negativeSamplingRatio': 1.0, 'testFraction':...
parameterSpace          {'RandomForest': [], 'LogisticRegression': []}
Name: 0, dtype: object

## Add pageRank (1.8)

In [79]:
c = '''
CALL gds.alpha.ml.pipeline.linkPrediction.addNodeProperty('pipe', 'pageRank', 
{
  mutateProperty: 'pageRank'
})
'''
print(c)
run_cypher(c, results=True)

## Add Betweenness Centrality (1.8)

In [80]:
c = '''
CALL gds.alpha.ml.pipeline.linkPrediction.addNodeProperty('pipe', 'betweenness', 
{
  mutateProperty: 'betweenness'
})
'''
print(c)
run_cypher(c, results=True)

## Add pageRank & Betweenness Centrality (driver)

In [81]:
for algo in ['pageRank', 'betweenness']:
    pipe.addNodeProperty(algo, mutateProperty=algo)

## Add features to pipeline (1.8)

In [82]:
c = '''
CALL gds.alpha.ml.pipeline.linkPrediction.addFeature('pipe', 
'hadamard', {
  nodeProperties: ['embedding', 'pageRank', 'betweenness']
}) YIELD featureSteps
'''
print(c)
run_cypher(c, results=True)

## Add features to pipeline (driver)

In [83]:
pipe.addFeature("hadamard", nodeProperties=['embedding', 'pageRank', 'betweenness'])

name                                                              pipe
nodePropertySteps    [{'name': 'gds.fastRP.mutate', 'config': {'ran...
featureSteps         [{'name': 'HADAMARD', 'config': {'nodeProperti...
splitConfig          {'negativeSamplingRatio': 1.0, 'testFraction':...
parameterSpace          {'RandomForest': [], 'LogisticRegression': []}
Name: 0, dtype: object

## Split Train/Test (1.8)

In [84]:
c = '''
CALL gds.alpha.ml.pipeline.linkPrediction.configureSplit('pipe', {
  testFraction: 0.3,
  trainFraction: 0.3,
  validationFolds: 7
  })
YIELD splitConfig
'''
print(c)
run_cypher(c, results=True)

## Split Train/Test (driver

In [85]:
pipe.configureSplit(trainFraction=0.3, testFraction=0.3, validationFolds=7)

name                                                              pipe
nodePropertySteps    [{'name': 'gds.fastRP.mutate', 'config': {'ran...
featureSteps         [{'name': 'HADAMARD', 'config': {'nodeProperti...
splitConfig          {'negativeSamplingRatio': 1.0, 'testFraction':...
parameterSpace          {'RandomForest': [], 'LogisticRegression': []}
Name: 0, dtype: object

## Create graph projection  (1.8)

In [86]:
c = f'''
CALL gds.graph.create(
    'proteins',
    'Protein',
    {{INTERACTS_WITH: 
        {{
            orientation: 'UNDIRECTED'
        }}
    }}
)
'''
run_cypher(c, results=True)

## Create graph projection  (driver)

In [87]:
node_projection = ["Protein"]
relationship_projection = {"INTERACTS_WITH": {"orientation": "UNDIRECTED"}}
G.drop()

G, _ = gds.graph.project("proteins", node_projection, relationship_projection)

## Train link prediction model  (1.8)

In [88]:
run_cypher("call gds.beta.model.drop('lp-pipeline-model')", results=True) 

c = '''
CALL gds.alpha.ml.pipeline.linkPrediction.train('proteins', {
  pipeline: 'pipe',
  modelName: 'lp-pipeline-model',
}) YIELD modelInfo
RETURN
  modelInfo.bestParameters AS winningModel,
  modelInfo.metrics.AUCPR.outerTrain AS trainGraphScore,
  modelInfo.metrics.AUCPR.test AS testGraphScore
'''
print(c)
run_cypher(c, results=True)

## Train link prediction model  (driver

In [90]:
pipe.addLogisticRegression()
model_name = "lp-pipeline-model"

trained_pipe_model, res = pipe.train(G, modelName=model_name)

## Stream Results (1.8)

In [91]:
c = '''
CALL gds.alpha.ml.pipeline.linkPrediction.predict.stream('proteins', {
  modelName: 'lp-pipeline-model',
  topN: 5,
  threshold: 0.3
})
 YIELD node1, node2, probability
 RETURN gds.util.asNode(node1).id AS protein1, gds.util.asNode(node2).id AS protein2, probability
 ORDER BY probability DESC, protein1
'''
print(c)
results = run_cypher(c, results=True)

## Stream Results (driver)

In [92]:
results = trained_pipe_model.predict_stream(G, topN=5)

In [93]:
results

Unnamed: 0,node1,node2,probability
0,12056,14010,0.643261
1,124,1619,0.642388
2,1619,11560,0.641647
3,3023,11346,0.641638
4,930,1523,0.641341


In [None]:
bb