In [1]:
from graphdatascience import GraphDataScience

In [2]:
URI = "bolt://localhost:7687"
AUTH = ("neo4j", "neo4jneo4j")
DB = 'medical'

In [3]:
gds = GraphDataScience(URI, AUTH, database=DB)
print(gds.version())

2.3.1


In [4]:
graphname = "mygraph"
pipelinename = "mypipe"
modelname = "mymodel"

In [5]:
gds.beta.pipeline.exists(pipelinename)

pipelineName                               mypipe
pipelineType    Link prediction training pipeline
exists                                       True
Name: 0, dtype: object

In [6]:
if gds.graph.exists(graphname).exists:
    gds.graph.drop(gds.graph.get(graphname))
if gds.beta.pipeline.exists(pipelinename).exists:
    gds.pipeline.get(pipelinename).drop()
if gds.beta.model.exists(modelname).exists:
    gds.model.get(modelname).drop()

In [7]:
pipe, _ = gds.beta.pipeline.linkPrediction.create(pipelinename)

In [8]:
pipe.addNodeProperty('fastRP', mutateProperty = 'embedding', embeddingDimension = 256, randomSeed = 42, iterationWeights = [1.0])

name                                                            mypipe
nodePropertySteps    [{'name': 'gds.fastRP.mutate', 'config': {'ran...
featureSteps                                                        []
splitConfig          {'negativeSamplingRatio': 1.0, 'testFraction':...
autoTuningConfig                                     {'maxTrials': 10}
parameterSpace       {'MultilayerPerceptron': [], 'RandomForest': [...
Name: 0, dtype: object

In [9]:
pipe.addFeature('COSINE', nodeProperties = ['embedding'])

name                                                            mypipe
nodePropertySteps    [{'name': 'gds.fastRP.mutate', 'config': {'ran...
featureSteps         [{'name': 'COSINE', 'config': {'nodeProperties...
splitConfig          {'negativeSamplingRatio': 1.0, 'testFraction':...
autoTuningConfig                                     {'maxTrials': 10}
parameterSpace       {'MultilayerPerceptron': [], 'RandomForest': [...
Name: 0, dtype: object

In [10]:
pipe.configureSplit(trainFraction=0.2, testFraction=0.2, validationFolds=3)

name                                                            mypipe
nodePropertySteps    [{'name': 'gds.fastRP.mutate', 'config': {'ran...
featureSteps         [{'name': 'COSINE', 'config': {'nodeProperties...
splitConfig          {'negativeSamplingRatio': 1.0, 'testFraction':...
autoTuningConfig                                     {'maxTrials': 10}
parameterSpace       {'MultilayerPerceptron': [], 'RandomForest': [...
Name: 0, dtype: object

In [11]:
pipe.addLogisticRegression(tolerance=(0.01, 0.1))
pipe.addLogisticRegression(penalty=1.0)

name                                                            mypipe
nodePropertySteps    [{'name': 'gds.fastRP.mutate', 'config': {'ran...
featureSteps         [{'name': 'COSINE', 'config': {'nodeProperties...
splitConfig          {'negativeSamplingRatio': 1.0, 'testFraction':...
autoTuningConfig                                     {'maxTrials': 10}
parameterSpace       {'MultilayerPerceptron': [], 'RandomForest': [...
Name: 0, dtype: object

In [12]:
pipe.addMLP(patience = 2)
pipe.addMLP(penalty = 1.0)
pipe.addMLP(hiddenLayerSizes = [4, 2])

name                                                            mypipe
nodePropertySteps    [{'name': 'gds.fastRP.mutate', 'config': {'ran...
featureSteps         [{'name': 'COSINE', 'config': {'nodeProperties...
splitConfig          {'negativeSamplingRatio': 1.0, 'testFraction':...
autoTuningConfig                                     {'maxTrials': 10}
parameterSpace       {'MultilayerPerceptron': [{'maxEpochs': 100, '...
Name: 0, dtype: object

In [13]:
G, result = gds.graph.project(graphname, 
  ['Patient','Diag','Drug','IHD'],
  {
    'IHD_RISK': {
      'orientation': 'UNDIRECTED'
    },
    'DIAGNOSED_WITH': {},
    'PRESCRIBED': {}
  }
);

In [14]:
model, train_result = pipe.train(
    G,
    modelName = modelname,
    sourceNodeLabel = 'Patient',
    targetNodeLabel = "IHD",
    targetRelationshipType = 'IHD_RISK',
    metrics = ['AUCPR', 'OUT_OF_BAG_ERROR'],
    randomSeed = 12
)

In [15]:
predictions = model.predict_stream(G, concurrency=2, topN= 20, threshold= 0.1)

In [16]:
predictions

Unnamed: 0,node1,node2,probability
0,1278,2076,0.5005
1,1280,2076,0.5005
2,1282,2076,0.5005
3,1284,2076,0.5005
4,1286,2076,0.5005
5,1288,2076,0.5005
6,1290,2076,0.5005
7,1292,2076,0.5005
8,1294,2076,0.5005
9,1295,2076,0.5005


In [17]:
gds.close()