In [1]:
from graphdatascience import GraphDataScience

In [2]:
URI = "bolt://localhost:7687"
AUTH = ("neo4j", "neo4jneo4j")
DB = 'medical'

In [3]:
gds = GraphDataScience(URI, AUTH, database=DB)
print(gds.version())

2.3.1


In [4]:
graphname = "mygraph"
pipelinename = "mypipe"
modelname = "mymodel"

In [5]:
RANDOM_SEED = 42

In [6]:
if gds.graph.exists(graphname).exists:
    gds.graph.drop(gds.graph.get(graphname))
if gds.beta.pipeline.exists(pipelinename).exists:
    gds.pipeline.get(pipelinename).drop()
if gds.beta.model.exists(modelname).exists:
    gds.model.get(modelname).drop()

In [7]:
G, result = gds.graph.project(graphname, 
  ['Patient','Diag','Drug','IHD'],
  {
    'IHD_RISK': {
      'orientation': 'UNDIRECTED'
    },
    'DIAGNOSED_WITH': {},
    'PRESCRIBED': {}
  }
);

In [8]:
pipe, _ = gds.beta.pipeline.linkPrediction.create(pipelinename)

In [9]:
pipe.addNodeProperty("beta.hashgnn",
    mutateProperty="embedding",
    iterations=4,
    heterogeneous=True,
    embeddingDensity=512,
    neighborInfluence=0.7,
    randomSeed=RANDOM_SEED,
    generateFeatures={'dimension': 1,  'densityLevel': 1},
    contextNodeLabels=G.node_labels(),
)

name                                                            mypipe
nodePropertySteps    [{'name': 'gds.beta.hashgnn.mutate', 'config':...
featureSteps                                                        []
splitConfig          {'negativeSamplingRatio': 1.0, 'testFraction':...
autoTuningConfig                                     {'maxTrials': 10}
parameterSpace       {'MultilayerPerceptron': [], 'RandomForest': [...
Name: 0, dtype: object

In [10]:
pipe.addFeature('COSINE', nodeProperties = ['embedding'])

name                                                            mypipe
nodePropertySteps    [{'name': 'gds.beta.hashgnn.mutate', 'config':...
featureSteps         [{'name': 'COSINE', 'config': {'nodeProperties...
splitConfig          {'negativeSamplingRatio': 1.0, 'testFraction':...
autoTuningConfig                                     {'maxTrials': 10}
parameterSpace       {'MultilayerPerceptron': [], 'RandomForest': [...
Name: 0, dtype: object

In [11]:
pipe.configureSplit(trainFraction=0.5, testFraction=0.5, validationFolds=4)

name                                                            mypipe
nodePropertySteps    [{'name': 'gds.beta.hashgnn.mutate', 'config':...
featureSteps         [{'name': 'COSINE', 'config': {'nodeProperties...
splitConfig          {'negativeSamplingRatio': 1.0, 'testFraction':...
autoTuningConfig                                     {'maxTrials': 10}
parameterSpace       {'MultilayerPerceptron': [], 'RandomForest': [...
Name: 0, dtype: object

In [12]:
pipe.addLogisticRegression(tolerance=(0.1, 1.0), penalty=1.0, patience=5)

name                                                            mypipe
nodePropertySteps    [{'name': 'gds.beta.hashgnn.mutate', 'config':...
featureSteps         [{'name': 'COSINE', 'config': {'nodeProperties...
splitConfig          {'negativeSamplingRatio': 1.0, 'testFraction':...
autoTuningConfig                                     {'maxTrials': 10}
parameterSpace       {'MultilayerPerceptron': [], 'RandomForest': [...
Name: 0, dtype: object

In [13]:
pipe.addMLP(patience = 2, penalty = 1.0, hiddenLayerSizes = [4, 2])

name                                                            mypipe
nodePropertySteps    [{'name': 'gds.beta.hashgnn.mutate', 'config':...
featureSteps         [{'name': 'COSINE', 'config': {'nodeProperties...
splitConfig          {'negativeSamplingRatio': 1.0, 'testFraction':...
autoTuningConfig                                     {'maxTrials': 10}
parameterSpace       {'MultilayerPerceptron': [{'maxEpochs': 100, '...
Name: 0, dtype: object

In [14]:
pipe.configureAutoTuning(maxTrials = 4)

name                                                            mypipe
nodePropertySteps    [{'name': 'gds.beta.hashgnn.mutate', 'config':...
featureSteps         [{'name': 'COSINE', 'config': {'nodeProperties...
splitConfig          {'negativeSamplingRatio': 1.0, 'testFraction':...
autoTuningConfig                                      {'maxTrials': 4}
parameterSpace       {'MultilayerPerceptron': [{'maxEpochs': 100, '...
Name: 0, dtype: object

In [15]:
model, train_result = pipe.train(
    G,
    modelName = modelname,
    sourceNodeLabel = 'Patient',
    targetNodeLabel = "IHD",
    targetRelationshipType = 'IHD_RISK',
    metrics = ['AUCPR', 'OUT_OF_BAG_ERROR'],
    randomSeed = RANDOM_SEED
)

In [16]:
predictions = model.predict_stream(G, concurrency=2, topN= 20, threshold= 0.1)

In [17]:
predictions

Unnamed: 0,node1,node2,probability
0,1278,2076,0.246868
1,1280,2076,0.246868
2,1282,2076,0.246868
3,1284,2076,0.246868
4,1286,2076,0.246868
5,1288,2076,0.246868
6,1290,2076,0.246868
7,1292,2076,0.246868
8,1294,2076,0.246868
9,1295,2076,0.246868


In [18]:
pipe.drop()
model.drop()
G.drop()

graphName                                                          mygraph
database                                                           medical
memoryUsage                                                               
sizeInBytes                                                             -1
nodeCount                                                             2008
relationshipCount                                                    13571
configuration            {'relationshipProjection': {'DIAGNOSED_WITH': ...
density                                                           0.003367
creationTime                           2023-03-20T15:18:21.209204100+08:00
modificationTime                       2023-03-20T15:18:24.123448900+08:00
schema                   {'graphProperties': {}, 'relationships': {'DIA...
schemaWithOrientation    {'graphProperties': {}, 'relationships': {'DIA...
Name: 0, dtype: object

In [19]:
gds.close()