In [179]:
import os
from py2neo import Graph, Node, Relationship
from graphdatascience import GraphDataScience

# Get Neo4j client
graph = Graph("bolt://localhost:7690", auth=("neo4j", "exoplanet"))

In [180]:
#create an unknown set of stars to model on. 
#rand value is percent of database as unknowns
create_unknowns = f'''
    MATCH (N)
    WITH N
    WHERE rand() < 0.2
    REMOVE N:star
    SET N:unknownStar
'''

graph.run(create_unknowns)

In [181]:
##drop functions for pipeline, prediction graph
drop_pipeline = f'''
    CALL gds.beta.pipeline.drop('pipe', False)
'''
drop_prediction_graph = f'''
    CALL gds.graph.drop('prediction_graph', False)
'''
drop_model = f'''
    CALL gds.beta.model.drop('nr_model', False)
'''

graph.run(drop_pipeline)
graph.run(drop_prediction_graph)
graph.run(drop_model)

modelInfo,trainConfig,graphSchema,loaded,stored,creationTime,shared
"{modelName: 'nr_model', featureProperties: ['num_planets', 'effective_temperature', 'radius', 'mass', 'metallicity', 'luminosity', 'surface_gravity', 'age', 'density', 'rotational_velocity', 'rotational_period', 'right_ascension', 'declination', 'distance', 'v_magnitude'], modelType: 'NodeRegression', metrics: {MEAN_SQUARED_ERROR: {test: 1.5684650602409635, validation: {min: 1.5236319517709178, max: 1.933657541478127, avg: 1.7286447466245223}, outerTrain: 1.5458285337354136, train: {min: 1.5215270739064903, max: 1.9087409193669873, avg: 1.7151339966367387}}}, pipeline: {featureProperties: [{feature: 'num_planets'}, {feature: 'effective_temperature'}, {feature: 'radius'}, {feature: 'mass'}, {feature: 'metallicity'}, {feature: 'luminosity'}, {feature: 'surface_gravity'}, {feature: 'age'}, {feature: 'density'}, {feature: 'rotational_velocity'}, {feature: 'rotational_period'}, {feature: 'right_ascension'}, {feature: 'declination'}, {feature: 'distance'}, {feature: 'v_magnitude'}], nodePropertySteps: []}, bestParameters: {maxDepth: 2147483647, minSplitSize: 2, minLeafSize: 1, numberOfSamplesRatio: 1.0, methodName: 'RandomForest', numberOfDecisionTrees: 100}, nodePropertySteps: []}","{targetProperty: 'num_planets', randomSeed: 42, jobId: '70501544-7e90-442f-91cf-2287efca8b37', graphName: 'prediction_graph', sudo: false, storeModelToDisk: false, modelName: 'nr_model', logProgress: true, metrics: ['MEAN_SQUARED_ERROR'], pipeline: 'pipe', concurrency: 4, relationshipTypes: ['*'], targetNodeLabels: ['star']}","{graphProperties: {}, nodes: {star: {density: 'Float (DefaultValue(NaN), PERSISTENT)', mass: 'Float (DefaultValue(NaN), PERSISTENT)', v_magnitude: 'Float (DefaultValue(NaN), PERSISTENT)', distance: 'Float (DefaultValue(NaN), PERSISTENT)', num_planets: 'Float (DefaultValue(NaN), PERSISTENT)', right_ascension: 'Float (DefaultValue(NaN), PERSISTENT)', rotational_period: 'Float (DefaultValue(NaN), PERSISTENT)', age: 'Float (DefaultValue(NaN), PERSISTENT)', effective_temperature: 'Float (DefaultValue(NaN), PERSISTENT)', rotational_velocity: 'Float (DefaultValue(NaN), PERSISTENT)', radius: 'Float (DefaultValue(NaN), PERSISTENT)', surface_gravity: 'Float (DefaultValue(NaN), PERSISTENT)', declination: 'Float (DefaultValue(NaN), PERSISTENT)', luminosity: 'Float (DefaultValue(NaN), PERSISTENT)', metallicity: 'Float (DefaultValue(NaN), PERSISTENT)'}}, relationships: {__ALL__: {}}}",True,False,datetime('2024-01-25T19:18:25.275708000-05:00'),False


In [182]:
#blank map
star_properties = '''
{
    star: {properties: ['num_planets', 'effective_temperature', 'radius',
    'mass', 'metallicity', 'luminosity', 'surface_gravity', 'age',
    'density', 'rotational_velocity', 'rotational_period', 'right_ascension',
    'declination', 'distance', 'v_magnitude']},
    unknownStar: {properties: ['num_planets', 'effective_temperature', 'radius',
    'mass', 'metallicity', 'luminosity', 'surface_gravity', 'age',
    'density', 'rotational_velocity', 'rotational_period', 'right_ascension',
    'declination', 'distance', 'v_magnitude']}
}
'''
#create prediction graph
create_prediction_graph = f'''
    CALL gds.graph.project(
    'prediction_graph',
    {star_properties},
    '*'
    )
'''

graph.run(create_prediction_graph)

nodeProjection,relationshipProjection,graphName,nodeCount,relationshipCount,projectMillis
"{unknownStar: {label: 'unknownStar', properties: {density: {property: 'density', defaultValue: null}, mass: {property: 'mass', defaultValue: null}, v_magnitude: {property: 'v_magnitude', defaultValue: null}, distance: {property: 'distance', defaultValue: null}, num_planets: {property: 'num_planets', defaultValue: null}, right_ascension: {property: 'right_ascension', defaultValue: null}, rotational_period: {property: 'rotational_period', defaultValue: null}, age: {property: 'age', defaultValue: null}, effective_temperature: {property: 'effective_temperature', defaultValue: null}, rotational_velocity: {property: 'rotational_velocity', defaultValue: null}, radius: {property: 'radius', defaultValue: null}, surface_gravity: {property: 'surface_gravity', defaultValue: null}, declination: {property: 'declination', defaultValue: null}, luminosity: {property: 'luminosity', defaultValue: null}, metallicity: {property: 'metallicity', defaultValue: null}}}, star: {label: 'star', properties: {density: {property: 'density', defaultValue: null}, mass: {property: 'mass', defaultValue: null}, v_magnitude: {property: 'v_magnitude', defaultValue: null}, distance: {property: 'distance', defaultValue: null}, num_planets: {property: 'num_planets', defaultValue: null}, right_ascension: {property: 'right_ascension', defaultValue: null}, rotational_period: {property: 'rotational_period', defaultValue: null}, age: {property: 'age', defaultValue: null}, effective_temperature: {property: 'effective_temperature', defaultValue: null}, rotational_velocity: {property: 'rotational_velocity', defaultValue: null}, radius: {property: 'radius', defaultValue: null}, surface_gravity: {property: 'surface_gravity', defaultValue: null}, declination: {property: 'declination', defaultValue: null}, luminosity: {property: 'luminosity', defaultValue: null}, metallicity: {property: 'metallicity', defaultValue: null}}}}","{__ALL__: {aggregation: 'DEFAULT', orientation: 'NATURAL', indexInverse: false, properties: {}, type: '*'}}",prediction_graph,4147,0,14


In [183]:
#create/config pipeline
create_pipeline = f'''
    CALL gds.alpha.pipeline.nodeRegression.create('pipe')
'''

graph.run(create_pipeline)

name,nodePropertySteps,featureProperties,splitConfig,autoTuningConfig,parameterSpace
pipe,[],[],"{testFraction: 0.3, validationFolds: 3}",{maxTrials: 10},"{LinearRegression: [], RandomForest: []}"


In [184]:
#split config 
split_config = '''
{
    testFraction: 0.4,
    validationFolds: 10
}
'''

splits = f'''
    CALL gds.alpha.pipeline.nodeRegression.configureSplit('pipe', {split_config})
'''

graph.run(splits)

name,nodePropertySteps,featureProperties,splitConfig,autoTuningConfig,parameterSpace
pipe,[],[],"{testFraction: 0.3, validationFolds: 3}",{maxTrials: 10},"{LinearRegression: [], RandomForest: []}"


In [185]:
#add node properties?

In [186]:
#add features
add_features = f'''
    CALL gds.alpha.pipeline.nodeRegression.selectFeatures('pipe',
    ['num_planets', 'effective_temperature', 'radius',
    'mass', 'metallicity', 'luminosity', 'surface_gravity', 'age',
    'density', 'rotational_velocity', 'rotational_period', 'right_ascension',
    'declination', 'distance', 'v_magnitude'])
'''

graph.run(add_features)

name,nodePropertySteps,featureProperties,splitConfig,autoTuningConfig,parameterSpace
pipe,[],"['num_planets', 'effective_temperature', 'radius', 'mass', 'metallicity', 'luminosity', 'surface_gravity', 'age', 'density', 'rotational_velocity', 'rotational_period', 'right_ascension', 'declination', 'distance', 'v_magnitude']","{testFraction: 0.3, validationFolds: 3}",{maxTrials: 10},"{LinearRegression: [], RandomForest: []}"


In [187]:
#add model candidates

linear_regression = f'''
    CALL gds.alpha.pipeline.nodeRegression.addLinearRegression('pipe')
'''

#graph.run(linear_regression)

empty_brace = '''{}'''

random_forest = f'''
    CALL gds.alpha.pipeline.nodeRegression.addRandomForest('pipe',{empty_brace})
'''

graph.run(random_forest)


name,nodePropertySteps,featureProperties,splitConfig,autoTuningConfig,parameterSpace
pipe,[],"['num_planets', 'effective_temperature', 'radius', 'mass', 'metallicity', 'luminosity', 'surface_gravity', 'age', 'density', 'rotational_velocity', 'rotational_period', 'right_ascension', 'declination', 'distance', 'v_magnitude']","{testFraction: 0.3, validationFolds: 3}",{maxTrials: 10},"{LinearRegression: [], RandomForest: [{maxDepth: 2147483647, minSplitSize: 2, minLeafSize: 1, numberOfSamplesRatio: 1.0, methodName: 'RandomForest', numberOfDecisionTrees: 100}]}"


In [188]:
#training the pipeline
train_config = '''
{
    pipeline: 'pipe',
    targetNodeLabels: ['star'],
    modelName: 'nr_model',
    targetProperty: 'num_planets',
    randomSeed: 42,
    metrics: ['MEAN_SQUARED_ERROR']
}
'''

train_pipeline = f'''
    CALL gds.alpha.pipeline.nodeRegression.train('prediction_graph', {train_config})
    YIELD modelInfo
    RETURN
    modelInfo.bestParameters as winningModel,
    modelInfo.metrics.MEAN_SQUARED_ERROR.train.avg AS avgTrainScore,
    modelInfo.metrics.MEAN_SQUARED_ERROR.outerTrain AS outerTrainScore,
    modelInfo.metrics.MEAN_SQUARED_ERROR.test AS testScore

'''

graph.run(train_pipeline)

winningModel,avgTrainScore,outerTrainScore,testScore
"{maxDepth: 2147483647, minSplitSize: 2, minLeafSize: 1, numberOfSamplesRatio: 1.0, methodName: 'RandomForest', numberOfDecisionTrees: 100}",1.777676763472306,1.7149188662916552,1.7479674066599395


In [189]:
#predict
predict_config = '''
{
    modelName: 'nr_model',
    targetNodeLabels: ['unknownStar']
}
'''

#streams stars with a number designating the amount predicted planets
predict_stream = f'''
    CALL gds.alpha.pipeline.nodeRegression.predict.stream('prediction_graph', {predict_config})
    YIELD nodeId, predictedValue
    WITH gds.util.asNode(nodeId) as node, round(predictedValue) as predicted_num_planets
    RETURN ID(node) AS node_ID, node.id AS star, predicted_num_planets
    ORDER BY predicted_num_planets DESC
'''

#copy paste below if inserting straight to neo4j desktop
#CALL gds.alpha.pipeline.nodeRegression.predict.stream('prediction_graph', {modelName: 'nr_model', targetNodeLabels: ['unknownStar']}) YIELD nodeId, predictedValue WITH gds.util.asNode(nodeId) as node, round(predictedValue) as predicted_num_planets RETURN ID(node) AS node_ID, node.id AS star, predicted_num_planets ORDER BY predicted_num_planets DESC

#returns a correctness score based on how many it modeled correctly
predict_correctness = f'''
    CALL gds.alpha.pipeline.nodeRegression.predict.stream('prediction_graph', {predict_config})
    YIELD nodeId, predictedValue
    WITH gds.util.asNode(nodeId) as node, round(predictedValue) as predicted_num_planets
    WITH node, predicted_num_planets, node.num_planets as actual_num_planets

    WITH 
        ID(node) as node_ID,
        node.id AS star,
        predicted_num_planets,
        actual_num_planets,
        CASE WHEN predicted_num_planets = actual_num_planets THEN 1 ELSE 0 END AS is_correct

    ORDER BY predicted_num_planets DESC
    WITH
        SUM(actual_num_planets) AS total_nodes,
        SUM(predicted_num_planets) AS correct_predictions

    RETURN
        correct_predictions AS num_planets_predicted,
        total_nodes AS actual_num_planets,
        (correct_predictions - total_nodes) / total_nodes * 100.0 AS error
'''

graph.run(predict_stream)
#graph.run(predict_correctness)

node_ID,star,predicted_num_planets
24,55 Cnc,5.0
756,HD 3167,5.0
2080,Kepler-1542,5.0


In [190]:
#Reset nodes to defaults
reset_unknowns = f'''
    MATCH (N:unknownStar)
    REMOVE N:unknownStar
    SET N:star
'''

graph.run(reset_unknowns)