In [179]:
from graphdatascience import GraphDataScience
import numpy as np
import pandas as pd
from sklearn.manifold import TSNE
import altair as alt

graph_name = "testgraph" # project graph name

gds = GraphDataScience("bolt://localhost:7687", auth=("neo4j", "j4oenj4oen"))
gds.version()

'2.5.4'

# Construct graph

In [180]:
# euro_roads
cypher_euro_roads = "https://raw.githubusercontent.com/smallcat9603/graph/main/cypher/euro_roads.cypher"
query = f"""
CALL apoc.cypher.runFile("{cypher_euro_roads}")
"""
gds.run_cypher(query)

Unnamed: 0,row,result
0,-1,"{'constraintsRemoved': 0, 'indexesRemoved': 0,..."


# Project graph

In [181]:
node_projection = ["Place"]
# # why raising error "java.lang.UnsupportedOperationException: Loading of values of type StringArray is currently not supported" ???
# node_projection = {"Query": {"properties": 'phrase'}, "Article": {"properties": 'phrase'}, "Noun": {}}
relationship_projection = {
    "EROAD": {"orientation": "UNDIRECTED"},
    # "CORRELATES": {"orientation": "UNDIRECTED", "properties": ["common"]} # Unsupported type [TEXT_ARRAY] of value StringArray[DNP]. Please use a numeric property.
    }
# # how to project node properties???
# node_properties = { 
#     "nodeProperties": {
#         "phrase": {"defaultValue": []},
#         "salience": {"defaultValue": []}
#     }
# }

exists_result = gds.graph.exists(graph_name)
if exists_result["exists"]:
    G = gds.graph.get(graph_name)
    G.drop()
G, result = gds.graph.project(graph_name, node_projection, relationship_projection)
print(G.node_properties("Place"))

[]


In [182]:
result_fastRP_stream = gds.fastRP.stream(
    G,
    randomSeed=42,
    embeddingDimension=10,
)
for i in range(5):
    print(gds.util.asNode(result_fastRP_stream["nodeId"][i])["name"])
    print(result_fastRP_stream["embedding"][i])

gds.fastRP.write(
    G,
    randomSeed=42,
    embeddingDimension=256,
    writeProperty="embedding"
)
query = """
MATCH (x)
RETURN x.name AS name, x.embedding AS embedding
"""
result = gds.run_cypher(query)
print(result.head())

Larne
[0.2804684340953827, 0.11474618315696716, 0.10329365730285645, 0.0798509493470192, -0.1528906226158142, 0.17779949307441711, -0.00774112343788147, -0.009006500244140625, 0.5070340037345886, -0.49096789956092834]
Belfast
[0.11146025359630585, 0.09622876346111298, 0.17428600788116455, 0.08232028037309647, -0.11772624403238297, 0.2665850818157196, -0.2951126992702484, 0.1784774661064148, 0.6177617311477661, -0.30660924315452576]
Dublin
[-0.33103495836257935, -0.38863199949264526, 0.3735765218734741, 0.29905325174331665, 0.27668333053588867, 0.5727735161781311, -0.8394432067871094, 0.2676694393157959, 0.6785457134246826, 0.2582975924015045]
Wexford
[-0.08042359352111816, -0.4900917708873749, 0.5370861291885376, -0.1860542744398117, 0.6389771699905396, 0.5905132293701172, -0.8393781185150146, -0.16729018092155457, 0.4057786166667938, 0.2532113194465637]
Rosslare
[0.09491148591041565, -0.5217186808586121, 0.587990403175354, -0.3561452627182007, 0.5779549479484558, 0.29384273290634155, 

# Test

In [183]:
query = """
MATCH (p:Place)-[:IN_COUNTRY]->(country)
WHERE country.code IN ["E", "GB", "F", "TR", "I", "D", "GR"]
RETURN p.name AS place, p.embedding AS embedding, country.code AS country
"""
result = gds.run_cypher(query)
result.head()

Unnamed: 0,place,embedding,country
0,Colchester,"[0.011932386085391045, 0.028970740735530853, 0...",GB
1,Glasgow,"[0.12930643558502197, 0.03863527253270149, 0.0...",GB
2,Edinburgh,"[0.16224481165409088, 0.0484783798456192, 0.12...",GB
3,Bristol,"[0.017798639833927155, -0.04315311834216118, 0...",GB
4,Immingham,"[-0.010613441467285156, -0.06019572168588638, ...",GB


# t-SNE

In [184]:
X = np.array(list(result["embedding"]))
X_embedded = TSNE(n_components=2, random_state=6).fit_transform(np.array(list(result["embedding"])))

places = result["place"]
countries = result["country"]
tsne_df = pd.DataFrame(data = {
    "place": places,
    "country": countries,
    "x": [value[0] for value in X_embedded],
    "y": [value[1] for value in X_embedded]
})
tsne_df.head()

Unnamed: 0,place,country,x,y
0,Colchester,GB,20.793064,-5.216808
1,Glasgow,GB,21.878115,4.537694
2,Edinburgh,GB,19.91025,3.376391
3,Bristol,GB,20.072948,-8.008925
4,Immingham,GB,16.768686,-0.706182


# Plot

In [185]:
alt.Chart(tsne_df).mark_circle(size=60).encode(
    x='x',
    y='y',
    color='country',
    tooltip=['place', 'country']
).properties(width=700, height=400)

# Free up memory

In [186]:
exists_result = gds.graph.exists(graph_name)
if exists_result["exists"]:
    G = gds.graph.get(graph_name)
    G.drop()    
query = """
MATCH (n) DETACH DELETE n
"""
gds.run_cypher(query)