In [2]:
from graph.settings import NEO4J_URL
from graphdatascience import GraphDataScience
gds = GraphDataScience(NEO4J_URL)

  from .autonotebook import tqdm as notebook_tqdm


## Lute Graph Projection Memory Estimate 

In [4]:
node_projection = ["Album", "Genre", "Artist", "Descriptor", "Language"]
relationship_projection = {
    "GENRE": {"orientation": "UNDIRECTED", "properties": "weight"},
    "DESCRIPTOR": {"orientation": "UNDIRECTED"},
    "LANGUAGE": {"orientation": "UNDIRECTED"},
    "ALBUM_ARTIST": {"orientation": "UNDIRECTED"},
    "CREDITED": {"orientation": "UNDIRECTED"},
}
result = gds.graph.project.estimate(node_projection, relationship_projection)

print(f"Required memory for native loading: {result['requiredMemory']}")

Required memory for native loading: [78 MiB ... 84 MiB]


## Projection

In [19]:

G, result = gds.graph.project("lute", node_projection, relationship_projection)
print(f"The projection took {result['projectMillis']} ms")
print(f"Graph '{G.name()}' node count: {G.node_count()}")
print(f"Graph '{G.name()}' relationship count: {G.relationship_count()}")

The projection took 212 ms
Graph 'lute' node count: 192816
Graph 'lute' relationship count: 3128358


## Creating FastRP Embeddings

### Estimate Required Memory

In [20]:
result = gds.fastRP.mutate.estimate(
    G,
    embeddingDimension=512,
    mutateProperty="embedding",
    randomSeed=42,
)

print(f"Required memory for running FastRP: {result["requiredMemory"]}")

Required memory for running FastRP: 1147 MiB


### Generate Embeddings

In [21]:
result = gds.fastRP.mutate(
    G,
    embeddingDimension=512,
    mutateProperty="embedding",
    randomSeed=42,
)

print(f"Number of embedding vectors produced: {result['nodePropertiesWritten']}")

FastRP: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100.0/100 [00:00<00:00, 120.81%/s]

Number of embedding vectors produced: 192816





### Compute KNN Similarity Scores

In [22]:
result = gds.knn.write(
    G,
    topK=10,
    nodeProperties=["embedding"],
    randomSeed=42,
    concurrency=1,
    sampleRate=1.0,
    deltaThreshold=0.0,
    writeRelationshipType="SIMILAR",
    writeProperty="score",
    nodeLabels=["Album"]
)

print(f"Relationships produced: {result['relationshipsWritten']}")
print(f"Nodes compared: {result['nodesCompared']}")
print(f"Mean similarity: {result['similarityDistribution']['mean']}")

K-Nearest Neighbours: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 100.0/100 [01:26<00:00,  3.82s/%]

Relationships produced: 631680
Nodes compared: 63168
Mean similarity: 0.8638176306833917


### Explore Similar Albums

In [31]:
gds.run_cypher(
    """
        MATCH (p1:Album{file_name:"release/album/liv_e/couldnt-wait-to-tell-you"})-[r:SIMILAR]->(p2:Album)
        RETURN p1.name AS person1, p2.file_name AS person2, r.score AS similarity
        ORDER BY similarity DESCENDING, person1, person2
    """
)

Unnamed: 0,person1,person2,similarity
0,Couldn't Wait to Tell You...,release/ep/liv_e/cwtty,0.965559
1,Couldn't Wait to Tell You...,release/album/standing-on-the-corner/standing-...,0.964727
2,Couldn't Wait to Tell You...,release/album/liv_e/githpremixedition,0.961066
3,Couldn't Wait to Tell You...,release/album/flanafi/flanafi,0.958283
4,Couldn't Wait to Tell You...,release/album/liv_e/girl-in-the-half-pearl,0.957136
5,Couldn't Wait to Tell You...,release/album/xenia-franca/em-nome-da-estrela,0.956973
6,Couldn't Wait to Tell You...,release/album/eddie-chacon/sundown,0.955877
7,Couldn't Wait to Tell You...,release/ep/lazygod-x-cartiergod/vapordrip,0.955366
8,Couldn't Wait to Tell You...,release/ep/laurel-halo/ambrosia-ep,0.9533
9,Couldn't Wait to Tell You...,release/album/hefner_f1/residue,0.952982


## Drop Graph

In [18]:
gds.graph.drop("lute")



graphName                                                             lute
database                                                             neo4j
databaseLocation                                                     local
memoryUsage                                                               
sizeInBytes                                                             -1
nodeCount                                                           192816
relationshipCount                                                  3128358
configuration            {'relationshipProjection': {'ALBUM_ARTIST': {'...
density                                                           0.000084
creationTime                           2024-07-16T04:39:13.239770215+00:00
modificationTime                       2024-07-16T04:39:16.327261509+00:00
schema                   {'graphProperties': {}, 'nodes': {'Descriptor'...
schemaWithOrientation    {'graphProperties': {}, 'nodes': {'Descriptor'...
Name: 0, dtype: object