In [1]:
from graphdatascience import GraphDataScience
import pandas as pd
import umap
import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import math
from getpass import getpass

In [2]:
bolt_url = "bolt://44.198.55.91:7687"

In [3]:
gds = GraphDataScience(bolt_url, auth=("neo4j", getpass("Neo4j password")))

In [7]:
g_example, result = gds.graph.cypher.project("""
MATCH p= (:Stream {name:"ogaminglol"})-[s:HAS_SIMILAR_INTERACTORS WHERE s.similarity > 0.065]-{1,2}()
UNWIND relationships(p) AS rel
WITH DISTINCT startNode(rel) AS n1, endNode(rel) AS n2, rel
WHERE n1 < n2
RETURN
gds.graph.project(
    "example",
    n1,
    n2,
    {
        relationshipType:"HAS_SIMILAR_INTERACTORS",
        relationshipProperties: rel{.similarity}
    },
    {
        undirectedRelationshipTypes: ["HAS_SIMILAR_INTERACTORS"]
    }
    )
                                             """)

In [8]:
result

relationshipCount                                                   20
graphName                                                      example
query                \nMATCH p= (:Stream {name:"ogaminglol"})-[s:HA...
projectMillis                                                       13
configuration        {'readConcurrency': 4, 'undirectedRelationship...
nodeCount                                                            9
dtype: object

In [12]:
gds.degree.mutate(g_example, mutateProperty="degree")



In [109]:
name_df = gds.graph.nodeProperties.stream(g_example, ["degree"], "*", separate_property_columns=True, db_node_properties=["name"])

In [115]:
name_colors = name_colors = {"ponce": "#FFE081", 
               "solary":"#C990C0", 
               "ogaminglol": "#F79767", 
               "vetheo": "#57C7E3", 
               "traytonlol": "#F16667", 
               "xari": "#D9C8AE", 
               "nuclearintt": "#8DCC93", 
               "targamas1": "#ECB5C9", 
               "domingo":"#4C8EDA"}

In [116]:
name_df['color'] = name_df['name'].map(name_colors)

In [117]:
name_df

Unnamed: 0,nodeId,degree,name,color
0,16,1.0,ponce,#FFE081
1,110,2.0,solary,#C990C0
2,232,4.0,ogaminglol,#F79767
3,476,3.0,vetheo,#57C7E3
4,1441905,4.0,traytonlol,#F16667
5,1441963,1.0,xari,#D9C8AE
6,1890577,1.0,nuclearintt,#8DCC93
7,1907275,1.0,targamas1,#ECB5C9
8,2940910,3.0,domingo,#4C8EDA


In [124]:
reducer = umap.UMAP(n_components=2, random_state=42)

def visualize_embedding(**kwargs):
    embedding_df = gds.fastRP.stream(g_example, **kwargs)
    embedding_df = embedding_df.merge(name_df, on="nodeId")
    embedding_reduced = reducer.fit_transform(np.array(list(embedding_df['embedding'])))
    df_reduced = pd.DataFrame(embedding_reduced, columns=['umap1', 'umap2'])
    df_reduced = pd.concat([embedding_df, df_reduced], axis=1)
    fig = px.scatter(df_reduced, 
                     x='umap1', 
                     y='umap2', 
                     width=900, 
                     height=900, 
                     hover_name='name', 
                     text='name',
                     color=embedding_df['color'],
                     color_discrete_map="identity")
    fig.update_traces(
        marker=dict(size=20),  # Increase point size
        textposition='top center'  # Position of the text labels
    )   
    fig.update_layout(
        yaxis={"scaleanchor":"x", "scaleratio":1, "showticklabels":False},
        xaxis={"showticklabels":False},
        plot_bgcolor='white',  # Set plot background to white
        paper_bgcolor='white'  # Set paper background to white
    )
    fig.show()
    return df_reduced

In [125]:
start_df = visualize_embedding(embeddingDimension=8, iterationWeights=[0], nodeSelfInfluence=1.0, relationshipWeightProperty="similarity", randomSeed=24)


n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.


n_neighbors is larger than the dataset size; truncating to X.shape[0] - 1



In [126]:
v1_df = visualize_embedding(embeddingDimension=8, iterationWeights=[1], relationshipWeightProperty="similarity", randomSeed=24)


n_neighbors is larger than the dataset size; truncating to X.shape[0] - 1



In [127]:
v2_df = visualize_embedding(embeddingDimension=8, iterationWeights=[1, 1], relationshipWeightProperty="similarity", randomSeed=24)


n_neighbors is larger than the dataset size; truncating to X.shape[0] - 1



In [128]:
v3_df = visualize_embedding(embeddingDimension=8, iterationWeights=[0, 1, 1], relationshipWeightProperty="similarity", randomSeed=24)


n_neighbors is larger than the dataset size; truncating to X.shape[0] - 1



In [130]:
v4_df = visualize_embedding(embeddingDimension=8, iterationWeights=[0, 0, 1, 1], relationshipWeightProperty="similarity", randomSeed=24)



n_neighbors is larger than the dataset size; truncating to X.shape[0] - 1



In [146]:
v4_df = visualize_embedding(embeddingDimension=8, iterationWeights=[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1], relationshipWeightProperty="similarity", randomSeed=24)



n_neighbors is larger than the dataset size; truncating to X.shape[0] - 1



In [90]:
np.array(list(v4_df['embedding']))

array([[-0.53606987, -0.22534773, -0.00757779, -1.28696656,  0.44392735,
        -0.05370855,  0.24917902,  1.15231788],
       [-0.48126012, -0.14608808, -0.03987488, -1.31719685,  0.39047596,
         0.03192626,  0.23080319,  1.1178236 ],
       [-0.53971773, -0.23605034,  0.03004307, -1.21652615,  0.46688581,
         0.00158991,  0.32177967,  1.27361131],
       [-0.44278389, -0.23665845, -0.00586607, -1.27582538,  0.40353203,
         0.03347939,  0.18114866,  1.12337255],
       [-0.42218956, -0.22095205, -0.03358599, -1.28945327,  0.43859914,
         0.02940267,  0.19870323,  1.08655727],
       [-0.64991158, -0.03447327,  0.31455606,  0.77913183, -0.10412266,
        -0.23483376,  0.46279541,  0.63672674],
       [-0.74402481, -0.0425178 ,  0.29086044,  0.52456075,  0.04924276,
        -0.1534787 ,  0.58442068,  1.00664389],
       [-0.64991158, -0.03447327,  0.31455609,  0.77913189, -0.10412268,
        -0.23483378,  0.46279544,  0.63672668],
       [-0.36640596, -0.15449318

In [83]:
v1_df = visualize_embedding(embeddingDimension=8, iterationWeights=[0, 0, 0, 1, 1], relationshipWeightProperty="similarity", randomSeed=24)



n_neighbors is larger than the dataset size; truncating to X.shape[0] - 1

