In [None]:
from neo4j import GraphDatabase
from graphdatascience import GraphDataScience
import pandas as pd
from getpass import getpass

# Set the display options
pd.set_option('display.max_columns', None)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', None)

# Connect to Neo4j
uri = "bolt://localhost:11005"  #change as needed
user = "neo4j"
password = getpass()

gds = GraphDataScience(uri, auth=(user,password))
gds.set_database("recipe")  #change as needed

# Create Projection

In [None]:
gds.run_cypher("""

MATCH (source:Recipe)-[:USES]->(target:Ingredient)
WHERE source.Recipe_Title CONTAINS 'cornbread'
    AND source.pageRank IS NOT NULL
    AND target.pageRank IS NOT NULL
WITH gds.graph.project(
  'ingredients',
  source,
  target,
  {
    sourceNodeProperties: source { pageRank: coalesce(source.pageRank, 0.001)},
    targetNodeProperties: target { pageRank: coalesce(target.pageRank, 0.001)}
  },
  {undirectedRelationshipTypes: ['*']}
) as g
RETURN
  g.graphName AS graph, g.nodeCount AS nodes, g.relationshipCount AS rels

""")

### GraphSAGE

In [None]:
gds.run_cypher("""

CALL gds.beta.graphSage.train(
  'ingredients',
  {
    modelName: 'graphSageModel',
    featureProperties: ['pageRank'],  
    aggregator: 'mean',  
    epochs: 10,  
    searchDepth: 5  
  }
)
YIELD modelInfo
RETURN modelInfo

""")

In [None]:
gds.run_cypher("""

CALL gds.beta.graphSage.write(
  'ingredients',
  {
    modelName: 'graphSageModel',
    writeProperty: 'embedding'
  }
)
YIELD nodePropertiesWritten
RETURN nodePropertiesWritten

""")

##### Visually represent the embeddings

In [None]:
df = gds.run_cypher("""

MATCH (source:Recipe)
WHERE source.Recipe_Title CONTAINS 'cornbread'
    AND source.pageRank IS NOT NULL
RETURN source.Recipe_Title as Recipe_Title
, source.pageRank as pageRank
, source.embedding as embedding
ORDER BY pageRank DESC

""")
df.head(n=2)

In [None]:
# pip install hdbscan


In [None]:
import umap
import numpy as np
import pandas as pd
import plotly.express as px
from sklearn.cluster import KMeans

embeddings = np.array(df['embedding'].tolist())

reducer = umap.UMAP(n_neighbors=15,  # Try different values for n_neighbors
                    min_dist=0.1,    # Try different values for min_dist
                    random_state=42)
umap_embeddings = reducer.fit_transform(embeddings)

n_clusters = 10
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
cluster_labels = kmeans.fit_predict(umap_embeddings)

umap_df = pd.DataFrame(umap_embeddings, columns=['x', 'y'])
umap_df['cluster'] = cluster_labels  # Add cluster labels to the DataFrame

# Plot using Plotly with cluster colors
fig = px.scatter(umap_df, x='x', y='y', color='cluster', title='2D UMAP visualization of Recipe Embeddings with K-Means Clusters')
fig.update_traces(marker=dict(size=5))
fig.show()


In [None]:
import umap
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans

# Your embeddings are assumed to be in a DataFrame 'df' under the column 'embedding'
embeddings = np.array(df['embedding'].tolist())

# Reduce dimensions with UMAP
reducer = umap.UMAP(n_neighbors=15, min_dist=0.1, random_state=42)
umap_embeddings = reducer.fit_transform(embeddings)

# Cluster the embeddings with K-Means
n_clusters = 10  # Update this if necessary
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
cluster_labels = kmeans.fit_predict(umap_embeddings)

# Create a DataFrame for plotting
plot_df = pd.DataFrame(umap_embeddings, columns=['UMAP Dimension 1', 'UMAP Dimension 2'])
plot_df['Cluster'] = cluster_labels

# Plot using Seaborn
plt.figure(figsize=(10, 8))
sns.scatterplot(
    x='UMAP Dimension 1', y='UMAP Dimension 2',
    hue='Cluster',
    palette=sns.color_palette("hsv", n_clusters),
    data=plot_df,
    legend="full",
    alpha=0.8
)
plt.title('2D UMAP visualization of Recipe Embeddings with K-Means Clusters')
plt.show()


# Drop Projection

In [None]:
gds.run_cypher("""

CALL gds.graph.drop('ingredients')

""")

In [None]:
gds.run_cypher("""

CALL gds.beta.model.drop('graphSageModel')
YIELD modelInfo
RETURN modelInfo

""")