<a href="https://colab.research.google.com/github/tomasonjo/blogs/blob/master/llm/langchain_neo4j_tips.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install neo4j openai langchain

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting neo4j
  Downloading neo4j-5.8.1.tar.gz (187 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m187.7/187.7 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting openai
  Downloading openai-0.27.7-py3-none-any.whl (71 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.0/72.0 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain
  Downloading langchain-0.0.180-py3-none-any.whl (922 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m922.9/922.9 kB[0m [31m43.6 MB/s[0m eta [36m0:00:00[0m
Collecting aiohttp (from openai)
  Downloading aiohttp-3.8.4-cp310-cp310-manylinux_2_17_x86_

In [2]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import GraphCypherQAChain
from langchain.graphs import Neo4jGraph

graph = Neo4jGraph(
    url="bolt://44.212.12.199:7687", 
    username="neo4j", 
    password="buoy-warehouse-subordinates"
)

In [3]:
import os

os.environ['OPENAI_API_KEY'] = "sk-"

chain = GraphCypherQAChain.from_llm(
    ChatOpenAI(temperature=0), graph=graph, verbose=True,
)

In [4]:
chain.run("""
Which streamer who plays fortnite has the most followers?
""")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (s:Stream)-[:PLAYS]->(:Game {name: 'Fortnite'})
RETURN s.name
ORDER BY s.followers DESC
LIMIT 1[0m
Full Context:
[32;1m[1;3m[{'s.name': 'thegrefg'}][0m

[1m> Finished chain.[0m


'The streamer with the most followers who plays Fortnite is thegrefg.'

In [5]:
chain.run("""
Who is the moderator for the most streams?
""")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (u:User)-[:MODERATOR]->(s:Stream)
WITH u, COUNT(s) AS numStreams
ORDER BY numStreams DESC
LIMIT 1
RETURN u.name AS moderator, numStreams AS numModeratedStreams[0m
Full Context:
[32;1m[1;3m[{'moderator': 'nightbot', 'numModeratedStreams': 2384}][0m

[1m> Finished chain.[0m


'According to the provided information, the moderator with the highest number of moderated streams is nightbot, with a total of 2384 moderated streams.'

In [9]:
chain.run("""
Which italian streamer has the most followers?
""")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (s:Stream)-[:HAS_LANGUAGE]->(:Language {name: 'Italian'})
RETURN s.name, s.followers
ORDER BY s.followers DESC
LIMIT 1[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m


"I'm sorry, but I cannot provide an answer to your question as no information has been provided. Please provide more details or a specific name to assist you better."

In [11]:
# https://github.com/hwchase17/langchain/blob/master/langchain/chains/graph_qa/prompts.py
from langchain.prompts.prompt import PromptTemplate


CYPHER_GENERATION_TEMPLATE = """Task:Generate Cypher statement to query a graph database.
Instructions:
Use only the provided relationship types and properties in the schema.
Do not use any other relationship types or properties that are not provided.
Schema:
{schema}
Cypher examples:
# How many streamers are from Norway?
MATCH (s:Stream)-[:HAS_LANGUAGE]->(:Language {{name: 'no'}})
RETURN count(s) AS streamers

Note: Do not include any explanations or apologies in your responses.
Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.
Do not include any text except the generated Cypher statement.

The question is:
{question}"""
CYPHER_GENERATION_PROMPT = PromptTemplate(
    input_variables=["schema", "question"], template=CYPHER_GENERATION_TEMPLATE
)

In [13]:
chain_language_example = GraphCypherQAChain.from_llm(
    ChatOpenAI(temperature=0), graph=graph, verbose=True,
    cypher_prompt=CYPHER_GENERATION_PROMPT
)

In [14]:
chain_language_example.run("""
Which italian streamer has the most followers?
""")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (s:Stream)-[:HAS_LANGUAGE]->(:Language {name: 'it'})
WHERE s.followers IS NOT NULL
RETURN s.name AS streamer, s.followers AS followers
ORDER BY followers DESC
LIMIT 1[0m
Full Context:
[32;1m[1;3m[{'streamer': 'pow3rtv', 'followers': 1530428}][0m

[1m> Finished chain.[0m


'According to the provided information, the streamer with the most followers is pow3rtv, with a total of 1530428 followers.'

In [15]:
chain_language_example.run("""
Which streamers should I watch if I like pokimane?
""")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (s1:Stream)-[:PLAYS]->(:Game {name: 'League of Legends'}), (s1)-[:CHATTER]->(s2:Stream)-[:PLAYS]->(:Game {name: 'League of Legends'}), (s2)-[:MODERATOR|VIP]->(s3:Stream)-[:PLAYS]->(:Game {name: 'League of Legends'})
WHERE s1.name = 'pokimane'
RETURN DISTINCT s3.name AS recommended_streamers[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m


'Based on your interest in Pokimane, some other popular streamers you may enjoy watching include Valkyrae, LilyPichu, and Fuslie. Each of these streamers has their own unique style and content, but they all share a similar focus on gaming and entertainment. Give them a try and see which ones you enjoy the most!'

In [17]:
graph.query("""
CALL gds.graph.project('shared-audience',
  'User',
  {CHATTER: {orientation:'REVERSE'}})
""")

[{'nodeProjection': {'User': {'label': 'User', 'properties': {}}},
  'relationshipProjection': {'CHATTER': {'orientation': 'REVERSE',
    'indexInverse': False,
    'aggregation': 'DEFAULT',
    'type': 'CHATTER',
    'properties': {}}},
  'graphName': 'shared-audience',
  'nodeCount': 4678779,
  'relationshipCount': 10018281,
  'projectMillis': 8013}]

In [19]:
graph.query("""
CALL gds.nodeSimilarity.mutate('shared-audience',
 {similarityMetric: 'Jaccard',similarityCutoff:0.05, topK:10, sudo:true,
     mutateProperty:'score', mutateRelationshipType:'SHARED_AUDIENCE'})
""")

[{'preProcessingMillis': 2,
  'computeMillis': 111087,
  'mutateMillis': 523,
  'postProcessingMillis': -1,
  'nodesCompared': 4538,
  'relationshipsWritten': 23609,
  'similarityDistribution': {'p1': 0.05039477348327637,
   'max': 0.9291150569915771,
   'p5': 0.05223870277404785,
   'p90': 0.27272772789001465,
   'p50': 0.08695673942565918,
   'p95': 0.3424661159515381,
   'p10': 0.05494499206542969,
   'p75': 0.14691996574401855,
   'p99': 0.46153998374938965,
   'p25': 0.06399989128112793,
   'p100': 0.9291150569915771,
   'min': 0.04999995231628418,
   'mean': 0.1265612697302399,
   'stdDev': 0.09586148263128431},
  'configuration': {'topK': 10,
   'similarityMetric': 'JACCARD',
   'bottomK': 10,
   'bottomN': 0,
   'mutateRelationshipType': 'SHARED_AUDIENCE',
   'topN': 0,
   'concurrency': 4,
   'jobId': '8fe2f42f-f625-4b00-af13-96f517732c22',
   'degreeCutoff': 1,
   'similarityCutoff': 0.05,
   'logProgress': True,
   'nodeLabels': ['*'],
   'sudo': True,
   'relationshipTypes'

In [31]:
# https://github.com/hwchase17/langchain/blob/master/langchain/chains/graph_qa/prompts.py
from langchain.prompts.prompt import PromptTemplate


CYPHER_RECOMMENDATION_TEMPLATE = """Task:Generate Cypher statement to query a graph database.
Instructions:
Use only the provided relationship types and properties in the schema.
Do not use any other relationship types or properties that are not provided.
Schema:
{schema}
Cypher examples:
# How many streamers are from Norway?
MATCH (s:Stream)-[:HAS_LANGUAGE]->(:Language {{name: 'no'}})
RETURN count(s) AS streamers
# Which streamers do you recommend if I like kimdoe and esfandtv?
MATCH (s:Stream)
WHERE s.name in ["kimdoe", "esfandtv"]
WITH collect(s) AS sourceNodes
CALL gds.pageRank.stream("shared-audience", {{sourceNodes:sourceNodes, relationshipTypes:['SHARED_AUDIENCE']}})
YIELD nodeId, score
RETURN gds.util.asNode(nodeId).name AS name, score
ORDER BY score DESC LIMIT 3

Note: Do not include any explanations or apologies in your responses.
Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.
Do not include any text except the generated Cypher statement.

The question is:
{question}"""
CYPHER_RECOMMENDATION_PROMPT = PromptTemplate(
    input_variables=["schema", "question"], template=CYPHER_RECOMMENDATION_TEMPLATE
)

In [32]:
chain_recommendation_example = GraphCypherQAChain.from_llm(
    ChatOpenAI(temperature=0, model_name='gpt-4'), graph=graph, verbose=True,
    cypher_prompt=CYPHER_RECOMMENDATION_PROMPT, 
)

In [33]:
chain_recommendation_example.run("""
Which streamers should I watch if I like pokimane?
""")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (s:Stream)
WHERE s.name = "pokimane"
WITH collect(s) AS sourceNodes
CALL gds.pageRank.stream("shared-audience", {sourceNodes:sourceNodes, relationshipTypes:['SHARED_AUDIENCE']})
YIELD nodeId, score
RETURN gds.util.asNode(nodeId).name AS name, score
ORDER BY score DESC LIMIT 3[0m
Full Context:
[32;1m[1;3m[{'name': 'xchocobars', 'score': 0.2343657053097286}, {'name': 'pokimane', 'score': 0.17483979554760862}, {'name': 'ariasaki', 'score': 0.06485239618458193}][0m

[1m> Finished chain.[0m


"Based on the provided information, you might enjoy watching xchocobars, who has a similarity score of 0.2344, and ariasaki, with a score of 0.0649, as they have some resemblance to pokimane's content."

In [35]:
chain_recommendation_example.run("""
Which streamers do you recommend to watch if I like Chess games?
""")



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (s:Stream)-[:PLAYS]->(:Game {name: 'Chess'})
RETURN s.name AS streamer
ORDER BY s.followers DESC
LIMIT 10[0m
Full Context:
[32;1m[1;3m[{'streamer': 'gmhikaru'}, {'streamer': 'thisisnotgeorgenotfound'}, {'streamer': 'gothamchess'}, {'streamer': 'mates'}, {'streamer': 'akanemsko'}, {'streamer': 'xntentacion'}, {'streamer': 'chessbrah'}, {'streamer': 'inet_saju'}, {'streamer': 'annacramling'}, {'streamer': 'michelleputtini'}][0m

[1m> Finished chain.[0m


'I recommend the following chess streamers for you to watch: gmhikaru, thisisnotgeorgenotfound, gothamchess, mates, akanemsko, xntentacion, chessbrah, inet_saju, annacramling, and michelleputtini. Enjoy watching their chess games!'