In [1]:
%pip install --upgrade --quiet langchain langchain-neo4j langchain-openai langgraph


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.1.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip3 install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
import getpass
import os
from dotenv import load_dotenv

load_dotenv("/Users/mac/Documents/PHUNGPX/knowledge_graph_searching/.env")

if "OPENAI_API_KEY" not in os.environ:
    os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter your OpenAI API key: ")

In [3]:
neo4j_uri = os.getenv("NEO4J_URI")
neo4j_username = os.getenv("NEO4J_USERNAME")
neo4j_password = os.getenv("NEO4J_PASSWORD")

In [7]:
from langchain_neo4j import Neo4jGraph

graph = Neo4jGraph(
    url=neo4j_uri,
    username=neo4j_username,
    password=neo4j_password,
)

movies_query = """
LOAD CSV WITH HEADERS FROM 
'https://raw.githubusercontent.com/tomasonjo/blog-datasets/main/movies/movies_small.csv'
AS row
MERGE (m:Movie {id:row.movieId})
SET m.released = date(row.released),
    m.title = row.title,
    m.imdbRating = toFloat(row.imdbRating)
FOREACH (director in split(row.director, '|') | 
    MERGE (p:Person {name:trim(director)})
    MERGE (p)-[:DIRECTED]->(m))
FOREACH (actor in split(row.actors, '|') | 
    MERGE (p:Person {name:trim(actor)})
    MERGE (p)-[:ACTED_IN]->(m))
FOREACH (genre in split(row.genres, '|') | 
    MERGE (g:Genre {name:trim(genre)})
    MERGE (m)-[:IN_GENRE]->(g))
"""

graph.query(movies_query)

[]

## Graph schema

In [12]:
graph.refresh_schema()
print(graph.schema)

Node properties:
Article {id: STRING}
Disease {id: STRING}
Crop {id: STRING}
Pathogen {id: STRING}
Symptom {id: STRING}
Variety {id: STRING}
Riskfactor {id: STRING}
Spreadmethod {id: STRING}
Seasonality {id: STRING}
Location {id: STRING}
Treatment {id: STRING}
Prevention {id: STRING}
Relationship properties:

The relationships:
(:Disease)-[:HAS_SYMPTOM]->(:Symptom)
(:Disease)-[:CAUSED_BY]->(:Pathogen)
(:Disease)-[:RISK_FACTOR]->(:Riskfactor)
(:Disease)-[:SPREADS_VIA]->(:Spreadmethod)
(:Disease)-[:PEAKS_DURING]->(:Seasonality)
(:Disease)-[:OCCURS_IN]->(:Location)
(:Disease)-[:MANAGED_BY]->(:Treatment)
(:Disease)-[:PREVENTED_BY]->(:Prevention)
(:Disease)-[:DOCUMENTED_IN]->(:Article)
(:Crop)-[:AFFECTED_BY]->(:Disease)
(:Variety)-[:SUSCEPTIBLE_TO]->(:Disease)


In [14]:
enhanced_graph = Neo4jGraph(enhanced_schema=True)
print(enhanced_graph.schema)

Node properties:
- **Article**
  - `id`: STRING Available options: ['Lim & Sangchote (2003)', 'Zeng Et Al. (2022)', 'Thailand’S Department Of Agriculture', 'Udayanga Et Al. (2012)', 'Vietnam’S Plant Protection Department (2023)']
- **Disease**
  - `id`: STRING Available options: ['Phomopsis Leaf Blight', 'Phomopsis Leaf Spot', 'Phomopsis Dieback']
- **Crop**
  - `id`: STRING Available options: ['Durian']
- **Pathogen**
  - `id`: STRING Available options: ['Phomopsis Durionis Syd. & P. Syd.', 'Diaporthe Sp.']
- **Symptom**
  - `id`: STRING Example: "Small, Water-Soaked Lesions"
- **Variety**
  - `id`: STRING Available options: ['D24', 'D99', 'Chanee', 'Monthong', 'Ri6', 'Musang King (D197)']
- **Riskfactor**
  - `id`: STRING Available options: ['High Humidity Above 85%', 'Prolonged Leaf Wetness', 'Canopy Density', 'Poor Orchard Sanitation', 'Unbalanced Fertilization', 'Plant Stress']
- **Spreadmethod**
  - `id`: STRING Available options: ['Rain Splash', 'Wind-Driven Rain', 'Contaminated

In [15]:
from langchain_neo4j import GraphCypherQAChain
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o", temperature=0)
chain = GraphCypherQAChain.from_llm(
    graph=enhanced_graph, llm=llm, verbose=True, allow_dangerous_requests=True
)
response = chain.invoke({"query": "The symptoms of Phomopsis Leaf Blight?"})
response



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (d:Disease {id: 'Phomopsis Leaf Blight'})-[:HAS_SYMPTOM]->(s:Symptom)
RETURN s.id
[0m
Full Context:
[32;1m[1;3m[{'s.id': 'Diminished Fruit Yield And Quality'}, {'s.id': 'Yellowish-Brown Or Greyish-Brown Necrotic Spots'}, {'s.id': 'Dark Or Purplish Borders'}, {'s.id': 'Reduced Photosynthesis'}, {'s.id': 'Cankers'}, {'s.id': 'Weakened Trees'}, {'s.id': 'Dieback Of Shoots'}, {'s.id': 'Yellow Halos'}, {'s.id': 'Blighted Patches'}, {'s.id': 'Small, Water-Soaked Lesions'}][0m

[1m> Finished chain.[0m


{'query': 'The symptoms of Phomopsis Leaf Blight?',
 'result': 'Diminished fruit yield and quality, yellowish-brown or greyish-brown necrotic spots, dark or purplish borders, reduced photosynthesis, cankers, weakened trees, dieback of shoots, yellow halos, blighted patches, and small, water-soaked lesions are symptoms of Phomopsis Leaf Blight.'}

In [16]:
response = chain.invoke({"query": "Phomopsis Leaf Blight caused by?"})
response



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (d:Disease {id: 'Phomopsis Leaf Blight'})-[:CAUSED_BY]->(p:Pathogen)
RETURN p.id
[0m
Full Context:
[32;1m[1;3m[{'p.id': 'Phomopsis Durionis Syd. & P. Syd.'}, {'p.id': 'Diaporthe Sp.'}][0m

[1m> Finished chain.[0m


{'query': 'Phomopsis Leaf Blight caused by?',
 'result': 'Phomopsis Leaf Blight is caused by Phomopsis Durionis Syd. & P. Syd. and Diaporthe Sp.'}