In [1]:
%pip install --upgrade --quiet  langchain langchain-neo4j langchain-openai langchain-experimental neo4j


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.1.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip3 install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
import getpass
import os
from dotenv import load_dotenv

load_dotenv("/Users/mac/Documents/PHUNGPX/knowledge_graph_searching/.env")

if "OPENAI_API_KEY" not in os.environ:
    os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter your OpenAI API key: ")

In [3]:
neo4j_uri = os.getenv("NEO4J_URI")
neo4j_username = os.getenv("NEO4J_USERNAME")
neo4j_password = os.getenv("NEO4J_PASSWORD")

In [4]:
from langchain_neo4j import Neo4jGraph

graph = Neo4jGraph(
    url=neo4j_uri,
    username=neo4j_username,
    password=neo4j_password,
)

In [5]:
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(temperature=0, model_name="gpt-4o")
llm_transformer = LLMGraphTransformer(llm=llm)

In [9]:
from langchain_core.documents import Document

text = """
Phomopsis Leaf Blight, also known as Phomopsis Leaf Spot or Phomopsis Dieback, is a fungal disease of durian trees caused by Phomopsis durionis Syd. & P. Syd. (teleomorph Diaporthe sp.), confirmed through both morphological and molecular methods. It is recognized in Vietnam as “Bệnh đốm lá Phomopsis” or “Bệnh cháy lá Phomopsis,” and in Thailand as “โรคใบจุดโฟมอปซิส (Rok bai jut Phomopsis).” The disease appears first as small, water-soaked lesions on young or mature leaves, which enlarge into yellowish-brown or greyish-brown necrotic spots with dark or purplish borders, often surrounded by yellow halos. Over time, these lesions merge, causing blighted patches, defoliation, and reduced photosynthesis. Black, flask-shaped fungal fruiting bodies (pycnidia) develop in older lesions, releasing spores that spread the disease. The fungus also infects young twigs, causing cankers and dieback of shoots, which stunts tree growth. Severe infections weaken trees, reduce vigor, and diminish fruit yield and quality. Susceptible cultivars include D24, D99, and Chanee, while Monthong and Ri6 show moderate susceptibility, and Musang King (D197) exhibits somewhat greater tolerance. Favorable conditions for disease development include high humidity above 85%, prolonged leaf wetness from rain or irrigation, canopy density, poor orchard sanitation, unbalanced fertilization (especially excessive nitrogen), and plant stress. The disease spreads primarily via rain splash, wind-driven rain, contaminated tools, infected planting material, and survival of the fungus in debris and cankers. It is most severe during the rainy season, particularly when new leaf flushes occur, as young tissues are highly susceptible. The disease is prevalent across Southeast Asia, including Thailand, Vietnam, Malaysia, the Philippines, and Indonesia. Management involves an integrated approach: chemical fungicides (e.g., mancozeb, chlorothalonil, copper compounds, propiconazole, difenoconazole, tebuconazole, azoxystrobin) applied preventively and rotated to avoid resistance; biological controls such as Trichoderma spp. and Bacillus subtilis; and cultural practices like pruning for canopy ventilation, removing infected debris, improving drainage, using drip irrigation, balancing fertilization, sterilizing tools, and ensuring disease-free planting material. Preventive orchard monitoring and sanitation remain the most effective strategies. Key references include works by Lim & Sangchote (2003), Zeng et al. (2022), Thailand’s Department of Agriculture, Udayanga et al. (2012), and Vietnam’s Plant Protection Department (2023).
"""

documents = [
    Document(page_content=text)
]
graph_documents = await llm_transformer.aconvert_to_graph_documents(documents)

print(f"Nodes ({len(graph_documents[0].nodes)}):{graph_documents[0].nodes}")
print(f"Relationships ({len(graph_documents[0].relationships)}):{graph_documents[0].relationships}")

Nodes (32):[Node(id='Phomopsis Leaf Blight', type='Disease', properties={}), Node(id='Phomopsis Leaf Spot', type='Disease', properties={}), Node(id='Phomopsis Dieback', type='Disease', properties={}), Node(id='Phomopsis Durionis Syd. & P. Syd.', type='Fungus', properties={}), Node(id='Diaporthe Sp.', type='Fungus', properties={}), Node(id='Durian Trees', type='Plant', properties={}), Node(id='Vietnam', type='Location', properties={}), Node(id='Thailand', type='Location', properties={}), Node(id='D24', type='Plant', properties={}), Node(id='D99', type='Plant', properties={}), Node(id='Chanee', type='Plant', properties={}), Node(id='Monthong', type='Plant', properties={}), Node(id='Ri6', type='Plant', properties={}), Node(id='Musang King (D197)', type='Plant', properties={}), Node(id='Southeast Asia', type='Location', properties={}), Node(id='Malaysia', type='Location', properties={}), Node(id='Philippines', type='Location', properties={}), Node(id='Indonesia', type='Location', propertie

In [10]:
for node in graph_documents[0].nodes:
    print(node)

id='Phomopsis Leaf Blight' type='Disease' properties={}
id='Phomopsis Leaf Spot' type='Disease' properties={}
id='Phomopsis Dieback' type='Disease' properties={}
id='Phomopsis Durionis Syd. & P. Syd.' type='Fungus' properties={}
id='Diaporthe Sp.' type='Fungus' properties={}
id='Durian Trees' type='Plant' properties={}
id='Vietnam' type='Location' properties={}
id='Thailand' type='Location' properties={}
id='D24' type='Plant' properties={}
id='D99' type='Plant' properties={}
id='Chanee' type='Plant' properties={}
id='Monthong' type='Plant' properties={}
id='Ri6' type='Plant' properties={}
id='Musang King (D197)' type='Plant' properties={}
id='Southeast Asia' type='Location' properties={}
id='Malaysia' type='Location' properties={}
id='Philippines' type='Location' properties={}
id='Indonesia' type='Location' properties={}
id='Mancozeb' type='Chemical' properties={}
id='Chlorothalonil' type='Chemical' properties={}
id='Copper Compounds' type='Chemical' properties={}
id='Propiconazole' ty

In [None]:
allowed_nodes = [
    "Crop",            # e.g., Durian
    "Variety",         # e.g., Monthong (D159), Musang King (D197)
    "Disease",         # e.g., Phomopsis Leaf Blight
    "Pathogen",        # e.g., Phomopsis durionis
    "Symptom",         # e.g., Leaf blight, twig dieback, pycnidia on lesions
    "Seasonality",     # e.g., Rainy season (May–Oct), >85% RH
    "Treatment",       # e.g., Propiconazole, Mancozeb
    "Prevention",      # e.g., Canopy pruning, tool sterilization
    "SpreadMethod",    # e.g., rain splash, contaminated tools
    "RiskFactor",      # e.g., overhead irrigation, dense canopy
    "Location",        # e.g., Thailand/Eastern provinces; Vietnam/Mekong Delta
    "Article"          # bibliographic source
]


allowed_relationships = [
    ("Crop",      "HAS_VARIETY",     "Variety"),
    ("Crop",      "AFFECTED_BY",     "Disease"),
    ("Variety",   "SUSCEPTIBLE_TO",  "Disease"),          # add property: level ∈ {low, moderate, high}
    ("Disease",   "CAUSED_BY",       "Pathogen"),
    ("Disease",   "HAS_SYMPTOM",     "Symptom"),
    ("Disease",   "PEAKS_DURING",    "Seasonality"),
    ("Disease",   "MANAGED_BY",      "Treatment"),
    ("Disease",   "PREVENTED_BY",    "Prevention"),
    ("Disease",   "SPREADS_VIA",     "SpreadMethod"),
    ("Disease",   "RISK_FACTOR",     "RiskFactor"),
    ("Disease",   "OCCURS_IN",       "Location"),
    ("Disease",   "DOCUMENTED_IN",   "Article")
]


llm_transformer_tuple = LLMGraphTransformer(
    llm=llm,
    allowed_nodes=allowed_nodes,
    allowed_relationships=allowed_relationships,
)
graph_documents_filtered = await llm_transformer_tuple.aconvert_to_graph_documents(
    documents
)

Nodes (32):[Node(id='Phomopsis Leaf Blight', type='Disease', properties={}), Node(id='Phomopsis Leaf Spot', type='Disease', properties={}), Node(id='Phomopsis Dieback', type='Disease', properties={}), Node(id='Phomopsis Durionis Syd. & P. Syd.', type='Fungus', properties={}), Node(id='Diaporthe Sp.', type='Fungus', properties={}), Node(id='Durian Trees', type='Plant', properties={}), Node(id='Vietnam', type='Location', properties={}), Node(id='Thailand', type='Location', properties={}), Node(id='D24', type='Plant', properties={}), Node(id='D99', type='Plant', properties={}), Node(id='Chanee', type='Plant', properties={}), Node(id='Monthong', type='Plant', properties={}), Node(id='Ri6', type='Plant', properties={}), Node(id='Musang King (D197)', type='Plant', properties={}), Node(id='Southeast Asia', type='Location', properties={}), Node(id='Malaysia', type='Location', properties={}), Node(id='Philippines', type='Location', properties={}), Node(id='Indonesia', type='Location', propertie

In [13]:
print(f"Nodes ({len(graph_documents_filtered[0].nodes)}):{graph_documents_filtered[0].nodes}")
print(f"Relationships ({len(graph_documents_filtered[0].relationships)}):{graph_documents_filtered[0].relationships}")

Nodes (51):[Node(id='Phomopsis Leaf Blight', type='Disease', properties={}), Node(id='Phomopsis Leaf Spot', type='Disease', properties={}), Node(id='Phomopsis Dieback', type='Disease', properties={}), Node(id='Durian', type='Crop', properties={}), Node(id='Phomopsis Durionis Syd. & P. Syd.', type='Pathogen', properties={}), Node(id='Diaporthe Sp.', type='Pathogen', properties={}), Node(id='Small, Water-Soaked Lesions', type='Symptom', properties={}), Node(id='Yellowish-Brown Or Greyish-Brown Necrotic Spots', type='Symptom', properties={}), Node(id='Dark Or Purplish Borders', type='Symptom', properties={}), Node(id='Yellow Halos', type='Symptom', properties={}), Node(id='Blighted Patches', type='Symptom', properties={}), Node(id='Defoliation', type='Symptom', properties={}), Node(id='Reduced Photosynthesis', type='Symptom', properties={}), Node(id='Cankers', type='Symptom', properties={}), Node(id='Dieback Of Shoots', type='Symptom', properties={}), Node(id='Stunted Tree Growth', type='

In [14]:
for node in graph_documents_filtered[0].nodes:
    print(node)

id='Phomopsis Leaf Blight' type='Disease' properties={}
id='Phomopsis Leaf Spot' type='Disease' properties={}
id='Phomopsis Dieback' type='Disease' properties={}
id='Durian' type='Crop' properties={}
id='Phomopsis Durionis Syd. & P. Syd.' type='Pathogen' properties={}
id='Diaporthe Sp.' type='Pathogen' properties={}
id='Small, Water-Soaked Lesions' type='Symptom' properties={}
id='Yellowish-Brown Or Greyish-Brown Necrotic Spots' type='Symptom' properties={}
id='Dark Or Purplish Borders' type='Symptom' properties={}
id='Yellow Halos' type='Symptom' properties={}
id='Blighted Patches' type='Symptom' properties={}
id='Defoliation' type='Symptom' properties={}
id='Reduced Photosynthesis' type='Symptom' properties={}
id='Cankers' type='Symptom' properties={}
id='Dieback Of Shoots' type='Symptom' properties={}
id='Stunted Tree Growth' type='Symptom' properties={}
id='Weakened Trees' type='Symptom' properties={}
id='Reduced Vigor' type='Symptom' properties={}
id='Diminished Fruit Yield And Qu

In [15]:
for rel in graph_documents_filtered[0].relationships:
    print(rel)

source=Node(id='Durian', type='Crop', properties={}) target=Node(id='Phomopsis Leaf Blight', type='Disease', properties={}) type='AFFECTED_BY' properties={}
source=Node(id='Phomopsis Leaf Blight', type='Disease', properties={}) target=Node(id='Phomopsis Durionis Syd. & P. Syd.', type='Pathogen', properties={}) type='CAUSED_BY' properties={}
source=Node(id='Phomopsis Leaf Blight', type='Disease', properties={}) target=Node(id='Diaporthe Sp.', type='Pathogen', properties={}) type='CAUSED_BY' properties={}
source=Node(id='Phomopsis Leaf Blight', type='Disease', properties={}) target=Node(id='Small, Water-Soaked Lesions', type='Symptom', properties={}) type='HAS_SYMPTOM' properties={}
source=Node(id='Phomopsis Leaf Blight', type='Disease', properties={}) target=Node(id='Yellowish-Brown Or Greyish-Brown Necrotic Spots', type='Symptom', properties={}) type='HAS_SYMPTOM' properties={}
source=Node(id='Phomopsis Leaf Blight', type='Disease', properties={}) target=Node(id='Dark Or Purplish Borde

In [16]:
graph.add_graph_documents(graph_documents_filtered)