<a href="https://colab.research.google.com/github/tomasonjo/blogs/blob/master/llm/graphreader.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
!pip install --quiet neo4j langchain-community langchain-core langchain-openai

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/52.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.0/52.0 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/362.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m362.9/362.9 kB[0m [31m10.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m26.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m318.9/318.9 kB[0m [31m15.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [29]:
import os
import getpass
from langchain_core.prompts import ChatPromptTemplate

from langchain_openai import ChatOpenAI
from langchain_core.pydantic_v1 import BaseModel, Field

from typing import List

from hashlib import md5


In [None]:
os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")

In [10]:
construction_system = """
You are now an intelligent assistant tasked with meticulously extracting both key elements and
atomic facts from a long text.
1. Key Elements: The essential nouns (e.g., characters, times, events, places, numbers), verbs (e.g.,
actions), and adjectives (e.g., states, feelings) that are pivotal to the text’s narrative.
2. Atomic Facts: The smallest, indivisible facts, presented as concise sentences. These include
propositions, theories, existences, concepts, and implicit elements like logic, causality, event
sequences, interpersonal relationships, timelines, etc.
Requirements:
#####
1. Ensure that all identified key elements are reflected within the corresponding atomic facts.
2. You should extract key elements and atomic facts comprehensively, especially those that are
important and potentially query-worthy and do not leave out details.
3. Whenever applicable, replace pronouns with their specific noun counterparts (e.g., change I, He,
She to actual names).
4. Ensure that the key elements and atomic facts you extract are presented in the same language as
the original text (e.g., English or Chinese).
"""

In [11]:
construction_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            construction_system,
        ),
        (
            "human",
            (
                "Use the given format to extract information from the "
                "following input: {input}"
            ),
        ),
    ]
)

In [18]:
class AtomicFact(BaseModel):
    key_elements: List[str] = Field(description="""The essential nouns (e.g., characters, times, events, places, numbers), verbs (e.g.,
actions), and adjectives (e.g., states, feelings) that are pivotal to the atomic fact's narrative.""")
    atomic_fact: str = Field(description="""The smallest, indivisible facts, presented as concise sentences. These include
propositions, theories, existences, concepts, and implicit elements like logic, causality, event
sequences, interpersonal relationships, timelines, etc.""")

class Extraction(BaseModel):
    atomic_facts: List[AtomicFact] = Field(description="List of atomic facts")


model = ChatOpenAI(model="gpt-4o", temperature=0.3)
structured_llm = model.with_structured_output(Extraction)

In [19]:
construction_chain = construction_prompt | structured_llm

In [21]:
text = """Joan of Arc (French: Jeanne d'Arc [ʒan daʁk]; Middle French: Jehanne Darc [ʒəˈãnə ˈdark]; c. 1412 – 30 May 1431) is a patron saint of France, honored as a defender of the French nation for her role in the siege of Orléans and her insistence on the coronation of Charles VII of France during the Hundred Years' War. Claiming to be acting under divine guidance, she became a military leader who transcended gender roles and gained recognition as a savior of France.

Joan was born to a propertied peasant family at Domrémy in northeast France. In 1428, she requested to be taken to Charles VII, later testifying that she was guided by visions from the archangel Michael, Saint Margaret, and Saint Catherine to help him save France from English domination. Convinced of her devotion and purity, Charles sent Joan, who was about seventeen years old, to the siege of Orléans as part of a relief army. She arrived at the city in April 1429, wielding her banner and bringing hope to the demoralized French army. Nine days after her arrival, the English abandoned the siege. Joan encouraged the French to aggressively pursue the English during the Loire Campaign, which culminated in another decisive victory at Patay, opening the way for the French army to advance on Reims unopposed, where Charles was crowned as the King of France with Joan at his side. These victories boosted French morale, paving the way for their final triumph in the Hundred Years' War several decades later.

After Charles's coronation, Joan participated in the unsuccessful siege of Paris in September 1429 and the failed siege of La Charité in November. Her role in these defeats reduced the court's faith in her. In early 1430, Joan organized a company of volunteers to relieve Compiègne, which had been besieged by the Burgundians—French allies of the English. She was captured by Burgundian troops on 23 May. After trying unsuccessfully to escape, she was handed to the English in November. She was put on trial by Bishop Pierre Cauchon on accusations of heresy, which included blaspheming by wearing men's clothes, acting upon visions that were demonic, and refusing to submit her words and deeds to the judgment of the church. She was declared guilty and burned at the stake on 30 May 1431, aged about nineteen."""
result = construction_chain.invoke({"input":text})
print(result)

atomic_facts=[AtomicFact(key_elements=['Joan of Arc', 'French', "Jeanne d'Arc", 'Middle French', 'Jehanne Darc', '1412', '30 May 1431', 'patron saint', 'France', 'defender', 'French nation', 'siege of Orléans', 'coronation', 'Charles VII', "Hundred Years' War"], atomic_fact="Joan of Arc (French: Jeanne d'Arc; Middle French: Jehanne Darc; c. 1412 – 30 May 1431) is a patron saint of France, honored as a defender of the French nation for her role in the siege of Orléans and her insistence on the coronation of Charles VII of France during the Hundred Years' War."), AtomicFact(key_elements=['Joan of Arc', 'divine guidance', 'military leader', 'gender roles', 'savior', 'France'], atomic_fact='Claiming to be acting under divine guidance, Joan of Arc became a military leader who transcended gender roles and gained recognition as a savior of France.'), AtomicFact(key_elements=['Joan of Arc', 'propertied peasant family', 'Domrémy', 'northeast France'], atomic_fact='Joan of Arc was born to a prop

In [23]:
result.dict()

{'atomic_facts': [{'key_elements': ['Joan of Arc',
    'French',
    "Jeanne d'Arc",
    'Middle French',
    'Jehanne Darc',
    '1412',
    '30 May 1431',
    'patron saint',
    'France',
    'defender',
    'French nation',
    'siege of Orléans',
    'coronation',
    'Charles VII',
    "Hundred Years' War"],
   'atomic_fact': "Joan of Arc (French: Jeanne d'Arc; Middle French: Jehanne Darc; c. 1412 – 30 May 1431) is a patron saint of France, honored as a defender of the French nation for her role in the siege of Orléans and her insistence on the coronation of Charles VII of France during the Hundred Years' War."},
  {'key_elements': ['Joan of Arc',
    'divine guidance',
    'military leader',
    'gender roles',
    'savior',
    'France'],
   'atomic_fact': 'Claiming to be acting under divine guidance, Joan of Arc became a military leader who transcended gender roles and gained recognition as a savior of France.'},
  {'key_elements': ['Joan of Arc',
    'propertied peasant famil

In [25]:
from langchain_community.graphs import Neo4jGraph

os.environ["NEO4J_URI"] = "bolt://34.239.226.242:7687"
os.environ["NEO4J_USERNAME"] = "neo4j"
os.environ["NEO4J_PASSWORD"] = "combat-cargo-images"

graph = Neo4jGraph(refresh_schema=False)

graph.query("""CREATE CONSTRAINT IF NOT EXISTS FOR (c:Chunk) REQUIRE c.id IS UNIQUE""")
graph.query("""CREATE CONSTRAINT IF NOT EXISTS FOR (c:AtomicFact) REQUIRE c.id IS UNIQUE""")
graph.query("""CREATE CONSTRAINT IF NOT EXISTS FOR (c:KeyElement) REQUIRE c.id IS UNIQUE""")

[]

In [38]:
import_query = """
MERGE (c:Chunk {id: $chunk_id})
SET c.text = $chunk_text
WITH c
UNWIND $atomic_facts AS af
MERGE (a:AtomicFact {id: af.id})
SET a.text = af.atomic_fact
MERGE (c)-[:HAS_ATOMIC_FACT]->(a)
WITH c, a, af
UNWIND af.key_elements AS ke
MERGE (k:KeyElement {id: ke})
MERGE (a)-[:HAS_KEY_ELEMENT]->(k)
"""

def encode_md5(text):
    return md5(text.encode("utf-8")).hexdigest()

def extract_and_store(text):
    result = construction_chain.invoke({"input":text}).dict()
    for af in result["atomic_facts"]:
        af["id"] = encode_md5(af["atomic_fact"])
    text_id = encode_md5(text)
    graph.query(import_query, params={"chunk_id": text_id, "chunk_text": text, "atomic_facts": result["atomic_facts"]})

In [39]:
extract_and_store(text)