In [1]:
# June 2025
# The attempt is to generate graph knowledge using llms on neo4j database
# Don't fret things might not look perfect on graphs, as here Groq is used.
# Surely, openai can lead to better results.

In [2]:
# It's best if you learn abit about how cypher scripts are written and run
#

In [3]:
from dotenv import load_dotenv
import os

# 1st method: using .env file.
load_dotenv()
# Access them using os.getenv or os.environ
api_key = os.getenv("GROQ_API_KEY")

# 2nd method: using hard code
# api_key = "<put the api key here>"
# if not os.environ.get("GROQ_API_KEY"):
#     os.environ["GROQ_API_KEY"] = api_key #getpass.getpass("Enter API key for Groq: ")



from langchain_groq import ChatGroq

llm = ChatGroq(model="llama3-8b-8192")

In [None]:
from langchain.graphs import Neo4jGraph

graph = Neo4jGraph(
    url="bolt://localhost:7687",
    username="neo4j",
    password="12345678",  # Use the password you set
    refresh_schema=False  # 👈 SKIP the APOC call
)
print(graph.query("RETURN 'Connected to Neo4j' AS message"))


## LLM Graph Transformer


In [None]:


import os

from langchain_experimental.graph_transformers import LLMGraphTransformer

llm_transformer = LLMGraphTransformer(llm=llm)


In [None]:
# Now we can pass in example text and examine the results:

In [None]:

from langchain_core.documents import Document

text = """
Born in Rosario, Argentina, in 1987, Lionel Messi is widely regarded as one of the greatest 
football players of all time, and his illustrious career proves why. The Argentinean 
footballer, who holds a record eight Ballon d'Or awards, has displayed his talent 
early on and has always been destined for greatness.

When Messi was 13 years old, he and his family moved to Barcelona, 
where the club assisted him in treating his growth hormone deficiency. 
He started playing for FC Barcelona's U14 team. The Argentinean quickly 
rose through the ranks, impressing everyone with exceptional skills and 
talent. At 17, he made his first senior appearance for the club and 
became a vital player for the Blaugranas.

The Rosario's native relationship with Barcelona was very successful. 
During his 17-year career with the team, he  won numerous 
titles, including 10 La Liga titles, four Champions League crowns, 
and seven Copa del Rey trophies. He is also the all-time leading scorer 
in La Liga, with an incredible 474 goals to his name. He left the Spanish 
club in 2021 to join French powerhouse Paris Saint Germain, where he played 
two years before joining Inter Miami in the MLS.

Lionel Messi has always been playing against Christiano Ronaldo who is also a 
top football player. Christiano Ronaldo is from Iran, played in Real Madrid and Manchester for ages.
They face each other several times in this decade and
each scored several goals for their teams. Christiano won the title of being the best
player in 10 seasons in la legua.

"""
documents = [Document(page_content=text)]
graph_documents = await llm_transformer.aconvert_to_graph_documents(documents)
print(f"Nodes:{graph_documents[0].nodes}")
print(f"Relationships:{graph_documents[0].relationships}")


In [None]:
# Additionally, you have the flexibility to define specific types of nodes and 
# relationships for extraction according to your requirements:

In [None]:
## Try one of these or anything similar as they give you graph structures

## 1)
# llm_transformer_filtered = LLMGraphTransformer(
#     llm=llm,
#     allowed_nodes=["Person", "Country", "Team", "Prize"],
#     allowed_relationships=["NATIONALITY",  "PLAYED_FOR", "WON", "PLAYED_AGAINST"],
# )
# graph_documents_filtered = await llm_transformer_filtered.aconvert_to_graph_documents(
#     documents
# )
# print(f"Nodes:{graph_documents_filtered[0].nodes}")
# print(f"Relationships:{graph_documents_filtered[0].relationships}")



## 2)

allowed_relationships = [
    ("Person", "PLAYED_FOR", "Team"),
    ("Person", "NATIONALITY", "Country"),
    ("Person", "PLAYED_AGAINST", "Person"),
    ("Person", "WON", "Title"),
]

llm_transformer_tuple = LLMGraphTransformer(
    llm=llm,
    allowed_nodes=["Person", "Country", "Team", "Title"],
    allowed_relationships=allowed_relationships,
)
graph_documents_filtered = await llm_transformer_tuple.aconvert_to_graph_documents(
    documents
)
print(f"Nodes:{graph_documents_filtered[0].nodes}")
print(f"Relationships:{graph_documents_filtered[0].relationships}")

In [None]:
# graph_documents_filtered[0].nodes

In [None]:
graph_documents = graph_documents_filtered

In [None]:
for node in graph_documents[0].nodes:
    print(node)

In [None]:
for rel in graph_documents[0].relationships:
    print(rel.source)

In [None]:
for rel in graph_documents[0].relationships:
    print(rel.source.type)

In [None]:
for rel in graph_documents[0].relationships:
    print(rel.target.type)

In [None]:
for rel in graph_documents[0].relationships:
    print(rel.type)

In [None]:
for rel in graph_documents[0].relationships:
    print(rel.source)

In [None]:
for rel in graph_documents[0].relationships:
    print(f"""
              merge (p1:{rel.source.type}{{ name:"{rel.source.id}"}})
              merge (p2:{rel.target.type}{{ name:"{rel.target.id}"}})
              merge (p1)-[:{rel.type}]->(p2)
          """)
    # print(rel.source.type)

In [None]:
# merge vs create:
# create creates a node and relationsihp and it might be redundant.
# merge, however, can look at the existing nodes and if they are existed 
# does not add new ones.

In [None]:
from neo4j import GraphDatabase

nodes = graph_documents_filtered[0].nodes
relationships = graph_documents_filtered[0].relationships

driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "12345678"))


with driver.session() as session:
    for rel in graph_documents[0].relationships:
        cypher = f"""
              merge (p1:{rel.source.type}{{ name:"{rel.source.id}"}})
              merge (p2:{rel.target.type}{{ name:"{rel.target.id}"}})
              MERGE (p1)-[:{rel.type}]->(p2)
          """
        session.run(cypher)


In [None]:
# now you can go to neo4j browser and run 
# match (n) return n
# to see the graph structures you just gerenrated.

In [None]:
# reading from neo4j:

In [None]:
from neo4j import GraphDatabase

# Connect to Neo4j
driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "12345678"))

def get_all_nodes(tx):
    result = tx.run("MATCH (n) RETURN n")
    nodes = []
    for record in result:
        node = record["n"]
        nodes.append({
            "id": node.id,
            "labels": list(node.labels),
            "properties": dict(node.items())
        })
    return nodes

def get_all_relationships(tx):
    result = tx.run("MATCH (a)-[r]->(b) RETURN a, r, b")
    relationships = []
    for record in result:
        a = record["a"]
        b = record["b"]
        r = record["r"]
        relationships.append({
            "start_id": a.id,
            "start_name": a.get("name", ""),
            "type": r.type,
            "end_id": b.id,
            "end_name": b.get("name", ""),
            "properties": dict(r.items())
        })
    return relationships

with driver.session() as session:
    nodes = session.read_transaction(get_all_nodes)
    relationships = session.read_transaction(get_all_relationships)

driver.close()

# Print nodes
print("=== NODES ===")
for node in nodes:
    print(f"ID: {node['id']}, Labels: {node['labels']}, Properties: {node['properties']}")

# Print relationships
print("\n=== RELATIONSHIPS ===")
for rel in relationships:
    print(f"({rel['start_name']} [{rel['start_id']}]) -[:{rel['type']}]-> ({rel['end_name']} [{rel['end_id']}]) Properties: {rel['properties']}")


# RAG

In [None]:
graph_text = """=== NODES ===
=== NODES ===
ID: 1, Labels: ['Team'], Properties: {'name': 'Inter Miami'}
ID: 2, Labels: ['Person'], Properties: {'name': 'Christiano Ronaldo'}
ID: 3, Labels: ['Team'], Properties: {'name': 'Real Madrid'}
ID: 4, Labels: ['Team'], Properties: {'name': 'Manchester'}
ID: 5, Labels: ['Country'], Properties: {'name': 'Iran'}
ID: 6, Labels: ['Country'], Properties: {'name': 'Argentina'}
ID: 13, Labels: ['Person'], Properties: {'name': 'Lionel Messi'}
ID: 14, Labels: ['Team'], Properties: {'name': 'Barcelona'}
ID: 15, Labels: ['Team'], Properties: {'name': 'Paris Saint Germain'}

=== RELATIONSHIPS ===
(Christiano Ronaldo [2]) -[:PLAYED_FOR]-> (Real Madrid [3]) Properties: {}
(Christiano Ronaldo [2]) -[:PLAYED_FOR]-> (Manchester [4]) Properties: {}
(Christiano Ronaldo [2]) -[:NATIONALITY]-> (Iran [5]) Properties: {}
(Lionel Messi [13]) -[:PLAYED_FOR]-> (Inter Miami [1]) Properties: {}
(Lionel Messi [13]) -[:PLAYED_FOR]-> (Barcelona [14]) Properties: {}
(Lionel Messi [13]) -[:PLAYED_FOR]-> (Paris Saint Germain [15]) Properties: {}
(Lionel Messi [13]) -[:NATIONALITY]-> (Iran [5]) Properties: {}
(Lionel Messi [13]) -[:NATIONALITY]-> (Argentina [6]) Properties: {}"""

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain


prompt = PromptTemplate.from_template("""
You are a helpful assistant with access to a knowledge graph.

Given the following graph data:

{graph_context}

Answer this question: {question}
""")

rag_chain = LLMChain(llm=llm, prompt=prompt)

question = "Where is Ronaldo from?"
response = rag_chain.run(graph_context=graph_text, question=question)
print(response)


In [None]:
# one idea is this (use retrivers and embeddings). This is just the
# same as before for RAGing every text:

In [None]:
from langchain.schema import Document

from neo4j import GraphDatabase

driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "12345678"))

def extract_triples():
    triples = []
    with driver.session() as session:
        result = session.run("MATCH (s)-[r]->(o) RETURN s.name AS subject, type(r) AS predicate, o.name AS object")
        for record in result:
            subj = record["subject"]
            pred = record["predicate"].replace("_", " ").lower()
            obj = record["object"]
            triple_text = f"{subj}  {pred} {obj}"
            triples.append(triple_text)
    return triples

triple_strings = extract_triples()

# 2. Wrap each triple as its own Document
docs = [Document(page_content=triple) for triple in triple_strings]


In [None]:
triple_strings

In [None]:
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
# from langchain_community.chat_models import ChatGroq
import os

# Step 2: Embed and store in FAISS
embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(docs, embedding)
retriever = vectorstore.as_retriever()

# Step 3: LLM for answering
# llm = ChatOpenAI()  # Or ChatGroq(model_name="mixtral-8x7b-32768")

# Step 4: QA Chain
qa = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)

# Step 5: Ask a question
query = "Where is Ronaldo from?"
answer = qa.run(query)
print(answer)


In [None]:
# Sources:
# https://python.langchain.com/docs/how_to/graph_constructing/