In [7]:
# Import necessary libraries
from langchain_core.runnables import (
    RunnableBranch,
    RunnableLambda,
    RunnableParallel,
    RunnablePassthrough,
)
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.prompts.prompt import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from typing import Tuple, List, Optional
from langchain_core.messages import AIMessage, HumanMessage
from langchain_core.output_parsers import StrOutputParser
import os
from langchain_community.graphs import Neo4jGraph
from langchain.document_loaders import WikipediaLoader
from langchain.text_splitter import TokenTextSplitter
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_experimental.graph_transformers import LLMGraphTransformer
from neo4j import GraphDatabase
from yfiles_jupyter_graphs import GraphWidget
from langchain_community.vectorstores import Neo4jVector
from langchain_community.vectorstores.neo4j_vector import remove_lucene_chars
from langchain_core.runnables import ConfigurableField, RunnableParallel, RunnablePassthrough
from langchain.chains import GraphCypherQAChain, RetrievalQA
from langchain_core.documents import Document
import pandas as pd

import warnings
warnings.filterwarnings("ignore")

# Load environment variables
from dotenv import load_dotenv
load_dotenv()


# Access the variables
NEO4J_URI = os.getenv("NEO4J_URI")
NEO4J_USERNAME = os.getenv("NEO4J_USERNAME")
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")

open_api_key = os.getenv("OPEN_API_KEY")

graph = Neo4jGraph()

In [23]:
# Initialize OpenAI model and embeddings
#llm = ChatOpenAI(model="gpt-3.5-turbo-0125")
llm = ChatOpenAI(model="gpt-4-turbo")
embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")

In [None]:
# Initialize Neo4j graph
os.environ["NEO4J_URI"] = "bolt://localhost:7687"  # Replace with your Neo4j URI
os.environ["NEO4J_USERNAME"] = "neo4j"  # Replace with your Neo4j username
os.environ["NEO4J_PASSWORD"] = "password"  # Replace with your Neo4j password

In [24]:
# Load documents from Wikipedia
raw_documents = WikipediaLoader(query="French Revolution", load_max_docs=1).load()

In [25]:
# Clean metadata
raw_documents = [Document(page_content=doc.page_content, metadata={}) for doc in raw_documents]

In [26]:
# Split documents into chunks
text_splitter = TokenTextSplitter(chunk_size=512, chunk_overlap=24)
documents = text_splitter.split_documents(raw_documents)

In [27]:
documents

[Document(page_content='The French Revolution (French: Révolution française [ʁevɔlysjɔ̃ fʁɑ̃sɛːz]) was a period of political and societal change in France that began with the Estates General of 1789, and ended with the coup of 18 Brumaire in November 1799 and the formation of the French Consulate. Many of its ideas are considered fundamental principles of liberal democracy, while its values and institutions remain central to modern French political discourse.\nThe causes of the revolution were a combination of social, political, and economic factors which the ancien régime ("old regime") proved unable to manage. A financial crisis and widespread social distress led to the convocation of the Estates General in May 1789, its first meeting since 1614. The representatives of the Third Estate broke away, and re-constituted themselves as a National Assembly in June. The Storming of the Bastille in Paris on 14 July was followed by a series of radical measures by the Assembly, among them the a

In [28]:
# Convert documents to graph documents
llm_transformer = LLMGraphTransformer(llm=llm)

In [29]:
from langchain_openai import ChatOpenAI
from langchain_experimental.graph_transformers import LLMGraphTransformer

# Initialize the LLM
#llm = ChatOpenAI(model="gpt-3.5-turbo-0125")
llm = ChatOpenAI(model="gpt-4-turbo")

# Initialize the LLMGraphTransformer with the LLM
llm_transformer = LLMGraphTransformer(llm=llm)

In [30]:
# Print the raw response from the LLM
for doc in documents:
    response = llm.invoke(doc.page_content)
    print(response)

content="leading to increased demand for land, food, and goods. However, economic growth did not keep pace with population growth, contributing to widespread poverty and discontent among the lower classes, particularly the peasants and urban poor. The agrarian crisis of the late 1780s, marked by poor harvests that led to famine and increased bread prices, heightened the economic distress.\n\nSocially, France was divided into three estates: the clergy (First Estate), the nobility (Second Estate), and the commoners (Third Estate). The First and Second Estates were privileged classes enjoying significant economic, social, and political benefits, while the Third Estate, comprising about 98% of the population, shouldered the majority of the tax burdens yet had minimal political power. This stark inequality contributed to resentment and a demand for social change.\n\nPolitically, the French monarchy was absolute, and the ancien régime was characterized by a lack of representative institution

In [31]:

graph_documents = llm_transformer.convert_to_graph_documents(documents)

In [32]:
# Add graph documents to Neo4j
graph.add_graph_documents(
    graph_documents,
    baseEntityLabel=True,
    include_source=True
)

In [33]:
# Visualize the graph
default_cypher = "MATCH (s)-[r:!MENTIONS]->(t) RETURN s,r,t LIMIT 50"

def showGraph(cypher: str = default_cypher):
    driver = GraphDatabase.driver(
        uri=os.environ["NEO4J_URI"],
        auth=(os.environ["NEO4J_USERNAME"], os.environ["NEO4J_PASSWORD"])
    )
    session = driver.session()
    widget = GraphWidget(graph=session.run(cypher).graph())
    widget.node_label_mapping = 'id'
    return widget

showGraph()

GraphWidget(layout=Layout(height='700px', width='100%'))

In [34]:
# Create a vector index from the existing graph
vector_index = Neo4jVector.from_existing_graph(
    embeddings,
    search_type="hybrid",
    node_label="Document",
    text_node_properties=["text"],
    embedding_node_property="embedding"
)

In [35]:
# Query the graph
query = "Who is Dudley?"
results = vector_index.similarity_search(query, k=1)
print(results[0].page_content)


text: 
Mr. and Mrs. Dursley, of number four, Privet Drive, were proud to say
that they were perfectly normal, thank you very much. They were the last
people you'd expect to be involved in anything strange or mysterious,
because they just didn't hold with such nonsense.
Mr. Dursley was the director of a firm called Grunnings, which made
drills. He was a big, beefy man with hardly any neck, although he did
have a very large mustache. Mrs. Dursley was thin and blonde and had
nearly twice the usual amount of neck, which came in very useful as she
spent so much of her time craning over garden fences, spying on the
neighbors. The Dursleys had a small son called Dudley and in their
opinion there was no finer boy anywhere.
The Dursleys had everything they wanted, but they also had a secret, and
their greatest fear was that somebody would discover it. They didn't
think they could bear it if anyone found out about the Potters. Mrs.
Potter was Mrs. Dursley's sister, but they hadn't met for sever

In [37]:
# Define questions for evaluation
questions = [
    "How did the economic policies of the Ancien Régime contribute to the financial crisis that precipitated the French Revolution?",
    "In what ways did the social and political structure of the Estates-General contribute to its transformation into the National Assembly?",
    "What role did economic depression and military defeats play in the radicalization of the French Revolution in 1792?",
    "How did the French Revolutionary Wars affect the internal political landscape of France from 1792 to 1799?",
    "Examine the socio-economic reasons behind the calling of the Estates-General in 1789.",
    "How did Enlightenment ideas influence the legislative reforms of the National Assembly?",
    "What event directly led to the transformation of the Estates-General into the National Assembly in June 1789?",
    "Which radical measure taken by the National Assembly on July 14, 1789, symbolically marked the beginning of the French Revolution?",
    "Which governing body replaced the National Convention after the fall of Robespierre in 1794?",
    "What significant political change occurred in France on 18 Brumaire in 1799?",
    "Considering the financial difficulties faced by the Ancien Régime, how did the complex and inconsistent tax system contribute to the financial instability and eventual calling of the Estates-General?",
    "What role did the socio-economic pressures such as the increase in the population and the widening gap between the rich and the poor play in setting the stage for the French Revolution?",
    "How did the financial crisis, exacerbated by poor harvests and high food prices, lead to the convening of the Estates-General in 1789?",
    "Discuss the immediate political repercussions of the Storming of the Bastille on the French Revolution."
]

In [None]:
from langchain.chains import RetrievalQA

# Initialize the RetrievalQA chain with source documents
chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=vector_index.as_retriever(),
    verbose=True,
    return_source_documents=True  # Include source documents in the output
)

In [42]:
# Evaluate the graph-based answers
graph_results = []
graph_source_documents = []
for q in questions:
    result = chain.invoke({"query": q})
    graph_results.append(result["result"])
    graph_source_documents.append(result["source_documents"])  # Access source documents



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m

In [45]:
# Create a DataFrame to store results
df = pd.DataFrame({
    'questions': questions,
    'graph_results': graph_results,
    'graph_source_documents': graph_source_documents
})

# Save the results to an Excel file
df.to_excel('graphrag_results.xlsx', index=False)