In [6]:
%pip install --upgrade --quiet langchain langchain-community langchain-ollama langchain-experimental neo4j tiktoken yfiles_jupyter_graphs python-dotenv json-repair==0.30.2 langchain-openai langchain_core 



In [13]:
from langchain_core.runnables import  RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate
from pydantic import BaseModel, Field
from langchain_core.output_parsers import StrOutputParser
from langchain_community.graphs import Neo4jGraph
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.chat_models import ChatOllama
from langchain_experimental.graph_transformers import LLMGraphTransformer
from neo4j import GraphDatabase
from yfiles_jupyter_graphs import GraphWidget
from langchain_community.vectorstores import Neo4jVector
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores.neo4j_vector import remove_lucene_chars
from langchain_ollama import OllamaEmbeddings
import os
from langchain_experimental.llms.ollama_functions import OllamaFunctions
from langchain.embeddings import OpenAIEmbeddings
from neo4j_graphrag.retrievers import VectorCypherRetriever
from neo4j import  Driver

from dotenv import load_dotenv

os.environ.clear()
load_dotenv()

#print(f"uri = {os.environ["OPENAI_API_KEY"]}")

url = os.environ["NEO4J_URI"]
username = os.environ["NEO4J_USERNAME"]
password = os.environ["NEO4J_PASSWORD"]
openai_api_key = os.environ["OPENAI_API_KEY"]

driver = GraphDatabase.driver(os.environ["NEO4J_URI"], auth=(os.environ["NEO4J_USERNAME"], os.environ["NEO4J_PASSWORD"]))
graph = Neo4jGraph(
        url=url,
        username=username,
        password=password
    )



In [14]:
import asyncio
from concurrent.futures import ThreadPoolExecutor
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_openai import ChatOpenAI

# Step 1: Load Document
loader = TextLoader(file_path="input/hetangyuese_zhuziqing_utf8.txt")
docs = loader.load()

# Step 2: Split Document into Chunks (returns List[Document])
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=24)
documents = text_splitter.split_documents(docs)  # This is already a list of Document objects

# Step 3: Load LLMGraphTransformer
llm=ChatOpenAI(temperature=0, model_name="gpt-4o-mini")
llm_graph_transformer = LLMGraphTransformer(llm=llm)

# Convert the documents to graph documents
graph_documents = llm_graph_transformer.convert_to_graph_documents(documents)

# Add the documents to the graph
graph.add_graph_documents(
    graph_documents,
    baseEntityLabel=True,
    include_source=True
)



In [15]:
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

with driver.session() as session:
    for doc in documents:
        embedding_vector = embeddings.embed_query(doc.page_content)
        session.run(
            """
            MERGE (d:Document {id: $id}) 
            SET d.text = $text, d.embedding = $embedding
            """,
            {"id": doc.metadata["id"], "text": doc.page_content, "embedding": embedding_vector}
        )

# Create a VectorCypherRetriever
vector_cypher_retriever = VectorCypherRetriever(
    driver=driver,
    index_name="vector",  
    embedder=embeddings,  # Can also use "similarity" or "keyword"
    retrieval_query = ""  # Adjust based on your needs
)

  embeddings = OpenAIEmbeddings(model="text-embedding-3-small")




In [16]:
from langchain.schema.runnable import RunnableLambda

# Define a function that retrieves relevant documents using VectorCypherRetriever
def retrieve_docs(query: str):
    return vector_cypher_retriever.search(query_text=query, top_k=5)  # Adjust top_k as needed

# Wrap it in a RunnableLambda to make it compatible with the chain
retriever_runnable = RunnableLambda(retrieve_docs)

# Define the prompt
template = """Answer the question based only on the following context:
{context}

Question: {question}
Use natural language and be concise.
Answer:"""
prompt = ChatPromptTemplate.from_template(template)

# Create the chain
chain = (
{
    "context": retriever_runnable,  # Use the wrapped retriever
    "question": RunnablePassthrough(),
}
| prompt
| llm
| StrOutputParser()
)



In [18]:
# Invoke the chain
response = chain.invoke("文章作者是谁?")

print(response)  # Should now return a valid response

文章作者是朱自清。


In [19]:
# Invoke the chain
response = chain.invoke("文章是哪一年写的?")

print(response) 

文章是1927年写的。


In [20]:
# Invoke the chain
response = chain.invoke("作者怎样来到荷塘的？")

print(response) 

作者在一个满月的晚上，心中感到不宁静，决定披上大衣，悄悄走出家门，沿着一条曲折的小路前往荷塘。


In [21]:
response = chain.invoke("作者认为独处有哪些妙处？")

print(response)

作者认为独处的妙处在于可以超越平常的自我，享受自由的思考，不必理会白天必须做的事情和说的话。在独处的状态下，可以随意想象和感受，体验无边的荷香和月色，感受到一种宁静和自由。
