In [None]:
!pip install langchain
!pip install neo4j
!pip install -U langchain-openai
!pip install pyvis
!pip install pypdf
!pip install langchain-chroma

In [None]:
import os
import openai
import sys

from langchain.document_loaders import PyPDFLoader
from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_openai import OpenAI
# from langchain.llms import OpenAI (deprecated)
# from langchain.chat_models import ChatOpenAI (deprecated)
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.chains.query_constructor.base import AttributeInfo
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor
from langchain.chains import RetrievalQA


In [None]:
!git clone https://github.com/shijiale0609/KG-RAG-LLM-Polymers.git

## Include your OpenAI API key


In [None]:
os.environ["OPENAI_API_KEY"] = 'sk-proj-xxxxx'
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
openai.api_key = os.environ["OPENAI_API_KEY"]

In [None]:
loader = PyPDFLoader("./KG-RAG-LLM-Polymers/gartner-jayaraman-2019-modeling-and-simulations-of-polymers-a-roadmap.pdf")
pages = loader.load()

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1500,
    chunk_overlap = 150
)

splits = text_splitter.split_documents(pages)


In [None]:
embeddings_model = OpenAIEmbeddings(model="text-embedding-3-large")

In [None]:
!sudo rm -rf ./docs/chroma

In [None]:
persist_directory = 'docs/chroma/'

vectorstore = Chroma.from_documents(
    documents=splits,
    embedding = embeddings_model,
    persist_directory=persist_directory
)


In [None]:
llm = OpenAI(temperature = 0)

In [None]:
# # (deprecated)
# from langchain.chat_models import ChatOpenAI
# llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

## Vector RAG

In [None]:
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectorstore.as_retriever()
)

## Test Q/A

In [None]:
question = " What are steps to learn modeling and simulation of polymers?"

result = qa_chain({"query": question})
result["result"]

In [None]:
question = "What are difficulties in the research fields of modeling and simulation of polymers?"

result = qa_chain({"query": question})
result["result"]

# Neo4j Knowledge Graph (working in progress)

In [None]:
from dotenv import load_dotenv
import os

from langchain_community.graphs import Neo4jGraph

In [None]:
os.environ["NEO4J_URI"] = "neo4j+s://9115ee59.databases.neo4j.io"
os.environ["NEO4J_USERNAME"] = "neo4j"
os.environ["NEO4J_PASSWORD"] = "zH0ctb5EBfJmZOWr9zC_DhmNCqwoyVDRnEwSz1a0iB8"
os.environ["NEO4J_DATABASE"] = "neo4j"

In [None]:
NEO4J_URL = os.getenv('NEO4J_URL')
NEO4J_USERNAME =  os.getenv('NEO4J_USERNAME')
NEO4J_PASSWORD =  os.getenv('NEO4J_PASSWORD')
NEO4J_DATABASE =  os.getenv('NEO4J_DATABASE')

In [None]:
kg = Neo4jGraph(
    url=NEO4J_URL,
    username=NEO4J_USERNAME,
    password=NEO4J_PASSWORD,
    database=NEO4J_DATABASE
)

In [None]:
from dotenv import load_dotenv
import os

import textwrap

# Langchain
from langchain_community.graphs import Neo4jGraph
from langchain_community.vectorstores import Neo4jVector
from langchain_openai import OpenAIEmbeddings
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.prompts.prompt import PromptTemplate
from langchain.chains import GraphCypherQAChain
from langchain_openai import ChatOpenAI

# Warning control
import warnings
warnings.filterwarnings("ignore")

In [None]:
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
# Global constants
VECTOR_INDEX_NAME = 'form_10k_chunks'
VECTOR_NODE_LABEL = 'Chunk'
VECTOR_SOURCE_PROPERTY = 'text'
VECTOR_EMBEDDING_PROPERTY = 'textEmbedding'

In [None]:
CYPHER_GENERATION_TEMPLATE = """Task:Generate Cypher statement to
query a graph database.
Instructions:
Use only the provided relationship types and properties in the
schema. Do not use any other relationship types or properties that
are not provided.
Schema:
{schema}
Note: Do not include any explanations or apologies in your responses.
Do not respond to any questions that might ask anything else than
for you to construct a Cypher statement.
Do not include any text except the generated Cypher statement.
Examples: Here are a few examples of generated Cypher
statements for particular questions:

# What investment firms are in San Francisco?
MATCH (mgr:Manager)-[:LOCATED_AT]->(mgrAddress:Address)
    WHERE mgrAddress.city = 'San Francisco'
RETURN mgr.managerName
The question is:
{question}"""

CYPHER_GENERATION_PROMPT = PromptTemplate(
    input_variables=["schema", "question"],
    template=CYPHER_GENERATION_TEMPLATE
)

cypherChain = GraphCypherQAChain.from_llm(
    ChatOpenAI(temperature=0),
    graph=kg,
    verbose=True,
    # cypher_prompt=CYPHER_GENERATION_PROMPT,
)

In [None]:
def prettyCypherChain(question: str) -> str:
    response = cypherChain.run(question)
    print(textwrap.fill(response, 60))

In [None]:
question = "What are steps to learn modeling and simulation in polymers?"

prettyCypherChain(question)

In [None]:
# Check the graph schema
kg.refresh_schema()
print(textwrap.fill(kg.schema, 60))