In [None]:
!pip install langchain langchain-community langchainhub langchain-openai chromadb bs4

In [None]:
!pip install chromadb

In [3]:
import os
import getpass

In [4]:
os.environ["OPENAI_API_KEY"] = getpass.getpass()

 ········


In [17]:
from pathlib import Path
from langchain import hub
from operator import itemgetter
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders.directory import DirectoryLoader
from langchain.document_loaders.pdf import PyMuPDFLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

In [6]:
pdf_dir = Path("./data/housing/pdf")

In [20]:
# Load, chunk and index the contents of the blog.
loader = DirectoryLoader(pdf_dir, glob="*.pdf", loader_cls=PyMuPDFLoader)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())

# Retrieve and generate using the relevant snippets of the blog.
retriever = vectorstore.as_retriever()
prompt = hub.pull("rlm/rag-prompt")
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [22]:
from langchain_core.runnables import RunnableParallel

rag_chain_from_docs = (
    RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])))
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain_with_source = RunnableParallel(
    {"context": retriever, "question": RunnablePassthrough()}
).assign(answer=rag_chain_from_docs)

In [39]:
rag_chain_with_source.to_json()

{'lc': 1,
 'type': 'constructor',
 'id': ['langchain', 'schema', 'runnable', 'RunnableSequence'],
 'kwargs': {'first': {
    context: VectorStoreRetriever(tags=['Chroma', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x12ad1efa0>),
    question: RunnablePassthrough()
  },
  'middle': [],
  'last': RunnableAssign(mapper={
    answer: RunnableAssign(mapper={
              context: RunnableLambda(lambda x: format_docs(x['context']))
            })
            | ChatPromptTemplate(input_variables=['context', 'question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"))])
            | ChatOpenAI(c

In [23]:
res = rag_chain_with_source.invoke("what can I do for a broken oven in my apartment")

In [40]:
queries = [
    # "what can I do for a broken oven in my apartment?",
    "There are roaches in my apartment",
    # "there is moisture inside the wall",
    # "my carpet was dirty when I moved in"
]

for q in queries:
    res = rag_chain_with_source.invoke(q)
    print(res["question"])
    print(res["answer"])
    print("Sources:")
    print("\n".join(set([doc.metadata["file_path"].strip() for doc in res["context"]])))
    print("=" * 100)
    
    

There are roaches in my apartment
If you are the occupant of a single-family dwelling, you are responsible for keeping the premises free from vermin, rodents, and rodent harborage. If you live in a two-family or multiple dwelling and your habitation is the only one infested, you are responsible for exterminating the vermin and rodents. However, if the infestation is caused by the failure of the owner or licensee to maintain the building in a rodent-proof or reasonably insect-proof condition, they are responsible for extermination.
Sources:
data/housing/pdf/14_14-1004_vermin.pdf
data/housing/pdf/14_14-805_extermination.pdf
