In [None]:
pip install -U langchain-community

In [57]:
import os
import getpass

os.environ["OPENAI_API_KEY"] = getpass.getpass()

 ········


In [60]:
from pathlib import Path
from operator import itemgetter
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders.directory import DirectoryLoader
from langchain.document_loaders.pdf import PyMuPDFLoader
from langchain_community.vectorstores import Chroma
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

pdf_dir = Path("./data/housing/pdf")
# Load, chunk and index the contents of the blog.
loader = DirectoryLoader(pdf_dir, glob="*.pdf", loader_cls=PyMuPDFLoader)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

In [53]:
!pip install faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.7.4-cp39-cp39-macosx_11_0_arm64.whl (2.7 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.7/2.7 MB[0m [31m532.0 kB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m[36m0:00:01[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.7.4

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [61]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

faiss_vectorstore = FAISS.from_documents(
    documents=splits, embedding=OpenAIEmbeddings()
)

In [62]:
faiss_vectorstore.save_local("./embeddings/title_14_housing_index/2024-02-09")

In [65]:
faiss_vectorstore = FAISS.load_local("./embeddings/title_14_housing_index/2024-02-09", OpenAIEmbeddings())

In [66]:
# Retrieve and generate using the relevant snippets of the blog.
retriever = faiss_vectorstore.as_retriever()
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You're a helpful Legal AI assistant for answering user questions. Given a user question and some a set of relevant documents from the Washington DC Title 14 Housing regulations provide an answer. If none of the documents answer the question, just say I don't know.\n\nHere are the documents relevant to the user question: {context}",
        ),
        ("human", "{question}"),
    ]
)
prompt.pretty_print()


You're a helpful Legal AI assistant for answering user questions. Given a user question and some a set of relevant documents from the Washington DC Title 14 Housing regulations provide an answer. If none of the documents answer the question, just say I don't know.

Here are the documents relevant to the user question: [33;1m[1;3m{context}[0m


[33;1m[1;3m{question}[0m


In [67]:
from operator import itemgetter
from typing import List

from langchain_core.documents import Document
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import (
    RunnableLambda,
    RunnableParallel,
    RunnablePassthrough,
)

def format_docs(docs: List[Document]) -> str:
    """Convert Documents to a single string.:"""
    formatted = [
        f"Source ID: {i}\nDocument Title: {doc.metadata['title']}\ Document Content: {doc.page_content}"
        for i, doc in enumerate(docs)
    ]
    return "\n\n" + "\n\n".join(formatted)




In [68]:
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain.output_parsers.openai_tools import JsonOutputKeyToolsParser


class cited_answer(BaseModel):
    """Answer the user question based only on the given sources, and cite the sources used."""

    answer: str = Field(
        ...,
        description="The answer to the user question, which is based only on the given sources.",
    )
    citations: List[int] = Field(
        ...,
        description="The integer IDs of the SPECIFIC sources which justify the answer.",
    )


class Citation(BaseModel):
    source_id: int = Field(
        ...,
        description="The integer ID of a SPECIFIC source which justifies the answer.",
    )
    quote: str = Field(
        ...,
        description="The VERBATIM quote from the specified source that justifies the answer.",
    )


class quoted_answer(BaseModel):
    """Answer the user question based only on the given sources, and cite the sources used."""

    answer: str = Field(
        ...,
        description="The answer to the user question, which is based only on the given sources.",
    )
    citations: List[Citation] = Field(
        ..., description="Citations from the given sources that justify the answer."
    )


output_parser = JsonOutputKeyToolsParser(key_name="quoted_answer")
llm_with_tool = llm.bind_tools(
    [quoted_answer],
    tool_choice="quoted_answer",
)


In [69]:
format = itemgetter("docs") | RunnableLambda(format_docs)
# subchain for generating an answer once we've done retrieval
answer = prompt | llm_with_tool | output_parser
# complete chain that calls wiki -> formats docs to string -> runs answer subchain -> returns just the answer and retrieved docs.
chain = (
    RunnableParallel(question=RunnablePassthrough(), docs=retriever)
    .assign(context=format)
    .assign(quoted_answer=answer)
    .pick(["quoted_answer", "docs"])
)

In [70]:
chain.invoke("what can I do for a broken oven in my apartment?")

{'quoted_answer': [{'answer': 'If the oven in your apartment is broken, you should contact your landlord or property management company to report the issue. They are responsible for the maintenance and repair of appliances provided with the rental unit. You can refer to the Washington DC Title 14 Housing regulations for more information on the responsibilities of tenants and landlords regarding appliances.',
   'citations': [{'source_id': 1,
     'quote': '906.3\nThe operator shall be responsible for the fire-safe installation and maintenance of all heating and cooking appliances furnished by the operator of the housing business.'}]}],
 'docs': [Document(page_content='Properly using and operating all electrical, gas, plumbing, and heating fixtures and \nappliances.\n802.3\nA tenant shall not do or permit any person on the premises with the tenant’s permission to do \nany of the following:\n(a)\nWillfully or wantonly destroy, deface, damage, impair, or remove any part of the \nstructure

In [49]:
chain.invoke("what can I do for a broken oven in my apartment?")

{'quoted_answer': [{'answer': 'If the oven in your apartment is broken, you should contact your landlord or property management company to report the issue. They are responsible for the maintenance and repair of appliances provided with the rental unit. You can refer to the Washington DC Title 14 Housing regulations for more information on the responsibilities of tenants and landlords regarding appliances.',
   'citations': [{'source_id': 1,
     'quote': '906.3\nThe operator shall be responsible for the fire-safe installation and maintenance of all heating and cooking appliances furnished by the operator of the housing business.'}]}],
 'docs': [Document(page_content='Properly using and operating all electrical, gas, plumbing, and heating fixtures and \nappliances.\n802.3\nA tenant shall not do or permit any person on the premises with the tenant’s permission to do \nany of the following:\n(a)\nWillfully or wantonly destroy, deface, damage, impair, or remove any part of the \nstructure