In [3]:
import os
import re
import time
import pypdf
import pickledb
import pinecone 
import pandas as pd
import streamlit as st
from langchain.llms import OpenAI
from langchain.llms import OpenAIChat
from langchain.chains import VectorDBQA
from langchain.vectorstores import FAISS
from langchain.vectorstores import Pinecone
from langchain import GoogleSearchAPIWrapper
from langchain.agents import initialize_agent, Tool
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains.question_answering import load_qa_chain

  from tqdm.autonotebook import tqdm


In [4]:
papers  = [l.split('.')[0] for l in os.listdir("Documents/") if l.endswith('.pdf')]
papers

['sf-building-code', 'sf-planning-code']

In [6]:
def split_pdf(name,chunk_chars=4000,overlap=50):
    """
    Pre-process PDF into chunks
    Some code from: https://github.com/whitead/paper-qa/blob/main/paperqa/readers.py
    """

    pat = "SECTION"
    pdfFileObj = open("Documents/%s.pdf"%name, "rb")
    pdfReader = pypdf.PdfReader(pdfFileObj)
    splits = []
    split = ""
    metadatas = []
    pages = []
    section = ""
    for i, page in enumerate(pdfReader.pages):
        pages.append(str(i + 1))
        split += page.extract_text()
        if len(split) > chunk_chars:
            splits.append(split[:chunk_chars])
            split = split[chunk_chars - overlap:]
            result = re.findall("%s\s[\d]{3}."%pat, split[:chunk_chars])
            if result:
                section = result[0]
            metadatas.append(name+" : %s"%section)
    pdfFileObj.close()
    return splits,metadatas

name = "sf-building-code"
docs_building_code,metadatas_building_code=split_pdf(name,chunk_chars=4000)
name = "sf-planning-code"
docs_planning_code,metadatas_planning_code=split_pdf(name,chunk_chars=4000)

`index`

* note: embeddings used to generate index must be consistent w/ those used w/ the query! 
* e.g., Pinecone generated w/ `HuggingFaceEmbeddings()`

In [5]:
# Embeddings
embeddings = OpenAIEmbeddings()
# embeddings = HuggingFaceEmbeddings()
 
# Pinecone
pinecone.init(
    api_key="xxx",  # find at app.pinecone.io
    environment="xxx"  # next to api key in console
)
index_name = "sf-building-codes"

# Write DB
# docsearch_sf_building_pinecone = Pinecone.from_texts(docs_building_code, embeddings, index_name=index_name)

# Read DB
docsearch_sf_building_pinecone = Pinecone.from_existing_index(index_name=index_name,embedding=embeddings)

In [7]:
# FAISS
docsearch_sf_building_faiss = FAISS.from_texts(docs_building_code, embeddings)
docsearch_sf_planning_faiss = FAISS.from_texts(docs_planning_code, embeddings)

2023-03-01 19:06:05.979 INFO    faiss.loader: Loading faiss.
2023-03-01 19:06:05.991 INFO    faiss.loader: Successfully loaded faiss.


`building codes`

In [18]:
query = "At what size do I need a permit for a storage shed in my backyard? In what section can I find this information in the building code?"

In [16]:
print("--Pinecode building code, stuff chain --")
llm = OpenAI(temperature=0)
chain_pinecone_building = VectorDBQA.from_chain_type(llm, chain_type="stuff", vectorstore=docsearch_sf_building_pinecone)
print(chain_pinecone_building.run(query))

--Pinecode building code, stuff chain --
 You do not need a building permit for a one-story detached accessory building or structure used as a tool and storage shed, playhouse, or similar use, provided the projected roof area does not exceed 100 square feet (9.29 m2). This information can be found in Section 106A.1.10.1 of the Building Code.


In [19]:
print("--Pinecode building code, stuff chain, chat GPT --")
llm = OpenAIChat(temperature=0)
chain_pinecone_building_cgpt = VectorDBQA.from_chain_type(llm, chain_type="stuff", vectorstore=docsearch_sf_building_pinecone)
print(chain_pinecone_building_cgpt.run(query))

--Pinecode building code, stuff chain, chat GPT --
You would need a permit for a storage shed in your backyard if the projected roof area exceeds 100 square feet (9.29 m2). This information can be found in Section 106A.1.1 of the building code.


`planning codes` - 

In [36]:
print("--FAISS planning, map_reduce chain--")
query = "What size of greenhouse or shed is permitted in a backyard?"
query = "What are the limits upon the floor area ratio of buildings?"
chain_faiss_planning = VectorDBQA.from_chain_type(OpenAI(temperature=0), chain_type="map_reduce", vectorstore=docsearch_sf_planning_faiss)
print(chain_faiss_planning.run(query))

--FAISS planning, refine chain--


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

 The limits upon the floor area ratio of buildings, as defined by this Code, shall be as stated in this Section and Sections 124 through 128.1. The maximum floor area ratio for any building or development shall be equal to the sum of the basic floor area ratio for the district, as set forth in Section 124, plus any premiums and floor area transfers which are applicable to such building or development under Sections 125, 127, 128 and 128.1 and as restricted by the provisions of Sections 123(c) and (d) and 124(b) and (j).


In [38]:
faiss_simserch = docsearch_sf_planning_faiss.similarity_search(query, k=3)
chain = load_qa_chain(OpenAI(temperature=0.0), chain_type="map_reduce")
print(chain.run(input_documents=faiss_simserch, question=query))

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

 The limits upon the floor area ratio of buildings, as defined by this Code, are stated in Sections 124 through 128.1. The maximum floor area ratio for any building or development shall be equal to the sum of the basic floor area ratio for the district, as set forth in Section 124, plus any premiums and floor area transfers which are applicable to such building or development under Sections 125, 127, 128 and 128.1 and as restricted by the provisions of Sections 123(c) and (d) and 124(b) and (j).


`agent` - 

In [12]:
search = GoogleSearchAPIWrapper()
tools = [
    Tool(
        name = "SF Building Codes QA System",
        func=chain_pinecone_building_cgpt.run,
        description="Useful for when you need to answer questions about building anything in San Francisco. Input should be a fully formed question."
    ),    
    Tool(name= "Google",
         func=search.run,
         description="Google search is useful if you need to answer a question about products to purchase."
    )
]

2023-03-01 21:06:04.333 INFO    googleapiclient.discovery_cache: file_cache is only supported with oauth2client<4.0.0


In [13]:
# llm = OpenAI(temperature=0)
llm = OpenAIChat(temperature=0)
agent = initialize_agent(tools, llm, agent="zero-shot-react-description", verbose=True)

In [14]:
query = "At what size do I need a permit for a storage shed in my backyard? What sheds can I buy that are smaller than this size?"
agent.run(query)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to find out the size limit for a storage shed without a permit and then search for sheds that are smaller than that size.
Action: SF Building Codes QA System
Action Input: "What is the size limit for a storage shed without a permit in San Francisco?"
[0m
Observation: [36;1m[1;3mThe size limit for a storage shed without a permit in San Francisco is 100 square feet (9.29 m2).[0m
Thought:[32;1m[1;3mNow that I know the size limit, I can search for sheds that are smaller than 100 square feet.
Action: Google
Action Input: "Storage sheds smaller than 100 square feet"
[0m
Observation: [33;1m[1;3mResults 1 - 24 of 279 ... Get free shipping on qualified Medium ( 36-101 sq. ft.) Sheds products or Buy Online Pick Up in Store today in the Storage ... A building permit is required for a new or replacement utility or storage shed larger than 100 square feet. Projects That Require Clearances or Other Permitting. May 19, 2022 

'The size limit for a storage shed without a permit in San Francisco is 100 square feet. There are many options for backyard storage sheds that are smaller than 100 square feet and do not require a permit, including small sheds under 36 square feet and medium sheds between 37 and 100 square feet.'