<a href="https://colab.research.google.com/github/socd06/databricks-language-hackathon/blob/main/June8_Dolly_3B_Ontario_Building_Code_Expert_Sitemap_Question_Answering_with_Memory.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import locale
locale.getpreferredencoding = lambda: "UTF-8"

In [2]:
!pip install transformers accelerate einops Xformers langchain sentence-transformers chromadb nest_asyncio unstructured pandoc bitsandbytes

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.30.0-py3-none-any.whl (7.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.2/7.2 MB[0m [31m109.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting accelerate
  Downloading accelerate-0.20.2-py3-none-any.whl (227 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m227.5/227.5 kB[0m [31m34.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting einops
  Downloading einops-0.6.1-py3-none-any.whl (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.2/42.2 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting Xformers
  Downloading xformers-0.0.20-cp310-cp310-manylinux2014_x86_64.whl (109.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m109.1/109.1 MB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain
  Downloading langchain-0.0.194-py3-none-a

In [3]:
# CLEAR CUDA if needed
"""
import torch
import gc
torch.cuda.empty_cache()
gc.collect()
"""

NameError: ignored

In [4]:
import torch
from transformers import pipeline
from transformers import AutoTokenizer

model_name = "databricks/dolly-v2-3b"

tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left")

generate_text = pipeline(model=model_name, 
                         torch_dtype=torch.bfloat16,
                         trust_remote_code=True, 
                         device_map="auto", 
                         return_full_text=True,
                         max_new_tokens=256, 
                         top_p=0.95, 
                         top_k=50)

Downloading (…)okenizer_config.json:   0%|          | 0.00/450 [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/228 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/819 [00:00<?, ?B/s]

Downloading (…)instruct_pipeline.py:   0%|          | 0.00/9.16k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/databricks/dolly-v2-3b:
- instruct_pipeline.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


Downloading pytorch_model.bin:   0%|          | 0.00/5.68G [00:00<?, ?B/s]



In [5]:
from langchain import PromptTemplate, LLMChain
from langchain.llms import HuggingFacePipeline
import torch

hf_pipeline = HuggingFacePipeline(pipeline=generate_text)

# template for an instrution with no input
prompt = PromptTemplate(
    input_variables=["instruction"],
    template="{instruction}")

llm_chain = LLMChain(llm=hf_pipeline, prompt=prompt)

# Test LLM Chain

In [6]:
question = 'Who was Dolly the sheep?'
llm_chain.run(question)

'\nDolly the sheep is the first ever human- animal cloned. The sheep was cloned in 1996 using adult skin cells from a young ewe. In 1997, Dolly was born and went on to have a full and happy life.'

In [7]:
question = 'what are the colors of the rainbow?'
llm_chain.run(question)

'\nRed, Orange, Yellow, Green, Blue, Indigo, Violet'

# Prepare Documents


In [8]:
import os
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter

from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders import DirectoryLoader
from langchain.document_loaders import UnstructuredPDFLoader

# Load the Ontario Building Code using its Sitemap
from [this link](https://nrc-publications.canada.ca/eng/view/object/?id=515340b5-f4e0-4798-be69-692e4ec423e8) and put in the 'docs' folder

In [90]:
# fixes a bug with asyncio and jupyter
import nest_asyncio

nest_asyncio.apply()     

In [10]:
from langchain.document_loaders.sitemap import SitemapLoader

sitemap_loader = SitemapLoader(web_path="https://www.buildingcode.online/sitemap.xml")
docs = sitemap_loader.load()

Fetching pages: 100%|##########| 2281/2281 [17:48<00:00,  2.13it/s]


In [11]:
# High chunk_overlap to provide more context
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=300)
texts = text_splitter.split_documents(docs)

In [12]:
len(texts)

7465

In [13]:
texts[2]

Document(page_content="Section 1 of the Ontario Building Code\r\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nBuildingCode.Online\n\n\n\nBuilding Codes\n\nHome\nOntario Building Code\nQuebec Building Code\nBritish Columbia Building Code\n\n\nNavigation\n\nSection 1\nSection 3\nSection 4\nSection 5\nSection 6\nSection 7\nSection 8\nSection 9\nSection 10\nSection 11\nSection 12\n\n\n\n\n\n\n\n\n\n\n\n\n\nHome\nSection 1 of the Ontario Building Code\n\n\n\r\nSection 1 of the Ontario Building Code\r\n\t\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nNavigate\n\n\nSection 1\nSection 3\nSection 4\nSection 5\nSection 6\nSection 7\nSection 8\nSection 9\nSection 10\nSection 11\nSection 12\n\n\n\n\n\n\n\n\r\nSection 1 of the Ontario Building Code\r\n\t  \n\n\n1.1.1.1. - Application\n1.1.2.1. - Climatic and Seismic Design Values\n1.1.2.2. - Depth of Frost Penetration\n1.3.1.1. - Effective Date\n1.3.1.2. - Applicable Editions\n1.3.2.1. - Abbreviations of Proper Names\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nThis material 

# Download HuggingFace Embeddings
Check [MTEB English Leaderboard](https://huggingface.co/spaces/mteb/leaderboard) to make sure you download embeddings with good performance

In [14]:
# Choose one of the top performers from the MTEB English Leaderboard

from langchain.embeddings import HuggingFaceEmbeddings, SentenceTransformerEmbeddings

# top #2 when task = Retrieval June 2023 for under ~500 MB
model_name = "intfloat/e5-base-v2" 

hf = HuggingFaceEmbeddings(model_name=model_name)

Downloading (…)47d37/.gitattributes:   0%|          | 0.00/1.48k [00:00<?, ?B/s]

Downloading (…)5d8dc47d37/README.md:   0%|          | 0.00/65.3k [00:00<?, ?B/s]

Downloading (…)8dc47d37/config.json:   0%|          | 0.00/650 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

Downloading (…)47d37/tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/314 [00:00<?, ?B/s]

Downloading (…)5d8dc47d37/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]



# Make a Vector Database

In [15]:
# Embed and store the texts
# Supplying a persist_directory will store the embeddings on disk
persist_directory = 'db'

## Here is the new embeddings being used
embedding = hf 

vectordb = Chroma.from_documents(documents=texts, 
                                 embedding=embedding,
                                 persist_directory=persist_directory)

In [83]:
# Zip vector database so we can resume later without running the command again
! zip -r db.zip /content/db*

updating: content/db/ (stored 0%)
  adding: content/db/index/ (stored 0%)
  adding: content/db/index/index_metadata_957ff0c0-3ebe-4503-9727-d7050739338d.pkl (deflated 14%)
  adding: content/db/index/index_957ff0c0-3ebe-4503-9727-d7050739338d.bin (deflated 26%)
  adding: content/db/index/uuid_to_id_957ff0c0-3ebe-4503-9727-d7050739338d.pkl (deflated 40%)
  adding: content/db/index/id_to_uuid_957ff0c0-3ebe-4503-9727-d7050739338d.pkl (deflated 37%)


# and a retriever

In [16]:
retriever = vectordb.as_retriever(search_kwargs={'k':3})

# Test the basic retriever

In [17]:
docs = retriever.get_relevant_documents("What are the minimum ceiling height requirements for habitable rooms?")

In [18]:
for doc in docs:
  print(doc)

page_content='9.5.3.1. ceiling Heights of Rooms or Spaces   (1) The ceiling heights of rooms or spaces in residential occupancies and live/work units shall conform to Table 9.5.3.1.  (2) Areas in rooms or spaces over which ceiling height is not less than the minimum specified in Table 9.5.3.1. shall be contiguous with the entry or entries to those rooms or spaces. Table 9.5.3.1.   Room ceiling Heights Forming Part of Sentences 9.5.3.1.(1) and (2)    Item   Column 1 Room or Space   Column 2 Minimum Heights(1)     1.   Living room or space, dining room or space, kitchen or kitchen space   2 300 mm over at least 75% of the required floor area with a clear height of 2 100 mm at any point over the required area     2.   Bedroom or bedroom space   2 300 mm over at least 50% of the required area or 2 100 mm over all of the required floor area.  Any part of the floor having a clear height of less than 1 400 mm shall not be considered in computing the required floor area     3.   basement space

# Make a proper Question Retrieval chain

In [39]:
## Cite sources
def process_llm_response(llm_response):
    print(llm_response['result'])
    print('\n\nSources:')
    for source in llm_response["source_documents"]:
      print(source.metadata['source'])
      # This version only has links, not pages
      # print(source.metadata['source'] + ' Page ' + str(source.metadata['page']))

In [23]:
# from langchain.memory import ConversationBufferMemory
from langchain.memory import ConversationBufferWindowMemory

from typing import Dict, Any

# class AnswerConversationBufferMemory(ConversationBufferMemory):
class AnswerConversationBufferMemory(ConversationBufferWindowMemory):
    def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, str]) -> None:
        return super(AnswerConversationBufferMemory, self).save_context(inputs,{'response': outputs['result']})

In [24]:
memory = AnswerConversationBufferMemory(k=3)

qa_chain_with_memory = RetrievalQA.from_chain_type(llm=hf_pipeline, 
                                                   chain_type="stuff", 
                                                   retriever=retriever, 
                                                   return_source_documents=True,
                                                   memory=memory)

In [31]:
# try to set the tone
template = '''
You are the assistant to a tradesperson with knowledge of the Ontario Building Code. You provide specific details using the context given and the users question. 
If you don't know the answer, you truthfully say you don't know and don't try to make up an answer. 
----------------
{context}

Question: {question}
Helpful Answer:'''

In [32]:
qa_chain_with_memory.combine_documents_chain.llm_chain.prompt.template = template
qa_chain_with_memory.combine_documents_chain.llm_chain.prompt.template

"\nYou are the assistant to a tradesperson with knowledge of the Ontario Building Code. You provide specific details using the context given and the users question. \nIf you don't know the answer, you truthfully say you don't know and don't try to make up an answer. \n----------------\n{context}\n\nQuestion: {question}\nHelpful Answer:"

In [48]:
def clean_text(text):
    # Remove excessive whitespace
    cleaned_text = ' '.join(text.split())  
    # Keep max one newline character
    cleaned_text = cleaned_text.replace('\n\n', '\n')  

    return cleaned_text

In [74]:
## Cite sources
def chatbot_llm_response(llm_response):
  text = clean_text(llm_response['result']) + '\nSources:\n'
  for source in llm_response["source_documents"]:
    text += source.metadata['source'] +'\n'
  
  return text

In [40]:
query = "What are the minimum ceiling height requirements for habitable rooms?"
llm_response = qa_chain_with_memory(query)
process_llm_response(llm_response)


The Ontario Building Code | Room and Space Height

	  (1) The unobstructed height in dwelling units and sleeping rooms in Group C occupancies shall conform to Subsection 9.5.3.1.  (2) The minimum ceiling heights for all other habitable rooms shall be 2 100 mm.


Sources:
https://www.buildingcode.online/1267.html
https://www.buildingcode.online/470.html
https://www.buildingcode.online/2255.html


In [76]:
query = "provide information on fire safety regulations for commercial"
llm_response = qa_chain_with_memory(query)
processed_response = chatbot_llm_response(llm_response)
print(processed_response)

6. B6 3.1.8.5.(2) (a) Existing functional and sound doors in existing buildings that are either hollow metal or kalamein and containing wired glass at least 6 mm thick and conforming to Sentence 3.1.8.14.(2) are permitted in lieu of doors not required to exceed 45 min, 7. B7 3.1.8.7. to 3.1.8.9. Fire dampers or fire stop flaps are not required to be installed in existing Functional and sound hollow metal or kalamein doors which carry existing 1.5 h labels are acceptable in lieu of current 1.5 h labels and may contain wired glass panels not exceeding 0.0645 m², at least 6 mm thick and conforming to Sentence 3.1.8.14.(2) Reserved for Fire doors, window assemblies or glass blocks used as a closure in a required fire separation shall be installed in conformance with good engineering practice. fire-resistant construction materials including, but not limited to metal, cement, concrete, brick, concrete block, steel, timber and noncombustible construction materials including,
Sources:
https://

In [77]:
query = "What are the accessibility guidelines for barrier-free design in commercial buildings"
llm_response = qa_chain_with_memory(query)
processed_response = chatbot_llm_response(llm_response)
print(processed_response)

To be barrier-free means having a design that allows unobstructed free movement of people between levels within a building. If a walkway or pedestrian bridge connects two barrier-free storeys in different buildings, the path of travel from one storey to the other storey by means of the walkway or bridge shall be barrier-free. If any part of the path of travel is provided by a ramp, the ramp shall meet the following requirements: It shall lead from the outdoors at sidewalk level or from a sidewalk. Any barrier to passage shall be no more than 12 inches high, measured from the top of the ramp to the top of the barrier. The design of the ramp shall ensure that people using it have unobstructed visibility of the outdoors or a sidewalk at the level of the doorway or floor at which the ramp begins. If a path of travel does not include a sidewalk or ramp that leads from the outdoors, it is not barrier-free.
Sources:
https://www.buildingcode.online/505.html
https://www.buildingcode.online/514.

In [78]:
query = "What are the regulations regarding installation of smoke alarms and carbon monoxide detectors in residential buildings?"
llm_response = qa_chain_with_memory(query)
processed_response = chatbot_llm_response(llm_response)
print(processed_response)


The Ontario Building Code requires that carbon monoxide alarms are wired so that their activation will activate all carbon monoxide alarms within the suite, where located within a suite of residential occupancy. The Code also permits carbon monoxide alarms to be battery operated. Installation of smoke alarms and carbon monoxide detectors require a certificate of compliance issued by the Ontario Fireproofing Certification Office (OFCO). The certificate must accompany the installation, be visible externally, and identify the name and address of the installer. Installers of smoke and carbon monoxide alarm systems must also be members of a local fire protection organization and undergo a fire protection course. The certificate and the names and addresses of the installers must be posted in the suite of the building where the alarms are to be located.
Sources:
https://www.buildingcode.online/884.html
https://www.buildingcode.online/2165.html
https://www.buildingcode.online/884.html



In [92]:
!pip install gradio

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [95]:
!pip install asyncio

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting asyncio
  Downloading asyncio-3.4.3-py3-none-any.whl (101 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m101.8/101.8 kB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: asyncio
Successfully installed asyncio-3.4.3


In [94]:
import asyncio

loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)

import gradio as gr
import random
import time

with gr.Blocks() as demo:
    chatbot = gr.Chatbot()
    msg = gr.Textbox()
    clear = gr.Button("Clear")

    def user(user_message, history):
        return gr.update(value="", interactive=False), history + [[user_message, None]]

    def bot(history):
        llm_response = qa_chain_with_memory(msg)
        processed_response = chatbot_llm_response(llm_response)
        bot_message = processed_response

        bot_message = random.choice(["How are you?", "I love you", "I'm very hungry"])
        history[-1][1] = ""
        for character in bot_message:
            history[-1][1] += character
            time.sleep(0.05)
            yield history

    response = msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
        bot, chatbot, chatbot
    )
    response.then(lambda: gr.update(interactive=True), None, [msg], queue=False)
    clear.click(lambda: None, None, chatbot, queue=False)

demo.queue()
if __name__ == "__main__":
    demo.launch()


Exception in thread Thread-41 (run):
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/nest_asyncio.py", line 27, in run
    loop = asyncio.get_event_loop()
  File "/usr/local/lib/python3.10/dist-packages/nest_asyncio.py", line 45, in _get_event_loop
    loop = events.get_event_loop_policy().get_event_loop()
  File "/usr/lib/python3.10/asyncio/events.py", line 656, in get_event_loop
    raise RuntimeError('There is no current event loop in thread %r.'
RuntimeError: There is no current event loop in thread 'Thread-41 (run)'.

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.10/threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/local/lib/python3.10/dist-packages/uvicorn/server.py", line 61, in run
    return asyncio.run(self.serve(sockets=sockets))
 

Here we see that it sent us to another document instead of trying to come up with an answer. Exactly the behavior we want!

# Trying out Knowledge Graph Memory
# NEEDS FURTHER TESTING

In [None]:
# workaround again
class AnswerKnowledgeGraphMemory(ConversationKGMemory):
    def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, str]) -> None:
        return super(AnswerKnowledgeGraphMemory, self).save_context(inputs,{'response': outputs['result']})

In [None]:
from langchain.memory import ConversationKGMemory

memory = AnswerKnowledgeGraphMemory(llm=hf_pipeline)

qa_chain_with_kg_memory = RetrievalQA.from_chain_type(llm=hf_pipeline,
                                                      chain_type="stuff", 
                                                      retriever=retriever, 
                                                      return_source_documents=True,                                                      
                                                      memory=memory)

# Notes for next version
- Keep overlap = 300
- Make an Ontario Building Code version (see [this link](https://www.buildingcode.online)) and [sitemap link](https://www.buildingcode.online/sitemap.xml)
- DO NOT Use dolly tokenizer to make vector database ([Reference link](https://python.langchain.com/en/latest/modules/indexes/text_splitters/examples/huggingface_length_function.html)). It ends up giving mismatching tensor errors.

- Figure out how to handle data in databricks using dbfs ([Dolly Example](https://www.dbdemos.ai/demo-notebooks.html?demoName=llm-dolly-chatbot))
- Databricks can do online versions instead of dealing with dbfs!