In [None]:
!pip install transformers accelerate einops Xformers langchain InstructorEmbedding sentence-transformers chromadb

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import locale
locale.getpreferredencoding = lambda: "UTF-8"

!pip install unstructured pandoc

In [None]:
# CLEAR CUDA
import gc
torch.cuda.empty_cache()
gc.collect()


In [None]:
import torch
from transformers import pipeline
from transformers import AutoTokenizer

model_name = "databricks/dolly-v2-3b"

tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left")

generate_text = pipeline(model=model_name, torch_dtype=torch.bfloat16,
                         trust_remote_code=True, device_map="auto", return_full_text=True)

In [None]:
from langchain import PromptTemplate, LLMChain
from langchain.llms import HuggingFacePipeline
import torch

hf_pipeline = HuggingFacePipeline(pipeline=generate_text)

# template for an instrution with no input
prompt = PromptTemplate(
    input_variables=["instruction"],
    template="{instruction}")

llm_chain = LLMChain(llm=hf_pipeline, prompt=prompt)

In [None]:
hf_pipeline

In [None]:
# TODO: Check if input is checked somewhere, otherwise check something like this:

# Your output sequence
output_sequence = "This is an example output sequence."

# Tokenize the output sequence
tokenized_output = tokenizer.encode(output_sequence, return_tensors="pt")

# Count the number of tokens in the tokenized sequence
num_tokens = tokenized_output.shape[1]

print(f"Number of tokens in the output sequence: {num_tokens}")

# Test LLM Chain

In [None]:
question = 'Who was Dolly the sheep?'
llm_chain.run(question)

In [None]:
question = 'what are the colors of the rainbow?'
llm_chain.run(question)

# Prepare Documents


In [None]:
!pip install nest_asyncio

In [None]:
# fixes a bug with asyncio and jupyter
import nest_asyncio

nest_asyncio.apply()

In [None]:
from langchain.document_loaders.sitemap import SitemapLoader


In [None]:
import os
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter

from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders import DirectoryLoader

In [None]:
sitemap_loader = SitemapLoader(web_path="https://www.limestone.on.ca/sitemap.xml")

docs = sitemap_loader.load()

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(docs)

In [None]:
len(texts)

In [None]:
texts[2]

# Download HuggingFace Embeddings
Check [MTEB English Leaderboard](https://huggingface.co/spaces/mteb/leaderboard) to make sure you download embeddings with good performance

In [None]:
# Choose one of the top performers from the MTEB English Leaderboard

from langchain.embeddings import HuggingFaceEmbeddings, SentenceTransformerEmbeddings

# top #2 when task = Retrieval June 2023 for under ~500 MB
model_name = "intfloat/e5-base-v2" 

hf = HuggingFaceEmbeddings(model_name=model_name)

# Make a Vector Database

In [None]:
# Embed and store the texts
# Supplying a persist_directory will store the embeddings on disk
persist_directory = 'db'

## Here is the nmew embeddings being used
embedding = hf 

vectordb = Chroma.from_documents(documents=texts, 
                                 embedding=embedding,
                                 persist_directory=persist_directory)

# and a retriever

In [None]:
retriever = vectordb.as_retriever(search_kwargs={'k':2})

# Test the basic retriever

In [None]:
docs = retriever.get_relevant_documents("How much is tuition?")

In [None]:
docs[0]

# Make a proper Question Retrieval chain

In [None]:
from langchain.memory import ConversationBufferMemory

from typing import Dict, Any

class AnswerConversationBufferMemory(ConversationBufferMemory):
    def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, str]) -> None:
        return super(AnswerConversationBufferMemory, self).save_context(inputs,{'response': outputs['result']})

In [None]:
qa_chain_with_memory = RetrievalQA.from_chain_type(llm=hf_pipeline, 
                                                   chain_type="stuff", 
                                                   retriever=retriever, 
                                                   return_source_documents=True,
                                                   memory=AnswerConversationBufferMemory())

In [None]:
# try to set the tone to "as a human-like Student Advisor"
qa_chain_with_memory.combine_documents_chain.llm_chain.prompt.template = '''
You are a prospective student advisor for a university. You are talkative and provide lots of specific details using the context given by the users question. 
If you don't know the answer, you truthfully say you don't know and don't try to make up an answer. You use the information contained in the "Relevant Information" section and You always answer with unbiased, ETHICAL and safe advise.

Relevant Information:
{history}

Conversation:

Question: {question}
Helpful Answer:'''

prompt = PromptTemplate(
    input_variables=["history", "question"], template=template)

In [None]:
query = "How much is tuition in engineering?"
llm_response = qa_chain_with_memory(query)
process_llm_response(llm_response)

In [None]:
query = "How much is tuition in med school?"
llm_response = qa_chain_with_memory(query)
process_llm_response(llm_response)

In [None]:
query = "I am interested in both medicine and engineering, do you have a program that is a mix of the two?"
llm_response = qa_chain_with_memory(query)
process_llm_response(llm_response)

# Trying out Knowledge Graph Memory

In [None]:
# workaround again
class AnswerKnowledgeGraphMemory(ConversationKGMemory):
    def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, str]) -> None:
        return super(AnswerKnowledgeGraphMemory, self).save_context(inputs,{'response': outputs['result']})

In [None]:
from langchain.memory import ConversationKGMemory

memory = AnswerKnowledgeGraphMemory(llm=hf_pipeline)

qa_chain_with_kg_memory = RetrievalQA.from_chain_type(llm=hf_pipeline,
                                                      chain_type="stuff", 
                                                      retriever=retriever, 
                                                      return_source_documents=True,                                                      
                                                      memory=memory)

In [None]:
# try to set the tone to "as a human-like Student Advisor"
template = '''
You are a prospective student advisor for a university. You are talkative and provide lots of specific details using the context given by the users question. 
If you don't know the answer, you truthfully say you don't know and don't try to make up an answer. You use the information contained in the "Relevant Information" section and You always answer with unbiased, ETHICAL and safe advise.

Relevant Information:
{history}

Conversation:

Question: {question}
Helpful Answer:'''

prompt = PromptTemplate(
    input_variables=["history", "question"], template=template)

In [None]:
qa_chain_with_kg_memory.combine_documents_chain.llm_chain.prompt = prompt
qa_chain_with_kg_memory.combine_documents_chain.llm_chain.prompt

In [None]:
qa_chain_with_kg_memory.combine_documents_chain

In [None]:
query = "I am interested in both medicine and engineering, do you have a program that is a mix of the two?"
llm_response = qa_chain_with_kg_memory(query)
process_llm_response(llm_response)

In [None]:
query = "I like biomedical engineering, do you offer it as a graduate program?"
llm_response = qa_chain_with_kg_memory(query)
process_llm_response(llm_response)

In [None]:
query = "How much is tuition for this program?"
llm_response = qa_chain_with_kg_memory(query)
process_llm_response(llm_response)

In [None]:
query = "WOw, expensive! is this the fee for international students?"
llm_response = qa_chain_with_kg_memory(query)
process_llm_response(llm_response)

# Notes for next version
- Make a National Building Code chatbot 
- Make a school board chatbot
- See if 7B model can be handled by free DataBricks
- Figure out how to handle data in databricks using dbfs ([Dolly Example](https://www.dbdemos.ai/demo-notebooks.html?demoName=llm-dolly-chatbot))