<a href="https://colab.research.google.com/github/pdushie/chainlit_llm_chatbot/blob/main/ChainLit_llama2_Chatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Install required libraries

In [1]:
!pip install chainlit langchain langchain_community pypdf sentence-transformers faiss-gpu faiss-cpu ctransformers pyngrok

Collecting chainlit
  Downloading chainlit-1.0.502-py3-none-any.whl (4.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.4/4.4 MB[0m [31m18.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain
  Downloading langchain-0.1.15-py3-none-any.whl (814 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m814.5/814.5 kB[0m [31m28.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain_community
  Downloading langchain_community-0.0.32-py3-none-any.whl (1.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m37.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pypdf
  Downloading pypdf-4.2.0-py3-none-any.whl (290 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m290.4/290.4 kB[0m [31m35.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting sentence-transformers
  Downloading sentence_transformers-2.6.1-py3-none-any.whl (163 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m163.3

## Mount Google Drive to hold LLAMA2 model

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Download llama2-7b model

In [3]:
#!wget https://huggingface.co/localmodels/Llama-2-7B-Chat-ggml/resolve/main/llama-2-7b-chat.ggmlv3.q8_0.bin?download=true

## Copy llama2 model to my Google Drive

In [4]:
#%cp /content/llama-2-7b-chat.ggmlv3.q8_0.bin /content/drive/MyDrive/

In [5]:
#%cp -r /content/data_interview/ /content/drive/MyDrive

## Copy llama2 model from Google Drive to content

In [6]:
%cp /content/drive/MyDrive/llama-2-7b-chat.ggmlv3.q8_0.bin /content/

## Copy PDF files (for training) contained in data_interview to /content and rename folder to data

In [7]:
%cp -r /content/drive/MyDrive/data_interview /content/
%mv /content/data_interview /content/data

## Copy my public folder fom my Google Drive to /content and rename it from public_llm to public

In [8]:
%cp -r /content/drive/MyDrive/public_llm /content/
%mv /content/public_llm /content/public

# Create and write model.py to file

In [9]:
%%writefile model.py
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.prompts import PromptTemplate
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.llms import CTransformers
from langchain.chains import RetrievalQA
import time
import chainlit as cl

DB_FAISS_PATH = 'vectorstore/'

custom_prompt_template = """Use the following information to answer the user's question.
If you don't know the answer, let the user know that.

Context: {context}
Question: {question}

Return a useful answer.
Answer:
"""

def set_custom_prompt():
    prompt = PromptTemplate(template=custom_prompt_template,input_variables=['context', 'question'])
    return prompt


def retrieval_qa_chain(llm, prompt, db):
    qa_chain = RetrievalQA.from_chain_type(llm=llm,
                                       chain_type='stuff',
                                       retriever=db.as_retriever(search_kwargs={'k': 2}),
                                       return_source_documents=True,
                                       chain_type_kwargs={'prompt': prompt}
                                       )
    return qa_chain


def load_llm():
    # Load the locally downloaded model here
    llm = CTransformers(
        model = "TheBloke/Llama-2-7B-Chat-GGML",
        model_type="llama",
        max_new_tokens = 200,
        temperature = 0.5
    )
    return llm


def qa_bot():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",
                                       model_kwargs={'device': 'cuda'})
    db = FAISS.load_local(DB_FAISS_PATH, embeddings, allow_dangerous_deserialization=True)
    llm = load_llm()
    qa_prompt = set_custom_prompt()
    qa = retrieval_qa_chain(llm, qa_prompt, db)
    return qa

def final_result(query):
    qa_result = qa_bot()
    response = qa_result({'query': query})
    return response

############
# Chainlit #
############
@cl.on_chat_start
async def start():
    chain = qa_bot()
    msg = cl.Message(content="Getting things ready...")
    await msg.send()
    msg.content = "Hi, Welcome to the Ace Interview Bot. I know a lot than you can imagine about interviews.  How can I be of assistance today?"
    await msg.update()

    cl.user_session.set("chain", chain)


def sync_func():
    time.sleep(5)
    return "Hello!"


@cl.on_message
async def main(message: cl.Message):
    chain = cl.user_session.get("chain")
    cb = cl.AsyncLangchainCallbackHandler(
        stream_final_answer=True, answer_prefix_tokens=["FINAL", "ANSWER"]
    )
    cb.answer_reached = True
    res = await chain.acall(message.content, callbacks=[cb])
    answer = res["result"]
    #sources = res["source_documents"]

    #if sources:
         #answer += f"\nSources:" + str(sources)
    #else:
         #answer += "\nNo sources found"

    #await cl.Message(content=answer).send()
    await cl.Message(
        content=answer,
    ).send()




Writing model.py


In [10]:
%%writefile ingest.py

from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS


DATA_PATH = "data/"
FAISS_PATH = "vectorstore/"


def load_vector_db():
    loader = DirectoryLoader(DATA_PATH,glob="*.pdf",loader_cls = PyPDFLoader)
    data = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=50)
    texts = text_splitter.split_documents(data)

    embeddings = HuggingFaceEmbeddings(model_name = 'sentence-transformers/all-MiniLM-L6-v2', model_kwargs = {'device':'cuda'})

    db = FAISS.from_documents(texts,embeddings)
    db.save_local(FAISS_PATH)



if __name__ == '__main__':
    load_vector_db()



Writing ingest.py


## Run python ingest.py

In [11]:
!python ingest.py

modules.json:   0% 0.00/349 [00:00<?, ?B/s]modules.json: 100% 349/349 [00:00<00:00, 2.04MB/s]
config_sentence_transformers.json: 100% 116/116 [00:00<00:00, 411kB/s]
README.md: 100% 10.7k/10.7k [00:00<00:00, 43.1MB/s]
sentence_bert_config.json: 100% 53.0/53.0 [00:00<00:00, 330kB/s]
config.json: 100% 612/612 [00:00<00:00, 4.01MB/s]
model.safetensors: 100% 90.9M/90.9M [00:00<00:00, 270MB/s]
tokenizer_config.json: 100% 350/350 [00:00<00:00, 2.31MB/s]
vocab.txt: 100% 232k/232k [00:00<00:00, 901kB/s]
tokenizer.json: 100% 466k/466k [00:00<00:00, 3.53MB/s]
special_tokens_map.json: 100% 112/112 [00:00<00:00, 734kB/s]
1_Pooling/config.json: 100% 190/190 [00:00<00:00, 1.24MB/s]


## Run python model.py

In [12]:
!python model.py

2024-04-11 02:20:25 - Created default config file at /content/.chainlit/config.toml
2024-04-11 02:20:25 - Created default translation directory at /content/.chainlit/translations
2024-04-11 02:20:25 - Created default translation file at /content/.chainlit/translations/en-US.json


## Run model.py

In [13]:
!chainlit run model.py &>/content/logs.txt &

## Copy customized chainlit.md file from MyDrive to content

In [14]:
!cp /content/drive/MyDrive/chainlit.md /content

## Ngrok setup to enable public access to colab project (Chainlit LLM App)

In [15]:
# Save ngrok authtoken - Go to ngrok and create an account
!ngrok config add-authtoken 2esw16uUBmWdB4r3HtL5B6coxID_3zcxjYgKcQYUqiR6TkCkJ

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


## Create Ngrok tunnel

In [16]:
from pyngrok import ngrok
ngrok_tunnel = ngrok.connect(8000)
print('Public URL:', ngrok_tunnel.public_url)

Public URL: https://37c0-34-16-160-106.ngrok-free.app


In [17]:
#ngrok.kill()