1. Install required packages

In [None]:
! pip install -q transformers==4.41.2
! pip install -q bitsandbytes==0.43.1
! pip install -q accelerate==0.31.0
! pip install -q langchain==0.2.5
! pip install -q langchainhub==0.1.20
! pip install -q langchain-chroma==0.1.1
! pip install -q langchain-community==0.2.5
! pip install -q langchain-openai==0.1.9
! pip install -q langchain_huggingface==0.0.3
! pip install -q chainlit==1.1.304
! pip install -q python-dotenv==1.0.1
! pip install -q pypdf==4.2.0
! npm install -g localtunnel
! pip install -q numpy==1.24.4

[K[?25h/tools/node/bin/lt -> /tools/node/lib/node_modules/localtunnel/bin/lt.js
[K[?25h+ localtunnel@2.0.2
updated 1 package in 0.924s
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.3/17.3 MB[0m [31m36.4 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
chainlit 1.1.304 requires numpy<2.0,>=1.26; python_version >= "3.9", but you have numpy 1.24.4 which is incompatible.
pandas-stubs 2.0.3.230814 requires numpy>=1.25.0; python_version >= "3.9", but you have numpy 1.24.4 which is incompatible.[0m[31m
[0m

2. Import

In [None]:
import chainlit as cl
import torch
from chainlit.types import AskFileResponse

from transformers import BitsAndBytesConfig
from transformers import AutoTokenizer , AutoModelForCausalLM , pipeline
from langchain_huggingface.llms import HuggingFacePipeline

from langchain.memory import ConversationBufferMemory
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain.chains import ConversationalRetrievalChain

from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain_community.document_loaders import PyPDFLoader , TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain import hub

3. Define text_splitter and embedding

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=100)

embedding = HuggingFaceEmbeddings()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

4. Define process file function

In [None]:
def process_file(file:AskFileResponse):
  if file.type =="text/plain":
    Loader = TextLoader
  elif file.type =="application/pdf":
    Loader = PyPDFLoader
  loader = Loader(file.path)
  documents = loader.load()
  docs = text_splitter.split_documents(documents)
  for i,doc in enumerate(docs):
    doc.metadata["source"] = f"{i}.{doc.metadata['source']}"
  return docs

5. Define Chroma database

In [None]:
def get_vector_db(file:AskFileResponse):
  docs = process_file(file)
  cl.user_session.set("docs",docs)
  db = Chroma.from_documents(docs,embedding)
  return db

6. Define LLM model

In [None]:
def get_huggingface_llm(model_name:str="lmsys/vicuna-7b-v1.5",max_new_token:int=512):
  nf4_config = BitsAndBytesConfig(
      load_in_4bits = True,
      bnb_4bit_quant_type="nf4",
      bnb_4bit_use_double_quant=True,
      bnb_4bit_compute_dtype=torch.bffloat16
  )
  model = AutoModelForCausalLM.from_pretrained(
      model_name,
      quantization_config=nf4_config,
      low_cpu_mem_usage=True
  )
  tokenizer = AutoTokenizer.from_pretrained(model_name)
  model_pipeline = pipeline(
      "text-generation",
      model=model,
      tokenizer=tokenizer,
      max_new_tokens=max_new_token,
      device_map="auto",
      pad_token_id=tokenizer.eos_token_id
  )
  llm = HuggingFacePipeline(pipeline=model_pipeline)
  return llm

8. define On_Chat_start function

In [None]:
welcome_message = """Welcome to the PDF QA! To get started :
1. Upload a PDF or text file
2. Ask a question about the file
"""

@cl.on_chat_start
async def on_chat_start():
  files = None
  while files is None:
    files = await cl.AskFileMessage(content=welcome_message,accept=["text/plain","application/pdf"],max_size_mb=20,timeout=180).send()
  file = files[0]
  msg = cl.Message(content=f"Processing file {file.name}...",disable_feedback=True)
  await msg.send()
  vector_db = await cl.make_async(get_vector_db)(file)
  llm = await cl.make_async(get_huggingface_llm)()
  message_history = ChatMessageHistory()
  memory = ConversationBufferMemory(memory_key="chat_history",chat_memory=message_history,output_key="answer",return_messages=True)
  retriever = vector_db.as_retriever(search_type="mmr",search_kwargs={"k":3})
  chain = ConversationalRetrievalChain.from_llm(
      llm=llm,
      chain_type="stuff",
      retriever=retriever,
      memory=memory,
      return_source_documents=True,
  )
  msg.content = f"{file.name} processed. You can now ask questions!"
  await msg.update()
  cl.user_session.set("chain",chain)

9. Define on_message function


In [None]:
@cl.on_message
async def on_message(message:cl.Message):
  chain = cl.user_session.get("chain")
  cb = cl.AsyncLangChainCallbackHandler()
  res = await chain.ainvoke(message.content,callbacks=[cb])
  answer = res["answer"]
  source_documents = res["source_documents"]
  text_elements = []

  if source_documents:
    for source_idx,source_doc in enumerate(source_documents):
      source_name = f"source_{source_idx}"
      text_elements.append(cl.Text(content=source_doc.page_content,name=source_name))
    sources_names = [text_el.name for text_el in text_elements]
    if source_names:
      answer += f"\nSources : {', '.join(sources_names)}"
    else:
      answer += "\nNo Sources found"
  msg = cl.Message(content=answer,elements=text_elements)
  await msg.send()

10. Initialize chainlit app

In [None]:
!chainlit run app.py --host 0.0.0.0 --port 8000 &>/content/logs.txt &

11. Public app

In [None]:
import urllib
print ("Password/Enpoint IP for localtunnel is:",urllib.request.urlopen('https://ipv4.icanhazip.com').read().decode('utf8 ').strip("\n") )

Password/Enpoint IP for localtunnel is: 34.135.48.88


In [None]:
!lt --port 8000 --subdomain aivn-simple-rag

your url is: https://polite-dog-94.loca.lt
