<a href="https://colab.research.google.com/github/thaithinhhl/LLM-PDF-QA/blob/main/LLM_PDF_QA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q transformers==4.41.2
!pip install -q bitsandbytes==0.43.1
!pip install -q accelerate==0.31.0
!pip install -q langchain==0.2.5
!pip install -q langchainhub==0.1.20
!pip install -q langchain-chroma==0.1.1
!pip install -q langchain-community==0.2.5
!pip install -q langchain_huggingface==0.0.3
!pip install -q python-dotenv==1.0.1
!pip install -q pypdf==4.2.0
!pip install -q numpy==1.24.4

## VECTOR DATABASE##

In [None]:
  import torch

  from transformers import BitsAndBytesConfig
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
  from langchain_huggingface import HuggingFaceEmbeddings
  from langchain_huggingface . llms import HuggingFacePipeline

  from langchain . memory import ConversationBufferMemory
  from langchain_community . chat_message_histories import ChatMessageHistory
  from langchain_community . document_loaders import PyPDFLoader, TextLoader
  from langchain . chains import ConversationalRetrievalChain

  from langchain_chroma import Chroma
  from langchain_text_splitters import RecursiveCharacterTextSplitter
  from langchain_core . runnables import RunnablePassthrough
  from langchain_core . output_parsers import StrOutputParser
  from langchain import hub

In [None]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# LOAD PDF
Loader = PyPDFLoader
FILE_PATH = '/content/drive/MyDrive/YOLOv10_Tutorials.pdf'
loader = Loader(FILE_PATH)
documents = loader.load()

In [None]:
# TEXT SPLITTER

text_splitter = RecursiveCharacterTextSplitter(chunk_size =1000, chunk_overlap =100)

In [None]:
docs = text_splitter.split_documents(documents)

print(f'Number of sub documents: {len(docs)}')
print(docs[0])

Number of sub documents: 33
page_content='AI VIET NAM – AI COURSE 2024\nTutorial: Phát hiện đối tượng trong ảnh với\nYOLOv10\nDinh-Thang Duong, Nguyen-Thuan Duong, Minh-Duc Bui và\nQuang-Vinh Dinh\nNgày 20 tháng 6 năm 2024\nI. Giới thiệu\nObject Detection (Tạm dịch: Phát hiện đối tượng) là một bài toán cổ điển thuộc lĩnh vực\nComputer Vision. Mục tiêu của bài toán này là tự động xác định vị trí của các đối tượng trong\nmột tấm ảnh. Tính tới thời điểm hiện tại, đã có rất nhiều phương pháp được phát triển nhằm\ngiải quyết hiệu quả bài toán này. Trong đó, các phương pháp thuộc họ YOLO (You Only Look\nOnce) thu hút được sự chú ý rất lớn từ cộng đồng nghiên cứu bởi độ chính xác và tốc độ thực\nthi mà loại mô hình này mang lại.\nHình 1: Logo của mô hình YOLO. Ảnh: link.\nThời gian vừa qua, Ao Wang và các cộng sự tại Đại học Thanh Hoa (Tsinghua University)\nđã đề xuất mô hình YOLOv10 trong bài báo YOLOv10: Real-Time End-to-End Object\nDetection [10]. Với những cải tiến mới, mô hình đã đạt đượ

In [None]:
# CREATE INSTANCE VECTORIZATION : string -> vector
embedding = HuggingFaceEmbeddings() # convert docs to vecto





## CREATE VECTOR DATABASE ##

In [None]:
vector_db = Chroma.from_documents (documents =docs, embedding = embedding)

retriever = vector_db.as_retriever()

In [None]:
result = retriever.invoke ("What is YOLO?")

print ("Number of relevant documents:", len(result))

Number of relevant documents: 4


## CREATE MODEL LLM using vicuna ##

In [None]:
nf4_config = BitsAndBytesConfig(
load_in_4bit =True,
bnb_4bit_quant_type ="nf4",
bnb_4bit_use_double_quant =True ,
bnb_4bit_compute_dtype = torch.bfloat16)

In [None]:
MODEL_NAME = "lmsys/vicuna-7b-v1.5"

model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, quantization_config = nf4_config, low_cpu_mem_usage = True )
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

pytorch_model.bin.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

pytorch_model-00001-of-00002.bin:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

pytorch_model-00002-of-00002.bin:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
model_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512, pad_token_id=tokenizer.eos_token_id, device_map="auto")

llm = HuggingFacePipeline(pipeline=model_pipeline)

In [None]:
from langchain.chains import SequentialChain, TransformChain

prompt = hub.pull("rlm/rag-prompt")

# Define steps for the chain
def retriever_step(inputs):
    question = inputs['question']
    docs = retriever.get_relevant_documents(question)
    context = format_docs(docs)
    return {"context": context, "retrieved_question": question}

retriever_chain = TransformChain(
    transform=retriever_step,
    input_variables=["question"],
    output_variables=["context", "retrieved_question"]
)

prompt_chain = TransformChain(
    transform=lambda x: {"prompt_input": f"Context: {x['context']}\n\nQuestion: {x['retrieved_question']}"},
    input_variables=["context", "retrieved_question"],
    output_variables=["prompt_input"]
)

llm_chain = TransformChain(
    transform=lambda x: {"llm_output": llm(x["prompt_input"])},
    input_variables=["prompt_input"],
    output_variables=["llm_output"]
)

output_parser_chain = TransformChain(
    transform=lambda x: {"parsed_output": StrOutputParser().parse(x["llm_output"])},
    input_variables=["llm_output"],
    output_variables=["parsed_output"]
)

# Create RAG chain
rag_chain = SequentialChain(
    chains=[retriever_chain, prompt_chain, llm_chain, output_parser_chain],
    input_variables=["question"],
    output_variables=["parsed_output"]
)

USER_QUESTION = "YOLOv10 là gì?"
output = rag_chain.invoke({"question": USER_QUESTION})
answer = output["parsed_output"].split('Answer :')[1].strip()
print(answer)

In [None]:
# Tải các gói thư viện
!pip install -q transformers==4.41.2
!pip install -q bitsandbytes==0.43.1
!pip install -q accelerate==0.31.0
!pip install -q langchain==0.2.5
!pip install -q langchainhub==0.1.20
!pip install -q langchain-chroma==0.1.1
!pip install -q langchain-community==0.2.5
!pip install -q langchain-openai==0.1.9
!pip install -q langchain_huggingface==0.0.3
!pip install -q chainlit==1.1.304
!pip install -q python-dotenv==1.0.1
!pip install -q pypdf==4.2.0
!npm install -q localtunnel
!pip install -q numpy==1.24.4


In [None]:
import chainlit as cl
import torch

from chainlit.types import AskFileResponse

from transformers import BitsAndBytesConfig
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain_huggingface.llms import HuggingFacePipeline

from langchain.memory import ConversationBufferMemory
from langchain_community.chat_message_histories import ChatMessageHistory

from langchain.chains import ConversationalRetrievalChain
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain_community.document_loaders import PyPDFLoader, TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain import hub


In [None]:
# Cài đặt lại các hàm và instance ở file trước
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
embedding = HuggingFaceEmbeddings()

# Xây dựng hàm xử lý file input đầu vào
def process_file(file: AskFileResponse):
    if file.type == "text/plain":
        Loader = TextLoader
    elif file.type == "application/pdf":
        Loader = PyPDFLoader

    loader = Loader(file.path)
    documents = loader.load()
    docs = text_splitter.split_documents(documents)
    for i, doc in enumerate(docs):
        doc.metadata["source"] = f"source_{i}"
    return docs

# Xây dựng hàm khởi tạo Chroma database
def get_vector_db(file: AskFileResponse):
    docs = process_file(file)
    cl.user_session.set("docs", docs)
    vector_db = Chroma.from_documents(documents=docs, embedding=embedding)
    return vector_db


In [None]:
def get_huggingface_llm(model_name: str = "lmsys/vicuna-7b-v1.5", max_new_token: int = 512):
    nf4_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_use_double_quant=True,
        bnb_4bit_compute_dtype=torch.bfloat16
    )
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=nf4_config,
        low_cpu_mem_usage=True
    )
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    model_pipeline = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        max_new_tokens=max_new_token,
        pad_token_id=tokenizer.eos_token_id,
        device_map="auto"
    )

    llm = HuggingFacePipeline(
        pipeline=model_pipeline,
    )
    return llm

llm = get_huggingface_llm()


In [None]:
# Khởi tạo welcome message
welcome_message = """Welcome to the PDF QA! To get started:
1. Upload a PDF or text file
2. Ask a question about the file
"""


In [None]:
# Khởi tạo hàm on_chat_start
@cl.on_chat_start
async def on_chat_start():
    files = None
    while files is None:
        files = await cl.AskFileMessage(
            content=welcome_message,
            accept=["text/plain", "application/pdf"],
            max_size_mb=20,
            timeout=180,
        ).send()
    file = files[0]

    msg = cl.Message(content=f"Processing `{file.name}`...", disable_feedback=True)
    await msg.send()

    vector_db = await cl.make_async(get_vector_db)(file)

    message_history = ChatMessageHistory()
    memory = ConversationBufferMemory(
        memory_key="chat_history",
        output_key="answer",
        chat_memory=message_history,
        return_messages=True,
    )

    retriever = vector_db.as_retriever(search_type="mmr", search_kwargs={'k': 3})

    chain = ConversationalRetrievalChain.from_llm(
        llm=LLM,
        chain_type="stuff",
        retriever=retriever,
        memory=memory,
        return_source_documents=True
    )

    msg.content = f"`{file.name}` processed. You can now ask questions!"
    await msg.update()

    cl.user_session.set("chain", chain)


In [None]:
# Khởi tạo hàm on_message
@cl.on_message
async def on_message(message: cl.Message):
    chain = cl.user_session.get("chain")
    cb = cl.AsyncLangchainCallbackHandler()
    res = await chain.ainvoke(message.content, callbacks=[cb])
    answer = res["answer"]
    source_documents = res["source_documents"]
    text_elements = []

    if source_documents:
        for source_idx, source_doc in enumerate(source_documents):
            source_name = f"source_{source_idx}"
            text_elements.append(
                cl.Text(content=source_doc.page_content, name=source_name)
            )
        source_names = [text_el.name for text_el in text_elements]

        if source_names:
            answer += f"\nSources: {', '.join(source_names)}"
        else:
            answer += "\nNo sources found"

    await cl.Message(content=answer, elements=text_elements).send()
