In [None]:
from langchain.llms import LlamaCpp

# If you following the instruction and use the q4_0 model, 
# you path should be: [UPDATE_AND_PUT_YOUR_PATH_TO_MODEL_HERE]/llama.cpp/models/llama-2-7b-chat/ggml-model-q4_0.gguf
# the folloiwing code use the q5_0 model
llm = LlamaCpp(
    model_path="[PATH_TO_YOUR_MODEL]/llama.cpp/models/llama-2-7b-chat/ggml-model-q5_0.gguf",
    temperature=0.25,
    n_gpu_layers=2,
    n_batch=512,
    n_ctx=2048,
    f16_kv=True,
    verbose=True,
)


In [None]:
from langchain.embeddings import LlamaCppEmbeddings

embeddings = LlamaCppEmbeddings(
    model_path="[PATH_TO_YOUR_MODEL]/llama.cpp/models/llama-2-7b-chat/ggml-model-q5_0.gguf",
    verbose=False)


In [None]:
from langchain.document_loaders import TextLoader
from langchain.document_loaders import PDFPlumberLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma

def init_vectorstore(file_path):
    contents = None
    if file_path.endswith('.pdf'):
        contents = PDFPlumberLoader(file_path).load()
    elif file_path.endswith('.txt'):
        contents = TextLoader(file_path).load()
    
    text_spliter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=100,
        length_function=len,
        add_start_index=True
    )

    documents = text_spliter.split_documents(contents)
    chromadb = Chroma.from_documents(documents, embeddings)
    return chromadb

In [None]:
from langchain.prompts import PromptTemplate

prompt_template = """
[INST]<<SYS>> You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.<</SYS>> 

{context} 

Question: {question} 
Answer: [/INST]
"""

prompt = PromptTemplate.from_template(prompt_template, verbose=True)

In [None]:
import gradio as gr
from langchain.chains import RetrievalQA

with gr.Blocks() as ui:
    user_session = gr.State({})
    # call back function to predict the response
    def predict(message, history, user_session):
        if user_session.get("qa_chain") is None:
            return "Please upload a document in text or PDF format for this sample to work."
        
        qa_chain = user_session["qa_chain"]
        
        if len(message) == 0:
            return "Please ask a question related to the document you uploaded."

        resp = qa_chain({"query": message})
        return resp["result"]
    
    # call back function to upload file for LLM context searching
    def upload_file(file, user_session):
        vectorstore = init_vectorstore(file.name)
        qa_chain = RetrievalQA.from_chain_type(
            llm, 
            retriever=vectorstore.as_retriever(search_type="similarity", search_kwargs={"k":5}),
            chain_type_kwargs={"prompt": prompt},
            verbose=True
        )
        user_session["qa_chain"] = qa_chain
        return  "File splitted, embeded, and ready to be searched.", user_session
    
    def clear_upload_file():
        return ""
    
    with gr.Row():
        chatui = None
        with gr.Column():
            chatui = gr.ChatInterface(
                predict,
                retry_btn=None,
                undo_btn=None,
                clear_btn=None,
                submit_btn="Send",
                additional_inputs=[user_session])
        with gr.Column():
            ctx_text_box = gr.Textbox(lines=8, label="Documents", placeholder="Only Text or PDF files is supported")
            file = gr.File(file_types=["txt", "pdf"], label="Use the click to upload instead of drag and drop. Drag and drop doesn't work here.")
            file.upload(upload_file, inputs=[file, user_session], outputs=[ctx_text_box, user_session], show_progress=True)
            file.clear(clear_upload_file, outputs=[ctx_text_box])

ui.launch()

In [None]:
ui.close()