<a href="https://colab.research.google.com/github/smthomas1704/restoration-rag/blob/main/search_basic.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Testing testing

!git clone https://github.com/smthomas1704/restoration-rag.git

In [None]:
!pip3 install -r restoration-rag/requirements.txt

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

DATA_PATH = 'restoration-rag/data' #Your root data folder path
DB_FAISS_PATH = 'vectorstore/db_faiss'

loader = PyPDFDirectoryLoader(DATA_PATH)
documents = loader.load()

print(len(documents))
print(documents[0].page_content[0:100])


38
People and Nature. 2023;5:1415–1429.    | 1415
 wileyonlinelibrary.com/journal/pan31 | INTRODUCTION



In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=10)
splits = text_splitter.split_documents(documents)

In [None]:
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2',
                                       model_kwargs={'device': 'cpu'})

.gitattributes:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

data_config.json:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

train_script.py:   0%|          | 0.00/13.2k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

In [None]:
db = FAISS.from_documents(splits, embeddings)
db.save_local(DB_FAISS_PATH)

In [None]:
!curl ipecho.net/plain

!curl localhost:8080/generate -X POST -H 'Content-Type: application/json' -d '{"inputs": "Tips for reforestation", "parameters": { "max_new_tokens":64}}' #Replace the locahost with the IP visible to the machine running the notebook


34.173.16.28^C


In [None]:
import langchain
from queue import Queue
from typing import Any
from langchain.llms.huggingface_text_gen_inference import HuggingFaceTextGenInference
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.schema import LLMResult
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.prompts.prompt import PromptTemplate
from anyio.from_thread import start_blocking_portal #For model callback streaming

langchain.debug=True

#vector db path
DB_FAISS_PATH = 'vectorstore/db_faiss'

#Llama2 TGI models host port
LLAMA2_7B_HOSTPORT = "http://localhost:8080/" #Replace the localhost with the IP visible to the machine running the notebook
LLAMA2_13B_HOSTPORT = "http://localhost:8080/" #Add your own host ports for model switching. You can host another TGI model on same instance on a different port.


model_dict = {
    "7b-chat" : LLAMA2_7B_HOSTPORT,
    "13b-chat" : LLAMA2_13B_HOSTPORT,
}

system_message = {"role": "system", "content": "You are a helpful assistant."}

In [None]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",
                                       model_kwargs={'device': 'cpu'})
db = FAISS.load_local(DB_FAISS_PATH, embeddings)

In [None]:
llm = HuggingFaceTextGenInference(
    inference_server_url=LLAMA2_7B_HOSTPORT,
    max_new_tokens=512,
    top_k=10,
    top_p=0.9,
    typical_p=0.95,
    temperature=0.6,
    repetition_penalty=1,
    do_sample=True,
    streaming=True
)

In [None]:
template = """
[INST]Use the following pieces of context to answer the question. If no context provided, answer like a AI assistant.
{context}
Question: {question} [/INST]
"""

retriever = db.as_retriever(
        search_kwargs={"k": 6}
    )

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    chain_type_kwargs={
        "prompt": PromptTemplate(
            template=template,
            input_variables=["context", "question"],
        ),
    }
)

In [None]:
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    chain_type_kwargs={
        "prompt": PromptTemplate(
            template=template,
            input_variables=["context", "question"],
        ),
    }
)

In [None]:
result = qa_chain({"query": "How to prioritize areas for ecological restoration"})
print(result)

In [None]:
import gradio as gr

job_done = object()

class MyStream(StreamingStdOutCallbackHandler):
    def __init__(self, q) -> None:
        self.q = q

    def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
        self.q.put(token)

    def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
        self.q.put(job_done)



with gr.Blocks() as demo:
    #Configure UI layout
    chatbot = gr.Chatbot(height = 600)
    with gr.Row():
        with gr.Column(scale=1):
            with gr.Row():
                #model selection
                model_selector = gr.Dropdown(
                    list(model_dict.keys()),
                    value="7b-chat",
                    label="Model",
                    info="Select the model",
                    interactive = True,
                    scale=1
                )
                max_new_tokens_selector = gr.Number(
                    value=512,
                    precision=0,
                    label="Max new tokens",
                    info="Adjust max_new_tokens",
                    interactive = True,
                    minimum=1,
                    maximum=1024,
                    scale=1
                )
            with gr.Row():
                #hyperparameter selection
                temperature_selector = gr.Slider(
                    value=0.6,
                    label="Temperature",
                    info="Range 0-2. Controls the creativity of the generated text.",
                    interactive = True,
                    minimum=0.01,
                    maximum=2,
                    step=0.01,
                    scale=1
                )
                top_p_selector = gr.Slider(
                    value=0.9,
                    label="Top_p",
                    info="Range 0-1. Nucleus sampling.",
                    interactive = True,
                    minimum=0.01,
                    maximum=0.99,
                    step=0.01,
                    scale=1
                )
        with gr.Column(scale=2):
            #user input prompt text field
            user_prompt_message = gr.Textbox(placeholder="Please add user prompt here", label="User prompt")
            with gr.Row():
                clear = gr.Button("Clear Conversation", scale=2)
                submitBtn = gr.Button("Submit", scale=8)


    state = gr.State([])

    #handle user message
    def user(user_prompt_message, history):
        if user_prompt_message != "":
            return history + [[user_prompt_message, None]]
        else:
            return history + [["Invalid prompts - user prompt cannot be empty", None]]

    #chatbot logic for configuration, sending the prompts, rendering the streamed back genereations etc
    def bot(model_selector, temperature_selector, top_p_selector, max_new_tokens_selector, user_prompt_message, history, messages_history):
        dialog = []
        bot_message = ""
        history[-1][1] = ""

        dialog = [
            {"role": "user", "content": user_prompt_message},
        ]
        messages_history += dialog

        #Queue for streamed character rendering
        q = Queue()

        #Update new llama hyperparameters
        llm.inference_server_url = model_selector
        llm.temperature = temperature_selector
        llm.top_p = top_p_selector
        llm.max_new_tokens = max_new_tokens_selector

        #Async task for streamed chain results wired to callbacks we previously defined, so we don't block the UI
        async def task(prompt):
            ret = await qa_chain.run(prompt, callbacks=[MyStream(q)])
            return ret

        with start_blocking_portal() as portal:
            portal.start_task_soon(task, user_prompt_message)
            while True:
                next_token = q.get(True)
                if next_token is job_done:
                    messages_history += [{"role": "assistant", "content": bot_message}]
                    return history, messages_history
                bot_message += next_token
                history[-1][1] += next_token
                yield history, messages_history

    #init the chat history with default system message
    def init_history(messages_history):
        messages_history = []
        messages_history += [system_message]
        return messages_history

    #clean up the user input text field
    def input_cleanup():
        return ""

    #when the user clicks Enter and the user message is submitted
    user_prompt_message.submit(
        user,
        [user_prompt_message, chatbot],
        [chatbot],
        queue=False
    ).then(
        bot,
        [model_selector, temperature_selector, top_p_selector, max_new_tokens_selector, user_prompt_message, chatbot, state],
        [chatbot, state]
    ).then(input_cleanup,
        [],
        [user_prompt_message],
        queue=False
    )

    #when the user clicks the submit button
    submitBtn.click(
        user,
        [user_prompt_message, chatbot],
        [chatbot],
        queue=False
    ).then(
        bot,
        [model_selector, temperature_selector, top_p_selector, max_new_tokens_selector, user_prompt_message, chatbot, state],
        [chatbot, state]
    ).then(
        input_cleanup,
        [],
        [user_prompt_message],
        queue=False
    )

    #when the user clicks the clear button
    clear.click(lambda: None, None, chatbot, queue=False).success(init_history, [state], [state])

In [None]:
demo.queue().launch(server_name="0.0.0.0", share="True")

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://1446f9dc98428e4621.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


