In [1]:
import os
import numpy as np
import gradio as gr
import json
import chromadb
from groq import Groq

In [12]:
DB_PATH = 'embeddings'
MODEL_NAME = 'llama3-70b-8192'
FILENAME = 'data/un_regulations_157.json'
RESPONSE_TEMPLATE = """The context data contains a automotive compliance regulations, most of the time its either a requirement or a definition.
The regulations given in the context are identified by their id located at the beginning and followed by a colon (3.2.1.:, 2.4.: etc...). Include regulation id in your answer whenever necessary.
Use the following pieces of context to answer the query at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible. Always say "thanks for asking!" at the end of the answer. 
{context}
Question: {question}
Helpful Answer:"""
question = 'Which requirements should be met for the vehicle type submitted for approval?'

# Vector datastore RAG
![title](images/rag_vector_store.png)

## Load model and datastore

In [13]:
with open(FILENAME) as f:
    data = json.load(f)
reg_ids = [regulation.splitlines()[0].strip() for regulation in data]

client = chromadb.PersistentClient(path=DB_PATH)
collection = client.get_or_create_collection(
    name='current_db',
    metadata={'hnsw:space': 'cosine'})
collection.upsert(
    documents=data,
    ids=reg_ids
    )
llm_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))

In [4]:
chat_completion = llm_client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": "Explain the meaning of life in 2 sentences",
        }
    ],
    model=MODEL_NAME,
)
print(chat_completion.choices[0].message.content)

The meaning of life is often subjective and can vary greatly from person to person, but at its core, it is the pursuit of happiness, fulfillment, and personal growth through the connections we make, the experiences we have, and the contributions we make to the world around us. Ultimately, the meaning of life is a journey of self-discovery, acceptance, and wisdom, and it is up to each individual to define what gives their life significance and purpose.


In [14]:
def retrieve(msg, top_k=3):
    query_results = collection.query(
        query_texts=[msg],
        n_results=top_k
    )
    return query_results["documents"][0]

def get_completion(msg, context, max_new_tokens, temperature, top_k):
    # context = retrieve(msg, top_k)
    query = RESPONSE_TEMPLATE.format(context=context, question=msg)
    chat_completion = llm_client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": query,
            }
        ],
        temperature=temperature,
        max_tokens=max_new_tokens,
        model=MODEL_NAME,
    )
    return chat_completion.choices[0].message.content

def generate_qa(msg, max_new_tokens, temperature, top_k):
    retrieve_result = retrieve(msg, top_k)
    print(f'Retrieved result from the vectorstore: {retrieve_result}')
    result = get_completion(msg, retrieve_result, max_new_tokens, temperature, top_k)
    return result

def generate_chat(msg, max_new_tokens, temperature, top_k, chat_history):
    retrieve_result = retrieve(msg, top_k)
    result = get_completion(msg, retrieve_result, max_new_tokens, temperature, top_k)
    chat_history.append((msg, result))
    return "", chat_history

## Q&A interface

In [15]:
qa_interface = gr.Interface(fn=generate_qa, 
                            title="QA interface",
                            inputs=[gr.Textbox(label="Prompt"),
                                    gr.Slider(label="Max new tokens", value=200, maximum=1024, step=int, minimum=1),
                                    gr.Slider(label="Temperature", value=0.8, maximum=1.0, minimum=0.0),
                                    gr.Slider(label="Max documents retrieved", value=3, step=int, maximum=5, minimum=1)
                                   ], 
                            outputs=[gr.Textbox(label="Completion")]
                           )
gr.close_all()
qa_interface.launch(share=True)

Closing server running on port: 7860
Closing server running on port: 7860
* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://13f42b84f4dc8643ff.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




Retrieved result from the vectorstore: ['10.: Modification of vehicle type and extension of type approval', '10.\nModification of vehicle type and extension of type approval', '10.1.\nEvery modification to an existing vehicle type shall be notified to the Type Approval Authority which approved the vehicle type.\nThe Authority shall then either:\n(a)\nDecide, in consultation with the manufacturer, that a new type-approval is to be granted; or\n(b)\nApply the procedure contained in paragraph 10.1.1 (Revision) and, if applicable, the procedure contained in paragraph 10.1.2 (Extension). 10.1.\nModel of the information provided to users (including expected driver’s tasks within the ODD and when going out of the ODD) …']


## Chatbot interface

In [7]:
with gr.Blocks() as chat_interface:
    title = gr.HTML("<center><h1>Chatbot</h1></center>")
    with gr.Row():
        with gr.Column():
            msg = gr.Textbox(label="Prompt")
            max_new_tokens = gr.Slider(label="Max new tokens", value=200, maximum=1024, step=int, minimum=1)
            temperature = gr.Slider(label="Temperature", value=0.8, maximum=1.0, minimum=0.0)
            max_documents = gr.Slider(label="Max documents retrieved", value=3, step=int, maximum=5, minimum=1)
        with gr.Column():
            chatbot = gr.Chatbot(height=240)
    btn = gr.Button("Query")
    clear = gr.ClearButton(components=[msg, chatbot], value="Clear console")
    btn.click(
        generate_chat,
        inputs=[
            msg,
            max_new_tokens,
            temperature,
            max_documents,
            chatbot],
        outputs=[msg, chatbot])
gr.close_all()
chat_interface.launch(share=True)



Closing server running on port: 7860
* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://880b55523f5c107cb2.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


