In [1]:
import os
from typing import Mapping, Text, Any
import torch
import time

# simple hack to support import module from parent directory
import sys
sys.path.append('../')

from rag_llama.core.retrievers import RerankRetriever
from rag_llama.core.generation import Llama, Dialog


Set proxy for notebook kernel, note this is only for my local environment and can be removed.

In [2]:
os.environ['http_proxy'] = "http://127.0.0.1:1081"
os.environ['https_proxy'] = "http://127.0.0.1:1081"

## Define system message and input query format templates. 
Note this are for single-turn chat, similar to how search works.

In [3]:
SYSTEM_MESSAGE = """
You are an assistant to a Tesla customer support team. 
Your job is to answer customer's questions to the best of your ability.
You will be given several reference documents to help you in answering.
Some of the provided documents may be irrelevant, so ignore these and only use those that appear to be relevant. 
Do not mention or cite documents that are not given to you.
Answer succinctly and make reference to the given documents sections.
You should provide the answer in English.
"""

def get_formatted_input_dialog(query: str, doc_strs: str) -> Dialog:
    combined_query = f"Question:\n{query}\n\n####\n\nDocuments:\n{doc_strs}"
    return [{'role': 'system', 'content': SYSTEM_MESSAGE}, {'role': 'user', 'content': combined_query}]

In [4]:
def format_retrieved_doc(item: Mapping[Text, Any], max_words: int=0) -> str:
    """Add more metadata to the document string"""
    # Extract values from the dictionary
    document = item.get("document", "")
    subject = item.get("subject", "")
    section = item.get("section", "")
    content = item.get("content", "")
    
    # Use a generator expression to filter out empty strings and join them
    formatted_doc = ' - '.join(value for value in [document, subject, section, content] if value)

    # Cut to maximum number of words
    if max_words > 0 :
        # Split the formatted text into words
        words = formatted_doc.split()

        if len(words) > max_words:
            # Take only the first max_words words
            truncated_tex = ' '.join(words[:max_words])
            return truncated_tex
        else:
            return formatted_doc

    return formatted_doc

Create the reranking retrieval instance and the LLaMA 2 chat generator instance.

In [5]:
doc_embed_file = "../data/Tesla_manual_embeddings.pk"
llama_model_ckpt = os.path.expanduser("~/models/meta_llama2/llama-2-7b-chat/consolidated.pth")
llama_tokenizer_ckpt = os.path.expanduser("~/models/meta_llama2/tokenizer.model")
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# LLM parameters
max_seq_len = 2048
max_gen_len = 1024
temperature = 0.6 # model sampling temperature
top_p = 0.9 # model sampling top P

# RAG specific parameters
max_words = 300 # cut maximum words for each retrieved document
top_k = 50 # select top K items to retrieve during naive retrieval
top_n = 3 # select top N items based on scores from reranking model

In [6]:
rerank_retriever = RerankRetriever(embed_file=doc_embed_file, device=device)

generator = Llama.build(
    ckpt_path=llama_model_ckpt,
    tokenizer_path=llama_tokenizer_ckpt,
    max_seq_len=max_seq_len,
    max_batch_size=1,
    device=device,
)

Loading sentence-transformers/all-MiniLM-L6-v2 model and tokenizer from HuggingFace...
Loading cross-encoder/ms-marco-MiniLM-L-6-v2 model and tokenizer from HuggingFace...
Starting to load tokenizer checkpoint '/home/michael/models/meta_llama2/tokenizer.model' ...
Starting to load model checkpoints '/home/michael/models/meta_llama2/llama-2-7b-chat/consolidated.pth' ...
Model checkpoint loaded in 19.65 seconds


## Main logic for retrieval and LLM generation

In [7]:
def chat_completion(query: str):
    """Single turn chat completion"""
    assert query is not None, query

    t0 = time.time()
    retrieved_items = rerank_retriever.retrieve(query, top_k, top_n)
    t1 = time.time()

    # Join multiple documents into a single document string
    ref_doc_strs = "\n\n".join([format_retrieved_doc(d, max_words) for d in retrieved_items])
    dialogs = [get_formatted_input_dialog(query, ref_doc_strs)] # needs batching even though we're not using it here

    results = generator.chat_completion(
        dialogs,  
        max_gen_len=max_gen_len,
        temperature=temperature,
        top_p=top_p,
    )

    t2 = time.time()

    response = results[0]['generation']['content']

    print(f"User: {query}")
    print(f"\n\nAssistant: {response}")
    print(f"\n\nRetrieval time: {t1-t0:.4f}, generation time: {t2-t1:.4f}")
    print(f"\n\nReference documents: {retrieved_items}")


## Now we can start asking question about Tesla cars

In [8]:
chat_completion('Explain to me briefly how can I enable Autopilot on my Tesla Model S car?')

User: Explain to me briefly how can I enable Autopilot on my Tesla Model S car?


Assistant:  To enable Autopilot on your Tesla Model S, you can follow these steps:
1. Touch Controls > Autopilot > Autopilot Features > Autosteer (Beta).
2. After carefully reading and understanding the popup window, touch Yes.
Autosteer confirms activation with an audible chime and briefly displays a message on the instrument cluster reminding you to pay attention to the road and be ready to take over at any time. To indicate that Autosteer is now active, the instrument cluster displays the Autosteer icon in blue. When Autosteer is able to detect lane markings, it displays the edges of the driving lane in blue on the instrument cluster. Whenever Autosteer is active, Traffic-Aware Cruise Control is active as well. In situations where the speed limit cannot be detected when Autosteer is engaged, Autosteer reduces your driving speed and limits the set cruising speed to 45 mph (70 km/h). Although you can man

In [9]:
chat_completion('What are the options to open the door of a Tesla Model X car when the power is very low?')

User: What are the options to open the door of a Tesla Model X car when the power is very low?


Assistant:  Thank you for reaching out to us! If you're experiencing a low power situation with your Tesla Model X, there are a few options you can try to open the door:
1. Check if the door is unlocked: Make sure the door is unlocked before attempting to open it. You can check if the door is unlocked by using the Tesla app or by checking the door handle.
2. Use the emergency door open button: If the door is locked, you can try using the emergency door open button located on the key fob or on the door handle. Pressing the button will unlock the door and allow you to open it manually.
3. Call Tesla Roadside Assistance: If none of the above options work, you can call Tesla Roadside Assistance for help. They will be able to send a technician to assist you in opening the door. Please provide your Vehicle Identification Number (VIN) and exact location when calling.
It's important to note that if

In [10]:
chat_completion('Can I use autopilot in raining or snowing conditions?')

User: Can I use autopilot in raining or snowing conditions?


Assistant:  Thank you for reaching out to us! I'm happy to help you with your question.
To answer your question, Tesla's Autopilot system is designed to operate in a variety of weather conditions, including rain and snow. However, it's important to note that Autopilot's performance may be affected by environmental conditions such as heavy rain, snow, or faded lane markings. If a camera is obstructed or blinded, Autopilot features may not be available.
According to the provided documents, Autopilot is designed to work in a variety of weather conditions, including rain and snow. However, it's important to drive safely and follow the rules of the road, even when using Autopilot.
I hope this helps! Let me know if you have any other questions.


Retrieval time: 0.1222, generation time: 4.6324




In [11]:
chat_completion('Why is my touchscreen not responding?')

User: Why is my touchscreen not responding?


Assistant:  Thank you for reaching out to us regarding your touchscreen issue. Based on the information provided in the owner's manuals, it seems that the touchscreen may be unresponsive or behaving irregularly. To resolve this issue, you can try restarting the touchscreen while the vehicle is in Park and the engine is off.
According to the Model 3 owner's manual (2024.2.3), you can restart the touchscreen by following these steps:
1. Shift into Park.
2. Hold down both scroll buttons on the steering wheel until the touchscreen turns black. Pressing the brake pedal while holding down the scroll buttons does not have any impact and is not required.
3. After a few seconds, the Tesla logo appears. Wait approximately 30 seconds for the touchscreen to restart. If the touchscreen is still unresponsive or demonstrating unusual behavior after a few minutes, try power cycling the vehicle (if possible). See Power Cycling the Vehicle on page 60.
Please

In [12]:
chat_completion('I have never owned an EV before. Do I need to change oil regularly for my Tesla car?')

User: I have never owned an EV before. Do I need to change oil regularly for my Tesla car?


Assistant:  As an assistant to the Tesla customer support team, I can confidently answer your question. According to the provided documents, Tesla recommends checking the brake fluid level every 4 years (or more frequently if heavy brake usage is observed) and replacing it if necessary. Additionally, the A/C desiccant bag should be replaced every 4 years, and the cabin air filter every 3 years. The HEPA filters and carbon filters should also be replaced every 3 years. It is also recommended to clean and lubricate the brake calipers every year or 12,500 miles (20,000 km) if in an area where roads are salted during winter. Finally, rotate the tires every 6,250 miles (10,000 km) or if the tread depth difference is 2/32 in (1.5 mm) or greater.
It is important to note that these intervals are based on typical driving behaviors and scenarios, and may vary depending on individual circumstances such as

In [13]:
chat_completion('What are the recommendations for a first-time Tesla car owner?')

User: What are the recommendations for a first-time Tesla car owner?


Assistant:  As a Tesla customer support assistant, I would recommend the following to a first-time Tesla car owner:
1. Familiarize yourself with the vehicle's features and functionality through the owner's manual and online resources.
2. Set up and use the Tesla app for remote monitoring and control of your vehicle, including charging, location tracking, and vehicle health checks.
3. Regularly check and maintain the vehicle's tires, brakes, and fluids, following the maintenance schedules outlined in the owner's manual.
4. Keep the vehicle's software up to date through the Tesla app or at a Tesla service center.
5. Be aware of your vehicle's battery health and range, and plan your charging stops accordingly.
6. Familiarize yourself with the vehicle's safety features and emergency procedures, including the location of the emergency brake, horn, and other critical components.
7. Take advantage of Tesla's do-it-yourself