In [20]:
!pip install -qU langchain langchain-core langchain-community langchain-openai

In [2]:
!pip install -qU qdrant-client

In [3]:
!pip install -qU tiktoken

In [6]:
!pip install jq

Collecting jq
  Downloading jq-1.7.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.8 kB)
Downloading jq-1.7.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (662 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m662.1/662.1 kB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m:00:01[0m0:01[0m
[?25hInstalling collected packages: jq
Successfully installed jq-1.7.0


In [2]:
import os
import getpass

os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")

In [3]:
import os
import json
import sys
from typing import Iterable
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
import tiktoken
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import Qdrant

def load_json_files(directory: str):
    # Loop through JSON files in directory
    result = []
    for file_name in os.listdir(directory):
        if file_name.endswith('.json'):
            json_path = os.path.join(directory, file_name)
            result.append(load_docs_from_jsonl(json_path))
    return result

def load_docs_from_jsonl(file_path: str) -> Iterable[Document]:
    array = []
    with open(file_path, 'r') as jsonl_file:
        for line in jsonl_file:
            data = json.loads(line)
            obj = Document(**data)
            array.append(obj)
    return array

all_documents = load_json_files('./data/')

# Split documents into chunks
def tiktoken_len(text):
    tokens = tiktoken.encoding_for_model("gpt-4-turbo").encode(
        text,
    )
    return len(tokens)

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap = 200,
    length_function = tiktoken_len,
)

splits = []

for document in all_documents:
    document_splits = text_splitter.split_documents(document)
    splits.extend(document_splits)

embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")

qdrant_vectorstore = Qdrant.from_documents(
    splits, 
    embedding_model, 
    path="./data/embeddings",
    collection_name="my_documents",
)

qdrant_retriever = qdrant_vectorstore.as_retriever()

In [4]:
from langchain_core.prompts import ChatPromptTemplate

RAG_PROMPT = """
CONTEXT:
{context}

QUERY:
{question}

your primary mission is to provide precise and contextually relevant answers to queries posed by users. For each query, you must diligently analyze the provided context to determine if it holds the necessary information pertinent to the query. Should the query align with the context, you are to retrieve the appropriate knowledge and generate a concise, accurate response. However, if the query does not pertain to the context given or if the context lacks sufficient information to formulate a reliable answer, you must gracefully decline to respond, indicating that the query falls outside the scope of the provided context. Your responses should uphold the principles of relevance and accuracy, ensuring each answer serves the user's need for specific and contextual information.  If you don't know the answer, respond I DO NOT KNOW THE ANSWER."
"""

rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)

In [5]:
from langchain_openai import ChatOpenAI

openai_chat_model = ChatOpenAI(model="gpt-4-turbo")

In [6]:
from operator import itemgetter
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough

retrieval_augmented_qa_chain = (
    # INVOKE CHAIN WITH: {"question" : "<<SOME USER QUESTION>>"}
    # "question" : populated by getting the value of the "question" key
    # "context"  : populated by getting the value of the "question" key and chaining it into the base_retriever
    {"context": itemgetter("question") | qdrant_retriever, "question": itemgetter("question")}
    # "context"  : is assigned to a RunnablePassthrough object (will not be called or considered in the next step)
    #              by getting the value of the "context" key from the previous step
    | RunnablePassthrough.assign(context=itemgetter("context"))
    # "response" : the "context" and "question" values are used to format our prompt object and then piped
    #              into the LLM and stored in a key called "response"
    # "context"  : populated by getting the value of the "context" key from the previous step
    | {"response": rag_prompt | openai_chat_model, "context": itemgetter("context")}
)

In [10]:
response = retrieval_augmented_qa_chain.invoke({"question" : "What are the driving assist options?"})

In [11]:
response["response"].content

'The driving assist options detailed in the provided documents are:\n\n1. **Highway Driving Assist (HDA)**: This system is designed to adjust the speed of the vehicle automatically based on the current speed limits of the highway the vehicle is traveling on. It operates under certain conditions such as the vehicle traveling below 95 mph, driving on federal highways, Smart Cruise Control (SCC) being active and set to the current highway speed.\n\n2. **Lane Following Assist (LFA)**: This feature helps to ensure that the vehicle stays centered in its lane by monitoring the detected lane markings ahead and adjusting the steering accordingly. It operates only when the vehicle is traveling slower than 120 mph and recognizes both sides of the lane markers.\n\nThese systems are part of the Advanced Driver Assistance Systems (ADAS) and are designed to enhance driving safety and convenience. However, they are not substitutes for attentive driving and have specific operational limitations as desc

In [12]:
for context in response["context"]:
  print("Context:")
  print(context)
  print("----")

Context:
page_content="ADVANCED DRIVER ASSISTANCE SYSTEMS (ADAS) Highway Driving Assist (HDA) + 4 Highway Driving Assist (HDA) is designed to adjust the speed of the vehicle when driving on limited/controlled access highways/freeways. The system can adjust the vehicle's speed based on available highway speed information. The automatic speed setting mode is designed to set the speed automatically by adjusting to the current speed limits of the highway the vehicle is traveling on. QR CODE Highway Driving Assist (HDA) Video To turn HDA on, go to User Settings # A in the Instrument Cluster or the Vehicle Settings* in the audio head unit. Go to Driver Assistance > Driving Assist > Highway Driving Assist and press the OK button B on the Steering Wheel. ADAS To activate HDA, turn Smart Cruise Control (SCC) on by pressing the Driving Assist CRUISE button on the Steering Wheel controls C. Refer to the inside front cover for more information on QR codes. www.youtube.com/KiaFeatureVideos A MODE C