In [7]:
# Import dependencies
import json, os
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.llms import Ollama
from langchain_community.vectorstores import Chroma
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

True

In [8]:
all_docs = []
for subdir, dirs, files in os.walk('./car_text_data'):
    for file in files:
        if file.endswith('.txt'):
            file_path = os.path.join(subdir, file)
            with open(file_path) as f:
                file_content = f.read()
            all_docs.append(file_content)
print(len(all_docs)) 

224


In [9]:
# Split file into chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=20,
    length_function=len,
    is_separator_regex=False,
)
docs = text_splitter.create_documents(all_docs)
print("Number of chunks:", len(docs))

Number of chunks: 574


In [10]:
# Generate embeddings & persist locally
PERSIST_DIR = "storage"
embeddings = OpenAIEmbeddings()

if not os.path.exists(PERSIST_DIR):
    Chroma(persist_directory=PERSIST_DIR)
    vectorstore = Chroma.from_documents(docs, embeddings)
    retriever = vectorstore.as_retriever()  
else:
    vectorstore = Chroma(persist_directory=PERSIST_DIR,embedding_function=embeddings)
    retriever = vectorstore.as_retriever()

In [11]:
# Prompt Template 

prompt_template = """
You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. 
If you don't know the answer, just say that you don't know. 
{question}
{context}
  """

prompt = PromptTemplate(
    input_variables=["context", "question"], 
    template=prompt_template,
  )

In [12]:
# Build the RAG Chain & Invoke

llm = ChatOpenAI(model_name="gpt-3.5-turbo")
#llm = Ollama(model="mistral")
rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain.invoke("tell me about creta")

'Hyundai Creta is a 5 seater SUV available in 28 variants with engine options ranging from 1482 to 1497 cc. It has a choice of 2 transmissions: Manual and Automatic, and comes with 6 airbags. Users have reported a mileage of 18 to 20 kmpl for the Hyundai Creta. The price range for the Hyundai Creta is from Rs. 11.00 - 20.15 Lakh. Additionally, there is a Hyundai Creta N Line variant available with a price range of Rs. 16.82 - 20.45 Lakh. There is also a Hyundai Creta EV variant expected to launch in India in September 2024 with an expected price range of Rs. 22.00 - 26.00 Lakh.'