In [21]:
import chromadb
from chromadb import Settings

from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings

from config import CHROMA_DATA_PATH,CHROMA_TENANT, CHROMA_DATABASE

CHROMA_CLIENT = chromadb.PersistentClient(
        path=CHROMA_DATA_PATH,
        settings=Settings(allow_reset=True, anonymized_telemetry=False),
        tenant=CHROMA_TENANT,
        database=CHROMA_DATABASE,
    )

# Create LangChain Chroma instance
langchain_chroma = Chroma(
    client=CHROMA_CLIENT,
    embedding_function=OpenAIEmbeddings(),
    # collection_name=collection_name
)


In [3]:

from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.document_loaders import TextLoader

file_path = r"D:\appChat_demo\temp\docs\toyota.txt"

loader = TextLoader(file_path, autodetect_encoding=True)

data = loader.load()

text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=300,
        chunk_overlap=50,
        add_start_index=True,
    )

docs = text_splitter.split_documents(data)

In [29]:
# collection = langchain_chroma._client.create_collection(name="aaf720f983d2110356c6679e28a2e2423712c191dc4e384840d2a725a46b0b7")
langchain_chroma.from_documents(
    documents = docs, 
    embedding=OpenAIEmbeddings(model="text-embedding-3-small"),
    collection_name = "kkf720f983d2110356c6679e28a2e2423712c191dc4e384840d2a725a46b0b7",
    persist_directory=r'D:\appChat_demo\backend\data\vector_db'
)

<langchain_community.vectorstores.chroma.Chroma at 0x26d13380890>

In [31]:
collection_name = "kkf720f983d2110356c6679e28a2e2423712c191dc4e384840d2a725a46b0b7"
collection = CHROMA_CLIENT.get_collection(name=collection_name)
collection.get()

{'ids': ['0ea8be8c-83e5-4166-b440-1a123da42ce2',
  '2bdc97df-556c-4394-a2d5-cfbbaf16f8b3',
  '4558687c-4751-4784-956d-7ed0f7889527',
  '47ff0b02-ac33-4558-85ba-38d3e92c9aaf',
  '5da58646-1a4f-4746-91aa-8bb7a7467070',
  '7cd81e1a-ef97-4dc5-9b57-99a8b006da4f',
  '7ff26c40-8719-41e8-b8a5-6082d940cc4a',
  '818a18f1-b388-41ad-aedf-c10f9a456175',
  '82720561-58e4-4fe7-8d3c-fb69dd1975b2',
  '880c34e1-1f1b-4de6-b7dd-85445a280c71',
  'a36080b6-d853-44a6-9d9e-3455da24e3ab',
  'e279212b-e388-43d5-8aae-11f29f775624',
  'fbf65f22-cab3-4500-be15-3b4cdcae2d25'],
 'embeddings': None,
 'metadatas': [{'source': 'D:\\appChat_demo\\temp\\docs\\toyota.txt',
   'start_index': 1319},
  {'source': 'D:\\appChat_demo\\temp\\docs\\toyota.txt', 'start_index': 1798},
  {'source': 'D:\\appChat_demo\\temp\\docs\\toyota.txt', 'start_index': 1564},
  {'source': 'D:\\appChat_demo\\temp\\docs\\toyota.txt', 'start_index': 2351},
  {'source': 'D:\\appChat_demo\\temp\\docs\\toyota.txt', 'start_index': 2047},
  {'source': '

In [5]:
from langchain import hub
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

retriever = langchain_chroma.as_retriever()

prompt = hub.pull("rlm/rag-prompt")

query = "summarize the content in 20 words"


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)
context = rag_chain.invoke(query)



In [6]:
print(context)

Tesla Motors is an American electric sports car manufacturer known for its Tesla Roadster, which offers high performance and range. The company also planned to offer solar-photovoltaic systems for home charging, making it "energy positive." The Roadster prototypes were introduced in July 2006, with the first production models planned for summer 2007.
