# DeepSeek RAG Demo (LangChain + ChromaDB + DeepSeek API)
This notebook demonstrates a lightweight Retrieval-Augmented Generation (RAG) pipeline using:
- `LangChain` for loading and chunking documents
- `ChromaDB` as the vector store
- `OpenAI-compatible` DeepSeek API for answering queries


In [None]:

#!pip install langchain chromadb openai python-dotenv tiktoken

In [None]:
#!pip install langchain-community

In [15]:
import os
from pathlib import Path
from dotenv import load_dotenv
from openai import OpenAI
import os
from pathlib import Path
from dotenv import load_dotenv, dotenv_values
from openai import OpenAI
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma


In [18]:


# 1. API Key Setup
load_dotenv()
API_KEY = os.getenv("DEEPSEEK_API_KEY")
if not API_KEY:
    raise ValueError("Set DEEPSEEK_API_KEY in your .env!")

client = OpenAI(api_key=API_KEY, base_url="https://api.deepseek.com")
MODEL = "deepseek-chat"

print(API_KEY)


In [19]:


loader = TextLoader("docs/sample.txt")
docs = loader.load()

splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = splitter.split_documents(docs)

embedding_model = OpenAIEmbeddings()
vectorstore = Chroma.from_documents(chunks, embedding_model, persist_directory="chroma_store")

query = input("Ask a question based on the document: ")

retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
relevant_docs = retriever.get_relevant_documents(query)
context = "\n\n".join([doc.page_content for doc in relevant_docs])

prompt = f"""You are a helpful assistant. Use the context below to answer the question.

Context:
{context}

Question:
{query}
"""

response = client.chat.completions.create(
    model=MODEL,
    messages=[{"role": "user", "content": prompt}],
    max_tokens=1024
)

print("\nAnswer:")
print(response.choices[0].message.content)

  embedding_model = OpenAIEmbeddings()


ValidationError: 1 validation error for OpenAIEmbeddings
  Value error, Did not find openai_api_key, please add an environment variable `OPENAI_API_KEY` which contains it, or pass `openai_api_key` as a named parameter. [type=value_error, input_value={'model_kwargs': {}, 'cli...20, 'http_client': None}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.10/v/value_error