# STEPS FOR THE DATA STORAGE

In [None]:
# STEP 1: Data Ingestion.
from langchain_community.document_loaders import WebBaseLoader
loader = WebBaseLoader("https://en.wikipedia.org/wiki/Natural_language_processing")
docs = loader.load()

In [None]:
# STEP 2: Chunking.
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=50
)
chunks = text_splitter.split_documents(docs)

In [None]:
# STEP 3: Embeddings.
from langchain.embeddings import HuggingFaceBgeEmbeddings

embedding_model = HuggingFaceBgeEmbeddings(
    model_name="BAAI/bge-base-en-v1.5",
    model_kwargs={"device": "cpu"},  # or "cuda" if you have GPU
    encode_kwargs={"normalize_embeddings": True}
)

embeddings = embedding_model.embed_documents([chunk.page_content for chunk in chunks])
print(f"Embedded {len(embeddings)} chunks.")

In [None]:
# STEP 4: Initialize Qdrant VectorStore
from langchain_community.vectorstores import Qdrant
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams

qdrant = QdrantClient(path="./qdrant_data")  # Local, or use `host` and `port` for remote

# Create collection (if not exists)
collection_name = "rag_demo"
if collection_name not in qdrant.get_collections().collections:
    qdrant.recreate_collection(
        collection_name=collection_name,
        vectors_config=VectorParams(size=384, distance=Distance.COSINE),
    )

In [None]:
# STEP 5: Store vectors in Qdrant
db = Qdrant.from_documents(
    documents=chunks,
    embedding=embedding_model,
    client=qdrant,
    collection_name=collection_name
)

In [None]:
# Defining the LLM.
from langchain_community.llms import HuggingFaceEndpoint

llm = HuggingFaceEndpoint(
    repo_id="mistralai/Mistral-7B-Instruct-v0.1",
    temperature=0.2,
    max_new_tokens=512
)

In [None]:
# ---- RETRIEVER ----
retriever = qdrant_store.as_retriever(search_type="similarity", search_kwargs={"k": 5})

In [None]:
# STEP 6: Design ChatPrompt Template.
from langchain_core.prompts import ChatPromptTemplate

prompt= ChatPromptTemplate.from_template("""
You are an intelligent assistant helping users based on the following retrieved context.

<context>
{context}
</context>

Answer the following question:
{question}
""")

In [None]:
# Chain Instruction
from langchain.chains.combine_documents import create_stuff_documents_chain

document_chain=create_stuff_documents_chain(llm, prompt)

In [None]:
# STEP 6: Create Retriever from Qdrant vector store
retriever = db.as_retriever(
    search_type="similarity",  # or "mmr" for Max Marginal Relevance
    search_kwargs={"k": 5}      # top 5 similar documents
)

In [None]:
from langchain.chains import create_retrieval_chain
retrieval_chain = create_retrieval_chain(retriever, document_chain)

In [None]:
response=retrieval_chain.invoke({input})