**SIMPLE RAG**

In [None]:
from pathlib import Path
from langchain_community.document_loaders import PyPDFLoader

In [None]:
pdf_path = Path.cwd() / "nodejs.pdf"

In [None]:
loader = PyPDFLoader(file_path=pdf_path)
docs = loader.load()  # docs: list[Document] <By default pages bna kr de dega>
# print(docs[45])

In [None]:
# Take text_Splitter
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [None]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap = 200
)

split_docs = text_splitter.split_documents(documents=docs)
print(f"DOCS: {len(docs)}")
print(f"SPLIT_DOCS: {len(split_docs)}")

In [None]:
import os
from dotenv import load_dotenv
from langchain_openai import OpenAIEmbeddings

load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [None]:
embedder = OpenAIEmbeddings(
    model="text-embedding-3-small",
    api_key=OPENAI_API_KEY
)

In [None]:
# We want to do embeddings of split_docs and save in vector DB
from langchain_qdrant import QdrantVectorStore

# qdrant_vector_store = QdrantVectorStore.from_documents(
#     documents=[],
#     url="http://localhost:6333",
#     collection_name="learning_rag_with_langchain",
#     embedding=embedder
# )

# qdrant_vector_store.add_documents(documents=split_docs)
# print(f"---------- INJECTION DONE ----------")

In [None]:
# RETRIVING FROM SAME DB
# We should create it in another file

retriver = QdrantVectorStore.from_existing_collection(
    url="http://localhost:6333",
    collection_name="learning_rag_with_langchain",
    embedding=embedder
)

# RELEVANT CHUNKS
# relevant_chunks = retriver.similarity_search(
#     query="What is FS Module?"
# )

# print(f"RELEVENT CHUNKS: {relevant_chunks}")

In [None]:
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage, SystemMessage, AIMessage

In [None]:
llm = ChatOpenAI(
    model="gpt-4o-mini",
    api_key=OPENAI_API_KEY    
)

In [None]:
# TODO: BASED ON THE RELEVENT CHUNKS WE CAN CHAT WITH THE LLM
# GIVE THE RELEVENT CHUNKS INTO THE CONTEXT

# SYSTEM_PROMPT = f"""
# You are a helpfull AI Assistant who give response of the user query based on the available CONTEXT

# CONTEXT
# {relevant_chunks}

# Provide a detailed answer based only on the context. If you cannot answer from the context, say so.
# """

In [None]:
chat_history = []

In [None]:
print("Chatbot is ready! (Type 'exit' to stop)")

while True:
    user_query = input("Ask: ")
    if user_query.lower() in ["exit", "quit"]:
        break

    relevant_chunks = retriver.similarity_search(
        query=user_query
    )

    # 4. Construct System Prompt with Context
    context_text = "\n".join([doc.page_content for doc in relevant_chunks])
    system_prompt_content = f"""
    You are a helpful AI Assistant who gives responses based on the available CONTEXT.
    
    CONTEXT:
    {context_text}
    
    Provide a detailed answer based only on the context. If you cannot answer from the context, say so.
    """

    # 5. Build Message List (History + New Query)
    messages = [SystemMessage(content=system_prompt_content)]
    messages.extend(chat_history)  # Add previous messages
    messages.append(HumanMessage(content=user_query))  # Add current query

    # get Response
    response = llm.invoke(messages)

    # 7. Update History
    chat_history.append(HumanMessage(content=user_query))
    chat_history.append(AIMessage(content=response.content))

    print(f"AI: {response.content}\n")