## Basic RAG FLOW

![basic rag flow image](basic-rag.png "Basic RAG FLOW")

## RAG FLOW

![rag flow image](flow.png "RAG FLOW")

In [2]:
import os
from dotenv import load_dotenv
from pathlib import Path
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_qdrant import QdrantVectorStore
from openai import OpenAI

load_dotenv()

True

In [4]:
file_path = "./data/somatosensory.pdf"

loader = PyPDFLoader(file_path)

## 1. Loading Data

In [8]:
# 1 Data loading
docs = loader.load()
docs

[Document(metadata={'producer': 'Prince 20150210 (www.princexml.com)', 'creator': 'PyPDF', 'creationdate': '', 'title': 'Anatomy of the Somatosensory System', 'source': './data/somatosensory.pdf', 'total_pages': 4, 'page': 0, 'page_label': '1'}, page_content='This is a sample document to\nshowcase page-based formatting. It\ncontains a chapter from a Wikibook\ncalled Sensory Systems. None of the\ncontent has been changed in this\narticle, but some content has been\nremoved.\nAnatomy of the Somatosensory System\nFROM WIKIBOOKS1\nOur somatosensory system consists of sensors in the skin\nand sensors in our muscles, tendons, and joints. The re-\nceptors in the skin, the so called cutaneous receptors, tell\nus about temperature (thermoreceptors), pressure and sur-\nface texture (mechano receptors), and pain (nociceptors).\nThe receptors in muscles and joints provide information\nabout muscle length, muscle tension, and joint angles.\nCutaneous receptors\nSensory information from Meissner cor

## 2. Chunking

In [7]:
# Chunking func
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)

# applying chunck fun to docs
split_docs = text_splitter.split_documents(docs)
split_docs

[Document(metadata={'producer': 'Prince 20150210 (www.princexml.com)', 'creator': 'PyPDF', 'creationdate': '', 'title': 'Anatomy of the Somatosensory System', 'source': './data/somatosensory.pdf', 'total_pages': 4, 'page': 0, 'page_label': '1'}, page_content='This is a sample document to\nshowcase page-based formatting. It\ncontains a chapter from a Wikibook\ncalled Sensory Systems. None of the\ncontent has been changed in this\narticle, but some content has been\nremoved.\nAnatomy of the Somatosensory System\nFROM WIKIBOOKS1\nOur somatosensory system consists of sensors in the skin\nand sensors in our muscles, tendons, and joints. The re-\nceptors in the skin, the so called cutaneous receptors, tell\nus about temperature (thermoreceptors), pressure and sur-\nface texture (mechano receptors), and pain (nociceptors).\nThe receptors in muscles and joints provide information\nabout muscle length, muscle tension, and joint angles.\nCutaneous receptors\nSensory information from Meissner cor

## 3. Embeddings

In [7]:
# Embedder function
embedder = OpenAIEmbeddings(
    model="text-embedding-3-large",
    api_key=os.getenv("OPEN_API_KEY")
)

## 4. Setting Vector Store db - Qdrant

### 4.a Adding Data into db

In [None]:
# connecting to Qdrant vector store (running locally through docker ~ docker compose -f docker-compose.db.yml up)
vector_store = QdrantVectorStore.from_documents(
    documents=[], # for the 1st time it will create
    url="http://localhost:6333", 
    collection_name="rag", 
    embedding=embedder # openai embedder
)

# adding document(chunked)
vector_store.add_documents(documents=split_docs)

print('Injection done')

### 4.b Retriving the Data via Retriver

In [None]:
retriver = QdrantVectorStore.from_documents(
    documents=[], # for the 1st time it will create
    url="http://localhost:6333", 
    collection_name="rag", 
    embedding=embedder # openai embedder
)

In [None]:
# Doing similarity search on user's query
relevant_chunk = retriver.similarity_search(query = "What is Rapidly adapting ??")
relevant_chunk

## 5. Creating System Prompt

In [None]:
# Feeding relevant chunk based on similarity seach to model's context
SYSTEM_PROMPT  = f"""
You are an helpful AI Assistant who respond based on the avalable context

Context:
{relevant_chunk}
"""

## 6. Init Openai Client

In [None]:
# Init Openai client
client = OpenAI(api_key=os.getenv("OPEN_API_KEY"))

user_query = "What is Rapidly adapting ??"

response = client.chat.completions.create(
    model="gpt-3.5-turbo",
    messages=[
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": user_query}
    ],
    temperature=0.7
)

# Print the response
print("\nUser Query:", user_query)
print("\nAssistant Response:", response.choices[0].message.content)

## E2E Flow

In [None]:
from openai import OpenAI

# Initialize client
client = OpenAI(api_key=os.getenv("OPEN_API_KEY"))

# Function to get response from LLM
def get_llm_response(system_prompt, user_query):
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_query}
        ],
        temperature=0.7
    )
    return response.choices[0].message.content

# Interactive loop for user queries
def interactive_rag():
    print("RAG Assistant ready. Type 'exit' to quit.")
    while True:
        user_input = input("\nYour question: ")
        if user_input.lower() == 'exit':
            break
            
        # Retrieve relevant chunks (this part you already have)
        relevant_chunks = retriver.similarity_search(query=user_input)
        
        # Create system prompt with context
        formatted_system_prompt = f"""
        You are a helpful AI Assistant who responds based on the available context.
        
        Context:
        {relevant_chunks}
        """
        
        # Get response from LLM
        response = get_llm_response(formatted_system_prompt, user_input)
        print("\nAssistant:", response)

# Start the interactive session
interactive_rag()