## Example 1: A Demo RAG

In [None]:
import os

from dotenv import load_dotenv
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader

load_dotenv()

# Load the documents
documents = SimpleDirectoryReader(
    "../../data", 
).load_data(show_progress=True)

print(len(documents))

# Create the vector store index (in-memory)
index = VectorStoreIndex.from_documents(documents)

# Create the query engine to ask questions
query_engine = index.as_query_engine()

In [None]:
response = query_engine.query("What is llama2-chat and what are the variants? Provide citations")
print(response)

## Example 2: Demo RAG With More Control

### Step 1: Parse The Document

In [None]:
import nest_asyncio
nest_asyncio.apply()

In [None]:
import os

from dotenv import load_dotenv
from llama_parse import LlamaParse

load_dotenv()

# Create the parser
parser = LlamaParse(
    api_key=os.environ.get("LLAMA_CLOUD_API_KEY"),
    result_type="markdown"
)

# Async load and parse the data
documents = await parser.aload_data("../../data/docs/lost_in_the_middle.pdf")

In [None]:
document = documents[0].dict()
print(document.keys())

### Step 2: Index The Documents

In [None]:
from llama_index.core import Settings
from llama_index.core import VectorStoreIndex

Settings.chunk_size = 1000
Settings.chunk_overlap = 100

# Create the vector index; this is in-memory database
index = VectorStoreIndex.from_documents(documents)

### Step 3: Create The Query Engine

In [None]:
query_engine = index.as_query_engine(similarity_top_k=4)

# Test a sample query
response = query_engine.query("Explain the lost in the middle problem in less than 500 words. Provide references or citations.")

In [None]:
# Show the response
response.response

In [None]:
# Show the metadata
response.metadata

## Example 3: Demo RAG With Qdrant

In [None]:
import os
import qdrant_client

from dotenv import load_dotenv
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.embeddings.fastembed import FastEmbedEmbedding
from llama_index.core import (
    Settings,
    StorageContext,
    VectorStoreIndex,
    SimpleDirectoryReader
)

In [None]:
# Specify the embedding model to use
Settings.embed_model = FastEmbedEmbedding(model_name="BAAI/bge-base-en-v1.5")

In [None]:
# Load the environment variables
load_dotenv()

In [None]:
# Load the documents; llama-index will parse it for us
documents = SimpleDirectoryReader(
    input_files=["../../data/docs/lost_in_the_middle.pdf"]
).load_data()

In [None]:
# Cleint to access Qdrant server on Docker instance
client = qdrant_client.QdrantClient(
    host="localhost",
    port=6333
)

# Create the vector store
vector_store = QdrantVectorStore(
    client=client,
    collection_name="retrieval_augmented_generation"
)

# Create the storage context for local Qdrant (persist data)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

# Build the vector index
index = VectorStoreIndex.from_documents(
    documents,
    storage_context=storage_context
)

In [None]:
# Create the query engine and ask questions
query_engine = index.as_query_engine()
response = query_engine.query(
    "How does large number of chunks in the retrieval phase affect quality of response?"
)

In [None]:
# View the response
response.response

In [None]:
# View the metadata
response.metadata

In [None]:
client.close()

## Example 4: Demo RAG With Qdrant And HuggingFace

In [1]:
import os
import qdrant_client

from dotenv import load_dotenv
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import (
    Settings,
    StorageContext,
    VectorStoreIndex,
    SimpleDirectoryReader
)

In [2]:
# Load the environment variables
load_dotenv()

# Configuration settings for embedding model
Settings.embed_model = HuggingFaceEmbedding(
    model_name="BAAI/bge-base-en-v1.5"
)

In [3]:
# Load the documents from directory
documents = SimpleDirectoryReader(
    input_files=["../../data/docs/lost_in_the_middle.pdf"]
).load_data()

In [4]:
len(documents)

18

In [6]:
# Configure qdrant client
client = qdrant_client.QdrantClient(
    host="localhost",
    port=6333
)

# Create vector store
vector_store = QdrantVectorStore(
    client=client,
    collection_name="sample_collection"
)

# Configure storage context
storage_context = StorageContext.from_defaults(vector_store=vector_store)

# Build the vector index
index = VectorStoreIndex.from_documents(
    documents,
    storage_context=storage_context
)

# Persist the store
index.storage_context.persist("../../db/qdrant")

# Create the query engine from index
query_engine = index.as_query_engine()

In [9]:
response = query_engine.query("What are the challenges of large number of chunks in RAG according to the author?")

In [10]:
response.response

'The challenges of a large number of chunks in RAG, according to the author, include the rapid degradation in model performance when models need to reason over information located in the middle of their input context. The author notes that model performance is highest when relevant information is positioned at the very beginning or end of the input context, and suffers degraded performance when forced to utilize information within the middle of the context.'

In [11]:
response.metadata

{'6a08aff2-b4d8-49d7-9f5d-d92378a5e25f': {'page_label': '5',
  'file_name': 'lost_in_the_middle.pdf',
  'file_path': '../../data/docs/lost_in_the_middle.pdf',
  'file_type': 'application/pdf',
  'file_size': 747542,
  'creation_date': '2024-07-17',
  'last_modified_date': '2024-07-17'},
 '83f6cfee-29c7-4832-a991-0479375840b5': {'page_label': '3',
  'file_name': 'lost_in_the_middle.pdf',
  'file_path': '../../data/docs/lost_in_the_middle.pdf',
  'file_type': 'application/pdf',
  'file_size': 747542,
  'creation_date': '2024-07-17',
  'last_modified_date': '2024-07-17'}}