# Simple Text RAG System

This notebook demonstrates a simple Retrieval-Augmented Generation (RAG) system that can run in Google Colab.


## Step 1: Install Dependencies


In [None]:
%pip install sentence-transformers requests numpy
%pwd



'/content'

## Step 2: Import and Setup


In [None]:
import os
from simple_rag import SimpleTextRAG
from download_texts import download_sample_texts, create_sample_texts

# Download or create sample text files
print("Setting up text files...")
try:
    text_files = download_sample_texts("texts")
except Exception as e:
    print(f"Download failed: {e}")
    print("Creating sample files instead...")
    text_files = create_sample_texts("texts")

print(f"\nReady! Found {len(text_files)} text files:")
for f in text_files:
    print(f"  - {f}")


ModuleNotFoundError: No module named 'simple_rag'

## Step 3: Initialize RAG System


In [None]:
# Initialize the RAG system
# Using a lightweight model suitable for Colab
rag = SimpleTextRAG(embedding_model_name="all-MiniLM-L6-v2")

# Load documents
rag.load_documents(text_files, chunk_size=500, chunk_overlap=50)

# Build the embedding index
rag.build_index()


## Step 4: Query the System


In [None]:
# Query the RAG system
query = "What is Python?"
result = rag.query(query, top_k=3)

print(f"Query: {result['query']}\n")
print("Retrieved Context:")
print(result['context'])
print("\n" + "="*50)
print("Retrieved Chunks:")
for i, chunk in enumerate(result['retrieved_chunks']):
    print(f"\nChunk {i+1} (similarity: {chunk['similarity']:.3f}):")
    print(f"From: {chunk['metadata']['file']}")
    print(f"Text: {chunk['text'][:200]}...")


## Step 5: Multiple Queries


In [None]:
queries = [
    "What is artificial intelligence?",
    "How does machine learning work?",
    "What are the features of Python?",
    "What is web development?"
]

for query in queries:
    print(f"\n{'='*60}")
    print(f"Query: {query}")
    print('='*60)
    result = rag.query(query, top_k=2)
    if result['retrieved_chunks']:
        print(f"\nTop result (similarity: {result['retrieved_chunks'][0]['similarity']:.3f}):")
        print(result['retrieved_chunks'][0]['text'][:300] + "...")
