<a href="https://colab.research.google.com/github/prakh-shetty-cqu/simple_rag/blob/main/Basic_RAG_Streamlit.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# [Optional] Enable T4 GPU in Colab
# Runtime -> Change runtime type -> T4 GPU -> Save
!sudo apt update
!sudo apt install -y pciutils lshw

# Install required packages
!pip install streamlit chromadb ollama PyPDF2 sentence-transformers pycryptodome

# Install and setup Ollama
!curl -fsSL https://ollama.com/install.sh | sh

# Start Ollama service in background
!nohup ollama serve > ollama.log 2>&1 &

# Pull required models
!ollama pull nomic-embed-text
!ollama pull llama3.2

# Install localtunnel for exposing Streamlit
!npm install -g localtunnel

[33m0% [Working][0m            Get:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,632 B]
Get:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease [1,581 B]
Get:3 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  Packages [1,853 kB]
Get:4 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Hit:5 http://archive.ubuntu.com/ubuntu jammy InRelease
Hit:6 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Get:7 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
Hit:8 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Get:9 https://r2u.stat.illinois.edu/ubuntu jammy InRelease [6,555 B]
Hit:10 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Get:11 https://r2u.stat.illinois.edu/ubuntu jammy/main all Packages [9,152 kB]
Get:12 http://security.ubuntu.com/ubuntu jammy-security/universe amd64 Packages [1,269 kB

In [2]:
%%writefile app.py
from io import BytesIO  # Byte streams for file processing, for PDF
import streamlit as st  # Web framework for creating interactive apps
import chromadb         # Vector database for storing embeddings
import ollama           # Local LLM interface
import PyPDF2           # PDF text extraction library
import uuid             # To generate unique identifiers
import time


# Streamlit page configuration
st.set_page_config(
    page_title="Simple RAG with Ollama",
    page_icon="😬",
    layout="wide"
)

# Constants
CHUNK_SIZE = 1000                           # Maximum characters per text chunk
CHUNK_OVERLAP = 200                         # Characters that can overlap between chunks
EMBEDDING_MODEL = "nomic-embed-text"        # Ollama Embedding model [Note: upgrade it]
LLM_MODEL = "llama3.2"                      # Ollama language model


Writing app.py


In [3]:
%%writefile -a app.py

class SimpleRAGSystem:
    """A Simple RAG system using Ollama and ChromaDB"""

    def __init__(self):
        # Initialize ChromaDB client with persistent storage
        # Will not work in google collab as the storage is ephemeral, unless stored to google drive
        self.client = chromadb.PersistentClient(path="./chroma_db")

        # Create or get collection
        self.collection_name = "documents"
        try:
            # Try to get existing collection
            self.collection = self.client.get_collection(name=self.collection_name)
            st.success("Connected to existing document collection")
        except:
            # Create new collection if it doesn't exist
            self.collection = self.client.create_collection(name=self.collection_name)
            st.success("Created new document collection")

    def extract_text_from_pdf(self, pdf_file):
        """Extract text from uploaded PDF file"""
        try:
            # Create a BytesIO object from the uploaded file
            pdf_bytes = BytesIO(pdf_file.read())

            # Creates a PDF reader object
            pdf_reader = PyPDF2.PdfReader(pdf_bytes)

            # Extract text from all pages
            text = ""
            for page_num, page in enumerate(pdf_reader.pages):
                page_text = page.extract_text()
                text += f"\n--- Page {page_num + 1} ---\n{page_text}\n"

            return text
        except Exception as e:
            st.error(f"Error reading PDF: {str(e)}")
            return None

    def create_chunks(self, text, source_name):
        """Split text into overlapping chunks"""
        chunks = []

        # Simple chunking with overlap
        start = 0
        chunk_id = 0

        while start < len(text):
            # Calculate end position
            # CHUNK SIZE is set at 1000
            end = start + CHUNK_SIZE

            # Get the chunk
            chunk_text = text[start:end]

            # Try to break at sentence boundary if possible
            if end < len(text):
                # Look for the last sentence ending in the chunk
                last_period = chunk_text.rfind('.')
                last_exclamation = chunk_text.rfind('!')
                last_question = chunk_text.rfind('?')

                # Find the latest sentence ending
                sentence_end = max(last_period, last_exclamation, last_question)

                if sentence_end > CHUNK_SIZE * 0.7:  # Only break if we're not losing too much text
                    chunk_text = chunk_text[:sentence_end + 1]
                    end = start + sentence_end + 1

            # Create chunk with metadata
            chunk = {
                'id': f"{source_name}_{chunk_id}",
                'text': chunk_text.strip(),
                'source': source_name,
                'chunk_number': chunk_id
            }

            if chunk['text']:  # Only add non-empty chunks
                chunks.append(chunk)

            # Move to next chunk with overlap
            start = end - CHUNK_OVERLAP
            chunk_id += 1

        return chunks

    def generate_embeddings(self, texts):
        """Generate embeddings using Ollama"""
        try:
            embeddings = []

            # Generate embeddings for each text
            for text in texts:
                response = ollama.embeddings(
                    model=EMBEDDING_MODEL,
                    prompt=text
                )
                embeddings.append(response['embedding'])

            return embeddings
        except Exception as e:
            st.error(f"Error generating embeddings: {str(e)}")
            return None

    def add_documents(self, chunks):
        """Add document chunks to ChromaDB"""
        try:
            # Extract texts for embedding
            texts = [chunk['text'] for chunk in chunks]

            # Generate embeddings
            with st.spinner("Generating embeddings..."):
                embeddings = self.generate_embeddings(texts)

            if embeddings is None:
                return False

            # Prepare data for ChromaDB
            ids = [chunk['id'] for chunk in chunks]
            documents = [chunk['text'] for chunk in chunks]
            metadatas = [
                {
                    'source': chunk['source'],
                    'chunk_number': chunk['chunk_number']
                }
                for chunk in chunks
            ]

            # Add to collection
            self.collection.add(
                ids=ids,
                embeddings=embeddings,
                documents=documents,
                metadatas=metadatas
            )

            return True

        except Exception as e:
            st.error(f"Error adding documents to database: {str(e)}")
            return False

    def search_documents(self, query, n_results=3):
        """Search for relevant documents"""
        try:
            # Generate embedding for the query
            query_response = ollama.embeddings(
                model=EMBEDDING_MODEL,
                prompt=query
            )
            query_embedding = query_response['embedding']

            # Search in ChromaDB
            results = self.collection.query(
                query_embeddings=[query_embedding],
                n_results=n_results
            )

            return results

        except Exception as e:
            st.error(f"Error searching documents: {str(e)}")
            return None

    def generate_answer(self, query, context):
        """Generate answer using Ollama LLM"""
        try:
            # Create prompt with context
            prompt = f"""Based on the following context, please answer the question. If the answer is not in the context, say "I don't have enough information to answer this question."

            Context:
            {context}

            Question: {query}

            Answer:"""

            # Generate response using Ollama
            response = ollama.generate(
                model=LLM_MODEL,
                prompt=prompt,
                options={
                    'temperature': 0.7,             # Controls randomness (0=deterministic, 1=creative)
                    #'top_p': 0.9,                  # Nucleus sampling for response quality [generally adjust 1 at a time]
                    'max_tokens': 1000               # Maximum response length
                }
            )

            return response['response']

        except Exception as e:
            st.error(f"Error generating answer: {str(e)}")
            return None

Appending to app.py


In [4]:
%%writefile -a app.py


def main():
    """Main Streamlit application"""

    st.title("😬 Simple RAG System")
    st.markdown("Upload PDF documents and ask questions about their content!")

    # Initialize session state
    if 'rag_system' not in st.session_state:
        st.session_state.rag_system = SimpleRAGSystem()

    if 'processed_files' not in st.session_state:
        st.session_state.processed_files = set()

    # Sidebar for file upload
    with st.sidebar:
        st.header("📁 Document Upload")

        uploaded_file = st.file_uploader(
            "Choose a PDF file",
            type="pdf",
            help="Upload a PDF document to add to the knowledge base"
        )

        if uploaded_file is not None:
            file_name = uploaded_file.name

            if file_name not in st.session_state.processed_files:
                with st.spinner(f"Processing {file_name}..."):
                    # Extract text from PDF
                    text = st.session_state.rag_system.extract_text_from_pdf(uploaded_file)

                    if text:
                        # Create chunks
                        chunks = st.session_state.rag_system.create_chunks(text, file_name)
                        st.info(f"Created {len(chunks)} chunks from the document")

                        # Add to database
                        if st.session_state.rag_system.add_documents(chunks):
                            st.session_state.processed_files.add(file_name)
                            st.success(f"✅ Successfully processed {file_name}")
                        else:
                            st.error(f"❌ Failed to process {file_name}")

        # Show processed files
        if st.session_state.processed_files:
            st.subheader("📚 Processed Documents")
            for file_name in st.session_state.processed_files:
                st.text(f"• {file_name}")

    # Main content area
    if st.session_state.processed_files:
        st.header("💬 Ask Questions")

        # Query input
        query = st.text_input(
            "Enter your question:",
            placeholder="What is this document about?"
        )

        if query:
            with st.spinner("🔍 Searching and generating answer..."):
                # Search for relevant documents
                search_results = st.session_state.rag_system.search_documents(query)

                if search_results and search_results['documents']:
                    # Combine retrieved documents as context
                    context = "\n\n".join(search_results['documents'][0])

                    # Generate answer
                    answer = st.session_state.rag_system.generate_answer(query, context)

                    if answer:
                        # Display results
                        st.subheader("🎯 Answer")
                        st.write(answer)

                        # Show sources
                        with st.expander("📖 View Sources", expanded=False):
                            for i, (doc, metadata) in enumerate(zip(
                                search_results['documents'][0],
                                search_results['metadatas'][0]
                            )):
                                st.markdown(f"**Source {i+1}:** {metadata['source']}")
                                st.text_area(
                                    f"Content {i+1}:",
                                    value=doc[:500] + "..." if len(doc) > 500 else doc,
                                    height=100,
                                    key=f"source_{i}"
                                )
                else:
                    st.warning("No relevant documents found for your query.")
    else:
        # Welcome message
        st.info("👆 Please upload a PDF document using the sidebar to get started!")

if __name__ == "__main__":
    main()

Appending to app.py


In [5]:
# Start Streamlit in background
!streamlit run /content/app.py &>/content/logs.txt &

# Get tunnel password
!echo -e "\nPassword:" && curl -s https://loca.lt/mytunnelpassword

# Create tunnel to expose Streamlit
!echo -e "\nLink:" && npx localtunnel --port 8501


Password:
34.87.93.180
Link:
[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0Kyour url is: https://happy-pans-fail.loca.lt
/tools/node/lib/node_modules/localtunnel/bin/lt.js:81
    throw err;
    ^

Error: connection refused: localtunnel.me:6993 (check your firewall settings)
    at Socket.<anonymous> (/tools/node/lib/node_modules/[4mlocaltunnel[24m/lib/TunnelCluster.js:52:11)
[90m    at Socket.emit (node:events:524:28)[39m
[90m    at emitErrorNT (node:internal/streams/destroy:169:8)[39m
[90m    at emitErrorCloseNT (node:internal/streams/destroy:128:3)[39m
[90m    at process.processTicksAndRejections (node:internal/process/task_queues:82:21)[39m

Node.js v20.19.0
[1G[0K⠙[1G[0K