# Medical RAG System - Admin Panel

## üîß Administrative Interface

Use this interface to manage the medical knowledge base, rebuild indexes, and perform system maintenance.

**‚ö†Ô∏è Access Control**: This panel should only be accessible to administrators.

In [None]:
# System initialization
import warnings
warnings.filterwarnings('ignore')

import ipywidgets as widgets
from IPython.display import display, HTML, clear_output
import os
from pathlib import Path
import json
from datetime import datetime

from rag import config
from rag.cache import save_chunks, save_faiss_index, save_metadata, load_chunks, load_faiss_index

print("‚úÖ Admin panel initialized")
print(f"üìÅ Data directory: {config.DATA_DIR}")
print(f"üì¶ Cache directory: {config.CACHE_DIR}")

---

## üìä System Status

In [None]:
# Status display
status_output = widgets.Output()

def refresh_status(button=None):
    with status_output:
        clear_output()
        
        # Check for existing data
        chunks = load_chunks()
        index = load_faiss_index()
        
        # Count PDFs
        pdf_count = len(list(config.PDF_DIR.glob('*.pdf')))
        
        # Check cache files
        cache_files = {
            'chunks.pkl': (config.CACHE_DIR / 'chunks.pkl').exists(),
            'faiss_index.bin': (config.CACHE_DIR / 'faiss_index.bin').exists(),
            'metadata.json': (config.CACHE_DIR / 'chunk_metadata.json').exists()
        }
        
        # Build status HTML
        status_html = '<div style="background-color: #f0f9ff; padding: 20px; border-radius: 10px; border-left: 5px solid #0066cc;">'
        status_html += '<h3 style="margin-top: 0; color: #0066cc;">üìä System Status</h3>'
        status_html += f'<p><strong>PDF Documents:</strong> {pdf_count}</p>'
        status_html += f'<p><strong>Processed Chunks:</strong> {len(chunks) if chunks else 0}</p>'
        status_html += f'<p><strong>FAISS Index:</strong> {"‚úÖ Built" if index else "‚ùå Not found"}</p>'
        status_html += '<p><strong>Cache Files:</strong></p><ul>'
        for file, exists in cache_files.items():
            icon = '‚úÖ' if exists else '‚ùå'
            status_html += f'<li>{icon} {file}</li>'
        status_html += '</ul>'
        
        if chunks:
            status_html += f'<p style="color: #28a745; font-weight: bold; margin-top: 15px;">System is operational and ready to serve queries.</p>'
        else:
            status_html += f'<p style="color: #dc3545; font-weight: bold; margin-top: 15px;">System needs initialization. Please process documents below.</p>'
        
        status_html += '</div>'
        display(HTML(status_html))

refresh_button = widgets.Button(
    description='üîÑ Refresh Status',
    button_style='info',
    layout=widgets.Layout(width='200px', margin='10px 0')
)
refresh_button.on_click(refresh_status)

display(refresh_button)
display(status_output)
refresh_status()

---

## üìÑ Document Management

Upload PDF documents to the system for processing.

In [None]:
# File upload interface
upload_output = widgets.Output()

file_upload = widgets.FileUpload(
    accept='.pdf',
    multiple=True,
    description='Upload PDFs'
)

def handle_upload(change):
    with upload_output:
        clear_output()
        uploaded_files = change['new']
        
        if not uploaded_files:
            return
        
        display(HTML(f'<p>üì§ Uploading {len(uploaded_files)} file(s)...</p>'))
        
        for file_info in uploaded_files:
            filename = file_info['name']
            content = file_info['content']
            filepath = config.PDF_DIR / filename
            
            with open(filepath, 'wb') as f:
                f.write(content)
            
            display(HTML(f'<p style="color: #28a745;">‚úÖ Uploaded: {filename}</p>'))
        
        display(HTML('<p style="font-weight: bold; margin-top: 15px;">Upload complete! Now run "Process Documents" below.</p>'))
        refresh_status()

file_upload.observe(handle_upload, names='value')

display(file_upload)
display(upload_output)

---

## ‚öôÔ∏è Processing Pipeline

Process documents through the complete RAG pipeline: extraction ‚Üí chunking ‚Üí header generation ‚Üí embedding ‚Üí indexing.

In [None]:
# Processing controls
process_output = widgets.Output()

process_button = widgets.Button(
    description='üöÄ Process Documents',
    button_style='success',
    icon='cogs',
    layout=widgets.Layout(width='200px', height='45px', margin='10px 0')
)

rebuild_button = widgets.Button(
    description='üî® Rebuild Index',
    button_style='warning',
    icon='refresh',
    layout=widgets.Layout(width='200px', height='45px', margin='10px 0')
)

def process_documents(button):
    with process_output:
        clear_output(wait=True)
        
        display(HTML('<h3>üîÑ Starting Document Processing Pipeline...</h3>'))
        
        try:
            # Step 1: Load documents from both JSON and PDFs
            display(HTML('<p>üìÑ Step 1/6: Loading documents from JSON and PDFs...</p>'))
            from rag.ingestion import extract_text_from_pdfs, load_json_documents
            
            # Load JSON documents (web-scraped)
            json_docs = load_json_documents(config.DATA_DIR)
            display(HTML(f'<p style="color: #28a745;">‚úÖ Loaded {len(json_docs)} JSON documents</p>'))
            
            # Extract PDF documents
            pdf_docs = extract_text_from_pdfs(config.PDF_DIR)
            display(HTML(f'<p style="color: #28a745;">‚úÖ Extracted {len(pdf_docs)} PDF documents</p>'))
            
            # Combine all documents
            documents = json_docs + pdf_docs
            display(HTML(f'<p style="color: #0066cc; font-weight: bold;">üìö Total: {len(documents)} documents</p>'))
            
            # Step 2: Chunk documents
            display(HTML('<p>‚úÇÔ∏è Step 2/6: Chunking documents with semantic boundaries...</p>'))
            from rag.chunking import SemanticChunker
            chunker = SemanticChunker(max_words=config.SEMANTIC_MAX_WORDS)
            chunks = chunker.chunk_documents(documents)
            display(HTML(f'<p style="color: #28a745;">‚úÖ Created {len(chunks)} chunks</p>'))
            
            # Step 3: Generate contextual headers
            display(HTML('<p>üè∑Ô∏è Step 3/6: Generating contextual headers...</p>'))
            from rag.headers import ContextualHeaderGenerator
            header_gen = ContextualHeaderGenerator()
            chunks = header_gen.generate_headers_batch(chunks, batch_size=config.BATCH_SIZE)
            display(HTML(f'<p style="color: #28a745;">‚úÖ Generated headers for all chunks</p>'))
            
            # Step 4: Generate embeddings with batching
            display(HTML('<p>üßÆ Step 4/6: Generating embeddings...</p>'))
            from rag.embeddings import get_embeddings_batch
            from rag.cache import save_embeddings
            import time
            
            texts_to_embed = [f"{chunk.ctx_header}\n\n{chunk.raw_chunk}" for chunk in chunks]
            embeddings = []
            batch_size = config.EMBED_BATCH_SIZE
            total_batches = (len(texts_to_embed) + batch_size - 1) // batch_size
            
            for i in range(0, len(texts_to_embed), batch_size):
                batch = texts_to_embed[i:i + batch_size]
                batch_embeddings = get_embeddings_batch(batch)
                
                # Check for zero vectors (failed embeddings)
                if batch_embeddings and any(sum(emb) == 0 for emb in batch_embeddings):
                    raise RuntimeError(f"Embedding generation failed for batch {i//batch_size + 1} (returned zero vectors)")
                
                embeddings.extend(batch_embeddings)
                batch_num = i // batch_size + 1
                display(HTML(f'<p>üìä Completed batch {batch_num}/{total_batches}</p>'))
                
                # Delay between batches (except last)
                if batch_num < total_batches:
                    time.sleep(config.EMBED_DELAY_SECONDS)
            
            display(HTML(f'<p style="color: #28a745;">‚úÖ Generated {len(embeddings)} embeddings</p>'))
            
            # Step 5: Build FAISS index
            display(HTML('<p>üîç Step 5/6: Building FAISS search index...</p>'))
            import numpy as np
            import faiss
            embeddings_array = np.array(embeddings).astype('float32')
            dimension = embeddings_array.shape[1]
            index = faiss.IndexFlatIP(dimension)  # Inner product for cosine similarity
            faiss.normalize_L2(embeddings_array)  # Normalize for cosine similarity
            index.add(embeddings_array)
            display(HTML(f'<p style="color: #28a745;">‚úÖ Built FAISS index with {index.ntotal} vectors</p>'))
            
            # Step 6: Save everything to cache
            display(HTML('<p>üíæ Step 6/6: Saving to cache...</p>'))
            save_chunks(chunks)
            save_faiss_index(index)
            
            # Build metadata for retrieval
            chunk_records = []
            for i, chunk in enumerate(chunks):
                chunk_records.append({
                    'chunk_id': chunk.chunk_id,
                    'doc_title': chunk.doc_title,
                    'source_url': chunk.source_url,
                    'ctx_header': chunk.ctx_header,
                    'chunk_index': chunk.chunk_index
                })
            save_metadata(chunk_records)
            
            display(HTML('<p style="color: #28a745;">‚úÖ Saved to cache</p>'))
            
            # Success message
            display(HTML(f'''
                <div style="background-color: #d4edda; border: 1px solid #c3e6cb; color: #155724; padding: 20px; border-radius: 10px; margin-top: 20px;">
                    <h3 style="margin-top: 0;">üéâ Processing Complete!</h3>
                    <ul style="margin-bottom: 0;">
                        <li>JSON documents: {len(json_docs)}</li>
                        <li>PDF documents: {len(pdf_docs)}</li>
                        <li>Total documents processed: {len(documents)}</li>
                        <li>Chunks created: {len(chunks)}</li>
                        <li>Embeddings generated: {len(embeddings)}</li>
                        <li>Index built: {index.ntotal} vectors</li>
                    </ul>
                    <p style="margin-top: 15px; margin-bottom: 0; font-weight: bold;">The system is now ready to serve queries!</p>
                </div>
            '''))
            
            refresh_status()
            
        except Exception as e:
            display(HTML(f'<p style="color: #dc3545; font-weight: bold;">‚ùå Error: {str(e)}</p>'))
            import traceback
            display(HTML(f'<pre style="background-color: #f8f9fa; padding: 10px; border-radius: 5px; font-size: 11px;">{traceback.format_exc()}</pre>'))

def rebuild_index(button):
    with process_output:
        clear_output(wait=True)
        
        display(HTML('<h3>üî® Rebuilding FAISS Index...</h3>'))
        
        try:
            # Load existing chunks
            chunks = load_chunks()
            if not chunks:
                display(HTML('<p style="color: #dc3545;">‚ùå No chunks found. Please process documents first.</p>'))
                return
            
            display(HTML(f'<p>üì¶ Loaded {len(chunks)} existing chunks</p>'))
            
            # Regenerate embeddings with batching
            display(HTML('<p>üßÆ Regenerating embeddings...</p>'))
            from rag.embeddings import get_embeddings_batch
            from rag.cache import save_embeddings
            import time
            
            texts_to_embed = [f"{chunk.ctx_header}\n\n{chunk.raw_chunk}" for chunk in chunks]
            embeddings = []
            batch_size = config.EMBED_BATCH_SIZE
            total_batches = (len(texts_to_embed) + batch_size - 1) // batch_size
            
            for i in range(0, len(texts_to_embed), batch_size):
                batch = texts_to_embed[i:i + batch_size]
                batch_embeddings = get_embeddings_batch(batch)
                
                # Check for zero vectors (failed embeddings)
                if batch_embeddings and any(sum(emb) == 0 for emb in batch_embeddings):
                    raise RuntimeError(f"Embedding generation failed for batch {i//batch_size + 1} (returned zero vectors)")
                
                embeddings.extend(batch_embeddings)
                batch_num = i // batch_size + 1
                display(HTML(f'<p>üìä Completed batch {batch_num}/{total_batches}</p>'))
                
                # Delay between batches (except last)
                if batch_num < total_batches:
                    time.sleep(config.EMBED_DELAY_SECONDS)
            
            display(HTML(f'<p style="color: #28a745;">‚úÖ Generated {len(embeddings)} embeddings</p>'))
            
            # Rebuild index
            display(HTML('<p>üîç Building new FAISS index...</p>'))
            import numpy as np
            import faiss
            embeddings_array = np.array(embeddings).astype('float32')
            dimension = embeddings_array.shape[1]
            index = faiss.IndexFlatIP(dimension)
            faiss.normalize_L2(embeddings_array)
            index.add(embeddings_array)
            
            # Save
            save_faiss_index(index)
            
            display(HTML(f'''
                <div style="background-color: #d4edda; border: 1px solid #c3e6cb; color: #155724; padding: 20px; border-radius: 10px; margin-top: 20px;">
                    <h3 style="margin-top: 0;">‚úÖ Index Rebuilt Successfully!</h3>
                    <p style="margin-bottom: 0;">FAISS index updated with {index.ntotal} vectors.</p>
                </div>
            '''))
            
            refresh_status()
            
        except Exception as e:
            display(HTML(f'<p style="color: #dc3545; font-weight: bold;">‚ùå Error: {str(e)}</p>'))
            import traceback
            display(HTML(f'<pre style="background-color: #f8f9fa; padding: 10px; border-radius: 5px; font-size: 11px;">{traceback.format_exc()}</pre>'))

process_button.on_click(process_documents)
rebuild_button.on_click(rebuild_index)

display(widgets.HBox([process_button, rebuild_button]))
display(process_output)

---

## üóëÔ∏è Cache Management

In [None]:
# Cache management
cache_output = widgets.Output()

clear_cache_button = widgets.Button(
    description='üóëÔ∏è Clear Cache',
    button_style='danger',
    layout=widgets.Layout(width='200px', margin='10px 0')
)

def clear_cache(button):
    with cache_output:
        clear_output()
        
        display(HTML('<p>‚ö†Ô∏è Clearing cache files...</p>'))
        
        cache_files = [
            config.CACHE_DIR / 'chunks.pkl',
            config.CACHE_DIR / 'faiss_index.bin',
            config.CACHE_DIR / 'chunk_metadata.json'
        ]
        
        for filepath in cache_files:
            if filepath.exists():
                filepath.unlink()
                display(HTML(f'<p style="color: #28a745;">‚úÖ Deleted: {filepath.name}</p>'))
        
        display(HTML('<p style="font-weight: bold; margin-top: 15px;">Cache cleared. Run "Process Documents" to rebuild.</p>'))
        refresh_status()

clear_cache_button.on_click(clear_cache)

display(clear_cache_button)
display(cache_output)

---

## üìà System Information

In [None]:
# Display system configuration
info_html = f'''
<div style="background-color: #f8f9fa; padding: 20px; border-radius: 10px; border: 1px solid #dee2e6;">
    <h3 style="margin-top: 0; color: #495057;">‚öôÔ∏è Configuration</h3>
    <table style="width: 100%; border-collapse: collapse;">
        <tr style="border-bottom: 1px solid #dee2e6;">
            <td style="padding: 8px; font-weight: bold;">Data Directory:</td>
            <td style="padding: 8px;"><code>{config.DATA_DIR}</code></td>
        </tr>
        <tr style="border-bottom: 1px solid #dee2e6;">
            <td style="padding: 8px; font-weight: bold;">PDF Directory:</td>
            <td style="padding: 8px;"><code>{config.PDF_DIR}</code></td>
        </tr>
        <tr style="border-bottom: 1px solid #dee2e6;">
            <td style="padding: 8px; font-weight: bold;">Cache Directory:</td>
            <td style="padding: 8px;"><code>{config.CACHE_DIR}</code></td>
        </tr>
        <tr style="border-bottom: 1px solid #dee2e6;">
            <td style="padding: 8px; font-weight: bold;">Embedding Model:</td>
            <td style="padding: 8px;"><code>{config.AOAI_EMBED_MODEL}</code></td>
        </tr>
        <tr style="border-bottom: 1px solid #dee2e6;">
            <td style="padding: 8px; font-weight: bold;">Chat Model:</td>
            <td style="padding: 8px;"><code>{config.AOAI_CHAT_MODEL}</code></td>
        </tr>
        <tr style="border-bottom: 1px solid #dee2e6;">
            <td style="padding: 8px; font-weight: bold;">Max Chunk Words:</td>
            <td style="padding: 8px;">{config.SEMANTIC_MAX_WORDS}</td>
        </tr>
        <tr>
            <td style="padding: 8px; font-weight: bold;">Embedding Batch Size:</td>
            <td style="padding: 8px;">{config.EMBED_BATCH_SIZE}</td>
        </tr>
    </table>
</div>
'''

display(HTML(info_html))