In [None]:
#Loading Jupyter Notebooks: jupyter notebook --ip=0.0.0.0 --port=8889 --no-browser --allow-root
#SSH EC2 Instance channel: ssh -i /home/seanhegede/myenv/noko.pem -L 10006:localhost:8889 -L 11434:localhost:11434 ubuntu@18.xxx.xx.xx
#Jupyter 

import os
import json
import time
import requests
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer
from typing import List, Dict, Optional, Tuple, Any
import logging
from collections import defaultdict, deque
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from concurrent.futures import ThreadPoolExecutor, as_completed
import hashlib
from functools import lru_cache
import threading
from difflib import SequenceMatcher

# Configuration
MODEL_NAME = "all-MiniLM-L6-v2"
EMBEDDINGS_FILE = "/home/ubuntu/scraper_deep/deep_embeddings.json"
GEMMA_URL = "http://localhost:11434/api/generate"
GEMMA_MODEL = "gemma3:27b"

# Performance settings
TOP_K = 8
MIN_SIMILARITY = 0.12
MAX_CONTEXT_LENGTH = 12000
HYBRID_WEIGHT_SEMANTIC = 0.6
HYBRID_WEIGHT_KEYWORD = 0.25
HYBRID_WEIGHT_RERANK = 0.15
CONVERSATION_HISTORY = 5
RERANK_TOP_K = 16  # Get more candidates for reranking

logging.basicConfig(level=logging.WARNING)
logger = logging.getLogger(__name__)

class LightweightReranker:
    """Fast reranking based on multiple lightweight signals."""
    
    def __init__(self):
        self.stopwords = {'the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by'}
        
    def rerank_chunks(self, query: str, chunks: List[Dict]) -> List[Dict]:
        """Lightweight reranking using multiple fast signals."""
        if len(chunks) <= 2:
            return chunks
            
        query_lower = query.lower()
        query_words = set(re.findall(r'\b\w+\b', query_lower))
        query_words = query_words - self.stopwords
        
        for chunk in chunks:
            text = chunk.get('text', '').lower()
            title = chunk.get('title', '').lower()
            
            # Signal 1: Exact phrase matches (highest weight)
            exact_matches = self._count_exact_phrases(query_lower, text)
            
            # Signal 2: Title relevance
            title_score = self._calculate_title_relevance(query_words, title)
            
            # Signal 3: Word density
            density_score = self._calculate_word_density(query_words, text)
            
            # Signal 4: Position of matches (earlier = better)
            position_score = self._calculate_position_score(query_words, text)
            
            # Signal 5: Text quality (length, structure)
            quality_score = self._calculate_text_quality(text)
            
            # Combine signals
            rerank_score = (
                exact_matches * 0.4 +
                title_score * 0.25 +
                density_score * 0.15 +
                position_score * 0.1 +
                quality_score * 0.1
            )
            
            # Blend with original similarity
            original_sim = chunk.get('similarity', 0)
            chunk['final_score'] = (original_sim * 0.7) + (rerank_score * 0.3)
            chunk['rerank_signals'] = {
                'exact': exact_matches,
                'title': title_score,
                'density': density_score,
                'position': position_score,
                'quality': quality_score
            }
        
        # Sort by final score
        chunks.sort(key=lambda x: x.get('final_score', 0), reverse=True)
        return chunks
    
    def _count_exact_phrases(self, query: str, text: str) -> float:
        """Count exact phrase matches."""
        if len(query) < 10:
            return 1.0 if query in text else 0.0
        
        # Split query into meaningful phrases
        words = query.split()
        phrases = []
        
        # 2-3 word phrases
        for i in range(len(words) - 1):
            if len(words[i]) > 2 and len(words[i+1]) > 2:
                phrases.append(f"{words[i]} {words[i+1]}")
        
        for i in range(len(words) - 2):
            if all(len(w) > 2 for w in words[i:i+3]):
                phrases.append(f"{words[i]} {words[i+1]} {words[i+2]}")
        
        matches = sum(1 for phrase in phrases if phrase in text)
        return matches / max(len(phrases), 1)
    
    def _calculate_title_relevance(self, query_words: set, title: str) -> float:
        """Calculate title relevance."""
        if not title or not query_words:
            return 0.0
        
        title_words = set(re.findall(r'\b\w+\b', title.lower()))
        title_words = title_words - self.stopwords
        
        if not title_words:
            return 0.0
        
        overlap = len(query_words.intersection(title_words))
        return overlap / len(query_words)
    
    def _calculate_word_density(self, query_words: set, text: str) -> float:
        """Calculate density of query words in text."""
        if not query_words or not text:
            return 0.0
        
        text_words = re.findall(r'\b\w+\b', text.lower())
        if len(text_words) < 10:
            return 0.0
        
        matches = sum(1 for word in text_words if word in query_words)
        return min(matches / len(text_words) * 100, 1.0)  # Cap at 1.0
    
    def _calculate_position_score(self, query_words: set, text: str) -> float:
        """Earlier matches score higher."""
        if not query_words or not text:
            return 0.0
        
        words = text.lower().split()
        first_match_pos = len(words)
        
        for i, word in enumerate(words[:100]):  # Only check first 100 words
            if word in query_words:
                first_match_pos = i
                break
        
        if first_match_pos == len(words):
            return 0.0
        
        return max(0, (50 - first_match_pos) / 50)  # Normalize to 0-1
    
    def _calculate_text_quality(self, text: str) -> float:
        """Simple text quality score."""
        if not text:
            return 0.0
        
        words = text.split()
        sentences = text.split('.')
        
        # Prefer medium-length texts
        length_score = 1.0 if 50 <= len(words) <= 300 else 0.5
        
        # Prefer well-structured text
        structure_score = 1.0 if len(sentences) > 2 else 0.5
        
        return (length_score + structure_score) / 2

class ImprovedFallbackSystem:
    """Enhanced fallback with better strategies and debugging."""
    
    def __init__(self, rag_system):
        self.rag_system = rag_system
        self.query_expansions = {
            # Technology terms
            'ai': ['artificial intelligence', 'machine learning', 'neural network', 'algorithm'],
            'ml': ['machine learning', 'artificial intelligence', 'data science', 'algorithm'],
            'api': ['interface', 'endpoint', 'service', 'integration', 'rest'],
            'web': ['website', 'internet', 'online', 'browser', 'http'],
            'data': ['dataset', 'information', 'database', 'analytics', 'statistics'],
            'python': ['programming', 'code', 'script', 'development'],
            'javascript': ['js', 'programming', 'web development', 'frontend'],
            'database': ['db', 'sql', 'storage', 'data', 'table'],
            
            # Business terms
            'business': ['company', 'enterprise', 'organization', 'commercial'],
            'marketing': ['advertising', 'promotion', 'sales', 'branding'],
            'finance': ['money', 'budget', 'investment', 'financial', 'cost'],
            'strategy': ['plan', 'approach', 'method', 'tactics'],
            
            # General terms
            'how': ['method', 'way', 'process', 'steps', 'guide'],
            'why': ['reason', 'purpose', 'cause', 'explanation'],
            'what': ['definition', 'meaning', 'explanation', 'description'],
            'best': ['optimal', 'recommended', 'top', 'effective'],
        }
    
    def execute_fallback(self, query: str, original_chunks: List[Dict] = None) -> List[Dict]:
        """Execute enhanced fallback strategies with detailed logging."""
        logger.warning(f"Executing fallback for query: '{query[:50]}...'")
        
        strategies = [
            ('Query Expansion', self._query_expansion),
            ('Relaxed Similarity', self._relaxed_search),
            ('Individual Keywords', self._keyword_search),
            ('Fuzzy Matching', self._fuzzy_search),
            ('Partial Content', self._partial_content_search),
        ]
        
        for strategy_name, strategy_func in strategies:
            try:
                results = strategy_func(query)
                if results and len(results) >= 2:  # Require at least 2 results
                    logger.warning(f"Fallback SUCCESS: {strategy_name} found {len(results)} results")
                    # Add fallback metadata
                    for result in results:
                        result['fallback_method'] = strategy_name
                    return results
                elif results:
                    logger.warning(f"Fallback PARTIAL: {strategy_name} found {len(results)} results (need 2+)")
            except Exception as e:
                logger.warning(f"Fallback ERROR in {strategy_name}: {e}")
                continue
        
        logger.warning("All fallback strategies failed")
        return []
    
    def _query_expansion(self, query: str) -> List[Dict]:
        """Expand query with related terms."""
        query_lower = query.lower()
        expanded_terms = []
        
        # Add expansions for found terms
        for term, expansions in self.query_expansions.items():
            if term in query_lower:
                expanded_terms.extend(expansions[:2])  # Limit expansions
        
        if expanded_terms:
            # Try different expansion strategies
            strategies = [
                f"{query} {' '.join(expanded_terms[:3])}",  # Add terms
                ' '.join(expanded_terms[:4]),  # Just expanded terms
                f"{' '.join(query.split()[:3])} {' '.join(expanded_terms[:2])}"  # Mix
            ]
            
            for expanded_query in strategies:
                results = self.rag_system.retrieve_context(expanded_query)
                if results and len(results) >= 2:
                    return results[:TOP_K]
        
        return []
    
    def _relaxed_search(self, query: str) -> List[Dict]:
        """Search with progressively relaxed thresholds."""
        global MIN_SIMILARITY
        original_min = MIN_SIMILARITY
        
        thresholds = [0.08, 0.05, 0.03]
        
        try:
            for threshold in thresholds:
                MIN_SIMILARITY = threshold
                results = self.rag_system.retrieve_context(query)
                if results and len(results) >= 2:
                    return results[:TOP_K]
        finally:
            MIN_SIMILARITY = original_min
        
        return []
    
    def _keyword_search(self, query: str) -> List[Dict]:
        """Search using individual important keywords."""
        # Extract meaningful keywords
        words = re.findall(r'\b[a-zA-Z]{3,}\b', query.lower())
        stopwords = {'the', 'and', 'are', 'you', 'what', 'how', 'when', 'where', 'why', 'this', 'that', 'with', 'for', 'can', 'could', 'would', 'should', 'will', 'may', 'might', 'must', 'have', 'has', 'had', 'was', 'were', 'been', 'said', 'from', 'they', 'them', 'than', 'only', 'even', 'also', 'back', 'other', 'many', 'then', 'well', 'some', 'like', 'just', 'very', 'more'}
        
        important_words = [w for w in words if w not in stopwords and len(w) > 3]
        
        if len(important_words) >= 1:
            # Try different keyword combinations
            strategies = [
                ' '.join(important_words[:4]),  # Top 4 keywords
                ' '.join(important_words[:2]),  # Top 2 keywords  
                important_words[0] if important_words else query  # Single most important
            ]
            
            for keyword_query in strategies:
                if keyword_query.strip():
                    results = self.rag_system.retrieve_context(keyword_query)
                    if results:
                        return results[:TOP_K]
        
        return []
    
    def _fuzzy_search(self, query: str) -> List[Dict]:
        """Fuzzy matching against chunk titles and content."""
        query_lower = query.lower()
        fuzzy_matches = []
        
        for i, chunk in enumerate(self.rag_system.knowledge_base[:1000]):  # Limit search
            title = chunk.get('title', '').lower()
            text_preview = chunk.get('text', '')[:200].lower()
            
            # Check fuzzy similarity with title
            title_sim = SequenceMatcher(None, query_lower, title).ratio()
            text_sim = SequenceMatcher(None, query_lower, text_preview).ratio()
            
            max_sim = max(title_sim, text_sim)
            if max_sim > 0.3:  # Fuzzy threshold
                chunk_copy = chunk.copy()
                chunk_copy['similarity'] = max_sim
                fuzzy_matches.append(chunk_copy)
        
        # Sort by fuzzy similarity
        fuzzy_matches.sort(key=lambda x: x['similarity'], reverse=True)
        return fuzzy_matches[:TOP_K]
    
    def _partial_content_search(self, query: str) -> List[Dict]:
        """Search for partial content matches."""
        # Extract noun phrases and important terms
        query_parts = []
        
        # Split on common question words and conjunctions
        parts = re.split(r'\b(?:what|how|when|where|why|and|or|but|if|then)\b', query.lower())
        for part in parts:
            cleaned = part.strip()
            if len(cleaned) > 5:
                query_parts.append(cleaned)
        
        # Try each part as a separate query
        for part in query_parts[:3]:  # Limit attempts
            if part.strip():
                results = self.rag_system.retrieve_context(part.strip())
                if results:
                    return results[:TOP_K]
        
        return []

class FastRAG:
    """Enhanced RAG system with lightweight reranking and improved fallbacks."""
    
    def __init__(self):
        self.model = None
        self.index = None
        self.knowledge_base = []
        self.hybrid_retriever = None
        self.conversation = ConversationManager()
        self.cache = FastCache()
        self.fallback_system = None
        self.reranker = LightweightReranker()
        self.ready = False
        
        # Enhanced stats
        self.stats = {
            'queries': 0, 
            'cache_hits': 0, 
            'fallbacks': 0,
            'rerank_improvements': 0,
            'avg_response_time': 0,
            'total_response_time': 0
        }
    
    def setup(self) -> bool:
        """Quick setup with better error handling."""
        print("🚀 Starting Optimized Fast RAG System...")
        
        # Check Ollama
        try:
            response = requests.get("http://localhost:11434/api/tags", timeout=3)
            models = response.json().get("models", [])
            if not any(GEMMA_MODEL in model['name'] for model in models):
                print(f"❌ Model {GEMMA_MODEL} not found")
                return False
            print("✅ Ollama connection verified")
        except Exception as e:
            print(f"❌ Ollama not available: {e}")
            return False
        
        # Load model with better error handling
        try:
            print("📦 Loading embedding model...")
            self.model = SentenceTransformer(MODEL_NAME)
            print("✅ Embedding model loaded")
        except Exception as e:
            print(f"❌ Model loading failed: {e}")
            return False
        
        # Load knowledge base
        if not self._load_knowledge_base():
            return False
        
        # Build index
        if not self._build_index():
            return False
        
        self.fallback_system = ImprovedFallbackSystem(self)
        self.ready = True
        
        print(f"✅ Enhanced Fast RAG Ready!")
        print(f"   📚 {len(self.knowledge_base)} chunks loaded")
        print(f"   🔍 Hybrid retrieval + lightweight reranking enabled")
        print(f"   🔄 5-strategy fallback system active")
        return True
    
    def retrieve_context(self, query: str) -> List[Dict]:
        """Enhanced context retrieval with reranking."""
        try:
            # Get more candidates for reranking
            query_embedding = self.model.encode([query]).astype('float32')
            faiss.normalize_L2(query_embedding)
            
            scores, indices = self.index.search(query_embedding, min(RERANK_TOP_K, len(self.knowledge_base)))
            
            semantic_results = []
            for score, idx in zip(scores[0], indices[0]):
                if idx != -1 and score > 0.08:  # Lower threshold for reranking
                    semantic_results.append((idx, float(score)))
            
            # Get initial results
            if self.hybrid_retriever and semantic_results:
                initial_chunks = self.hybrid_retriever.fast_search(query, semantic_results)
            else:
                initial_chunks = []
                for idx, score in semantic_results[:TOP_K]:
                    chunk = self.knowledge_base[idx].copy()
                    chunk['similarity'] = score
                    initial_chunks.append(chunk)
            
            # Apply lightweight reranking
            if len(initial_chunks) > 2:
                reranked_chunks = self.reranker.rerank_chunks(query, initial_chunks)
                
                # Check if reranking improved results
                if reranked_chunks != initial_chunks:
                    self.stats['rerank_improvements'] += 1
                
                return reranked_chunks[:TOP_K]
            
            return initial_chunks[:TOP_K]
                
        except Exception as e:
            logger.warning(f"Context retrieval failed: {e}")
            return []
    
    def ask(self, query: str):
        """Process query with enhanced fallback and reranking."""
        if not self.ready:
            print("❌ System not ready")
            return
        
        self.stats['queries'] += 1
        start_time = time.time()
        
        # Check cache first
        cache_key = FastCache.hash_query(query)
        cached_response = self.cache.get(cache_key)
        if cached_response:
            self.stats['cache_hits'] += 1
            print(f"\n⚡ CACHED ANSWER\n{'-'*50}")
            print(cached_response)
            print(f"{'-'*50}\nResponse time: <0.01s | From cache")
            return
        
        # Get conversation context
        conv_context = self.conversation.get_context_for_query(query)
        
        # Retrieve context
        chunks = self.retrieve_context(query)
        
        # Enhanced fallback logic
        needs_fallback = (
            not chunks or 
            len(chunks) < 2 or  # Need at least 2 good chunks
            (chunks and max(c.get('similarity', 0) for c in chunks) < 0.25)
        )
        
        if needs_fallback:
            logger.warning(f"Triggering fallback: chunks={len(chunks)}, max_sim={max([c.get('similarity', 0) for c in chunks], default=0):.3f}")
            fallback_chunks = self.fallback_system.execute_fallback(query, chunks)
            if fallback_chunks and len(fallback_chunks) >= len(chunks):
                chunks = fallback_chunks
                self.stats['fallbacks'] += 1
        
        # Build prompt and get answer
        prompt = self._build_prompt(query, chunks, conv_context)
        answer = self._query_llm(prompt)
        
        # Cache good responses
        if len(answer) > 50 and "error" not in answer.lower():
            self.cache.set(cache_key, answer)
        
        # Add to conversation history
        self.conversation.add_exchange(query, answer, chunks)
        
        # Calculate and update stats
        response_time = time.time() - start_time
        self.stats['total_response_time'] += response_time
        self.stats['avg_response_time'] = self.stats['total_response_time'] / self.stats['queries']
        
        # Display result with enhanced info
        print(f"\n📝 ANSWER\n{'-'*50}")
        print(answer)
        
        if chunks:
            print(f"\n📚 SOURCES ({len(chunks)})")
            for i, chunk in enumerate(chunks[:3], 1):
                similarity = chunk.get('final_score', chunk.get('similarity', 0))
                title = chunk.get('title', 'Unknown')
                fallback_method = chunk.get('fallback_method', '')
                
                info_parts = [f"relevance: {similarity:.3f}"]
                if fallback_method:
                    info_parts.append(f"via {fallback_method}")
                
                print(f"{i}. {title} ({', '.join(info_parts)})")
        
        # Enhanced stats display
        cache_rate = (self.stats['cache_hits'] / self.stats['queries']) * 100
        fallback_rate = (self.stats['fallbacks'] / self.stats['queries']) * 100
        rerank_rate = (self.stats['rerank_improvements'] / self.stats['queries']) * 100
        
        print(f"{'-'*50}")
        print(f"Time: {response_time:.2f}s (avg: {self.stats['avg_response_time']:.2f}s)")
        print(f"Cache: {cache_rate:.0f}% | Fallback: {fallback_rate:.0f}% | Rerank: {rerank_rate:.0f}%")

    # ... [Keep the other methods from the original FastRAG class unchanged]
    def _load_knowledge_base(self) -> bool:
        """Load knowledge base quickly."""
        if not os.path.exists(EMBEDDINGS_FILE):
            print(f"❌ File not found: {EMBEDDINGS_FILE}")
            return False
        
        try:
            with open(EMBEDDINGS_FILE, 'r', encoding='utf-8') as f:
                data = json.load(f)
            
            self.knowledge_base = []
            if isinstance(data, dict):
                for doc_id, item in data.items():
                    if item.get('text') and item.get('embedding') and len(item.get('text', '').strip()) > 30:
                        self.knowledge_base.append({
                            'id': doc_id,
                            'text': item['text'],
                            'embedding': np.array(item['embedding']),
                            'title': item.get('page_title', 'Unknown'),
                            'url': item.get('url', '')
                        })
            else:
                for i, item in enumerate(data):
                    if isinstance(item, dict) and item.get('text') and item.get('embedding') and len(item.get('text', '').strip()) > 30:
                        self.knowledge_base.append({
                            'id': item.get('id', f'doc_{i}'),
                            'text': item['text'],
                            'embedding': np.array(item['embedding']),
                            'title': item.get('page_title', 'Unknown'),
                            'url': item.get('url', '')
                        })
            
            print(f"✅ Loaded {len(self.knowledge_base)} chunks")
            return len(self.knowledge_base) > 0
            
        except Exception as e:
            print(f"❌ Failed to load knowledge base: {e}")
            return False
    
    def _build_index(self) -> bool:
        """Build FAISS index quickly."""
        if not self.knowledge_base:
            return False
        
        try:
            embeddings = np.vstack([chunk['embedding'] for chunk in self.knowledge_base]).astype('float32')
            faiss.normalize_L2(embeddings)
            
            # Use simple flat index for speed
            self.index = faiss.IndexFlatIP(embeddings.shape[1])
            self.index.add(embeddings)
            
            # Build hybrid retriever
            self.hybrid_retriever = FastHybridRetriever(self.knowledge_base)
            
            print(f"✅ FAISS index built ({embeddings.shape[0]} vectors)")
            return True
            
        except Exception as e:
            print(f"❌ Index building failed: {e}")
            return False
    
    def _build_prompt(self, query: str, chunks: List[Dict], context: str = "") -> str:
        """Build efficient prompt."""
        if not chunks:
            return f"{context}I don't have specific information to answer: '{query}'. Could you rephrase or ask about a related topic?"
        
        # Build context efficiently
        context_parts = []
        total_length = 0
        
        for i, chunk in enumerate(chunks):
            if total_length >= MAX_CONTEXT_LENGTH:
                break
            
            text = chunk['text'].strip()
            source = chunk.get('title', 'Unknown')
            
            chunk_text = f"[Source {i+1}: {source}]\n{text}\n\n"
            
            if total_length + len(chunk_text) <= MAX_CONTEXT_LENGTH:
                context_parts.append(chunk_text)
                total_length += len(chunk_text)
            else:
                # Add partial content if there's room
                remaining = MAX_CONTEXT_LENGTH - total_length
                if remaining > 200:
                    partial = text[:remaining-50] + "..."
                    context_parts.append(f"[Source {i+1}: {source}]\n{partial}\n\n")
                break
        
        full_context = "".join(context_parts)
        
        return f"""{context}Based on the provided context, answer the question comprehensively. Cite sources using [Source X] format.

CONTEXT:
{full_context}

QUESTION: {query}

ANSWER:"""
    
    def _query_llm(self, prompt: str) -> str:
        """Fast LLM query without retries."""
        try:
            payload = {
                "model": GEMMA_MODEL,
                "prompt": prompt,
                "stream": False,
                "options": {
                    "temperature": 0.2,
                    "top_p": 0.9,
                    "max_tokens": 1000
                }
            }
            
            response = requests.post(GEMMA_URL, json=payload, timeout=None)
            if response.status_code == 200:
                result = response.json()
                answer = result.get('response', '').strip()
                return answer if answer else "Unable to generate response."
            else:
                return "Error communicating with language model."
                
        except Exception as e:
            logger.warning(f"LLM query failed: {e}")
            return "Technical error occurred."

# Keep the other classes unchanged (ConversationManager, FastCache, FastTextProcessor, FastHybridRetriever)
class ConversationManager:
    """Manages conversation context and history."""
    
    def __init__(self, max_history: int = CONVERSATION_HISTORY):
        self.history = deque(maxlen=max_history)
        self.context_keywords = set()
        self.current_topic = None
        
    def add_exchange(self, query: str, answer: str, chunks: List[Dict]):
        """Add query-answer exchange to history."""
        exchange = {
            'query': query,
            'answer': answer,
            'keywords': self._extract_keywords(query),
            'topics': [chunk.get('title', '') for chunk in chunks[:3]],
            'timestamp': time.time()
        }
        self.history.append(exchange)
        self._update_context(exchange)
    
    def _extract_keywords(self, text: str) -> List[str]:
        """Extract keywords from text."""
        words = re.findall(r'\b[a-zA-Z]{3,}\b', text.lower())
        stopwords = {'the', 'and', 'are', 'you', 'what', 'how', 'when', 'where', 'why', 'this', 'that', 'with', 'for', 'can', 'could', 'would', 'should'}
        return [w for w in words if w not in stopwords]
    
    def _update_context(self, exchange: Dict):
        """Update conversation context."""
        self.context_keywords.update(exchange['keywords'][:5])
        if len(self.context_keywords) > 20:
            # Keep most recent keywords
            recent_keywords = set()
            for ex in list(self.history)[-3:]:
                recent_keywords.update(ex['keywords'][:3])
            self.context_keywords = recent_keywords
    
    def get_context_for_query(self, query: str) -> str:
        """Get relevant context for current query."""
        if not self.history:
            return ""
        
        query_keywords = set(self._extract_keywords(query))
        context_parts = []
        
        # Find related previous exchanges
        for exchange in reversed(list(self.history)):
            overlap = len(query_keywords.intersection(set(exchange['keywords'])))
            if overlap > 0:
                context_parts.append(f"Previously asked: {exchange['query']}")
                if len(context_parts) >= 2:
                    break
        
        if context_parts:
            return "Conversation context: " + " | ".join(context_parts) + "\n\n"
        return ""

class FastCache:
    """Lightweight caching system."""
    
    def __init__(self, max_size: int = 100):
        self.cache = {}
        self.access_order = deque(maxlen=max_size)
        self.max_size = max_size
    
    def get(self, key: str) -> Optional[str]:
        if key in self.cache:
            # Move to end (most recently used)
            self.access_order.remove(key)
            self.access_order.append(key)
            return self.cache[key]
        return None
    
    def set(self, key: str, value: str):
        if len(self.cache) >= self.max_size and key not in self.cache:
            # Remove least recently used
            old_key = self.access_order.popleft()
            del self.cache[old_key]
        
        self.cache[key] = value
        if key in self.access_order:
            self.access_order.remove(key)
        self.access_order.append(key)
    
    @staticmethod
    def hash_query(query: str) -> str:
        return hashlib.md5(query.lower().strip().encode()).hexdigest()[:12]

class FastTextProcessor:
    """Streamlined text processing."""
    
    def __init__(self):
        self.stopwords = {'the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'is', 'are', 'was', 'were', 'be', 'been', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might', 'must', 'can', 'this', 'that', 'these', 'those', 'a', 'an'}
    
    @lru_cache(maxsize=500)
    def extract_keywords(self, text: str) -> Tuple[str, ...]:
        """Extract and cache keywords."""
        words = re.findall(r'\b[a-zA-Z0-9]{2,}\b', text.lower())
        keywords = tuple(w for w in words if w not in self.stopwords and len(w) > 2)
        return keywords[:15]  # Limit keywords
    
    @lru_cache(maxsize=300)
    def extract_phrases(self, text: str) -> Tuple[str, ...]:
        """Extract meaningful phrases."""
        # Simple phrase extraction - consecutive words
        words = text.lower().split()
        phrases = []
        for i in range(len(words) - 1):
            if len(words[i]) > 2 and len(words[i+1]) > 2:
                phrase = f"{words[i]} {words[i+1]}"
                if words[i] not in self.stopwords and words[i+1] not in self.stopwords:
                    phrases.append(phrase)
        return tuple(phrases[:10])

class FastHybridRetriever:
    """Fast hybrid retrieval system."""
    
    def __init__(self, knowledge_base: List[Dict]):
        self.knowledge_base = knowledge_base
        self.text_processor = FastTextProcessor()
        self.keyword_index = defaultdict(set)
        self.phrase_index = defaultdict(set)
        self.tfidf_vectorizer = None
        self.tfidf_matrix = None
        self._build_indices()
    
    def _build_indices(self):
        """Build search indices quickly."""
        logger.warning("Building search indices...")
        
        documents = []
        for idx, chunk in enumerate(self.knowledge_base):
            text = chunk.get('text', '')
            if not text:
                documents.append("")
                continue
                
            # Build keyword index
            keywords = self.text_processor.extract_keywords(text)
            for keyword in keywords:
                self.keyword_index[keyword].add(idx)
            
            # Build phrase index
            phrases = self.text_processor.extract_phrases(text)
            for phrase in phrases:
                self.phrase_index[phrase].add(idx)
            
            documents.append(text.lower())
        
        # Build TF-IDF (simplified)
        if documents:
            self.tfidf_vectorizer = TfidfVectorizer(
                max_features=10000,
                ngram_range=(1, 2),
                stop_words='english',
                min_df=2,
                max_df=0.8
            )
            try:
                self.tfidf_matrix = self.tfidf_vectorizer.fit_transform(documents)
            except:
                self.tfidf_matrix = None
        
        logger.warning(f"Indices built: {len(self.keyword_index)} keywords")
    
    def fast_search(self, query: str, semantic_results: List[Tuple[int, float]]) -> List[Dict]:
        """Fast hybrid search combining all methods."""
        # Keyword search
        query_keywords = self.text_processor.extract_keywords(query)
        keyword_scores = defaultdict(float)
        
        for keyword in query_keywords:
            for doc_idx in self.keyword_index.get(keyword, set()):
                keyword_scores[doc_idx] += 1.0 / len(query_keywords)
        
        # Phrase search
        query_phrases = self.text_processor.extract_phrases(query)
        for phrase in query_phrases:
            for doc_idx in self.phrase_index.get(phrase, set()):
                keyword_scores[doc_idx] += 2.0 / max(len(query_phrases), 1)
        
        # TF-IDF search
        tfidf_scores = {}
        if self.tfidf_vectorizer and self.tfidf_matrix is not None:
            try:
                query_vec = self.tfidf_vectorizer.transform([query.lower()])
                similarities = cosine_similarity(query_vec, self.tfidf_matrix).flatten()
                for idx, sim in enumerate(similarities):
                    if sim > 0.05:
                        tfidf_scores[idx] = sim
            except:
                pass
        
        # Combine scores
        final_scores = defaultdict(float)
        all_indices = set()
        
        # Add semantic results
        for idx, score in semantic_results:
            all_indices.add(idx)
            final_scores[idx] = score * HYBRID_WEIGHT_SEMANTIC
        
        # Add keyword results
        for idx, score in keyword_scores.items():
            all_indices.add(idx)
            final_scores[idx] += score * HYBRID_WEIGHT_KEYWORD * 0.7
        
        # Add TF-IDF results
        for idx, score in tfidf_scores.items():
            all_indices.add(idx)
            final_scores[idx] += score * HYBRID_WEIGHT_KEYWORD * 0.3
        
        # Build results
        results = []
        sorted_indices = sorted(all_indices, key=lambda x: final_scores[x], reverse=True)
        
        source_count = defaultdict(int)
        for idx in sorted_indices[:TOP_K * 2]:
            if final_scores[idx] < MIN_SIMILARITY:
                continue
            
            chunk = self.knowledge_base[idx].copy()
            source = chunk.get('title', 'Unknown')
            
            # Limit per source
            if source_count[source] >= 3:
                continue
            
            chunk['similarity'] = final_scores[idx]
            results.append(chunk)
            source_count[source] += 1
            
            if len(results) >= TOP_K:
                break
        
        return results

def main():
    """Main execution with enhanced interface."""
    rag = FastRAG()
    
    if not rag.setup():
        print("❌ Setup failed. Exiting.")
        return
    
    print(f"\n{'='*60}")
    print("🤖 ENHANCED FAST RAG SYSTEM READY")
    print("="*60)
    print("✨ New features:")
    print("  • Lightweight reranking for better relevance")
    print("  • 5-strategy fallback system")  
    print("  • Enhanced performance monitoring")
    print("  • Improved caching and conversation context")
    print("\n💬 Commands:")
    print("  • Type questions naturally")
    print("  • 'stats' - detailed performance info")
    print("  • 'debug' - show system internals")
    print("  • 'clear' - clear cache & history")
    print("  • 'quit' - exit")
    print("="*60)
    
    while True:
        try:
            query = input("\n💬 Ask: ").strip()
            
            if not query:
                continue
            elif query.lower() in ['quit', 'exit', 'q']:
                print("\n👋 Goodbye!")
                break
            elif query.lower() == 'stats':
                stats = rag.stats
                total = stats['queries']
                
                if total > 0:
                    cache_pct = (stats['cache_hits'] / total) * 100
                    fallback_pct = (stats['fallbacks'] / total) * 100
                    rerank_pct = (stats['rerank_improvements'] / total) * 100
                    
                    print(f"\n📊 PERFORMANCE STATS")
                    print(f"Total queries: {total}")
                    print(f"Average response time: {stats['avg_response_time']:.2f}s")
                    print(f"Cache hit rate: {cache_pct:.1f}%")
                    print(f"Fallback usage: {fallback_pct:.1f}%")
                    print(f"Reranking improvements: {rerank_pct:.1f}%")
                    print(f"Cache size: {len(rag.cache.cache)}")
                    print(f"Conversation history: {len(rag.conversation.history)}")
                else:
                    print("\n📊 No queries processed yet")
                continue
            elif query.lower() == 'debug':
                print(f"\n🔧 SYSTEM DEBUG INFO")
                print(f"Knowledge base size: {len(rag.knowledge_base)}")
                print(f"FAISS index: {'Ready' if rag.index else 'Not ready'}")
                print(f"Hybrid retriever: {'Ready' if rag.hybrid_retriever else 'Not ready'}")
                print(f"Fallback system: {'Ready' if rag.fallback_system else 'Not ready'}")
                print(f"Reranker: {'Ready' if rag.reranker else 'Not ready'}")
                print(f"Model: {MODEL_NAME}")
                print(f"Current settings: TOP_K={TOP_K}, MIN_SIM={MIN_SIMILARITY}")
                continue
            elif query.lower() == 'clear':
                rag.cache = FastCache()
                rag.conversation = ConversationManager()
                print("🧹 Cache and conversation history cleared!")
                continue
            
            rag.ask(query)
            
        except (KeyboardInterrupt, EOFError):
            print("\n\n👋 Goodbye!")
            break
        except Exception as e:
            logger.error(f"Error: {e}")
            print("⚠️ An error occurred. Please try again.")

if __name__ == "__main__":
    main()

2025-07-20 21:35:23,967 - INFO - [Use pytorch device_name: cpu]
2025-07-20 21:35:23,968 - INFO - [Load pretrained SentenceTransformer: all-MiniLM-L6-v2]


🚀 Starting Optimized Fast RAG System...
✅ Ollama connection verified
📦 Loading embedding model...
✅ Embedding model loaded




✅ Loaded 1615 chunks




✅ FAISS index built (1615 vectors)
✅ Enhanced Fast RAG Ready!
   📚 1615 chunks loaded
   🔍 Hybrid retrieval + lightweight reranking enabled
   🔄 5-strategy fallback system active

🤖 ENHANCED FAST RAG SYSTEM READY
✨ New features:
  • Lightweight reranking for better relevance
  • 5-strategy fallback system
  • Enhanced performance monitoring
  • Improved caching and conversation context

💬 Commands:
  • Type questions naturally
  • 'stats' - detailed performance info
  • 'debug' - show system internals
  • 'clear' - clear cache & history
  • 'quit' - exit



💬 Ask:  What is return on equity?


Batches:   0%|          | 0/1 [00:00<?, ?it/s]


📝 ANSWER
--------------------------------------------------
Return on equity (ROE) is calculated by dividing a company’s net income by its owners’ equity [Source 1]. It represents the return generated for each dollar of shareholder investment. According to the provided text, a higher level of financial leverage (debt) can lead to a greater return on equity, *even if* net income remains the same [Source 1]. For example, XYZ Software with $100 million in equity and $15 million in net income had a 15% ROE. However, if XYZ had only $50 million in equity (and the same $15 million net income), its ROE would increase to 30% [Source 1]. 

In essence, ROE measures how efficiently a company is using shareholders’ investments to generate profits [Source 1].

📚 SOURCES (8)
1. Financial Accounting 101 (relevance: 0.292)
2. Financial Accounting 101 (relevance: 0.284)
3. Financial Accounting 101 (relevance: 0.281)
--------------------------------------------------
Time: 437.34s (avg: 437.34s)
Cache:


💬 Ask:  What is the capital of Moldova?


Batches:   0%|          | 0/1 [00:00<?, ?it/s]



Batches:   0%|          | 0/1 [00:00<?, ?it/s]




📝 ANSWER
--------------------------------------------------
This is a trick question! The provided documents are all about real estate investment and cap rate analysis. They do *not* contain information about the capital of Moldova. 

The capital of Moldova is **Chisinau** (also spelled Chișinău). 

The question is designed to test if you can identify irrelevant information and avoid answering based on the provided context.

📚 SOURCES (3)
1. Selling or Renting 1416 Manchester (relevance: 0.189, via Query Expansion)
2. 2517 Bailey Drive Cap Rate Analysis Feb 2025 (relevance: 0.183, via Query Expansion)
3. 2745 Saxon Drive Cap Rate Analysis  Feb 2025 (relevance: 0.175, via Query Expansion)
--------------------------------------------------
Time: 349.39s (avg: 393.36s)
Cache: 0% | Fallback: 50% | Rerank: 0%



💬 Ask:  What is the internal rate of return and its relationship to return on equity?


Batches:   0%|          | 0/1 [00:00<?, ?it/s]


📝 ANSWER
--------------------------------------------------
## Return on Equity (ROE) and its Relationship to Internal Rate of Return (IRR)

Based on the provided documents, here's a comprehensive explanation of Return on Equity (ROE) and its connection to Internal Rate of Return (IRR):

**What is Return on Equity (ROE)?**

ROE is a profitability ratio that measures how much profit a company generates with the money shareholders have invested.  [Source 1] defines it as net income divided by owners’ equity.  Essentially, it shows how efficiently a company is using shareholder investments to generate earnings. A higher ROE generally indicates better performance.  [Source 1] highlights that increasing debt (financial leverage) can *increase* ROE, but also increases risk.  The example in [Source 1] demonstrates this:

*   **Scenario 1:** $200M assets, $100M liabilities, $100M equity, $15M net income = 15% ROE ($15M / $100M)
*   **Scenario 2:** $200M assets, $150M liabilities, $50M equity,