In [0]:
%pip install streamlit

In [0]:
dbutils.library.restartPython()

In [0]:
# Advanced Performance Optimization for Professional Chatbot

import streamlit as st
import numpy as np
from typing import Dict, List, Tuple
import time
import hashlib
import json
from datetime import datetime, timedelta

class PerformanceOptimizedRAG:
    """Enhanced RAG with caching and performance optimizations"""
    
    def __init__(self):
        self.client = get_databricks_client()
        self.embedding_model = load_embedding_model()
        self.professional_data = load_professional_data()
        
        # Performance optimization components
        self.response_cache = {}
        self.embedding_cache = {}
        self.context_cache = {}
        self.query_analytics = []
        
        # Pre-compute optimized document chunks
        self.document_chunks = self._create_optimized_chunks()
        self.embeddings = self._create_cached_embeddings()
        
        # Initialize query classifier
        self.query_classifier = self._initialize_query_classifier()
    
    def _create_optimized_chunks(self) -> List[Dict]:
        """Create smaller, focused document chunks for better retrieval"""
        chunks = []
        
        # Split experience into granular chunks
        for exp in self.professional_data["experience"]:
            # Role overview chunk
            chunks.append({
                "content": f"Role: {exp['role']} at {exp['company']} ({exp['duration']})",
                "type": "role",
                "importance": 0.9,
                "tokens": len(f"{exp['role']} {exp['company']}") // 4
            })
            
            # Responsibilities chunk
            chunks.append({
                "content": f"Responsibilities: {exp['description']}",
                "type": "responsibilities", 
                "importance": 0.8,
                "tokens": len(exp['description']) // 4
            })
            
            # Skills chunk
            chunks.append({
                "content": f"Skills used: {', '.join(exp['skills'])}",
                "type": "skills",
                "importance": 0.7,
                "tokens": len(', '.join(exp['skills'])) // 4
            })
        
        # Project chunks
        for proj in self.professional_data["projects"]:
            chunks.append({
                "content": f"Project: {proj['name']} - {proj['description']}",
                "type": "project",
                "importance": 0.9,
                "tokens": len(f"{proj['name']} {proj['description']}") // 4
            })
            
            chunks.append({
                "content": f"Tech stack: {', '.join(proj['tech_stack'])}. Impact: {proj['impact']}",
                "type": "project_tech",
                "importance": 0.8,
                "tokens": len(f"{proj['tech_stack']} {proj['impact']}") // 4
            })
        
        # Skill category chunks
        for category, skills in self.professional_data["skills"].items():
            chunks.append({
                "content": f"{category.replace('_', ' ').title()}: {', '.join(skills)}",
                "type": "skills_category",
                "importance": 0.6,
                "tokens": len(', '.join(skills)) // 4
            })
        
        return chunks
    
    def _create_cached_embeddings(self) -> List[np.ndarray]:
        """Create embeddings with caching for performance"""
        cache_key = "document_embeddings"
        
        if cache_key in self.embedding_cache:
            return self.embedding_cache[cache_key]
        
        documents = [chunk["content"] for chunk in self.document_chunks]
        
        if self.embedding_model == "databricks" and self.client:
            try:
                response = self.client.predict(
                    endpoint=Config.EMBEDDING_MODEL,
                    inputs={"input": documents}
                )
                embeddings = [np.array(emb) for emb in response["data"]]
            except:
                model = SentenceTransformer(Config.FALLBACK_EMBEDDING_MODEL)
                embeddings = model.encode(documents)
        else:
            embeddings = self.embedding_model.encode(documents)
        
        self.embedding_cache[cache_key] = embeddings
        return embeddings
    
    def _initialize_query_classifier(self) -> Dict:
        """Initialize query classifier for smart context selection"""
        return {
            "project_keywords": ["project", "built", "developed", "created", "implemented", "designed"],
            "skill_keywords": ["skill", "technology", "tool", "programming", "language", "framework"],
            "experience_keywords": ["experience", "role", "job", "work", "position", "career"],
            "databricks_keywords": ["databricks", "delta", "mlflow", "spark", "lakehouse", "unity"]
        }
    
    def classify_query(self, query: str) -> str:
        """Classify query type for optimized context retrieval"""
        query_lower = query.lower()
        
        # Priority classification
        if any(word in query_lower for word in self.query_classifier["databricks_keywords"]):
            return "databricks"
        elif any(word in query_lower for word in self.query_classifier["project_keywords"]):
            return "project"
        elif any(word in query_lower for word in self.query_classifier["skill_keywords"]):
            return "skill"
        elif any(word in query_lower for word in self.query_classifier["experience_keywords"]):
            return "experience"
        else:
            return "general"
    
    def get_smart_context(self, query: str, max_tokens: int = 1000) -> str:
        """Get optimized context based on query classification"""
        
        # Check cache first
        cache_key = hashlib.md5(f"{query}_{max_tokens}".encode()).hexdigest()
        if cache_key in self.context_cache:
            return self.context_cache[cache_key]
        
        query_type = self.classify_query(query)
        
        # Get query embedding
        if self.embedding_model == "databricks" and self.client:
            try:
                query_response = self.client.predict(
                    endpoint=Config.EMBEDDING_MODEL,
                    inputs={"input": [query]}
                )
                query_embedding = np.array(query_response["data"][0])
            except:
                model = SentenceTransformer(Config.FALLBACK_EMBEDDING_MODEL)
                query_embedding = model.encode([query])[0]
        else:
            query_embedding = self.embedding_model.encode([query])[0]
        
        # Calculate similarities with importance weighting
        scored_chunks = []
        for i, chunk in enumerate(self.document_chunks):
            embedding = self.embeddings[i]
            similarity = np.dot(query_embedding, embedding) / (
                np.linalg.norm(query_embedding) * np.linalg.norm(embedding)
            )
            
            # Apply importance and type weighting
            importance_weight = chunk["importance"]
            type_weight = self._get_type_weight(chunk["type"], query_type)
            
            final_score = similarity * importance_weight * type_weight
            scored_chunks.append((final_score, chunk))
        
        # Sort by score and select top chunks within token limit
        scored_chunks.sort(key=lambda x: x[0], reverse=True)
        
        selected_chunks = []
        total_tokens = 0
        
        for score, chunk in scored_chunks:
            if total_tokens + chunk["tokens"] <= max_tokens:
                selected_chunks.append(chunk["content"])
                total_tokens += chunk["tokens"]
            else:
                break
        
        context = "\n\n".join(selected_chunks)
        
        # Cache the result
        self.context_cache[cache_key] = context
        return context
    
    def _get_type_weight(self, chunk_type: str, query_type: str) -> float:
        """Get type-specific weighting for better relevance"""
        weights = {
            "databricks": {
                "project": 1.2,
                "project_tech": 1.3,
                "skills_category": 1.1,
                "role": 1.0,
                "responsibilities": 1.1
            },
            "project": {
                "project": 1.3,
                "project_tech": 1.2,
                "responsibilities": 1.0,
                "role": 0.8,
                "skills_category": 0.9
            },
            "skill": {
                "skills_category": 1.3,
                "project_tech": 1.1,
                "project": 1.0,
                "responsibilities": 0.9,
                "role": 0.7
            },
            "experience": {
                "role": 1.3,
                "responsibilities": 1.2,
                "project": 1.0,
                "skills_category": 0.8,
                "project_tech": 0.9
            },
            "general": {
                "role": 1.0,
                "responsibilities": 1.0,
                "project": 1.0,
                "skills_category": 1.0,
                "project_tech": 1.0
            }
        }
        
        return weights.get(query_type, {}).get(chunk_type, 1.0)
    
    def get_cached_response(self, query: str, chat_history: List[Dict]) -> str:
        """Check for cached responses to similar queries"""
        
        # Create cache key from query and recent history
        recent_history = chat_history[-2:] if len(chat_history) > 2 else chat_history
        cache_key = hashlib.md5(f"{query}_{str(recent_history)}".encode()).hexdigest()
        
        if cache_key in self.response_cache:
            cached_response, timestamp = self.response_cache[cache_key]
            
            # Check if cache is still valid (30 minutes)
            if datetime.now() - timestamp < timedelta(minutes=30):
                return cached_response
        
        return None
    
    def cache_response(self, query: str, chat_history: List[Dict], response: str):
        """Cache response for future use"""
        recent_history = chat_history[-2:] if len(chat_history) > 2 else chat_history
        cache_key = hashlib.md5(f"{query}_{str(recent_history)}".encode()).hexdigest()
        
        self.response_cache[cache_key] = (response, datetime.now())
        
        # Limit cache size
        if len(self.response_cache) > 100:
            # Remove oldest entries
            oldest_key = min(self.response_cache.keys(), 
                           key=lambda k: self.response_cache[k][1])
            del self.response_cache[oldest_key]
    
    def track_query_analytics(self, query: str, response_time: float, context_tokens: int):
        """Track performance analytics"""
        self.query_analytics.append({
            "query": query,
            "response_time": response_time,
            "context_tokens": context_tokens,
            "timestamp": datetime.now()
        })
        
        # Keep only last 100 queries
        if len(self.query_analytics) > 100:
            self.query_analytics = self.query_analytics[-100:]

class OptimizedChatBot:
    """Performance-optimized chatbot with caching and smart context management"""
    
    def __init__(self):
        self.client = get_databricks_client()
        self.rag = PerformanceOptimizedRAG()
        
    def generate_response(self, user_query: str, chat_history: List[Dict]) -> str:
        """Generate optimized response with caching and performance tracking"""
        
        start_time = time.time()
        
        # Check cache first
        cached_response = self.rag.get_cached_response(user_query, chat_history)
        if cached_response:
            return cached_response
        
        if not self.client:
            return "I'm sorry, but I'm having trouble connecting to Databricks services right now."
        
        # Get smart context
        context = self.rag.get_smart_context(user_query, max_tokens=1000)
        context_tokens = len(context) // 4
        
        # Create optimized system message
        system_message = f"""You are a professional AI assistant representing a skilled data scientist and ML engineer.

Context: {context}

Guidelines:
- Be specific and enthusiastic about their experience
- Reference concrete examples from the context
- Keep responses focused and under 150 words
- Highlight unique value propositions"""
        
        # Minimal chat history for performance
        messages = [{"role": "system", "content": system_message}]
        
        # Only last exchange for context
        if chat_history:
            messages.extend(chat_history[-2:])
        
        messages.append({"role": "user", "content": user_query})
        
        try:
            response = self.client.predict(
                endpoint=Config.FOUNDATION_MODEL_ENDPOINT,
                inputs={
                    "messages": messages,
                    "max_tokens": 200,
                    "temperature": 0.7
                }
            )
            
            response_text = response["choices"][0]["message"]["content"]
            
            # Cache the response
            self.rag.cache_response(user_query, chat_history, response_text)
            
            # Track analytics
            response_time = time.time() - start_time
            self.rag.track_query_analytics(user_query, response_time, context_tokens)
            
            return response_text
            
        except Exception as e:
            return f"I apologize, but I'm having trouble processing your request. Error: {str(e)}"
    
    def get_performance_stats(self) -> Dict:
        """Get performance statistics"""
        if not self.rag.query_analytics:
            return {"message": "No queries processed yet"}
        
        response_times = [q["response_time"] for q in self.rag.query_analytics]
        context_tokens = [q["context_tokens"] for q in self.rag.query_analytics]
        
        return {
            "total_queries": len(self.rag.query_analytics),
            "avg_response_time": np.mean(response_times),
            "avg_context_tokens": np.mean(context_tokens),
            "cache_hits": len(self.rag.response_cache),
            "fastest_response": min(response_times),
            "slowest_response": max(response_times)
        }

# Usage in Streamlit app
def main_optimized():
    st.set_page_config(
        page_title="Optimized Professional AI Assistant",
        page_icon="⚡",
        layout="wide"
    )
    
    st.title("⚡ Performance-Optimized Professional AI Assistant")
    st.markdown("*Powered by Databricks with Smart Context Management*")
    
    # Initialize optimized chatbot
    if 'optimized_chatbot' not in st.session_state:
        with st.spinner("Loading optimized knowledge base..."):
            st.session_state.optimized_chatbot = OptimizedChatBot()
    
    # Performance metrics in sidebar
    with st.sidebar:
        st.header("Performance Metrics")
        stats = st.session_state.optimized_chatbot.get_performance_stats()
        
        if "total_queries" in stats:
            st.metric("Total Queries", stats["total_queries"])
            st.metric("Avg Response Time", f"{stats['avg_response_time']:.2f}s")
            st.metric("Avg Context Tokens", f"{stats['avg_context_tokens']:.0f}")
            st.metric("Cache Hits", stats["cache_hits"])
    
    # Rest of the Streamlit app logic...

if __name__ == "__main__":
    main_optimized()