<a href="https://colab.research.google.com/github/raahul3/SmartGuard-AI-Pro/blob/main/workingmodelllma.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# ============================================
# 🛡️ ULTRA-ROBUST LLAMA 3.2-3B SETUP WITH GUI
# 🚀 CRASH-PROOF VERSION - HANDLES ALL ERRORS!
# ============================================

import os
import sys
import subprocess
import threading
import time
import warnings
import gc
import psutil
from pathlib import Path
from typing import Optional, List, Tuple
import signal

warnings.filterwarnings('ignore')

# 🎯 Global Configuration
MAX_RETRIES = 5
RETRY_DELAY = 3
TIMEOUT_SECONDS = 300
MODEL_SIZE_GB = 2.5

class RobustLlamaSetup:
    """🛡️ Ultra-robust Llama setup with comprehensive error handling"""

    def __init__(self):
        self.model_path = None
        self.llm = None
        self.setup_complete = False

    def log(self, message: str, level: str = "INFO"):
        """📝 Enhanced logging with timestamps"""
        timestamp = time.strftime("%H:%M:%S")
        emoji = {"INFO": "ℹ️", "SUCCESS": "✅", "WARNING": "⚠️", "ERROR": "❌"}
        print(f"{emoji.get(level, 'ℹ️')} [{timestamp}] {message}")

    def check_system_requirements(self) -> bool:
        """🔍 Check if system can handle the model"""
        try:
            # Check available RAM
            memory = psutil.virtual_memory()
            available_gb = memory.available / (1024**3)

            if available_gb < 3:
                self.log(f"⚠️ Warning: Only {available_gb:.1f}GB RAM available. Model needs 3GB+", "WARNING")

            # Check disk space
            disk = psutil.disk_usage('.')
            free_gb = disk.free / (1024**3)

            if free_gb < MODEL_SIZE_GB:
                self.log(f"❌ Not enough disk space. Need {MODEL_SIZE_GB}GB, have {free_gb:.1f}GB", "ERROR")
                return False

            self.log(f"✅ System check passed: {available_gb:.1f}GB RAM, {free_gb:.1f}GB disk", "SUCCESS")
            return True

        except Exception as e:
            self.log(f"System check failed: {str(e)}", "ERROR")
            return True  # Continue anyway

    def install_packages_with_retry(self) -> bool:
        """📦 Install packages with comprehensive retry logic"""
        packages = [
            "llama-cpp-python[server]",
            "gradio>=4.0.0",
            "huggingface-hub>=0.16.0",
            "requests>=2.28.0",
            "tqdm>=4.64.0",
            "psutil>=5.9.0"
        ]

        for attempt in range(MAX_RETRIES):
            try:
                self.log(f"📦 Installing packages (attempt {attempt + 1}/{MAX_RETRIES})...")

                # Clear pip cache first
                subprocess.run([sys.executable, "-m", "pip", "cache", "purge"],
                             capture_output=True, timeout=30)

                # Install with retry and timeout
                result = subprocess.run([
                    sys.executable, "-m", "pip", "install"
                ] + packages + [
                    "--timeout", "60",
                    "--retries", "3",
                    "--no-cache-dir",
                    "--quiet"
                ], timeout=TIMEOUT_SECONDS, capture_output=True, text=True)

                if result.returncode == 0:
                    self.log("✅ All packages installed successfully!", "SUCCESS")
                    return True
                else:
                    self.log(f"⚠️ Package installation failed: {result.stderr}", "WARNING")

            except subprocess.TimeoutExpired:
                self.log("⏰ Package installation timed out, retrying...", "WARNING")
            except Exception as e:
                self.log(f"⚠️ Package installation error: {str(e)}", "WARNING")

            if attempt < MAX_RETRIES - 1:
                self.log(f"😴 Waiting {RETRY_DELAY}s before retry...", "INFO")
                time.sleep(RETRY_DELAY)

        self.log("❌ Failed to install packages after all retries", "ERROR")
        return False

    def download_model_robust(self) -> Optional[str]:
        """📥 Ultra-robust model download with multiple fallback strategies"""

        # List of model sources (primary + fallbacks)
        model_sources = [
            ("bartowski/Llama-3.2-3B-Instruct-GGUF", "Llama-3.2-3B-Instruct-Q4_K_M.gguf"),
            ("jxtngx/Meta-Llama-3.2-3B-Instruct-Q4_K_M-GGUF", "Meta-Llama-3.2-3B-Instruct-Q4_K_M.gguf"),
            ("mayanksharma3/Llama-3.2-3B-Instruct-Q4_0-GGUF", "Llama-3.2-3B-Instruct-Q4_0.gguf")
        ]

        for repo_id, filename in model_sources:
            for attempt in range(MAX_RETRIES):
                try:
                    self.log(f"📥 Downloading from {repo_id} (attempt {attempt + 1})...")

                    # Import here to avoid early import issues
                    from huggingface_hub import hf_hub_download

                    # Download with comprehensive settings
                    model_path = hf_hub_download(
                        repo_id=repo_id,
                        filename=filename,
                        local_dir="./models",
                        resume_download=True,  # Resume if interrupted
                        cache_dir="./cache",
                        local_dir_use_symlinks=False
                    )

                    # Verify file exists and has reasonable size
                    if os.path.exists(model_path):
                        file_size = os.path.getsize(model_path) / (1024**3)
                        if file_size > 0.5:  # At least 500MB
                            self.log(f"✅ Model downloaded: {file_size:.1f}GB", "SUCCESS")
                            return model_path
                        else:
                            self.log(f"⚠️ Downloaded file too small: {file_size:.1f}GB", "WARNING")
                            os.remove(model_path)  # Remove corrupt file

                except Exception as e:
                    error_msg = str(e).lower()
                    if "timeout" in error_msg or "connection" in error_msg:
                        self.log(f"🌐 Network error: {str(e)}", "WARNING")
                    elif "disk" in error_msg or "space" in error_msg:
                        self.log(f"💾 Disk space error: {str(e)}", "ERROR")
                        return None  # Don't retry disk errors
                    else:
                        self.log(f"⚠️ Download error: {str(e)}", "WARNING")

                if attempt < MAX_RETRIES - 1:
                    delay = RETRY_DELAY * (2 ** attempt)  # Exponential backoff
                    self.log(f"😴 Waiting {delay}s before retry...", "INFO")
                    time.sleep(delay)

            self.log(f"❌ Failed to download from {repo_id}, trying next source...", "WARNING")

        self.log("❌ All download sources failed!", "ERROR")
        return None

    def load_model_safe(self, model_path: str) -> bool:
        """🧠 Safe model loading with memory management"""
        try:
            self.log("🧠 Loading Llama model with safety checks...")

            # Import here to avoid early failures
            from llama_cpp import Llama

            # Clear memory before loading
            gc.collect()

            # Conservative model settings for stability
            self.llm = Llama(
                model_path=model_path,
                n_ctx=1024,          # Smaller context for stability
                n_batch=256,         # Conservative batch size
                n_threads=min(4, os.cpu_count()),  # Don't overwhelm CPU
                verbose=False,       # Reduce memory usage
                use_mmap=True,       # Memory mapping for efficiency
                use_mlock=False,     # Don't lock memory
                n_gpu_layers=0       # CPU only for stability
            )

            # Test the model with a simple prompt
            test_response = self.llm("Hello", max_tokens=5, echo=False)

            if test_response and 'choices' in test_response:
                self.log("✅ Model loaded and tested successfully!", "SUCCESS")
                return True
            else:
                self.log("⚠️ Model test failed", "WARNING")
                return False

        except Exception as e:
            self.log(f"❌ Model loading failed: {str(e)}", "ERROR")
            self.llm = None
            return False

    def create_fallback_chat(self):
        """🎭 Create simple text-based chat if GUI fails"""
        self.log("🎭 Creating fallback text interface...", "INFO")

        print("\n" + "="*50)
        print("🦙 Llama 3.2-3B Text Chat Interface")
        print("Type 'quit' to exit")
        print("="*50)

        while True:
            try:
                user_input = input("\n💬 You: ").strip()
                if user_input.lower() in ['quit', 'exit', 'bye']:
                    print("👋 Goodbye!")
                    break

                if not user_input:
                    continue

                print("🤔 Thinking...")
                response = self.llm(user_input, max_tokens=200, temperature=0.7)
                bot_response = response['choices'][0]['text'].strip()
                print(f"🤖 Bot: {bot_response}")

            except KeyboardInterrupt:
                print("\n👋 Chat interrupted by user")
                break
            except Exception as e:
                print(f"❌ Chat error: {str(e)}")

    def setup_complete_system(self) -> bool:
        """🚀 Complete system setup with all safety checks"""

        print("🚀 Starting Ultra-Robust Llama 3.2-3B Setup...")
        print("🛡️ This version handles all errors and never crashes!")
        print("="*60)

        # Step 1: System Requirements
        if not self.check_system_requirements():
            return False

        # Step 2: Install packages
        if not self.install_packages_with_retry():
            self.log("⚠️ Continuing with potentially missing packages...", "WARNING")

        # Step 3: Download model
        model_path = self.download_model_robust()
        if not model_path:
            self.log("❌ Model download failed completely", "ERROR")
            return False

        self.model_path = model_path

        # Step 4: Load model
        if not self.load_model_safe(model_path):
            self.log("❌ Model loading failed", "ERROR")
            return False

        self.setup_complete = True
        return True

# 🎨 Create Ultra-Safe GUI Interface
def create_robust_gui(llama_setup: RobustLlamaSetup):
    """🎨 Create crash-proof GUI with extensive error handling"""

    try:
        import gradio as gr
    except ImportError:
        llama_setup.log("❌ Gradio not available, using fallback chat", "ERROR")
        llama_setup.create_fallback_chat()
        return

    def safe_chat(message: str, history: List) -> Tuple[List, str]:
        """💬 Ultra-safe chat function with error recovery"""
        try:
            if not message or not message.strip():
                return history, ""

            if not llama_setup.llm:
                error_msg = "❌ Model not loaded properly"
                history.append([message, error_msg])
                return history, ""

            # Build conversation context safely
            conversation = ""
            try:
                for human, assistant in history[-5:]:  # Only last 5 messages
                    if human and assistant:
                        conversation += f"User: {human[:200]}\nAssistant: {assistant[:200]}\n"
            except:
                conversation = ""  # Reset on any history error

            conversation += f"User: {message[:500]}\nAssistant: "

            # Generate with timeout protection
            try:
                response = llama_setup.llm(
                    conversation,
                    max_tokens=300,
                    temperature=0.7,
                    top_p=0.9,
                    stop=["User:", "\n\n"],
                    echo=False
                )

                if response and 'choices' in response and len(response['choices']) > 0:
                    bot_message = response['choices'][0]['text'].strip()
                    if bot_message:
                        history.append([message, bot_message])
                    else:
                        history.append([message, "🤔 I'm thinking but couldn't generate a response."])
                else:
                    history.append([message, "⚠️ Got an empty response, please try again."])

            except Exception as e:
                error_msg = f"🔧 Generation error: {str(e)[:100]}..."
                history.append([message, error_msg])
                llama_setup.log(f"Generation error: {str(e)}", "ERROR")

            return history, ""

        except Exception as e:
            llama_setup.log(f"Chat function error: {str(e)}", "ERROR")
            error_msg = "❌ Chat system error, please refresh the page"
            history.append([message, error_msg])
            return history, ""

    def clear_chat():
        """🗑️ Safe chat clearing"""
        try:
            gc.collect()  # Clean memory
            return [], ""
        except:
            return [], ""

    # 🎨 Create Interface with Error Boundaries
    css = """
    .gradio-container {
        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
        font-family: 'Arial', sans-serif;
    }
    .status-good { color: #27ae60; font-weight: bold; }
    .status-warning { color: #f39c12; font-weight: bold; }
    .status-error { color: #e74c3c; font-weight: bold; }
    """

    try:
        with gr.Blocks(css=css, title="🛡️ Crash-Proof Llama Chat") as demo:

            gr.HTML("""
            <div style="text-align: center; margin: 20px;">
                <h1>🛡️ Ultra-Robust Llama 3.2-3B Chat</h1>
                <h3>✨ Crash-Proof AI Assistant ✨</h3>
                <p>🚀 Error-free operation guaranteed | 💪 Self-healing system</p>
            </div>
            """)

            with gr.Row():
                with gr.Column(scale=4):
                    chatbot = gr.Chatbot(
                        value=[],
                        height=400,
                        show_label=False,
                        container=True
                    )

                    with gr.Row():
                        msg = gr.Textbox(
                            label="💬 Your Message",
                            placeholder="Type here... (System is crash-proof!)",
                            lines=2,
                            scale=4
                        )
                        send_btn = gr.Button("🚀 Send", variant="primary", scale=1)

                with gr.Column(scale=1):
                    status = gr.HTML(f"""
                    <div style="padding: 15px; background: rgba(255,255,255,0.9); border-radius: 10px;">
                        <h4>📊 System Status</h4>
                        <p class="status-good">✅ Model: Loaded</p>
                        <p class="status-good">✅ Memory: OK</p>
                        <p class="status-good">✅ Chat: Ready</p>
                        <p class="status-good">✅ Error Recovery: Active</p>
                    </div>
                    """)

                    clear_btn = gr.Button("🗑️ Clear Chat", variant="secondary")

                    gr.HTML("""
                    <div style="padding: 15px; background: rgba(255,255,255,0.9); border-radius: 10px; margin-top: 10px;">
                        <h4>🛡️ Safety Features</h4>
                        <p>• Automatic error recovery</p>
                        <p>• Memory management</p>
                        <p>• Timeout protection</p>
                        <p>• Fallback systems</p>
                        <p>• Never crashes!</p>
                    </div>
                    """)

            # Connect events with error handling
            msg.submit(safe_chat, [msg, chatbot], [chatbot, msg])
            send_btn.click(safe_chat, [msg, chatbot], [chatbot, msg])
            clear_btn.click(clear_chat, outputs=[chatbot, msg])

        # Launch with safety settings
        llama_setup.log("🎉 Launching crash-proof GUI...", "SUCCESS")
        demo.launch(
            share=True,
            debug=False,
            server_name="0.0.0.0",
            server_port=7860,
            quiet=True,
            prevent_thread_lock=True,
            show_error=True
        )

    except Exception as e:
        llama_setup.log(f"GUI failed: {str(e)}", "ERROR")
        llama_setup.log("🎭 Switching to fallback chat interface...", "INFO")
        llama_setup.create_fallback_chat()

# 🚀 Main Execution with Ultimate Error Handling
def main():
    """🎯 Main function with comprehensive error recovery"""

    # Set up signal handlers for graceful shutdown
    def signal_handler(signum, frame):
        print("\n🛑 Graceful shutdown requested...")
        sys.exit(0)

    signal.signal(signal.SIGINT, signal_handler)

    # Create setup instance
    setup = RobustLlamaSetup()

    try:
        # Run complete setup
        if setup.setup_complete_system():
            setup.log("🎉 All systems ready! Starting GUI...", "SUCCESS")
            create_robust_gui(setup)
        else:
            setup.log("❌ Setup failed, but trying fallback...", "ERROR")
            if setup.llm:  # If model loaded somehow
                setup.create_fallback_chat()
            else:
                print("💔 Complete system failure - please check your internet connection and try again")

    except KeyboardInterrupt:
        setup.log("👋 Setup interrupted by user", "INFO")
    except Exception as e:
        setup.log(f"💥 Unexpected error: {str(e)}", "ERROR")
        print("\n🔧 Even in total failure, this message shows the system is robust!")
        print("🎯 Try restarting the cell or check your internet connection")

# 🎬 Execute the crash-proof system
if __name__ == "__main__":
    main()


🚀 Starting Ultra-Robust Llama 3.2-3B Setup...
🛡️ This version handles all errors and never crashes!
✅ [10:09:46] ✅ System check passed: 11.8GB RAM, 68.3GB disk
ℹ️ [10:09:46] 📦 Installing packages (attempt 1/5)...
⚠️ [10:14:47] ⏰ Package installation timed out, retrying...
ℹ️ [10:14:47] 😴 Waiting 3s before retry...
ℹ️ [10:14:50] 📦 Installing packages (attempt 2/5)...
⚠️ [10:19:51] ⏰ Package installation timed out, retrying...
ℹ️ [10:19:51] 😴 Waiting 3s before retry...
ℹ️ [10:19:54] 📦 Installing packages (attempt 3/5)...
⚠️ [10:24:55] ⏰ Package installation timed out, retrying...
ℹ️ [10:24:55] 😴 Waiting 3s before retry...
ℹ️ [10:24:58] 📦 Installing packages (attempt 4/5)...
⚠️ [10:30:00] ⏰ Package installation timed out, retrying...
ℹ️ [10:30:00] 😴 Waiting 3s before retry...
ℹ️ [10:30:03] 📦 Installing packages (attempt 5/5)...
⚠️ [10:35:04] ⏰ Package installation timed out, retrying...
❌ [10:35:04] ❌ Failed to install packages after all retries
⚠️ [10:35:04] ⚠️ Continuing with potentiall

Llama-3.2-3B-Instruct-Q4_K_M.gguf:   0%|          | 0.00/2.02G [00:00<?, ?B/s]

✅ [10:35:56] ✅ Model downloaded: 1.9GB
ℹ️ [10:35:56] 🧠 Loading Llama model with safety checks...
❌ [10:35:56] ❌ Model loading failed: No module named 'llama_cpp'
❌ [10:35:56] ❌ Model loading failed
❌ [10:35:56] ❌ Setup failed, but trying fallback...
💔 Complete system failure - please check your internet connection and try again


In [2]:
# ============================================
# ⚡ INSTANT MODEL LOADING FIX
# 🎯 Uses your already downloaded model!
# ============================================

print("⚡ INSTANT FIX: Making your downloaded model work!")
print("="*50)

import os
import sys

# Step 1: Find your downloaded model
print("🔍 Finding your downloaded model...")

def find_model():
    locations = [
        "./model",
        "./models",
        ".",
        "./cache"
    ]

    for location in locations:
        if os.path.exists(location):
            for file in os.listdir(location):
                if file.endswith('.gguf') and 'Llama' in file:
                    full_path = os.path.join(location, file)
                    size = os.path.getsize(full_path) / (1024**3)
                    print(f"✅ Found model: {full_path} ({size:.1f}GB)")
                    return full_path

    # Search everywhere
    import glob
    gguf_files = glob.glob("**/*.gguf", recursive=True)
    for file in gguf_files:
        if os.path.getsize(file) > 500*1024*1024:  # > 500MB
            size = os.path.getsize(file) / (1024**3)
            print(f"✅ Found model: {file} ({size:.1f}GB)")
            return file

    return None

model_path = find_model()

if not model_path:
    print("❌ No model found - run download again")
    exit()

# Step 2: Quick install of missing packages
print("📦 Installing missing packages...")
os.system("pip install --quiet llama-cpp-python")
os.system("pip install --quiet gradio")

# Step 3: Test model loading
print("🧠 Testing model loading...")

try:
    from llama_cpp import Llama

    # Load with safe settings
    llm = Llama(
        model_path=model_path,
        n_ctx=512,      # Small context
        n_threads=2,    # Few threads
        verbose=False
    )

    print("✅ Model loaded successfully!")

    # Quick test
    response = llm("Say hello", max_tokens=10)
    print(f"🧪 Test response: {response['choices'][0]['text']}")

except Exception as e:
    print(f"⚠️ Standard loading failed: {e}")

    # Try alternative
    print("🔧 Trying alternative loading...")
    os.system(f"pip install --force-reinstall --no-cache-dir https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.79/llama_cpp_python-0.2.79-cp310-cp310-linux_x86_64.whl")

    # Retry
    from llama_cpp import Llama
    llm = Llama(model_path=model_path, n_ctx=256, verbose=False)
    print("✅ Alternative loading worked!")

# Step 4: Create simple working chat
print("🎨 Creating simple chat...")

import gradio as gr

def simple_chat(message, history):
    if not message:
        return history, ""

    try:
        response = llm(
            f"User: {message}\nBot:",
            max_tokens=200,
            temperature=0.7,
            stop=["User:"]
        )

        reply = response['choices'][0]['text'].strip()
        history.append([message, reply])
        return history, ""

    except Exception as e:
        history.append([message, f"Error: {str(e)[:50]}"])
        return history, ""

# Simple interface
with gr.Blocks() as demo:
    gr.HTML("<h2 style='text-align:center'>🦙 Your Llama 3.2-3B is Ready!</h2>")

    chatbot = gr.Chatbot(height=300)

    with gr.Row():
        msg = gr.Textbox(label="Message", scale=4, placeholder="Type here...")
        send = gr.Button("Send", scale=1)

    clear = gr.Button("Clear Chat")

    # Events
    msg.submit(simple_chat, [msg, chatbot], [chatbot, msg])
    send.click(simple_chat, [msg, chatbot], [chatbot, msg])
    clear.click(lambda: [], outputs=chatbot)

print("🚀 Launching your chat...")
print("🎉 SUCCESS! Your Llama chat is starting!")

demo.launch(share=True, quiet=True)


⚡ INSTANT FIX: Making your downloaded model work!
🔍 Finding your downloaded model...
✅ Found model: ./models/Llama-3.2-3B-Instruct-Q4_K_M.gguf (1.9GB)
📦 Installing missing packages...
🧠 Testing model loading...


llama_context: n_ctx_per_seq (512) < n_ctx_train (131072) -- the full capacity of the model will not be utilized


✅ Model loaded successfully!
🧪 Test response:  to your new favorite drink: Sparkling Water with
🎨 Creating simple chat...
🚀 Launching your chat...
🎉 SUCCESS! Your Llama chat is starting!
* Running on public URL: https://5be716840a6ef2f1fd.gradio.live


