### Quick GPU check

In [1]:
def check_system_resources():
    import psutil
    print(f"CPU Usage: {psutil.cpu_percent()}%")
    print(f"RAM Usage: {psutil.virtual_memory().percent}%")
    
    # Check if running on GPU
    try:
        import subprocess
        gpu_info = subprocess.run(['nvidia-smi', '--query-gpu=utilization.gpu', '--format=csv,noheader,nounits'], 
                                capture_output=True, text=True)
        if gpu_info.returncode == 0:
            print(f"GPU Usage: {gpu_info.stdout.strip()}%")
        else:
            print("GPU: Not available or not in use")
    except:
        print("GPU: nvidia-smi not found")

# Call before processing:
check_system_resources()

CPU Usage: 6.0%
RAM Usage: 22.0%
GPU Usage: 0%


In [2]:
# Check current system and Ollama configuration
import subprocess
import os

def check_gpu_availability():
    """Check if GPU is available and what type"""
    print("🔍 CHECKING GPU AVAILABILITY")
    print("=" * 40)
    
    # Check NVIDIA GPU
    try:
        result = subprocess.run(['nvidia-smi'], capture_output=True, text=True)
        if result.returncode == 0:
            print("✅ NVIDIA GPU detected:")
            print(result.stdout)
            return "nvidia"
        else:
            print("❌ nvidia-smi not found or failed")
    except FileNotFoundError:
        print("❌ nvidia-smi not found")
    
    # Check AMD GPU
    try:
        result = subprocess.run(['rocm-smi'], capture_output=True, text=True)
        if result.returncode == 0:
            print("✅ AMD GPU (ROCm) detected:")
            print(result.stdout)
            return "amd"
        else:
            print("❌ rocm-smi not found")
    except FileNotFoundError:
        print("❌ rocm-smi not found")
    
    print("⚠️ No GPU detected or drivers not installed")
    return None

def check_ollama_status():
    """Check current Ollama configuration"""
    print("\n🔍 CHECKING OLLAMA STATUS")
    print("=" * 40)
    
    try:
        # Check if Ollama is running
        result = subprocess.run(['ollama', 'list'], capture_output=True, text=True)
        if result.returncode == 0:
            print("✅ Ollama is running")
            print("Available models:")
            print(result.stdout)
        else:
            print("❌ Ollama not running or accessible")
            
        # Check Ollama version
        result = subprocess.run(['ollama', '--version'], capture_output=True, text=True)
        if result.returncode == 0:
            print(f"Ollama version: {result.stdout.strip()}")
            
    except FileNotFoundError:
        print("❌ Ollama not found in PATH")

# Run checks
gpu_type = check_gpu_availability()
check_ollama_status()

🔍 CHECKING GPU AVAILABILITY
✅ NVIDIA GPU detected:
Fri Aug  8 14:54:01 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 566.24                 Driver Version: 566.24         CUDA Version: 12.7     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA RTX 2000 Ada Gene...  WDDM  |   00000000:01:00.0  On |                  N/A |
| N/A   51C    P8              7W /   45W |     543MiB /   8188MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+---------------------

In [None]:
def test_gpu_ubuntu():
    """Test GPU usage on Ubuntu"""
    print("\n🧪 TESTING GPU ON UBUNTU")
    print("=" * 40)
    
    # Test with a small model
    try:
        print("Pulling test model...")
        result = subprocess.run(['ollama', 'pull', 'gemma3:1b'], 
                              capture_output=True, text=True, timeout=300)
        
        print("Testing inference...")
        result = subprocess.run(['ollama', 'run', 'gemma3:1b', 'Hello world'], 
                              capture_output=True, text=True, timeout=60)
        
        if result.returncode == 0:
            print("✅ Test successful!")
            print(f"Response: {result.stdout}")
        else:
            print(f"❌ Test failed: {result.stderr}")
            
    except subprocess.TimeoutExpired:
        print("⚠️ Test timed out")
    except Exception as e:
        print(f"Error: {e}")

# Uncomment to test
test_gpu_ubuntu()

In [None]:
# Add this cell to diagnose GPU issues on Ubuntu
def diagnose_gpu_ubuntu():
    """Comprehensive GPU diagnosis for Ubuntu"""
    print("🔧 COMPREHENSIVE GPU DIAGNOSIS (Ubuntu)")
    print("=" * 50)
    
    # 1. Check hardware detection
    print("1. HARDWARE DETECTION:")
    print("-" * 25)
    try:
        result = subprocess.run(['lspci | grep -i nvidia'], shell=True, capture_output=True, text=True)
        if result.stdout:
            print("✅ NVIDIA hardware detected:")
            print(result.stdout)
        else:
            print("❌ No NVIDIA hardware found")
            return False
    except Exception as e:
        print(f"Error checking hardware: {e}")
        return False
    
    # 2. Check driver installation
    print("\n2. DRIVER INSTALLATION:")
    print("-" * 25)
    try:
        result = subprocess.run(['nvidia-smi'], capture_output=True, text=True)
        if result.returncode == 0:
            print("✅ NVIDIA drivers installed and working")
            # Extract driver version
            lines = result.stdout.split('\n')
            for line in lines:
                if 'Driver Version:' in line:
                    print(f"Driver version: {line.strip()}")
                    break
        else:
            print("❌ nvidia-smi failed - drivers not properly installed")
            print("Install with: sudo apt install nvidia-driver-535")
            return False
    except FileNotFoundError:
        print("❌ nvidia-smi not found - drivers not installed")
        return False
    
    # 3. Check CUDA installation
    print("\n3. CUDA INSTALLATION:")
    print("-" * 25)
    try:
        result = subprocess.run(['nvcc', '--version'], capture_output=True, text=True)
        if result.returncode == 0:
            print("✅ CUDA toolkit installed")
            # Extract CUDA version
            for line in result.stdout.split('\n'):
                if 'release' in line:
                    print(f"CUDA version: {line.strip()}")
                    break
        else:
            print("⚠️ CUDA toolkit not found")
            print("Install with: sudo apt install nvidia-cuda-toolkit")
    except FileNotFoundError:
        print("⚠️ nvcc not found - CUDA toolkit not installed")
        print("This might be needed for Ollama GPU support")
    
    # 4. Check Ollama GPU environment
    print("\n4. OLLAMA GPU ENVIRONMENT:")
    print("-" * 25)
    
    gpu_vars = ['OLLAMA_GPU', 'OLLAMA_GPU_LAYERS', 'CUDA_VISIBLE_DEVICES']
    for var in gpu_vars:
        value = os.environ.get(var, 'Not set')
        print(f"{var}: {value}")
    
    # 5. Check Ollama service configuration
    print("\n5. OLLAMA SERVICE:")
    print("-" * 25)
    try:
        # Check if running as service
        result = subprocess.run(['systemctl', 'is-active', 'ollama'], capture_output=True, text=True)
        if result.stdout.strip() == 'active':
            print("✅ Ollama running as system service")
            
            # Check service environment
            result = subprocess.run(['systemctl', 'show', 'ollama', '--property=Environment'], 
                                  capture_output=True, text=True)
            print(f"Service environment: {result.stdout.strip()}")
        else:
            print("⚠️ Ollama not running as system service")
            
        # Check manual process
        result = subprocess.run(['pgrep', '-f', 'ollama'], capture_output=True, text=True)
        if result.returncode == 0:
            print("✅ Ollama process found")
        else:
            print("❌ No Ollama process running")
            
    except Exception as e:
        print(f"Error checking service: {e}")
    
    return True

# Run diagnosis
diagnose_gpu_ubuntu()

In [None]:
# Add this cell to configure GPU properly
def configure_ollama_gpu_ubuntu():
    """Configure Ollama for GPU on Ubuntu"""
    print("\n🔧 CONFIGURING OLLAMA FOR GPU (Ubuntu)")
    print("=" * 50)
    
    # 1. Set environment variables for current session
    print("1. Setting environment variables...")
    gpu_env = {
        'OLLAMA_GPU': '1',
        'OLLAMA_GPU_LAYERS': '-1',
        'OLLAMA_KEEP_ALIVE': '5m',
        'CUDA_VISIBLE_DEVICES': '0'  # Use first GPU
    }
    
    for key, value in gpu_env.items():
        os.environ[key] = value
        print(f"   {key}={value}")
    
    # 2. Create systemd service override (if running as service)
    print("\n2. Creating systemd service override...")
    service_override = """[Service]
Environment=OLLAMA_GPU=1
Environment=OLLAMA_GPU_LAYERS=-1
Environment=CUDA_VISIBLE_DEVICES=0
"""
    
    print("   Service override content:")
    print(service_override)
    print("   To apply manually, run:")
    print("   sudo mkdir -p /etc/systemd/system/ollama.service.d/")
    print("   sudo tee /etc/systemd/system/ollama.service.d/gpu.conf << EOF")
    print(service_override)
    print("   EOF")
    print("   sudo systemctl daemon-reload")
    print("   sudo systemctl restart ollama")
    
    # 3. Add to bashrc for persistence
    print("\n3. Adding to ~/.bashrc for persistence...")
    bashrc_content = """
# Ollama GPU configuration
export OLLAMA_GPU=1
export OLLAMA_GPU_LAYERS=-1
export CUDA_VISIBLE_DEVICES=0
"""
    print("   Add this to ~/.bashrc:")
    print(bashrc_content)
    
    return gpu_env

# Configure GPU
gpu_config = configure_ollama_gpu_ubuntu()

In [None]:
# Add this cell to restart Ollama with GPU
def restart_ollama_with_gpu_ubuntu():
    """Restart Ollama with GPU configuration"""
    print("\n🔄 RESTARTING OLLAMA WITH GPU")
    print("=" * 40)
    
    # Stop any running Ollama processes
    print("1. Stopping existing Ollama processes...")
    try:
        subprocess.run(['pkill', '-f', 'ollama'], capture_output=True)
        print("   ✅ Stopped existing processes")
    except:
        print("   ℹ️ No existing processes found")
    
    # Wait a moment
    import time
    time.sleep(2)
    
    # Start Ollama manually with GPU environment
    print("2. Starting Ollama with GPU environment...")
    
    # Set GPU environment variables
    env = os.environ.copy()
    env.update({
        'OLLAMA_GPU': '1',
        'OLLAMA_GPU_LAYERS': '-1',
        'CUDA_VISIBLE_DEVICES': '0'
    })
    
    try:
        # Start ollama serve in background
        print("   Starting 'ollama serve' with GPU env...")
        proc = subprocess.Popen(['ollama', 'serve'], env=env)
        
        # Wait a bit for startup
        time.sleep(5)
        
        # Check if it's responding
        result = subprocess.run(['ollama', 'list'], capture_output=True, text=True, timeout=10)
        if result.returncode == 0:
            print("   ✅ Ollama started successfully with GPU environment")
            return True
        else:
            print(f"   ❌ Ollama not responding: {result.stderr}")
            return False
            
    except Exception as e:
        print(f"   ❌ Error starting Ollama: {e}")
        return False

# Restart Ollama (uncomment to run)
restart_ollama_with_gpu_ubuntu()

In [None]:
# Add this cell for detailed GPU testing with monitoring
def test_gpu_with_monitoring():
    """Test GPU with real-time monitoring"""
    import threading
    import time
    
    print("\n🧪 TESTING GPU WITH MONITORING")
    print("=" * 40)
    
    # Start GPU monitoring in background
    monitoring = True
    gpu_activity_detected = False
    
    def monitor_gpu():
        nonlocal gpu_activity_detected, monitoring
        print("   📊 Starting GPU monitoring...")
        
        while monitoring:
            try:
                result = subprocess.run([
                    'nvidia-smi', '--query-gpu=utilization.gpu,memory.used,memory.total,temperature.gpu',
                    '--format=csv,noheader,nounits'
                ], capture_output=True, text=True, timeout=2)
                
                if result.returncode == 0:
                    for i, line in enumerate(result.stdout.strip().split('\n')):
                        if line:
                            gpu_util, mem_used, mem_total, temp = line.split(', ')
                            if int(gpu_util) > 0:
                                print(f"      🔥 GPU {i}: {gpu_util}% util, {mem_used}MB/{mem_total}MB, {temp}°C")
                                gpu_activity_detected = True
                            elif int(mem_used) > 500:  # Model loaded but not active
                                print(f"      💤 GPU {i}: Model loaded ({mem_used}MB), waiting for inference...")
                
                time.sleep(1)
            except Exception as e:
                print(f"      ❌ GPU monitoring error: {e}")
                break
    
    # Start monitoring thread
    monitor_thread = threading.Thread(target=monitor_gpu)
    monitor_thread.daemon = True
    monitor_thread.start()
    
    # Test inference with timing
    try:
        print("\n   🚀 Testing inference with gemma3:1b...")
        start_time = time.time()
        
        result = subprocess.run([
            'ollama', 'run', 'gemma3:1b', 
            'Extract one key fact from this text: The case was filed in court in 2020.'
        ], capture_output=True, text=True, timeout=120)
        
        inference_time = time.time() - start_time
        
        # Stop monitoring
        monitoring = False
        time.sleep(1)  # Let monitoring finish
        
        if result.returncode == 0:
            print(f"\n   ✅ Inference completed in {inference_time:.2f} seconds")
            print(f"   📝 Response: {result.stdout[:200]}...")
            
            # Determine GPU usage
            if gpu_activity_detected:
                print("   🎯 GPU ACTIVITY DETECTED - GPU acceleration working!")
            elif inference_time < 15:
                print("   ⚡ Fast inference but no GPU activity detected - check GPU monitoring")
            else:
                print("   🐌 Slow inference and no GPU activity - likely using CPU")
                
            return gpu_activity_detected
            
        else:
            print(f"   ❌ Inference failed: {result.stderr}")
            monitoring = False
            return False
            
    except subprocess.TimeoutExpired:
        print("   ⚠️ Test timed out - definitely using CPU")
        monitoring = False
        return False
    except Exception as e:
        print(f"   ❌ Test error: {e}")
        monitoring = False
        return False

# Run GPU test with monitoring
gpu_working = test_gpu_with_monitoring()