# GeoAI Environment Smoke Test

This notebook provides a comprehensive test of your GeoAI course environment. Run all cells to verify that:

- ✅ Python environment and packages are correctly installed
- ✅ GPU/MPS acceleration is working (if available)
- ✅ Jupyter kernel is properly configured
- ✅ Course data and models are accessible
- ✅ Foundation models can be loaded
- ✅ Basic ML workflows function correctly

**Instructions:** 
1. Make sure you're running this notebook with the **GeoAI Course** kernel
2. Run all cells in order
3. Check that all tests pass (✅) - any failures (❌) indicate setup issues
4. If you encounter issues, refer to the GRIT_SETUP.md troubleshooting section

---


## 1. Environment and System Information


In [None]:
import sys
import os
import platform
from datetime import datetime

print("🔍 Environment Information")
print("=" * 50)
print(f"📅 Test run: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"🐍 Python version: {sys.version}")
print(f"💻 Platform: {platform.platform()}")
print(f"🏠 Home directory: {os.path.expanduser('~')}")
print(f"📁 Current working directory: {os.getcwd()}")
print(f"🔧 Conda environment: {os.environ.get('CONDA_DEFAULT_ENV', 'unknown')}")
print(f"📦 Python executable: {sys.executable}")

# Check if we're in the right environment
expected_env = "geoAI"
current_env = os.environ.get('CONDA_DEFAULT_ENV', 'unknown')
if current_env == expected_env:
    print(f"✅ Running in correct environment: {current_env}")
else:
    print(f"❌ Wrong environment! Expected: {expected_env}, Found: {current_env}")
    print("   → Activate the geoAI environment: conda activate geoAI")


## 2. Environment Variables and Path Configuration


In [None]:
print("🔍 Course Environment Variables")
print("=" * 50)

# Check critical environment variables for the course
critical_env_vars = {
    'GEO_BENCH_DIR': 'GEO-Bench datasets directory',
    'GEOAI_MODELS_DIR': 'Foundation models directory', 
    'GEOAI_DATA_DIR': 'Course data directory',
    'HUGGINGFACE_HUB_CACHE': 'HuggingFace models cache',
    'HF_HOME': 'HuggingFace home directory'
}

all_vars_ok = True
for var, description in critical_env_vars.items():
    value = os.environ.get(var)
    if value:
        if os.path.exists(value):
            print(f"✅ {var}: {value}")
            print(f"   📁 {description} - Path exists")
        else:
            print(f"⚠️  {var}: {value}")
            print(f"   📁 {description} - Path does not exist")
            all_vars_ok = False
    else:
        print(f"❌ {var}: NOT SET")
        print(f"   📁 {description} - Variable not configured")
        all_vars_ok = False

# Check Python path
pythonpath = os.environ.get('PYTHONPATH', '')
if pythonpath:
    print(f"\n📚 PYTHONPATH: {pythonpath}")
else:
    print(f"\n⚠️  PYTHONPATH: Not set")

print(f"\n{'✅ All environment variables configured correctly' if all_vars_ok else '❌ Some environment variables need attention'}")


## 3. Core Package Imports and Versions


In [None]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning)

print("🔍 Package Import Test")
print("=" * 50)

# Core scientific packages
core_packages = [
    ('numpy', 'NumPy'),
    ('pandas', 'Pandas'), 
    ('matplotlib', 'Matplotlib'),
    ('seaborn', 'Seaborn'),
    ('scipy', 'SciPy'),
    ('sklearn', 'Scikit-learn')
]

# AI/ML packages  
ml_packages = [
    ('torch', 'PyTorch'),
    ('torchvision', 'TorchVision'),
    ('transformers', 'HuggingFace Transformers'),
    ('datasets', 'HuggingFace Datasets'),
    ('pytorch_lightning', 'PyTorch Lightning'),
    ('timm', 'TIMM')
]

# Geospatial packages
geo_packages = [
    ('rasterio', 'Rasterio'),
    ('geopandas', 'GeoPandas'),
    ('xarray', 'XArray'),
    ('folium', 'Folium'),
    ('torchgeo', 'TorchGeo'),
    ('ee', 'Earth Engine API')
]

# Computer vision packages
cv_packages = [
    ('cv2', 'OpenCV'),
    ('kornia', 'Kornia'), 
    ('einops', 'Einops'),
    ('PIL', 'Pillow')
]

# Jupyter packages
jupyter_packages = [
    ('jupyter', 'Jupyter'),
    ('jupyterlab', 'JupyterLab'),
    ('ipykernel', 'IPython Kernel'),
    ('notebook', 'Notebook')
]

def test_package_group(packages, group_name):
    print(f"\n📦 {group_name}")
    print("-" * 30)
    failed_imports = []
    
    for package, display_name in packages:
        try:
            module = __import__(package)
            version = getattr(module, '__version__', 'unknown')
            print(f"✅ {display_name}: {version}")
        except ImportError as e:
            print(f"❌ {display_name}: Failed to import - {e}")
            failed_imports.append(display_name)
    
    return failed_imports

all_failed = []
all_failed.extend(test_package_group(core_packages, "Core Scientific Packages"))
all_failed.extend(test_package_group(ml_packages, "AI/ML Packages"))
all_failed.extend(test_package_group(geo_packages, "Geospatial Packages"))
all_failed.extend(test_package_group(cv_packages, "Computer Vision Packages"))
all_failed.extend(test_package_group(jupyter_packages, "Jupyter Packages"))

print(f"\n{'=' * 50}")
if not all_failed:
    print("✅ All core packages imported successfully!")
else:
    print(f"❌ {len(all_failed)} packages failed to import:")
    for pkg in all_failed:
        print(f"   • {pkg}")
    print("\nRefer to installation guide for troubleshooting.")


## 4. GPU/Acceleration Testing


In [None]:
import torch
import time

print("🔍 GPU/Acceleration Testing")
print("=" * 50)

print(f"🔧 PyTorch version: {torch.__version__}")

# Check available devices
gpu_available = False
device_name = "cpu"

# Check CUDA (NVIDIA GPU)
if torch.cuda.is_available():
    gpu_count = torch.cuda.device_count()
    primary_gpu = torch.cuda.get_device_name(0)
    print(f"✅ CUDA available: {gpu_count} device(s)")
    print(f"   🎯 Primary GPU: {primary_gpu}")
    device_name = "cuda"
    gpu_available = True
    
    # Test CUDA operations
    try:
        device = torch.device('cuda')
        x = torch.randn(100, 100, device=device)
        y = torch.randn(100, 100, device=device)
        z = torch.mm(x, y)
        print(f"   ✅ CUDA tensor operations successful on {z.device}")
    except Exception as e:
        print(f"   ❌ CUDA tensor operations failed: {e}")
        gpu_available = False

# Check MPS (Apple Silicon)
elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
    print("✅ Apple Metal Performance Shaders (MPS) available")
    device_name = "mps"
    gpu_available = True
    
    # Test MPS operations  
    try:
        device = torch.device('mps')
        x = torch.randn(100, 100, device=device)
        y = torch.randn(100, 100, device=device)
        z = torch.mm(x, y)
        print(f"   ✅ MPS tensor operations successful on {z.device}")
    except Exception as e:
        print(f"   ❌ MPS tensor operations failed: {e}")
        gpu_available = False
        device_name = "cpu"

else:
    print("⚠️  No GPU acceleration available - using CPU only")
    print("   💡 This is normal for CPU-only systems")

# Performance benchmark
print(f"\n🏃 Performance benchmark on {device_name.upper()}:")
try:
    if gpu_available:
        device = torch.device(device_name)
    else:
        device = torch.device('cpu')
    
    # Warm up
    a = torch.randn(1000, 1000, device=device)
    b = torch.randn(1000, 1000, device=device)
    _ = torch.mm(a, b)
    
    # Benchmark
    start_time = time.time()
    for _ in range(10):
        c = torch.mm(a, b)
    if device_name in ['cuda']:
        torch.cuda.synchronize()
    elapsed = time.time() - start_time
    
    print(f"   🎯 10x (1000x1000) matrix multiplications: {elapsed:.3f}s")
    print(f"   📊 Average per operation: {elapsed/10:.3f}s")
    
except Exception as e:
    print(f"   ❌ Benchmark failed: {e}")

# Recommended device selection code
print(f"\n💡 Recommended device selection for notebooks:")
print("   device = torch.device('cuda' if torch.cuda.is_available() else")
print("                        ('mps' if torch.backends.mps.is_available() else 'cpu'))")
print(f"   Current best device: {device}")

print(f"\n{'✅ GPU acceleration ready!' if gpu_available else '⚠️  CPU-only mode (still functional)'}")


## 5. Foundation Models and Data Access


In [None]:
import json
from pathlib import Path

print("🔍 Foundation Models and Data Access")
print("=" * 50)

# Check for foundation model registry
models_dir = Path.home() / 'geoAI' / 'models'
registry_path = models_dir / 'model_registry.json'

print(f"📁 Models directory: {models_dir}")
print(f"📄 Registry path: {registry_path}")

if registry_path.exists():
    try:
        with open(registry_path, 'r') as f:
            registry = json.load(f)
        
        print("✅ Model registry found")
        print(f"   📅 Created: {registry.get('created', 'unknown')}")
        
        models = registry.get('models', {})
        if models:
            print(f"   📦 Available models ({len(models)}):")
            for model_name, model_info in models.items():
                model_path = Path(model_info['path'])
                if model_path.exists():
                    print(f"      ✅ {model_name}: {model_path}")
                else:
                    print(f"      ❌ {model_name}: {model_path} (not found)")
        else:
            print("   ⚠️  No models registered")
            
    except Exception as e:
        print(f"❌ Failed to read model registry: {e}")
else:
    print("❌ Model registry not found")
    print("   💡 Run: bash installation/scripts/install_foundation_models.sh")

# Check GEO-Bench data
geobench_dir = os.environ.get('GEO_BENCH_DIR')
if geobench_dir:
    geobench_path = Path(geobench_dir)
    if geobench_path.exists():
        print(f"\n✅ GEO-Bench directory: {geobench_path}")
        # List available datasets
        try:
            datasets = [d for d in geobench_path.iterdir() if d.is_dir()]
            if datasets:
                print(f"   📊 Available datasets ({len(datasets)}):")
                for dataset in sorted(datasets)[:10]:  # Show first 10
                    print(f"      • {dataset.name}")
                if len(datasets) > 10:
                    print(f"      ... and {len(datasets)-10} more")
            else:
                print("   ⚠️  No datasets found")
        except Exception as e:
            print(f"   ⚠️  Could not list datasets: {e}")
    else:
        print(f"❌ GEO-Bench directory not found: {geobench_path}")
else:
    print("❌ GEO_BENCH_DIR environment variable not set")

# Check course data directory
data_dir = os.environ.get('GEOAI_DATA_DIR')
if data_dir:
    data_path = Path(data_dir)
    if data_path.exists():
        print(f"\n✅ Course data directory: {data_path}")
    else:
        print(f"❌ Course data directory not found: {data_path}")
else:
    print("❌ GEOAI_DATA_DIR environment variable not set")


## 6. Model Loading and Basic ML Workflow Test


In [None]:
print("🔍 Model Loading and ML Workflow Test")
print("=" * 50)

# Test HuggingFace model loading
print("📦 Testing HuggingFace model loading...")
try:
    from transformers import AutoModel, AutoTokenizer
    
    # Load a small test model
    model_name = "distilbert-base-uncased"
    print(f"   Loading {model_name}...")
    
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModel.from_pretrained(model_name)
    
    # Move to available device
    if gpu_available:
        model = model.to(device)
        print(f"   ✅ Model loaded on {device}")
    else:
        print("   ✅ Model loaded on CPU")
    
    # Test tokenization and inference
    test_text = "This is a test sentence for the GeoAI course."
    inputs = tokenizer(test_text, return_tensors="pt", padding=True, truncation=True)
    
    if gpu_available:
        inputs = {k: v.to(device) for k, v in inputs.items()}
    
    with torch.no_grad():
        outputs = model(**inputs)
    
    print(f"   ✅ Model inference successful")
    print(f"   📊 Output shape: {outputs.last_hidden_state.shape}")
    
except Exception as e:
    print(f"   ❌ HuggingFace model loading failed: {e}")

# Test PyTorch model creation with TIMM
print(f"\n🏗️  Testing TIMM model creation...")
try:
    import timm
    
    # Create a vision transformer model
    model_name = "vit_tiny_patch16_224"
    print(f"   Creating {model_name}...")
    
    model = timm.create_model(model_name, pretrained=False, num_classes=10)
    
    if gpu_available:
        model = model.to(device)
        
    # Test forward pass
    batch_size = 2
    test_input = torch.randn(batch_size, 3, 224, 224)
    
    if gpu_available:
        test_input = test_input.to(device)
    
    with torch.no_grad():
        output = model(test_input)
    
    print(f"   ✅ TIMM model creation and inference successful")
    print(f"   📊 Input shape: {test_input.shape}")
    print(f"   📊 Output shape: {output.shape}")
    
except Exception as e:
    print(f"   ❌ TIMM model creation failed: {e}")

# Test basic tensor operations with geospatial dimensions
print(f"\n🌍 Testing geospatial tensor operations...")
try:
    # Simulate satellite image data (batch, channels, height, width)
    batch_size = 4
    channels = 6  # Typical for HLS (6-band)
    height, width = 256, 256
    
    print(f"   Creating mock satellite data: ({batch_size}, {channels}, {height}, {width})")
    satellite_data = torch.randn(batch_size, channels, height, width)
    
    if gpu_available:
        satellite_data = satellite_data.to(device)
    
    # Test common operations
    mean_channels = torch.mean(satellite_data, dim=(2, 3))  # Spatial mean
    normalized = (satellite_data - satellite_data.mean()) / satellite_data.std()
    patches = satellite_data.unfold(2, 64, 64).unfold(3, 64, 64)  # 64x64 patches
    
    print(f"   ✅ Geospatial tensor operations successful")
    print(f"   📊 Channel means shape: {mean_channels.shape}")
    print(f"   📊 Patches shape: {patches.shape}")
    
except Exception as e:
    print(f"   ❌ Geospatial tensor operations failed: {e}")

print(f"\n{'✅ ML workflow tests completed successfully!' if True else '❌ Some ML workflow tests failed'}")


## 7. Final Summary and Next Steps


In [None]:
print("🎯 Environment Smoke Test Summary")
print("=" * 50)

# Collect overall status
print(f"✅ Test completed at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"🐍 Python: {sys.version.split()[0]}")
print(f"🔧 Environment: {os.environ.get('CONDA_DEFAULT_ENV', 'unknown')}")
print(f"🎮 Acceleration: {device_name.upper() if gpu_available else 'CPU-only'}")

print(f"\n📋 Quick checklist for students:")
print("   ✅ Run `conda activate geoAI` before starting Jupyter")
print("   ✅ Use the 'GeoAI Course' kernel in your notebooks")
print("   ✅ Import torch and set device for your computations")
print("   ✅ Check environment variables are set correctly")

print(f"\n💡 Recommended code snippet for your notebooks:")
print("```python")
print("import torch")
print("import os")
print("")
print("# Set device for computations")
print("device = torch.device('cuda' if torch.cuda.is_available() else")
print("                     ('mps' if torch.backends.mps.is_available() else 'cpu'))")
print("print(f'Using device: {device}')")
print("")
print("# Check environment variables")
print("for var in ['GEO_BENCH_DIR', 'GEOAI_MODELS_DIR', 'GEOAI_DATA_DIR']:")
print("    print(f'{var}: {os.environ.get(var, \"NOT SET\")}')")
print("```")

print(f"\n🚨 If you encountered any ❌ errors:")
print("   1. Check the GRIT_SETUP.md file for troubleshooting")
print("   2. Ensure you're in the correct conda environment") 
print("   3. Verify environment variables are set correctly")
print("   4. Contact course staff if issues persist")

print(f"\n🎉 Your GeoAI environment appears to be ready for the course!")
print("   You can now proceed with the course materials and assignments.")
