# 🏔️ Glacier Hack 2025 - Kaggle Training Notebook

This notebook trains an optimized UNet model for glacier segmentation using advanced techniques to achieve **70-75% MCC** performance.

## Key Features:
- ✅ **Custom UNet Architecture** with proven stability
- ✅ **TverskyLoss** optimized for imbalanced glacier data
- ✅ **Global Normalization** for consistent training
- ✅ **Advanced Training** with AMP, SWA, threshold sweeping
- ✅ **Kaggle-Compatible** file saving and submission prep

Expected training time: **2-3 hours** | Target MCC: **70-75%**

In [None]:
# 🚀 Initial Setup and GPU Check
import os
import subprocess
import torch
import numpy as np
from pathlib import Path
import glob

# Check GPU availability
print(f"🔥 CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"🎯 GPU: {torch.cuda.get_device_name(0)}")
    print(f"💾 GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
else:
    print("⚠️ No GPU detected - training will be very slow!")

# Set working directory
os.chdir('/kaggle/working')
print(f"📁 Current directory: {os.getcwd()}")

# Show available space
result = subprocess.run(['df', '-h', '/kaggle/working'], capture_output=True, text=True)
print(f"💽 Available space:\n{result.stdout}")

In [None]:
# 📦 Clone Repository and Download Data
print("🔄 Cloning repository...")
!git clone https://github.com/observer04/glacier-hack.git
os.chdir('/kaggle/working/glacier-hack')

print("📥 Downloading training data...")
!wget -q https://www.glacier-hack.in/train.zip
!unzip -q train.zip -d ./
!mv ./Train/Train/* ./Train/
!rmdir ./Train/Train

# Verify data structure
print("✅ Data structure verification:")
train_files = os.listdir('Train/')
print(f"📊 Total training files: {len(train_files)}")
print(f"📋 Sample files: {train_files[:5]}")

# Clean up zip file to save space
!rm train.zip
print("🧹 Cleaned up zip file")

In [None]:
# 🛠️ Install Dependencies
print("📋 Installing required packages...")
!pip install -q tqdm scikit-learn matplotlib pillow tifffile

# Import and verify installation
import tqdm
import sklearn
import matplotlib.pyplot as plt
from PIL import Image
import tifffile
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

print("✅ All dependencies installed successfully!")

# Verify we can load the training modules
import sys
sys.path.append('/kaggle/working/glacier-hack')

try:
    from data_utils import GlacierDataset, compute_global_stats
    from models import UNet
    from train_utils import TverskyLoss
    print("✅ All custom modules imported successfully!")
except ImportError as e:
    print(f"❌ Import error: {e}")
    print("🔍 Available files:")
    !ls -la

In [None]:
# 🎯 Start Optimized Training (UNet + Tversky)
print("🚀 Starting optimized training...")
print("📋 Configuration:")
print("   • Model: UNet (proven stable architecture)")
print("   • Loss: TverskyLoss (α=0.7, β=0.3 for imbalanced data)")
print("   • Batch Size: 2 (memory optimized)")
print("   • Epochs: 80")
print("   • Features: AMP, SWA, Threshold Sweep, Global Normalization")
print("   • Expected MCC: 70-75%")
print("   • Training Time: ~2-3 hours")
print()

# Create models directory
os.makedirs('/kaggle/working/models', exist_ok=True)

# Run the training
!python train_model.py \
    --model_type unet \
    --loss_type tversky \
    --batch_size 2 \
    --epochs 80 \
    --lr 0.001 \
    --save_dir /kaggle/working/models \
    --use_amp \
    --use_swa \
    --threshold_sweep \
    --scheduler plateau \
    --normalize_type global \
    --data_dir Train \
    --patience 15 \
    --gradient_accumulation_steps 4

## 📊 Monitor Training Progress

**Run the cell below periodically while training to monitor progress:**

In [None]:
# 📈 Monitor Training Progress
import time
import matplotlib.pyplot as plt
import glob

def monitor_training():
    """Monitor training progress by reading logs"""
    log_files = glob.glob('/kaggle/working/models/*/training.log')
    if log_files:
        latest_log = max(log_files, key=os.path.getctime)
        print(f"📊 Monitoring: {latest_log}")
        print("📋 Last 20 lines of training log:")
        print("-" * 50)
        
        with open(latest_log, 'r') as f:
            lines = f.readlines()
            for line in lines[-20:]:
                print(line.strip())
    else:
        print("⏳ No training logs found yet...")
        print("🔍 Available directories:")
        dirs = glob.glob('/kaggle/working/models/*')
        for d in dirs:
            print(f"   📁 {d}")

def show_gpu_usage():
    """Show current GPU usage"""
    try:
        result = subprocess.run(['nvidia-smi'], capture_output=True, text=True)
        print("🖥️ GPU Status:")
        print(result.stdout)
    except:
        print("❌ Could not get GPU status")

# Monitor training
monitor_training()
print("\n" + "="*60 + "\n")
show_gpu_usage()

## 🎁 Prepare Submission Files

**Run this after training completes to prepare your competition submission:**

In [None]:
# 🎁 Prepare Final Submission Files
import shutil
import glob

print("🔍 Searching for trained models...")

# Find the best model
model_dirs = glob.glob('/kaggle/working/models/*')
if model_dirs:
    latest_model_dir = max(model_dirs, key=os.path.getctime)
    print(f"📁 Latest model directory: {latest_model_dir}")
    
    # Create submission directory
    submission_dir = '/kaggle/working/submission'
    os.makedirs(submission_dir, exist_ok=True)
    
    # Find and copy best model
    best_model_files = glob.glob(f'{latest_model_dir}/best_model.pth')
    if best_model_files:
        # Copy model as model.pth (competition requirement)
        shutil.copy(best_model_files[0], f'{submission_dir}/model.pth')
        print("✅ Model copied as model.pth")
        
        # Copy solution.py
        shutil.copy('/kaggle/working/glacier-hack/solution.py', f'{submission_dir}/')
        print("✅ Solution.py copied")
        
        # Show final files
        print("\n🎯 SUBMISSION FILES READY:")
        print("=" * 40)
        files = os.listdir(submission_dir)
        for file in files:
            size = os.path.getsize(f'{submission_dir}/{file}') / (1024*1024)  # MB
            print(f"📄 {file} ({size:.1f} MB)")
        
        # Copy training summary
        log_files = glob.glob(f'{latest_model_dir}/training.log')
        if log_files:
            shutil.copy(log_files[0], f'{submission_dir}/training_log.txt')
            
            # Show final training results
            print("\n📊 FINAL TRAINING RESULTS:")
            print("=" * 40)
            with open(log_files[0], 'r') as f:
                lines = f.readlines()
                for line in lines[-10:]:
                    if 'Best' in line or 'MCC' in line:
                        print(f"🏆 {line.strip()}")
        
        print(f"\n✅ All files ready in: {submission_dir}")
        print("💡 Download these files from Kaggle's output section!")
        
    else:
        print("❌ No best_model.pth found!")
        print("🔍 Available files in model directory:")
        available_files = glob.glob(f'{latest_model_dir}/*')
        for file in available_files:
            print(f"   📄 {os.path.basename(file)}")
else:
    print("❌ No model directories found!")
    print("🔍 Available directories:")
    dirs = os.listdir('/kaggle/working/models')
    for d in dirs:
        print(f"   📁 {d}")