# DrainageAI Demo - Google Colab Integration with BYOL (Unlabeled Data Focus)

This notebook demonstrates the DrainageAI workflow using Google Colab's GPU acceleration, focusing on the BYOL approach with unlabeled data only.

## Step 1: Check GPU Availability

In [None]:
import torch

print(f"GPU available: {torch.cuda.is_available()}")

if torch.cuda.is_available():
    print(f"GPU name: {torch.cuda.get_device_name(0)}")
else:
    print("WARNING: No GPU detected. Processing will be slow.")

## Step 2: Install Dependencies

In [None]:
!pip install rasterio geopandas scikit-image matplotlib pytorch-lightning torch-geometric

## Step 3: Clone the DrainageAI Repository

In [None]:
# Clone the repository
!git clone https://github.com/yourusername/DrainageAI.git

%cd DrainageAI

## Step 4: Upload Test Imagery

In [None]:
from google.colab import files

print("Please upload your multispectral imagery file (GeoTIFF format):")
uploaded = files.upload()

# Get the filename of the uploaded file
imagery_filename = list(uploaded.keys())[0]
print(f"Uploaded file: {imagery_filename}")

## Step 5: Create Output Directory

In [None]:
!mkdir -p colab_results

## Step 6: Calculate Spectral Indices

In [None]:
print("\n=== Step 1: Calculate Spectral Indices ===\n")

!python main.py indices --imagery {imagery_filename} --output colab_results/indices.tif --indices ndvi,ndmi,msavi2

## Step 7: Run Drainage Detection

In [None]:
print("\n=== Step 2: Detect Drainage Pipes ===\n")

# Choose one of the following model options:

# Semi-supervised model (default)
!python main.py detect --imagery {imagery_filename} --indices colab_results/indices.tif --output colab_results/drainage_semi.tif --model semi

# Uncomment to use BYOL model (new)
# !python main.py detect --imagery {imagery_filename} --indices colab_results/indices.tif --output colab_results/drainage_byol.tif --model byol

## Step 8: Vectorize Results

In [None]:
print("\n=== Step 3: Vectorize Results ===\n")

# Change the input file if you used a different model
detection_file = "colab_results/drainage_semi.tif"

!python main.py vectorize --input {detection_file} --output colab_results/drainage_lines.shp

## BYOL Workflow for Unlabeled Data

This section demonstrates the BYOL (Bootstrap Your Own Latent) approach using only unlabeled data for pretraining.

### Upload Unlabeled Data and Check Image Format

In [None]:
# Create directories for unlabeled data
!mkdir -p data/unlabeled/imagery

# Upload unlabeled imagery (you can upload multiple files)
print("Please upload unlabeled imagery files (GeoTIFF format):")
uploaded_unlabeled = files.upload()

# Save uploaded files to the unlabeled directory
for filename in uploaded_unlabeled.keys():
    with open(f"data/unlabeled/imagery/{filename}", 'wb') as f:
        f.write(uploaded_unlabeled[filename])
    print(f"Saved {filename} to data/unlabeled/imagery/")

### Check Image Channels and Convert if Needed

In [None]:
import rasterio
import numpy as np
import os

# Create directory for RGB images if needed
!mkdir -p data/unlabeled_rgb

# Function to convert grayscale to RGB
def convert_grayscale_to_rgb(input_path, output_path):
    with rasterio.open(input_path) as src:
        # Read the data
        data = src.read()
        profile = src.profile.copy()
        
        # Check if it's already multi-channel
        if data.shape[0] >= 3:
            print(f"Image {input_path} already has {data.shape[0]} channels, skipping.")
            return False
        
        # Create 3-channel image by duplicating the grayscale channel
        if data.shape[0] == 1:
            rgb_data = np.repeat(data, 3, axis=0)
        else:
            # If it has 2 channels, add a third one
            zeros = np.zeros_like(data[0:1])
            rgb_data = np.concatenate([data, zeros], axis=0)
        
        # Update profile for RGB output
        profile.update(count=3)
        
        # Write the RGB image
        with rasterio.open(output_path, 'w', **profile) as dst:
            dst.write(rgb_data)
        
        return True

# Check and convert all images in the unlabeled directory
need_conversion = False
for filename in os.listdir('data/unlabeled/imagery'):
    if filename.endswith(('.tif', '.tiff')):
        input_path = os.path.join('data/unlabeled/imagery', filename)
        output_path = os.path.join('data/unlabeled_rgb', filename)
        
        # Check number of channels
        with rasterio.open(input_path) as src:
            num_channels = src.count
            print(f"Image {filename} has {num_channels} channel(s)")
            
            if num_channels < 3:
                need_conversion = True
                print(f"Converting {filename} to RGB format...")
                convert_grayscale_to_rgb(input_path, output_path)
            else:
                print(f"Image {filename} already has {num_channels} channels, no conversion needed.")

# Determine which directory to use for training
if need_conversion:
    print("\nUsing converted RGB images for training.")
    optical_dir = "data/unlabeled_rgb"
else:
    print("\nUsing original images for training.")
    optical_dir = "data/unlabeled/imagery"

### BYOL Pretraining

In [None]:
print("\n=== BYOL Pretraining ===\n")

# Run BYOL pretraining
!python examples/byol_mvp_workflow.py \
    --optical-dir {optical_dir} \
    --output-dir colab_results \
    --byol-epochs 20

### BYOL Inference with Pretrained Model

In [None]:
print("\n=== BYOL Inference with Pretrained Model ===\n")

# Run inference with the pretrained BYOL model (without fine-tuning)
!python examples/byol_mvp_workflow.py \
    --inference-only \
    --model-path colab_results/byol_pretrained.pth \
    --test-image {imagery_filename} \
    --output-dir colab_results

### Optional: Add Labeled Data for Fine-tuning

In [None]:
print("\n=== Optional: Add Labeled Data for Fine-tuning ===\n")
print("Do you have labeled data for fine-tuning? If yes, upload them now.")
print("If not, you can skip this step and use the pretrained model directly.")

use_labeled_data = input("Do you have labeled data? (yes/no): ")

if use_labeled_data.lower() == 'yes':
    # Create directories for labeled data
    !mkdir -p data/labeled/imagery
    !mkdir -p data/labeled/labels
    
    # Upload labeled imagery
    print("\nPlease upload labeled imagery files (GeoTIFF format):")
    uploaded_labeled_imagery = files.upload()
    
    # Save uploaded files to the labeled imagery directory
    for filename in uploaded_labeled_imagery.keys():
        with open(f"data/labeled/imagery/{filename}", 'wb') as f:
            f.write(uploaded_labeled_imagery[filename])
        print(f"Saved {filename} to data/labeled/imagery/")
    
    # Upload label masks
    print("\nPlease upload label mask files (GeoTIFF format):")
    uploaded_labels = files.upload()
    
    # Save uploaded files to the labeled labels directory
    for filename in uploaded_labels.keys():
        with open(f"data/labeled/labels/{filename}", 'wb') as f:
            f.write(uploaded_labels[filename])
        print(f"Saved {filename} to data/labeled/labels/")
    
    # Check and convert labeled imagery if needed
    !mkdir -p data/labeled_rgb
    
    need_conversion = False
    for filename in os.listdir('data/labeled/imagery'):
        if filename.endswith(('.tif', '.tiff')):
            input_path = os.path.join('data/labeled/imagery', filename)
            output_path = os.path.join('data/labeled_rgb', filename)
            
            # Check number of channels
            with rasterio.open(input_path) as src:
                num_channels = src.count
                print(f"Image {filename} has {num_channels} channel(s)")
                
                if num_channels < 3:
                    need_conversion = True
                    print(f"Converting {filename} to RGB format...")
                    convert_grayscale_to_rgb(input_path, output_path)
                else:
                    print(f"Image {filename} already has {num_channels} channels, no conversion needed.")
    
    # Determine which directory to use for fine-tuning
    if need_conversion:
        print("\nUsing converted RGB images for fine-tuning.")
        labeled_optical_dir = "data/labeled_rgb"
    else:
        print("\nUsing original images for fine-tuning.")
        labeled_optical_dir = "data/labeled/imagery"
    
    # Run BYOL fine-tuning
    print("\n=== BYOL Fine-tuning ===\n")
    !python examples/byol_mvp_workflow.py \
        --optical-dir {labeled_optical_dir} \
        --label-dir data/labeled/labels \
        --output-dir colab_results \
        --model-path colab_results/byol_pretrained.pth \
        --num-labeled 5 \
        --finetune-epochs 10
    
    # Run inference with the fine-tuned BYOL model
    print("\n=== BYOL Inference with Fine-tuned Model ===\n")
    !python examples/byol_mvp_workflow.py \
        --inference-only \
        --model-path colab_results/byol_finetuned.pth \
        --test-image {imagery_filename} \
        --output-dir colab_results
else:
    print("Skipping fine-tuning. Using pretrained model for inference.")

## Optional: SAR Data Integration

In [None]:
print("\n=== Optional: SAR Data Integration ===\n")
print("Do you have SAR imagery to integrate? If yes, upload it now.")
print("If not, you can skip this step.")

use_sar_data = input("Do you have SAR data? (yes/no): ")

if use_sar_data.lower() == 'yes':
    # Upload SAR imagery
    print("Please upload your SAR imagery file (GeoTIFF format):")
    uploaded_sar = files.upload()
    
    # Get the filename of the uploaded file
    sar_filename = list(uploaded_sar.keys())[0]
    print(f"Uploaded SAR file: {sar_filename}")
    
    # Run SAR integration example
    print("\n=== SAR Integration ===\n")
    !python examples/sar_integration_example.py \
        --imagery {imagery_filename} \
        --sar {sar_filename} \
        --output-dir colab_results \
        --visualize
    
    # Combining BYOL and SAR
    print("\n=== BYOL + SAR Integration ===\n")
    # Use the appropriate model path based on whether fine-tuning was done
    model_path = "colab_results/byol_finetuned.pth" if use_labeled_data.lower() == 'yes' else "colab_results/byol_pretrained.pth"
    
    !python examples/byol_mvp_workflow.py \
        --inference-only \
        --model-path {model_path} \
        --test-image {imagery_filename} \
        --test-sar {sar_filename} \
        --output-dir colab_results
else:
    print("Skipping SAR integration.")