# DrainageAI Tutorial

This notebook demonstrates how to use the DrainageAI system to detect drainage pipes in satellite imagery.

## Setup

First, let's import the necessary modules and set up the environment.

In [None]:
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
import torch
import rasterio
from rasterio.plot import show
import geopandas as gpd

# Add parent directory to path for imports
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath('__file__'))))

from models import EnsembleModel, CNNModel, GNNModel, SelfSupervisedModel
from preprocessing import DataLoader, ImageProcessor, GraphBuilder, Augmentation

## Load Sample Data

Next, let's load some sample satellite imagery and elevation data. For this tutorial, you should place sample data in the `data/samples` directory.

In [None]:
# Define paths to sample data
imagery_path = '../data/samples/satellite_imagery.tif'
elevation_path = '../data/samples/elevation.tif'

# Load imagery
with rasterio.open(imagery_path) as src:
    imagery = src.read()
    imagery_meta = src.meta

# Load elevation data
with rasterio.open(elevation_path) as src:
    elevation = src.read(1)  # Assume single band
    elevation_meta = src.meta

# Display imagery
plt.figure(figsize=(10, 10))
show(imagery, transform=imagery_meta['transform'])
plt.title('Satellite Imagery')
plt.axis('off')
plt.show()

# Display elevation data
plt.figure(figsize=(10, 10))
show(elevation, transform=elevation_meta['transform'], cmap='terrain')
plt.title('Elevation Data')
plt.colorbar(label='Elevation (m)')
plt.axis('off')
plt.show()

## Preprocess Data

Now, let's preprocess the data for input to the model.

In [None]:
# Create image processor
image_processor = ImageProcessor()

# Preprocess imagery
preprocessed_imagery = image_processor.preprocess(imagery)

# Create graph builder
graph_builder = GraphBuilder()

# Extract node features and positions
node_positions, node_features = graph_builder._extract_nodes_from_raster(
    imagery, elevation
)

# Create input data for model
input_data = {
    'imagery': preprocessed_imagery,
    'node_features': node_features,
    'node_positions': node_positions,
    'elevation': elevation
}

print(f"Preprocessed imagery shape: {preprocessed_imagery.shape}")
print(f"Number of nodes: {len(node_positions)}")
print(f"Node features shape: {node_features.shape}")

## Load Model

Next, let's load the DrainageAI model. For this tutorial, we'll use the ensemble model, which combines CNN, GNN, and self-supervised approaches.

In [None]:
# Create model
model = EnsembleModel()

# Set model to evaluation mode
model.eval()

print(f"Model created: {type(model).__name__}")
print(f"Number of parameters: {sum(p.numel() for p in model.parameters())}")

## Run Inference

Now, let's run inference on the preprocessed data.

In [None]:
# Run inference
with torch.no_grad():
    result = model.predict(input_data)

# Apply confidence threshold
confidence_threshold = 0.5
binary_result = (result > confidence_threshold).float()

# Convert to numpy array
if isinstance(binary_result, torch.Tensor):
    binary_result = binary_result.numpy()

print(f"Result shape: {binary_result.shape}")

# Display result
plt.figure(figsize=(10, 10))
plt.imshow(binary_result[0], cmap='gray')
plt.title('Drainage Detection Result')
plt.axis('off')
plt.show()

## Save Results

Finally, let's save the results as a GeoTIFF file.

In [None]:
# Define output path
output_path = '../data/results/drainage_detection.tif'

# Create output directory if it doesn't exist
os.makedirs(os.path.dirname(output_path), exist_ok=True)

# Save as GeoTIFF
with rasterio.open(
    output_path,
    'w',
    driver='GTiff',
    height=binary_result.shape[1],
    width=binary_result.shape[2],
    count=1,
    dtype=binary_result.dtype,
    crs=imagery_meta['crs'],
    transform=imagery_meta['transform']
) as dst:
    dst.write(binary_result[0], 1)

print(f"Results saved to {output_path}")

## Vectorize Results

Now, let's vectorize the results to create a shapefile of drainage lines.

In [None]:
from skimage.morphology import skeletonize
from shapely.geometry import LineString

# Skeletonize the binary result
skeleton = skeletonize(binary_result[0] > 0)

# Display skeleton
plt.figure(figsize=(10, 10))
plt.imshow(skeleton, cmap='gray')
plt.title('Skeletonized Result')
plt.axis('off')
plt.show()

# Convert to vector lines
lines = []

# Find all skeleton pixels
skeleton_pixels = np.column_stack(np.where(skeleton > 0))

# Group pixels into lines
if len(skeleton_pixels) > 0:
    current_line = [skeleton_pixels[0]]
    for i in range(1, len(skeleton_pixels)):
        # Check if pixel is adjacent to the last pixel in the line
        last_pixel = current_line[-1]
        pixel = skeleton_pixels[i]
        
        if (abs(pixel[0] - last_pixel[0]) <= 1 and
            abs(pixel[1] - last_pixel[1]) <= 1):
            # Adjacent pixel, add to current line
            current_line.append(pixel)
        else:
            # Not adjacent, start a new line
            if len(current_line) > 1:
                # Convert pixel coordinates to world coordinates
                coords = []
                for p in current_line:
                    # Convert pixel coordinates to world coordinates
                    x, y = imagery_meta['transform'] * (p[1], p[0])
                    coords.append((x, y))
                
                # Create line
                line = LineString(coords)
                
                # Simplify line
                line = line.simplify(1.0)
                
                # Add to lines
                lines.append(line)
            
            # Start a new line
            current_line = [pixel]
    
    # Add the last line
    if len(current_line) > 1:
        # Convert pixel coordinates to world coordinates
        coords = []
        for p in current_line:
            # Convert pixel coordinates to world coordinates
            x, y = imagery_meta['transform'] * (p[1], p[0])
            coords.append((x, y))
        
        # Create line
        line = LineString(coords)
        
        # Simplify line
        line = line.simplify(1.0)
        
        # Add to lines
        lines.append(line)

# Create GeoDataFrame
gdf = gpd.GeoDataFrame(
    {"geometry": lines},
    crs=imagery_meta['crs']
)

# Save to shapefile
output_shapefile = '../data/results/drainage_lines.shp'
gdf.to_file(output_shapefile)

print(f"Vectorized results saved to {output_shapefile}")
print(f"Number of drainage lines: {len(lines)}")

## Visualize Results

Finally, let's visualize the results overlaid on the original imagery.

In [None]:
# Load the shapefile
drainage_lines = gpd.read_file(output_shapefile)

# Create a figure
fig, ax = plt.subplots(figsize=(12, 12))

# Display the satellite imagery
show(imagery, transform=imagery_meta['transform'], ax=ax)

# Display the drainage lines
drainage_lines.plot(ax=ax, color='red', linewidth=2)

# Set title and turn off axis
ax.set_title('Detected Drainage Lines')
ax.set_axis_off()

plt.show()

## Conclusion

In this tutorial, we've demonstrated how to use the DrainageAI system to detect drainage pipes in satellite imagery. We've covered the following steps:

1. Loading and preprocessing satellite imagery and elevation data
2. Running inference with the DrainageAI model
3. Saving the results as a GeoTIFF file
4. Vectorizing the results to create a shapefile of drainage lines
5. Visualizing the results overlaid on the original imagery

This workflow can be applied to any satellite imagery to detect drainage pipes in agricultural fields.