In [None]:
from zipfile import ZipFile
import os

# Mount Google Drive if needed
# from google.colab import drive
# drive.mount('/content/drive')

# Upload or copy your zip files into Colab working directory first
!unzip -q "training_patches_64tile.zip" -d "training_data"
!unzip -q "New AOI 2018-2024-20251014T043114Z-1-001.zip" -d "new_aoi"
!unzip -q "composites-20251014T112925Z-1-001.zip" -d "composites"
!unzip -q "RGB Composites-20251014T112925Z-1-001.zip" -d "rgb_composites"





# Inspect the folder structure
!ls training_data



unzip:  cannot find or open New AOI 2018-2024-20251014T043114Z-1-001.zip, New AOI 2018-2024-20251014T043114Z-1-001.zip.zip or New AOI 2018-2024-20251014T043114Z-1-001.zip.ZIP.
training_patches_32tile


In [None]:
!ls composites/composites/

00_Landsat7_Composite_C2_1999_2000_summer.tif
01_Landsat7_Composite_C2_2000_2001_summer.tif
02_Landsat7_Composite_C2_2001_2002_summer.tif
03_Landsat7_Composite_C2_2002_2003_summer.tif
04_Landsat5_Composite_C2_2003_2004_summer.tif
05_Landsat5_Composite_C2_2004_2005_summer.tif
06_Landsat5_Composite_C2_2005_2006_summer.tif
07_Landsat5_Composite_C2_2006_2007_summer.tif
08_Landsat5_Composite_C2_2007_2008_summer.tif
09_Landsat5_Composite_C2_2008_2009_summer.tif
10_Landsat5_Composite_C2_2009_2010_summer.tif
11_Landsat5_Composite_C2_2010_2011_summer.tif
12_Landsat5_Composite_C2_2011_2012_summer.tif
14_L8_Composite_2014_2015.tif
15_L8_Composite_2015_2016.tif
16_L8_Composite_2016_2017.tif
17_L8_Composite_2017_2018.tif
18_L8_Composite_2018_2019.tif
19_L8_Composite_2019_2020.tif
20_L8_Composite_2020_2021.tif
21_L8_Composite_2021_2022.tif
22_L8_Composite_2022_2023.tif
23_L8_Composite_2023_2024.tif


In [None]:
!ls rgb_composites/RGB\ Composites

00_Landsat7_RGB_Composite_C2_1999_2000_summer.tif
01_Landsat7_RGB_Composite_C2_2000_2001_summer.tif
02_Landsat7_RGB_Composite_C2_2001_2002_summer.tif
03_Landsat7_RGB_Composite_C2_2002_2003_summer.tif
04_Landsat5_RGB_Composite_C2_2003_2004_summer.tif
05_Landsat5_RGB_Composite_C2_2004_2005_summer.tif
06_Landsat5_RGB_Composite_C2_2005_2006_summer.tif
07_Landsat5_RGB_Composite_C2_2006_2007_summer.tif
08_Landsat5_RGB_Composite_C2_2007_2008_summer.tif
09_Landsat5_RGB_Composite_C2_2008_2009_summer.tif
10_Landsat5_RGB_Composite_C2_2009_2010_summer.tif
11_Landsat5_RGB_Composite_C2_2010_2011_summer.tif
12_Landsat5_RGB_Composite_C2_2011_2012_summer.tif


In [None]:
!pip install rasterio

Collecting rasterio
  Downloading rasterio-1.4.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.1 kB)
Collecting affine (from rasterio)
  Downloading affine-2.4.0-py3-none-any.whl.metadata (4.0 kB)
Collecting cligj>=0.5 (from rasterio)
  Downloading cligj-0.7.2-py3-none-any.whl.metadata (5.0 kB)
Collecting click-plugins (from rasterio)
  Downloading click_plugins-1.1.1.2-py2.py3-none-any.whl.metadata (6.5 kB)
Downloading rasterio-1.4.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (22.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m22.3/22.3 MB[0m [31m21.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading cligj-0.7.2-py3-none-any.whl (7.1 kB)
Downloading affine-2.4.0-py3-none-any.whl (15 kB)
Downloading click_plugins-1.1.1.2-py2.py3-none-any.whl (11 kB)
Installing collected packages: cligj, click-plugins, affine, rasterio
Successfully installed affine-2.4.0 click-plugins-1.1.1.2 cligj-0.7.2 rasterio-1.4.3


In [None]:
import rasterio
import numpy as np
import glob
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from xgboost import XGBClassifier

# Paths
img_dir = "training_data/training_patches_32tile/images"
lab_dir = "training_data/training_patches_32tile/labels"

# List all image tiles
img_files = sorted(glob.glob(f"{img_dir}/*.tif"))
lab_files = sorted(glob.glob(f"{lab_dir}/*.tif"))

print(f"{len(img_files)} image tiles, {len(lab_files)} label tiles")

X_list, y_list = [], []

# Loop through each patch pair
for img_fp, lab_fp in zip(img_files, lab_files):
    with rasterio.open(img_fp) as src:
        img = src.read()  # shape: (bands, height, width)
    with rasterio.open(lab_fp) as src:
        lab = src.read(1)  # single band

    # Reshape
    n_bands, h, w = img.shape
    img_2d = img.reshape(n_bands, h*w).T   # shape (pixels, bands)
    lab_1d = lab.flatten()

    # Remove nodata pixels (often value 255 or 0 for unlabeled)
    mask = (lab_1d >= 0)
    X_list.append(img_2d[mask])
    y_list.append(lab_1d[mask])

# Combine all tiles
X = np.vstack(X_list)
y = np.concatenate(y_list)

print("Feature matrix shape:", X.shape)
print("Label vector shape:", y.shape)
np.unique(y, return_counts=True)


71 image tiles, 71 label tiles
Feature matrix shape: (290816, 4)
Label vector shape: (290816,)


(array([0, 1, 2, 3, 4, 5], dtype=uint16),
 array([272750,    480,    542,   7010,   8066,   1968]))

In [None]:
## Train XGBoost Model

# Split into train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Train model
model = XGBClassifier(
    n_estimators=200,
    learning_rate=0.1,
    max_depth=6,
    subsample=0.8,
    colsample_bytree=0.8,
    objective='binary:logistic',
    eval_metric='logloss'
)
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.96      1.00      0.98     54551
           1       0.96      0.48      0.64        96
           2       0.93      0.47      0.63       108
           3       0.91      0.16      0.27      1402
           4       0.89      0.36      0.51      1613
           5       0.95      0.48      0.64       394

    accuracy                           0.95     58164
   macro avg       0.93      0.49      0.61     58164
weighted avg       0.95      0.95      0.94     58164

[[54441     2     4    22    72    10]
 [   50    46     0     0     0     0]
 [   57     0    51     0     0     0]
 [ 1180     0     0   222     0     0]
 [ 1038     0     0     0   575     0]
 [  206     0     0     0     0   188]]


In [None]:
import rasterio
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap, BoundaryNorm
import folium
import imageio.v2 as iio
from pathlib import Path

# Step 1: Define the RGB image normalization function
def rgb_normalize(rgb_path):
    with rasterio.open(rgb_path) as src:
        rgb = src.read([3, 2, 1]).astype(float)  # Red, Green, Blue (Landsat RGB)

    for i in range(3):
        band = rgb[i]
        finite = np.isfinite(band)
        if not finite.any():
            continue
        lo, hi = np.percentile(band[finite], (2, 98))  # Normalize using 2nd and 98th percentiles
        if hi > lo:
            band = np.clip((band - lo) / (hi - lo), 0, 1)
        else:
            band = np.zeros_like(band)
        rgb[i] = band

    rgb = np.moveaxis(rgb, 0, -1)
    return rgb

# Step 2: Define the static overlay function to combine the RGB image with the prediction mask
def make_static_overlay(rgb_path: Path, pred_path: Path, out_png: Path):
    with rasterio.open(pred_path) as ds:
        pred = ds.read(1)  # (H, W)

    rgb = rgb_normalize(rgb_path)

    new_h, new_w = rgb.shape[0], rgb.shape[1]
    ys = (np.linspace(0, pred.shape[0] - 1, new_h)).astype(int)
    xs = (np.linspace(0, pred.shape[1] - 1, new_w)).astype(int)
    pred_small = pred[ys][:, xs]

    cmap = ListedColormap([(0, 0, 0, 0), (1, 1, 0, 0.6), (0, 1, 0, 0.6)])  # Transparent, Yellow, Green
    norm = BoundaryNorm([0, 1, 2], cmap.N)

    plt.figure(figsize=(9, 9))
    plt.imshow(rgb)
    plt.imshow(pred_small, cmap=cmap, norm=norm)
    plt.title("Seagrass prediction (1=sparse, 2=dense)")
    plt.axis("off")
    plt.tight_layout()

    plt.savefig(out_png, dpi=180)
    plt.close()
    print(f"Saved static overlay: {out_png}")

# Step 3: Define the Folium map generation function
def make_folium_map(pred_path: Path, html_out: Path):
    with rasterio.open(pred_path) as ds:
        pred = ds.read(1)  # Prediction mask (H, W)
        h, w = pred.shape
        rgba = np.zeros((h, w, 4), dtype=np.uint8)
        rgba[pred == 1] = np.array([255, 255, 0, 140], dtype=np.uint8)  # Sparse (Yellow)
        rgba[pred == 2] = np.array([0, 255, 0, 140], dtype=np.uint8)    # Dense (Green)

        overlay_png = html_out.with_suffix(".overlay.png")  # Ensure `html_out` is a Path object
        iio.imwrite(overlay_png, rgba)

    try:
        src_crs = ds.crs.to_string() if ds.crs else "EPSG:4326"
        minx, miny, maxx, maxy = ds.bounds
        (wgs_minx, wgs_miny, wgs_maxx, wgs_maxy) = transform_bounds(src_crs, "EPSG:4326", minx, miny, maxx, maxy)
        bounds = [[wgs_miny, wgs_minx], [wgs_maxy, wgs_maxx]]
        center = [(bounds[0][0] + bounds[1][0]) / 2, (bounds[0][1] + bounds[1][1]) / 2]
    except Exception as e:
        print(f"Could not transform bounds, using fallback center. Error: {e}")
        bounds = [[-37.0, 174.0], [-36.0, 175.0]]  # Default center for Auckland
        center = [-36.5, 174.5]

    m = folium.Map(location=center, zoom_start=13, control_scale=True)
    folium.raster_layers.ImageOverlay(
        name="Seagrass Prediction",
        image=str(overlay_png),
        bounds=bounds,
        opacity=0.7,
        interactive=True,
        cross_origin=False,
        zindex=2
    ).add_to(m)
    folium.LayerControl().add_to(m)
    m.save(html_out)

    print(f"Interactive map saved: {html_out}")

# Step 4: Apply the above functions to each composite and prediction
composite_dir = "composites/composites"
out_dir = "xgb_predictions"
os.makedirs(out_dir, exist_ok=True)

# Get the list of Landsat 8 composites to classify
files = sorted(glob.glob(f"{composite_dir}/1*_L8_Composite_*.tif"))
files_2 = sorted(glob.glob(f"{composite_dir}/2*_L8_Composite_*.tif"))
files.extend(files_2)
print(f"Found {len(files)} Landsat 8 composites")

# Loop through each composite and generate overlays/maps
for composite_path in files:
    name = os.path.basename(composite_path)
    year_label = name.split("_")[-2] + "_" + name.split("_")[-1].split(".")[0]
    print(f"\n🌿 Processing {year_label}...")

    with rasterio.open(composite_path) as src:
        img = src.read([4, 3, 2, 5]) if src.count >= 5 else src.read([1, 2, 3, 4])
        profile = src.profile
        h, w = img.shape[1], img.shape[2]

    img_flat = img.reshape(img.shape[0], h * w).T
    mask = np.all(np.isfinite(img_flat), axis=1)

    pred_flat = np.zeros(h * w, dtype=np.uint8)
    pred_flat[mask] = model.predict(img_flat[mask])
    pred_raster = pred_flat.reshape(h, w)

    out_tif = f"{out_dir}/xgb_pred_{year_label}.tif"
    profile.update(dtype=rasterio.uint8, count=1)
    with rasterio.open(out_tif, "w", **profile) as dst:
        dst.write(pred_raster, 1)

    print(f"✅ Saved classification: {out_tif}")

    # Normalize RGB image and make overlay
    rgb = np.moveaxis(img[[0, 1, 2]], 0, -1).astype(float)
    rgb = np.clip(rgb / np.percentile(rgb, 99), 0, 1)

    # Save static overlay
    out_png = f"{out_dir}/{year_label}_overlay.png"
    try:
        make_static_overlay(composite_path, out_tif, out_png)
        print(f"[OK] Static overlay → {out_png}")
    except Exception as e:
        print(f"[WARN] Static overlay failed for {year_label}: {e}")

    # Save Folium interactive map
    html_out = f"{out_dir}/{year_label}_map.html"
    try:
        make_folium_map(out_tif, html_out)
        print(f"[OK] Map → {html_out}")
    except Exception as e:
        print(f"[WARN] Map failed for {year_label}: {e}")


Found 10 Landsat 8 composites

🌿 Processing 2014_2015...
✅ Saved classification: xgb_predictions/xgb_pred_2014_2015.tif
Saved static overlay: xgb_predictions/2014_2015_overlay.png
[OK] Static overlay → xgb_predictions/2014_2015_overlay.png
[WARN] Map failed for 2014_2015: 'str' object has no attribute 'with_suffix'

🌿 Processing 2015_2016...
✅ Saved classification: xgb_predictions/xgb_pred_2015_2016.tif
Saved static overlay: xgb_predictions/2015_2016_overlay.png
[OK] Static overlay → xgb_predictions/2015_2016_overlay.png
[WARN] Map failed for 2015_2016: 'str' object has no attribute 'with_suffix'

🌿 Processing 2016_2017...
✅ Saved classification: xgb_predictions/xgb_pred_2016_2017.tif
Saved static overlay: xgb_predictions/2016_2017_overlay.png
[OK] Static overlay → xgb_predictions/2016_2017_overlay.png
[WARN] Map failed for 2016_2017: 'str' object has no attribute 'with_suffix'

🌿 Processing 2017_2018...
✅ Saved classification: xgb_predictions/xgb_pred_2017_2018.tif
Saved static overla