# Debugging

In [1]:
%load_ext autoreload
%autoreload 2
%pdb on

Automatic pdb calling has been turned ON


In [2]:
import os, sys
from pathlib import Path
from tqdm import tqdm
from random import *
from datetime import *

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from shapely.geometry import *
import rasterio
import geopandas as gpd

## Potsdam

In [None]:
store = Path("/home/akramzaytar/ssdprivate/akramz_datasets/wtl")
assert store.exists()

In [None]:
data_store = store / "vaihingen"
assert data_store.exists()

In [None]:
imgs_dir = data_store / "train"
imgs = list(imgs_dir.glob("*.tif"))
len(imgs)

In our case, we have the following bands:
- RGB (3)
- NIR (1)
- DSM (1)
- Mask (1)

In [None]:
# load a random image
img = imgs[randint(0, len(imgs))]

# Set the number of images that you want to plot
N = 10

# Init the figure
fig, axes = plt.subplots(N, 4, figsize=(4 * 4, N * 4))

for i in range(N):

    # Grab the axs
    axs = axes[i]

    # Grab the image
    img = imgs[i + N]

    # Read it using rasterio
    with rasterio.open(img) as src:
        img = src.read()
        rgb = img[:3, ...] / 255.0
        nir = img[3, ...] / 255.0
        dsm = img[4, ...] / 255.0
        mask = img[5, ...]

    # Plot the image in a single row that shows RGB, NIR, DSM, and Mask
    axs[0].imshow(np.moveaxis(rgb, 0, -1))
    axs[1].imshow(nir, cmap="cividis")
    axs[2].imshow(dsm, cmap="coolwarm")
    axs[3].imshow(mask, cmap="Set1", vmin=0, vmax=5)

    # Add titles
    axs[0].set_title("RGB")
    axs[1].set_title("NIR")
    axs[2].set_title("DSM")
    axs[3].set_title("Mask")

    # Remove axis
    for ax in axs:
        ax.axis("off")

plt.show()

## Vaihingen

In [None]:
store = Path("/home/akramzaytar/ssdprivate/akramz_datasets/wtl")
assert store.exists()

In [None]:
data_store = store / "vaihingen"
assert data_store.exists()

In [None]:
imgs_dir = data_store / "test"
imgs = list(imgs_dir.glob("*.tif"))
len(imgs)

In our case, we have the following bands:
- NIR-GB (3)
- DSM (1)
- Mask (1)

In [None]:
# load a random image
img = imgs[randint(0, len(imgs))]

# Set the number of images that you want to plot
N = 5

# Init the figure
fig, axes = plt.subplots(N, 3, figsize=(4 * 4, N * 4))

for i in range(N):

    # Grab the axs
    axs = axes[i]

    # Grab the image
    img = imgs[i + N]

    # Read it using rasterio
    with rasterio.open(img) as src:
        img = src.read()
        rgb = img[:3, ...] / 255.0
        dsm = img[3, ...] / 255.0
        mask = img[4, ...]

    # Plot the image in a single row that shows RGB, NIR, DSM, and Mask
    axs[0].imshow(np.moveaxis(rgb, 0, -1))
    axs[1].imshow(dsm, cmap="coolwarm")
    axs[2].imshow(mask, cmap="Set1", vmin=0, vmax=5)

    # Add titles
    axs[0].set_title("RGB")
    axs[1].set_title("DSM")
    axs[2].set_title("Mask")

    # Remove axis
    for ax in axs:
        ax.axis("off")

plt.show()

## DFC-22

In [None]:
store = Path("/home/akramzaytar/ssdprivate/akramz_datasets/wtl")
assert store.exists()

In [None]:
data_store = store / "dfc22"
assert data_store.exists()

In [None]:
imgs_dir = data_store / "test"
imgs = list(imgs_dir.glob("*.tif"))
len(imgs)

In our case, we have the following bands:
- NIR-GB (3)
- DSM (1)
- Mask (1)

In [None]:
# Set the number of images that you want to plot
N = 5

# Init the figure
fig, axes = plt.subplots(N, 3, figsize=(4 * 4, N * 4))

for i in range(N):

    # Grab the axs
    axs = axes[i]

    # Grab the image
    img = choice(imgs)

    # Read it using rasterio
    with rasterio.open(img) as src:
        img = src.read()
        rgb = img[:3, ...] / 255.0
        dsm = img[3, ...] / 255.0
        mask = img[4, ...]

    # Plot the image in a single row that shows RGB, NIR, DSM, and Mask
    axs[0].imshow(np.moveaxis(rgb, 0, -1))
    axs[1].imshow(dsm, cmap="coolwarm")
    axs[2].imshow(mask, cmap="tab20", vmin=0, vmax=16)

    # Add titles
    axs[0].set_title("RGB")
    axs[1].set_title("DSM")
    axs[2].set_title("Mask")

    # Remove axis
    for ax in axs:
        ax.axis("off")

plt.show()

---

# Training

We want to make sure the batches are loading correctly.

I have changed the data set class to allow for txt files to be passed, let's test the data loader:

In [None]:
from omegaconf import OmegaConf


dataset = "vaihingen"
method_name = "vendi_clustering_resnet"
gpu = 0
scores_file_path = "/home/akramzaytar/ssdshared/akramz_datasets/wtl/submissions/vaihingen/complexity.txt"
config_file_path = "../config.yaml"

config = OmegaConf.load(config_file_path)
config["evaluation"]["method_name"] = method_name
config["evaluation"]["scores_file"] = Path(scores_file_path)
config["trainer"]["devices"] = [gpu] if gpu >= 0 else -1
config["datamodule"]["root"] = Path(config["datamodule"]["root"]) / dataset
config["datamodule"]["dataset"] = dataset

# Set the number of channels by dataset
if dataset == "vaihingen":
    config["learning"]["in_channels"] = 4
    config["learning"]["ignore_index"] = -1
    config["learning"]["num_classes"] = 6
elif dataset == "potsdam":
    config["learning"]["in_channels"] = 5
    config["learning"]["ignore_index"] = -1
    config["learning"]["num_classes"] = 6
elif dataset == "dfc22":
    config["learning"]["in_channels"] = 4
    config["learning"]["ignore_index"] = 0
    config["learning"]["num_classes"] = 16
else:
    raise ValueError("Unknown dataset")

In [None]:
# Set the sizes according to how many rows are in the dataset
evaluation_percentages = config["evaluation"]["sizes"]
scores = pd.read_csv(scores_file_path, sep=" ", header=None)
n_samples = len(scores)

# Set the sizes according to how many rows are in the dataset
config["evaluation"]["sizes"] = [
    int(n_samples * percentage) for percentage in evaluation_percentages
]

In [None]:
# Get the name of the dataset
from mveo_benchmarks.datamodule import (
    DFC2022DataModule,
    PotsdamDataModule,
    VaihingenDataModule,
)

dataset = config["datamodule"]["dataset"]

# Depending on the name, create the data module object
if dataset == "dfc22":
    dm = DFC2022DataModule
elif dataset == "potsdam":
    dm = PotsdamDataModule
elif dataset == "vaihingen":
    dm = VaihingenDataModule
else:
    raise ValueError("Unknown dataset")

In [None]:
# Initialize a dictionary to hold scores for each size
scores = {}

# Iterate over the top fraction of scored samples
for sample_size in config["evaluation"]["sizes"]:
    # Initialize list to store scores for this sample size
    scores[sample_size] = []

    # Perform multiple runs for each sample size
    for run in range(config["evaluation"]["runs"]):

        # Train!
        datamodule = dm(
            **config.datamodule,
            train_size=sample_size,
            train_scores_file=config["evaluation"]["scores_file"],
        )

        break
    break

In [None]:
# Setup the data module
datamodule.setup()

In [None]:
# Get the training data loader
train_loader = datamodule.train_dataloader()

In [None]:
# Get a single batch
batch = next(iter(train_loader))

In [None]:
batch["image"].min(), batch["image"].max()

---