# Python Notebook for remote sensing work
Repository made for Artificial Intelligence & Neural Network course with Prof. Ciarfuglia @ Sapienza 

Import necessary items with this:

In [1]:
# Let's import our own modules
import sys
sys.path.insert(0, './src')
import utils



import albumentations as A
from albumentations.pytorch import ToTensorV2
import torch
import torch.nn as nn
import warnings
import torch.utils.data as data
from rasterio.errors import NotGeoreferencedWarning
warnings.filterwarnings("ignore", category=NotGeoreferencedWarning) # Masks are not georeferences, so we can ignore this warning

from rasterio.plot import show
import matplotlib.pyplot as plt

We should test that the CUDA platform has been successfully recognized and it's being in use with pyTorch

In [2]:
device = utils.set_cuda_and_seed()

print("PyTorch version: ", torch.__version__)
print("CUDA version: ", torch.version.cuda)
print("cuDNN version: ", torch.backends.cudnn.version())

Using PyTorch version: 2.3.0+cu121  Device: cuda
PyTorch version:  2.3.0+cu121
CUDA version:  12.1
cuDNN version:  8902


# Some parameters, used for hyperparameter tuning later

Here we'll setup some parameters, that will be used for hyperparameter tuning once we have a PoC that actually works

In [3]:
# MEAN = [63.02235933, 66.64201154, 60.63862196]
# STD = [55.50368184, 55.35826425, 52.63471437]
MEAN = [0, 0, 0]
STD = [1.0, 1.0, 1.0]
INITIAL_LR = 0.001
MAX_LR = 0.01
SIZE = 256
NUM_WORKERS = 8
BATCH_SIZE = 16
EPOCHS = 16
LOAD_CHECKPOINT = False

# TODO:
 - dataset analysis
 - mean and standard deviation calculatiom

# Data analysis

Before starting, we need to do some preliminary analysis on our dataset.
Inside split_dataset.py we already split our images into 3 different categories.
We'll be using:
 - train.txt as a training dataset
 - val.txt to validate that our model correctly predicts masks
 - test.txt to challenge different models one against the other

We should also calculate mean and std values of our train dataset, and we'll apply them to all of our sets


In [4]:
""" mean, std = utils.get_mean_std(path_to_train_data="data/train/AOI_11_Rotterdam/splits/train.txt")
print("Mean: ", mean)
print("Std: ", std) 
 """

' mean, std = utils.get_mean_std(path_to_train_data="data/train/AOI_11_Rotterdam/splits/train.txt")\nprint("Mean: ", mean)\nprint("Std: ", std) \n '

# Create a dataset class

Now we'll need to create a SN6Dataset derived from data.Dataset that will import the data and return the data and labels, and that implements __len__ and __getitem__ methods
We'll be using RasterIO

We can see the implementation of the dataset in src/dataset.py

In [5]:
train_transforms = A.Compose([
    A.Normalize(mean=MEAN, std=STD, max_pixel_value=255.0),
    A.Resize(SIZE, SIZE),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.RandomRotate90(p=0.5)
])

eval_transforms = A.Compose([
    A.Normalize(mean=MEAN, std=STD, max_pixel_value=255.0),
    A.Resize(SIZE, SIZE)
])

from dataset import SN6Dataset

train_dataset = SN6Dataset('./data/train/AOI_11_Rotterdam', transform=train_transforms, split='train')
eval_dataset = SN6Dataset('./data/train/AOI_11_Rotterdam', transform=eval_transforms, split='val')

# Use DataLoader

We'll now import the dataset into a dataloader, and just to see that everything's working we'll show the first image of the batch

Since we have normalized the images the image won't be shown correctly. That's normal, since now the values are centered in zero




In [6]:
train_loader = data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, pin_memory=True, num_workers=NUM_WORKERS)
eval_loader = data.DataLoader(eval_dataset, batch_size=BATCH_SIZE, shuffle=True,  pin_memory=True, num_workers=NUM_WORKERS)

image, mask = next(iter(train_loader))
print(f"Image shape: {image.shape}, Mask shape: {mask.shape}")
# utils.visualize_image(image[0], mask[0])

Image shape: torch.Size([16, 3, 256, 256]), Mask shape: torch.Size([16, 1, 256, 256])


# Define the training model

We'll be using UNET, a convolutional neural network (CNN), for our task.

UNET is a popular architecture for image segmentation tasks. It is widely used in various domains, including medical imaging, remote sensing, and computer vision.

NOTE: This will throw a warning message about cudnn, this is normal and documented [HERE](https://github.com/pytorch/pytorch/pull/125790)


In [7]:
from model import UNET
from train import train

model = UNET(in_channels = 3, out_channels = 1).to(device)

criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=INITIAL_LR)

scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=MAX_LR, steps_per_epoch=len(train_loader), epochs=EPOCHS)
scaler = torch.cuda.amp.GradScaler()

if LOAD_CHECKPOINT:
    checkpoint = torch.load("checkpoints/checkpoint_16.pth")
    model.load_state_dict(checkpoint["state_dict"])
    optimizer.load_state_dict(checkpoint["optimizer"])
    scheduler.load_state_dict(checkpoint["scheduler"])

# Training model
for epoch in range(EPOCHS):
    print(f"Epoch {epoch+1} of {EPOCHS}")
    train(train_loader, model, optimizer, criterion, scaler, scheduler, device)
    # Save model
    checkpoint = {
        "state_dict": model.state_dict(),
        "optimizer": optimizer.state_dict(),
        "scheduler": scheduler.state_dict()
    }
    utils.save_checkpoint(checkpoint, filename=f"checkpoints/checkpoint_{epoch+1}.pth")
    utils.get_evals(eval_loader, model, device)
    utils.save_predictions_as_image(eval_loader, model, device, "data/eval")

Epoch 1 of 16


Loss: 0.1827: 100%|██████████| 170/170 [01:06<00:00,  2.56it/s]
  return F.conv_transpose2d(


Accuracy: 1349.7325
Dice Score: 0.1136
Epoch 2 of 16


Loss: 0.1261: 100%|██████████| 170/170 [01:04<00:00,  2.62it/s]


Accuracy: 1475.0793
Dice Score: 0.0082
Epoch 3 of 16


Loss: 0.1526: 100%|██████████| 170/170 [01:05<00:00,  2.61it/s]


Accuracy: 1428.2552
Dice Score: 0.0853
Epoch 4 of 16


Loss: 0.1431: 100%|██████████| 170/170 [01:03<00:00,  2.66it/s]


Accuracy: 1460.6746
Dice Score: 0.0460
Epoch 5 of 16


Loss: 0.1470: 100%|██████████| 170/170 [01:05<00:00,  2.59it/s]


Accuracy: 1425.7925
Dice Score: 0.0741
Epoch 6 of 16


Loss: 0.0942: 100%|██████████| 170/170 [01:05<00:00,  2.60it/s]


Accuracy: 1413.0586
Dice Score: 0.0978
Epoch 7 of 16


Loss: 0.0977: 100%|██████████| 170/170 [01:05<00:00,  2.58it/s]


Accuracy: 1430.3217
Dice Score: 0.0792
Epoch 8 of 16


Loss: 0.1545: 100%|██████████| 170/170 [01:05<00:00,  2.59it/s]


Accuracy: 1421.3517
Dice Score: 0.0964
Epoch 9 of 16


Loss: 0.1113: 100%|██████████| 170/170 [01:14<00:00,  2.29it/s]


Accuracy: 1437.5959
Dice Score: 0.0793
Epoch 10 of 16


Loss: 0.1719: 100%|██████████| 170/170 [01:34<00:00,  1.80it/s]


Accuracy: 1419.4114
Dice Score: 0.0979
Epoch 11 of 16


Loss: 0.0832: 100%|██████████| 170/170 [01:05<00:00,  2.60it/s]


Accuracy: 1425.7906
Dice Score: 0.0978
Epoch 12 of 16


Loss: 0.0823: 100%|██████████| 170/170 [01:04<00:00,  2.62it/s]


Accuracy: 1422.1395
Dice Score: 0.0989
Epoch 13 of 16


Loss: 0.0562: 100%|██████████| 170/170 [01:05<00:00,  2.61it/s]


Accuracy: 1416.0118
Dice Score: 0.1075
Epoch 14 of 16


Loss: 0.0860: 100%|██████████| 170/170 [01:05<00:00,  2.61it/s]


Accuracy: 1414.8850
Dice Score: 0.1049
Epoch 15 of 16


Loss: 0.0679: 100%|██████████| 170/170 [01:03<00:00,  2.66it/s]


Accuracy: 1413.6678
Dice Score: 0.1122
Epoch 16 of 16


Loss: 0.1134: 100%|██████████| 170/170 [01:05<00:00,  2.61it/s]


Accuracy: 1411.0724
Dice Score: 0.1077


# Sources

https://www.nature.com/articles/s41598-024-56706-x 
https://medium.com/@nghihuynh_37300/understanding-evaluation-metrics-in-medical-image-segmentation-d289a373a3f