In [None]:
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

## Land Cover Classification: Sentinel-2 with EuroCrops

This tutorial will walk you through the process of performing land cover classification with Sentinel-2 satellite imagery, annotated with EuroCrops, using TorchGeo library. We will start with downloading Sentinel-2 data, setting it up with EuroCrops for pixel-wise supervised classification, training a segmentation model, and finally running inference to make sense of Sentinel-2 imagery. 

Whether you are a remote sensing enthusiast or just curious about deep learning for geospatial data, this guide has something cool for you!

In [1]:
import os
from urllib.parse import urlparse

import matplotlib.pyplot as plt
import planetary_computer
import pystac
import torch
from torch.utils.data import DataLoader
from torchgeo.datasets.utils import download_url

%matplotlib inline
plt.rcParams['figure.figsize'] = (12, 12)

### 1- Downloading Sentinel-2 Imagery

Fetch Sentinel-2 imagery using Microsoft Planetary Computer and ensure you have the data you need.

In [None]:
root = "/data/sentinel"
item_urls = [
    'https://planetarycomputer.microsoft.com/api/stac/v1/collections/sentinel-2-l2a/items/S2A_MSIL2A_20220827T093601_R036_T34UEA_20220829T151158',
]
for item_url in item_urls:
    item = pystac.Item.from_file(item_url)
    signed_item = planetary_computer.sign(item)
    for band in ['B02', 'B03', 'B04', 'B08']:
        asset_href = signed_item.assets[band].href
        filename = urlparse(asset_href).path.split('/')[-1]
        download_url(asset_href, root, filename)

### 2-  Prepare Dataloader

Customize TorchGeo to align `Sentinel2` and `EuroCrops` datasets, forming an `IntersectionDataset` for pixel-wise classification task

In [4]:
datamodule.sentinel2.files

['/data/sentinel/T34UEA_20220827T093601_B02_10m.tif']

In [6]:
datamodule.sentinel2.crs

CRS.from_epsg(32634)

In [11]:
datamodule.sentinel2.

['/data/sentinel/T34UEA_20220827T093601_B02_10m.tif']

In [5]:
datamodule.eurocrops.files

['/data/datatorchgeo/AT_2021/AT_2021_EC21.shp',
 '/data/datatorchgeo/BE_VLG_2021/BE_VLG_2021_EC21.shp',
 '/data/datatorchgeo/DE_LS_2021/DE_LS_2021_EC21.shp',
 '/data/datatorchgeo/DE_NRW_2021/DE_NRW_2021_EC21.shp',
 '/data/datatorchgeo/DK_2019_EC21.shp',
 '/data/datatorchgeo/EE_2021_EC21.shp',
 '/data/datatorchgeo/FR_2018/FR_2018_EC21.shp',
 '/data/datatorchgeo/HR/HR_2020_EC21.shp',
 '/data/datatorchgeo/LT_2021_EC.shp',
 '/data/datatorchgeo/LV_2021/LV_2021_EC21.shp',
 '/data/datatorchgeo/NA/ES_NA_2020_EC21.shp',
 '/data/datatorchgeo/NL_2020_EC21.shp',
 '/data/datatorchgeo/PT_2021_EC21.shp',
 '/data/datatorchgeo/SE/SE_2021_EC21.shp',
 '/data/datatorchgeo/SI_2021_EC21.shp',
 '/data/datatorchgeo/SK_2021_EC21.shp']

In [7]:
datamodule.eurocrops.crs

CRS.from_epsg(32634)

In [5]:
datamodule.dataset.

In [2]:
from torchgeo.datasets import Sentinel2, RasterDataset

class Sentinel2_Custom(Sentinel2):
    filename_glob = 'T34UEA_*_{}*.*'
    filename_regex = r"""
        ^T(?P<tile>\d{{2}}[A-Z]{{3}})
        _(?P<date>\d{{8}}T\d{{6}})
        _(?P<band>B[018][\dA])
        (?:_(?P<resolution>{}m))?
        \..*$
    """
    date_format = '%Y%m%dT%H%M%S'
    all_bands = ('B02', 'B03', 'B04', 'B08')
    rgb_bands = ('B04', 'B03', 'B02')

import torchgeo.datasets
torchgeo.datasets.Sentinel2 = Sentinel2_Custom

from torchgeo.datamodules import Sentinel2EuroCropsDataModule

datamodule = Sentinel2EuroCropsDataModule(
    sentinel2_paths="/data/sentinel",
    eurocrops_paths="/data/datatorchgeo",
    batch_size=1,
    # eurocrops_crs="epsg:4326",
    # sentinel2_crs="epsg:4326",
    patch_size=64,
    num_workers=32,
    # length=8,
)

datamodule.setup("fit")
train_dataset = datamodule.train_dataset
datamodule.setup("validate")
val_dataset = datamodule.val_dataset
datamodule.setup("test")
test_dataset = datamodule.test_dataset

# datamodule.val_sampler.length = 3
# datamodule.test_sampler.length = 3

Converting EuroCrops CRS from EPSG:4326 to EPSG:32634
Converting EuroCrops res from 1e-05 to 10


RuntimeError: Datasets have no spatiotemporal intersection

In [None]:
datamodule.sentinel2_kwargs

In [None]:
Sentinel2Custom.is_image

In [None]:
datamodule.eurocrops.class_map

#### Visualize the Sentinel-2 imagery and EuroCrops Labels

In [None]:
# TODO

### 3- Training Semantic Segmentation Model

Train a UNet model with Sentinel-2 images paired with EuroCrops labels to classify land cover, powered by PyTorch Lightning

In [None]:
from torchgeo.trainers import SemanticSegmentationTask
from lightning.pytorch import Trainer
import torch

task = SemanticSegmentationTask(
    model='unet',
    backbone='resnet50',
    weights=None,  # TODO: Add pretrained weights
    in_channels=4,
    num_classes=396, # TODO: How many classes are there? All countries in the dataset have different number of classes
    num_filters=3,
    loss='ce', # Either the loss or the f-f' are wrong
    class_weights=None,
    ignore_index=None,
    lr=0.001,
    patience=10,
    freeze_backbone=False,
    freeze_decoder=False)

# TODO: Trainer on CPU for now as I could not move 'input' to CUDA 
# accelerator = 'gpu' if torch.cuda.is_available() else 'cpu'

trainer = Trainer(
    accelerator='cpu',
    default_root_dir='./',
    fast_dev_run=False,
    log_every_n_steps=1,
    min_epochs=2,
    max_epochs=20,
)

trainer.fit(model=task, datamodule=datamodule)

### 4- Model Evaluation on Test Set

Evaluate the segmentation model over the test set

### 5- Inference over full Imagery

Perform inference by computing predictions over the complete imagery 

In [None]:
len(train_dataset), len(val_dataset), len(test_dataset)

In [None]:
datamodule.sentinel2.files

In [None]:
datamodule.eurocrops.files

In [None]:
datamodule.train_dataset.bounds

In [None]:
datamodule.sentinel2.bounds

In [None]:
datamodule.eurocrops.bounds

In [None]:
datamodule.train_dataset.datasets

In [None]:
datamodule.dataset

In [None]:
datamodule.train_dataset

In [None]:
datamodule.train_dataset, len(datamodule.train_dataset), len(datamodule.val_dataset), len(datamodule.test_dataset)

In [None]:
datamodule.train_batch_sampler.size

In [None]:
from torchgeo.datasets.utils import BoundingBox

for i in datamodule.eurocrops.index.intersection(datamodule.eurocrops.index.bounds, objects=True):
    box1 = BoundingBox(*i.bounds)
    print(box1.area)

In [None]:
for i in datamodule.sentinel2.index.intersection(datamodule.sentinel2.index.bounds, objects=True):
    box1 = BoundingBox(*i.bounds)
    print(box1.area)

In [None]:
i = 0

for hit1 in datamodule.eurocrops.index.intersection(datamodule.eurocrops.index.bounds, objects=True):
    print("hit1", hit1)
    for hit2 in datamodule.sentinel2.index.intersection(hit1.bounds, objects=True):
        box1 = BoundingBox(*hit1.bounds)
        box2 = BoundingBox(*hit2.bounds)
        print("box1.area", box1.area)
        print("box2.area", box2.area)
        box3 = box1 & box2
        # Skip 0 area overlap (unless 0 area dataset)
        if box3.area > 0 or box1.area == 0 or box2.area == 0:
            # self.index.insert(i, tuple(box3))
            i += 1

if i == 0:
    raise RuntimeError('Datasets have no spatiotemporal intersection')

In [None]:
datamodule.train_batch_sampler.size

In [None]:
datamodule.train_dataset, datamodule.patch_size, datamodule.batch_size, datamodule.length

In [None]:
datamodule.length

In [None]:
len(datamodule.train_batch_sampler), len(datamodule.val_sampler), len(datamodule.test_sampler)