In [31]:
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

## Land Cover Classification: Sentinel-2 with EuroCrops

This tutorial will walk you through the process of performing land cover classification with Sentinel-2 satellite imagery, annotated with EuroCrops, using TorchGeo library. We will start with downloading Sentinel-2 data, setting it up with EuroCrops for pixel-wise supervised classification, training a segmentation model, and finally running inference to make sense of Sentinel-2 imagery. 

Whether you are a remote sensing enthusiast or just curious about deep learning for geospatial data, this guide has something cool for you!

In [39]:
import os
from urllib.parse import urlparse

import matplotlib.pyplot as plt
import planetary_computer
import pystac
import torch
from torch.utils.data import DataLoader
from torchgeo.datasets.utils import download_url
from torchgeo.datasets import RasterDataset

%matplotlib inline
plt.rcParams['figure.figsize'] = (12, 12)

### 1- Downloading Sentinel-2 Imagery

Fetch Sentinel-2 imagery using Microsoft Planetary Computer and ensure you have the data you need.

In [40]:
root = "/data/sentinel"
item_urls = [
    'https://planetarycomputer.microsoft.com/api/stac/v1/collections/sentinel-2-l2a/items/S2A_MSIL2A_20220827T093601_R036_T34UEA_20220829T151158',
]
for item_url in item_urls:
    item = pystac.Item.from_file(item_url)
    signed_item = planetary_computer.sign(item)
    for band in ['B02', 'B03', 'B04', 'B08']:
        asset_href = signed_item.assets[band].href
        filename = urlparse(asset_href).path.split('/')[-1]
        download_url(asset_href, root, filename)

Downloading https://sentinel2l2a01.blob.core.windows.net/sentinel2-l2/34/U/EA/2022/08/27/S2A_MSIL2A_20220827T093601_N0400_R036_T34UEA_20220829T151158.SAFE/GRANULE/L2A_T34UEA_A037499_20220827T093559/IMG_DATA/R10m/T34UEA_20220827T093601_B02_10m.tif?st=2024-12-04T20%3A45%3A31Z&se=2024-12-05T21%3A30%3A31Z&sp=rl&sv=2024-05-04&sr=c&skoid=9c8ff44a-6a2c-4dfb-b298-1c9212f64d9a&sktid=72f988bf-86f1-41af-91ab-2d7cd011db47&skt=2024-12-05T14%3A26%3A18Z&ske=2024-12-12T14%3A26%3A18Z&sks=b&skv=2024-05-04&sig=hFT16uqd5VjWtWkBhyMx67OSoIIpCk%2BQ25A7Cq4g7B8%3D to /data/sentinel/T34UEA_20220827T093601_B02_10m.tif


100%|██████████| 209459745/209459745 [00:03<00:00, 64877956.88it/s]


Downloading https://sentinel2l2a01.blob.core.windows.net/sentinel2-l2/34/U/EA/2022/08/27/S2A_MSIL2A_20220827T093601_N0400_R036_T34UEA_20220829T151158.SAFE/GRANULE/L2A_T34UEA_A037499_20220827T093559/IMG_DATA/R10m/T34UEA_20220827T093601_B03_10m.tif?st=2024-12-04T20%3A45%3A31Z&se=2024-12-05T21%3A30%3A31Z&sp=rl&sv=2024-05-04&sr=c&skoid=9c8ff44a-6a2c-4dfb-b298-1c9212f64d9a&sktid=72f988bf-86f1-41af-91ab-2d7cd011db47&skt=2024-12-05T14%3A26%3A18Z&ske=2024-12-12T14%3A26%3A18Z&sks=b&skv=2024-05-04&sig=hFT16uqd5VjWtWkBhyMx67OSoIIpCk%2BQ25A7Cq4g7B8%3D to /data/sentinel/T34UEA_20220827T093601_B03_10m.tif


100%|██████████| 217850772/217850772 [00:03<00:00, 72323220.58it/s]


Downloading https://sentinel2l2a01.blob.core.windows.net/sentinel2-l2/34/U/EA/2022/08/27/S2A_MSIL2A_20220827T093601_N0400_R036_T34UEA_20220829T151158.SAFE/GRANULE/L2A_T34UEA_A037499_20220827T093559/IMG_DATA/R10m/T34UEA_20220827T093601_B04_10m.tif?st=2024-12-04T20%3A45%3A31Z&se=2024-12-05T21%3A30%3A31Z&sp=rl&sv=2024-05-04&sr=c&skoid=9c8ff44a-6a2c-4dfb-b298-1c9212f64d9a&sktid=72f988bf-86f1-41af-91ab-2d7cd011db47&skt=2024-12-05T14%3A26%3A18Z&ske=2024-12-12T14%3A26%3A18Z&sks=b&skv=2024-05-04&sig=hFT16uqd5VjWtWkBhyMx67OSoIIpCk%2BQ25A7Cq4g7B8%3D to /data/sentinel/T34UEA_20220827T093601_B04_10m.tif


100%|██████████| 219170487/219170487 [00:03<00:00, 67522033.41it/s]


Downloading https://sentinel2l2a01.blob.core.windows.net/sentinel2-l2/34/U/EA/2022/08/27/S2A_MSIL2A_20220827T093601_N0400_R036_T34UEA_20220829T151158.SAFE/GRANULE/L2A_T34UEA_A037499_20220827T093559/IMG_DATA/R10m/T34UEA_20220827T093601_B08_10m.tif?st=2024-12-04T20%3A45%3A31Z&se=2024-12-05T21%3A30%3A31Z&sp=rl&sv=2024-05-04&sr=c&skoid=9c8ff44a-6a2c-4dfb-b298-1c9212f64d9a&sktid=72f988bf-86f1-41af-91ab-2d7cd011db47&skt=2024-12-05T14%3A26%3A18Z&ske=2024-12-12T14%3A26%3A18Z&sks=b&skv=2024-05-04&sig=hFT16uqd5VjWtWkBhyMx67OSoIIpCk%2BQ25A7Cq4g7B8%3D to /data/sentinel/T34UEA_20220827T093601_B08_10m.tif


100%|██████████| 232486630/232486630 [00:03<00:00, 65934489.32it/s]


### 2-  Prepare Dataloader

Customize TorchGeo to align `Sentinel2` and `EuroCrops` datasets, forming an `IntersectionDataset` for pixel-wise classification task

In [47]:
from torchgeo.datasets import Sentinel2

class Sentinel2Custom(Sentinel2):
    filename_glob = 'T*_B08_10m.tif'
    filename_regex = r'.*'
    date_format = '%Y%m%dT%H%M%S'
    is_image = True
    separate_files = True
    all_bands = ('B02', 'B03', 'B04', 'B08')
    rgb_bands = ('B04', 'B03', 'B02')

import torchgeo.datasets
torchgeo.datasets.Sentinel2 = Sentinel2Custom

from torchgeo.datamodules import Sentinel2EuroCropsDataModule

datamodule = Sentinel2EuroCropsDataModule(
    eurocrops_paths="/data/datatorchgeo",
    sentinel2_paths="/data/sentinel",
    batch_size=1,
    patch_size=64,
    length=8,
)

datamodule.setup("fit")
train_dataset = datamodule.train_dataset
datamodule.setup("validate")
val_dataset = datamodule.val_dataset
datamodule.setup("test")
test_dataset = datamodule.test_dataset

# datamodule.val_sampler.length = 3
# datamodule.test_sampler.length = 3

Converting EuroCrops CRS from EPSG:4326 to EPSG:32615
Converting EuroCrops res from 1e-05 to 10
Converting EuroCrops CRS from EPSG:4326 to EPSG:32615
Converting EuroCrops res from 1e-05 to 10
Converting EuroCrops CRS from EPSG:4326 to EPSG:32615
Converting EuroCrops res from 1e-05 to 10


### 3- Training Semantic Segmentation Model

Train a UNet model with Sentinel-2 images paired with EuroCrops labels to classify land cover, powered by PyTorch Lightning

In [49]:
from torchgeo.trainers import SemanticSegmentationTask
from lightning.pytorch import Trainer
import torch

task = SemanticSegmentationTask(
    model='unet',
    backbone='resnet50',
    weights=None,  # No pre-trained weights
    in_channels=4,  # CDL dataset may have RGB inputs
    num_classes=134,
    num_filters=3,
    loss='ce',  # CrossEntropyLoss
    class_weights=None,
    ignore_index=None,
    lr=0.001,
    patience=10,
    freeze_backbone=False,
    freeze_decoder=False)

# accelerator = 'gpu' if torch.cuda.is_available() else 'cpu'

trainer = Trainer(
    accelerator='cpu',
    default_root_dir='./',
    fast_dev_run=True,
    log_every_n_steps=1,
    min_epochs=1,
    max_epochs=2,
)

trainer.fit(model=task, datamodule=datamodule)

INFO: GPU available: True (cuda), used: False
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: False
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
/opt/conda/envs/ood/lib/python3.11/site-packages/lightning/pytorch/trainer/setup.py:187: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.
INFO: Running in `fast_dev_run` mode: will run the requested loop using 1 batch(es). Logging and checkpointing is suppressed.
INFO:lightning.pytorch.utilities.rank_zero:Running in `fast_dev_run` mode: will run the requested loop using 1 batch(es). Logging and checkpointing is suppressed.
INFO: 
  | Name          | Type       

Converting EuroCrops CRS from EPSG:4326 to EPSG:32615
Converting EuroCrops res from 1e-05 to 10


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO: `Trainer.fit` stopped: `max_steps=1` reached.
INFO:lightning.pytorch.utilities.rank_zero:`Trainer.fit` stopped: `max_steps=1` reached.


### 4- Model Evaluation on Test Set

Evaluate the segmentation model over the test set

### 5- Inference over full Imagery

Perform inference by computing predictions over the complete imagery 

In [50]:
len(train_dataset), len(val_dataset), len(test_dataset)

(52, 6, 6)

In [51]:
datamodule.sentinel2.files

['/data/sentinel/T15TWG_20241101T170349_B02_10m.tif',
 '/data/sentinel/T34UEA_20220827T093601_B02_10m.tif']

In [53]:
datamodule.eurocrops.files

['/data/datatorchgeo/AT_2021/AT_2021_EC21.shp',
 '/data/datatorchgeo/BE_VLG_2021/BE_VLG_2021_EC21.shp',
 '/data/datatorchgeo/DE_LS_2021/DE_LS_2021_EC21.shp',
 '/data/datatorchgeo/DE_NRW_2021/DE_NRW_2021_EC21.shp',
 '/data/datatorchgeo/DK_2019_EC21.shp',
 '/data/datatorchgeo/EE_2021_EC21.shp',
 '/data/datatorchgeo/FR_2018/FR_2018_EC21.shp',
 '/data/datatorchgeo/HR/HR_2020_EC21.shp',
 '/data/datatorchgeo/LT_2021_EC.shp',
 '/data/datatorchgeo/LV_2021/LV_2021_EC21.shp',
 '/data/datatorchgeo/NA/ES_NA_2020_EC21.shp',
 '/data/datatorchgeo/NL_2020_EC21.shp',
 '/data/datatorchgeo/PT_2021_EC21.shp',
 '/data/datatorchgeo/SE/SE_2021_EC21.shp',
 '/data/datatorchgeo/SI_2021_EC21.shp',
 '/data/datatorchgeo/SK_2021_EC21.shp']

In [54]:
datamodule.train_dataset.bounds

BoundingBox(minx=4767668.208980892, maxx=4852845.369973785, miny=12059856.293102827, maxy=12244745.151673885, mint=1609459200.0, maxt=1640995199.999999)

In [55]:
datamodule.sentinel2.bounds

BoundingBox(minx=499980.0, maxx=4852845.369973785, miny=4590240.0, maxy=12244745.151673885, mint=0.0, maxt=9.223372036854776e+18)

In [56]:
datamodule.eurocrops.bounds

BoundingBox(minx=2618755.4445662247, maxx=7395571.480706856, miny=9066700.060796153, maxy=12531048.828763735, mint=1514764800.0, maxt=1640995199.999999)

In [57]:
datamodule.train_dataset.datasets

[<__main__.Sentinel2Custom at 0x7fb97c8cfa10>,
 <torchgeo.datasets.eurocrops.EuroCrops at 0x7fb97e677610>]

In [58]:
datamodule.dataset

<torchgeo.datasets.geo.IntersectionDataset at 0x7fb97e673b10>

In [59]:
datamodule.train_dataset

<torchgeo.datasets.geo.IntersectionDataset at 0x7fb97cb20f10>

In [60]:
datamodule.train_dataset, len(datamodule.train_dataset), len(datamodule.val_dataset), len(datamodule.test_dataset)

(<torchgeo.datasets.geo.IntersectionDataset at 0x7fb97cb20f10>, 52, 6, 6)

In [61]:
datamodule.train_batch_sampler.size

(640, 640)

In [62]:
from torchgeo.datasets.utils import BoundingBox

for i in datamodule.eurocrops.index.intersection(datamodule.eurocrops.index.bounds, objects=True):
    box1 = BoundingBox(*i.bounds)
    print(box1.area)

526794950433.0406
51743675069.350876
221599321761.17267
141888810541.07678
348908610014.32117
221392276629.58197
2980960657471.9956
700393908177.45
269523305955.8701
311876677510.05255
52470208769.82907
173922024020.19308
443179357557.58044
2605366275475.8315
127016410374.98985
295224872828.9651


In [63]:
for i in datamodule.sentinel2.index.intersection(datamodule.sentinel2.index.bounds, objects=True):
    box1 = BoundingBox(*i.bounds)
    print(box1.area)

12056040000.0
34183890023.70891


In [64]:
i = 0

for hit1 in datamodule.eurocrops.index.intersection(datamodule.eurocrops.index.bounds, objects=True):
    print("hit1", hit1)
    for hit2 in datamodule.sentinel2.index.intersection(hit1.bounds, objects=True):
        box1 = BoundingBox(*hit1.bounds)
        box2 = BoundingBox(*hit2.bounds)
        print("box1.area", box1.area)
        print("box2.area", box2.area)
        box3 = box1 & box2
        # Skip 0 area overlap (unless 0 area dataset)
        if box3.area > 0 or box1.area == 0 or box2.area == 0:
            # self.index.insert(i, tuple(box3))
            i += 1

if i == 0:
    raise RuntimeError('Datasets have no spatiotemporal intersection')

hit1 <rtree.index.Item object at 0x7fb97b8cfbf0>
hit1 <rtree.index.Item object at 0x7fb97b8abbf0>
hit1 <rtree.index.Item object at 0x7fb97b8cfbf0>
hit1 <rtree.index.Item object at 0x7fb97b8abbf0>
hit1 <rtree.index.Item object at 0x7fb97b8cfbf0>
hit1 <rtree.index.Item object at 0x7fb97b8abbf0>
hit1 <rtree.index.Item object at 0x7fb97b8cfbf0>
hit1 <rtree.index.Item object at 0x7fb97b8abbf0>
hit1 <rtree.index.Item object at 0x7fb97b8cfbf0>
hit1 <rtree.index.Item object at 0x7fb97b8abbf0>
hit1 <rtree.index.Item object at 0x7fb97b8cfbf0>
hit1 <rtree.index.Item object at 0x7fb97b8abbf0>
hit1 <rtree.index.Item object at 0x7fb97b8cfbf0>
hit1 <rtree.index.Item object at 0x7fb97b8abbf0>
hit1 <rtree.index.Item object at 0x7fb97b8cfbf0>
hit1 <rtree.index.Item object at 0x7fb97b8abbf0>
box1.area 295224872828.9651
box2.area 34183890023.70891


In [65]:
datamodule.train_batch_sampler.size

(640, 640)

In [66]:
datamodule.train_dataset, datamodule.patch_size, datamodule.batch_size, datamodule.length

(<torchgeo.datasets.geo.IntersectionDataset at 0x7fb97cb20f10>, 64, 1, 8)

In [67]:
datamodule.length

8

In [68]:
len(datamodule.train_batch_sampler), len(datamodule.val_sampler), len(datamodule.test_sampler)

(8, 3774, 3774)