In [1]:
import os
import numpy as np
import rasterio
from rasterio.windows import Window

In [2]:
def preprocess_mosaic(country: str, year: int, gee_src: str, img_sections_path: str, tile_size: int):

    bands = ["B01", "B02", "B03", "B04", "B05", "B06", "B07", "B08", "B8A", "B09", "B11", "B12"]

    # GEE data initalization
    gee_data = np.empty((len(bands), tile_size, tile_size))

    # Bands opening
    band_datasets = []
    for band in bands:
        band_datasets.append(rasterio.open(os.path.join(gee_src, f'{country}_{year}_{band}.tif')))


    # Number of horizontal and vertical tiles and relative offsets
    h_tiles = band_datasets[0].width // tile_size
    v_tiles = band_datasets[0].height // tile_size
    h_offset = (band_datasets[0].width % tile_size) // 2
    v_offset = (band_datasets[0].height % tile_size) // 2

    # GEE raster profile
    gee_profile = band_datasets[0].profile
    gee_profile.update({
    'count': len(bands),
    'height': tile_size,
    'width': tile_size})

    # Tiling
    for h_i in range(h_tiles):
        for v_i in range(v_tiles):
                
            # Bounds
            h_start = h_i * tile_size + h_offset
            v_start = v_i * tile_size + v_offset

            # Window
            window = Window(h_start, v_start, tile_size, tile_size)
                
            gee_profile['transform'] = band_datasets[0].window_transform(window)

            # GEE and CLC data
            for i, band_dataset in enumerate(band_datasets):
                gee_data[i] = band_dataset.read(window = window)

            # Setting NaN values to 0 on GEE data
            gee_data = np.nan_to_num(gee_data)

            # Skip empty tiles
            if np.all(gee_data == 0):
                continue
            
            # GEE destination path
            gee_dst_file = f'{country}_{year}_{h_i}_{v_i}.tif'
            gee_dst_path = os.path.join(img_sections_path, gee_dst_file)

            with rasterio.open(gee_dst_path, 'w', **gee_profile) as gee_tile:
                gee_tile.write(gee_data)
    
    for band_dataset in band_datasets:
        band_dataset.close()

Variables

In [3]:
countries = ['Bornate']
year = 2019
gee_src = '../data/Inferences/2019/Bands/'
img_sections_path = '../data/Inferences/2019/Sections/'
tile_size = 2048

In [4]:
for country in countries:
    preprocess_mosaic(country=country, year=year, gee_src=gee_src, img_sections_path=img_sections_path, tile_size=tile_size)

Empty Labels Generation

In [5]:
import rasterio
import os
from pathlib import Path
import numpy as np

In [6]:
ann_dst_path = '../data/Inferences/2019/Labels/'

In [7]:
sections = os.listdir(img_sections_path)
dummy_data = np.zeros((1, tile_size, tile_size))
for section in sections:
    with rasterio.open(os.path.join(img_sections_path, section)) as dst:
        profile = dst.profile
        profile.update({
            'count': 1,
            'dtype': np.int8
        })
    ann_section = f"{Path(section).stem}_MAP.tif"

    with rasterio.open(os.path.join(ann_dst_path, ann_section), 'w', **profile) as dst:
        dst.write(dummy_data)