# Satellite imagery tile generation

## Imports and setup

In [None]:
import numpy as np
import random
import pandas as pd
import os
import matplotlib.pyplot as plt
import pathlib
import rasterio
import rasterio.plot
import geopandas
import pickle
from collections import Counter

import tensorflow as tf
AUTOTUNE = tf.data.experimental.AUTOTUNE

from modules.helpers import *
from modules.tile_generator import *

# Check GPUs:",
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            # Prevent TensorFlow from allocating all memory of all GPUs:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        print(e)

In [None]:
with open('metadata_df.pickle', 'rb') as file:
    meta = pickle.load(file)
    
SENSORS = ['WV02']
#SENSORS = ['WV03_VNIR']
#SENSORS = ['GE01']
    
# Path to location where individual satellite images are located
DATA_PATH = 'data/toulon-laspezia/' 
DATA_PATH_IS_RELATIVE = True
DATA_PATH_RESIZED = 'data/toulon-laspezia-resized'
DATA_PATH_TILES = 'data/toulon-laspezia-tiles-test/WV02'
#DATA_PATH_TILES = 'data/toulon-laspezia-tiles-large/WV02'
#DATA_PATH_TILES = 'data/toulon-laspezia-tiles/GE01'
#DATA_PATH_TILES = 'data/toulon-laspezia-tiles-noseaclouds/WV02'
DATA_PATH_TILES_TRAIN = str(DATA_PATH_TILES + '/train')
DATA_PATH_TILES_VAL = str(DATA_PATH_TILES + '/val')
DATA_PATH_TILES_TEST = str(DATA_PATH_TILES + '/test')

AREAS = ['La_Spezia', 'Toulon']
meta = meta.loc[meta['sensorVehicle'].isin(SENSORS)]
meta = meta.loc[meta['area_name'].isin(AREAS)]

N_IMAGES = len(meta.index)

N_IMAGES_TRAIN = count_images_in_partition(meta, 'train')
N_IMAGES_VAL = count_images_in_partition(meta, 'val')
N_IMAGES_TEST = count_images_in_partition(meta, 'test')
print(N_IMAGES_TRAIN, N_IMAGES_VAL, N_IMAGES_TEST)

N_TILES_TRAIN = 50000
#N_TILES_TRAIN = 0
N_TILES_VAL = 10000
N_TILES_TEST = 10000

#PAN_WIDTH, PAN_HEIGHT = (1024, 1024)
#PAN_WIDTH, PAN_HEIGHT = (384, 384)
PAN_WIDTH, PAN_HEIGHT = (128, 128)
PAN_PIXEL_WIDTH, PAN_PIXEL_HEIGHT = (0.5, 0.5)

SR_FACTOR = 4
MS_WIDTH, MS_HEIGHT = (int(PAN_WIDTH/SR_FACTOR), int(PAN_HEIGHT/SR_FACTOR))
MS_PIXEL_WIDTH, MS_PIXEL_HEIGHT = (2.0, 2.0)

RESIZE_TO_PIXEL_SIZE = False
RESAMPLING_METHOD = 'nearest'

# Should be derived automatically, but added here as a quick fix
MS_BANDS = 8

BATCH_SIZE = 16

# Resizing images (if necessary)

This is only relevant for images from sensors GE01 and WV03_VNIR as they have a varying pixel resolution. The following functions resizes the .tif files to a resolution that matches as exactly as possible (with float32 division) the pixel resolution of the WV02 sensor (ms resolution 2m, pan resolution 0.5m). By using `rasterio` the image georeferencing information is preserved through the process.

In [None]:
if RESIZE_TO_PIXEL_SIZE:
    resize_all_sat_imgs_to_new_pixel_size(meta, save_dir=DATA_PATH_RESIZED, 
                                          new_pixel_size_pan=(PAN_PIXEL_WIDTH, PAN_PIXEL_HEIGHT), 
                                          sr_factor=SR_FACTOR, resampling=RESAMPLING_METHOD)

# Tile allocation

The number of tiles from each satellite image is decided through a weighted sampling where the satellite image sizes are the weights. (More correctly the ratio between the size of the image of interest and the largest image are the weights). Some images only cover parts of the area of interest and this weighted sampling ensures that these smaller areas are not disproportionally represented during training, validation and/or testing.

In [None]:
meta = allocating_tiles(meta, N_TILES_TRAIN, N_TILES_VAL, N_TILES_TEST)

In [None]:
meta.to_csv(str(DATA_PATH_TILES + '/metadata_tile_allocation.csv'))
with open(str(DATA_PATH_TILES + '/metadata_tile_allocation.pickle'), 'wb') as file:
    pickle.dump(meta, file)

# Tile generation to disk

Tiles are uniformly sampled from the satellite images and saved to disk as individual geotiffs, thus preserving the geospatial information. If the sampled tile includes border area pixels (pixels with value 0 in our case) the tile is discarded and a new tile is sampled.

In [None]:
generate_all_tiles(meta, save_dir = DATA_PATH_TILES)