## Imports and setup

In [None]:
import numpy as np
import random
import pandas as pd
import os
import matplotlib.pyplot as plt
import pathlib
from PIL import Image
import rasterio
from rasterio.plot import reshape_as_image

import tensorflow as tf
AUTOTUNE = tf.data.experimental.AUTOTUNE

from modules.metadata_reader import img_metadata_to_dict, add_names_to_metadata_dict, dict_to_df

# Path to location where individual satellite images are located
DATA_PATH = 'data/toulon-laspezia' 
DATA_PATH_IS_RELATIVE = True
DATA_PATH_NPY = 'data/toulon-laspezia-npy' 
DATA_PATH_TILES = 'data/toulon-laspezia-tiles'

# Name of metadata .xml file
METADATA_NAME = 'DeliveryMetadata.xml'

# Names of areas covered by satellite imagery
AREAS = ['La_Spezia', 'Toulon'] # Spelled like the directory names

# Speficy what the xmlns url on top of metadata .xml file is
# (should be second line)
XMLNS = 'http://xsd.digitalglobe.com/xsd/dm'

## Metadata parsing from xml to pandas dataframe

Every satellite image delivery from Maxar contains a `DeliveryMetadata.xml` file with important specifications for both the multispectral and panchromatic images. The following functions finds all the `DeliveryMetadata.xml` files contained in all subdirectories of a directory and parses them into the *Pandas DataFrame* format which will be used for further descriptive statistics of the dataset.

In [None]:
img_metadata_pan, img_metadata_ms = img_metadata_to_dict(METADATA_NAME, 
                                                         DATA_PATH, XMLNS, 
                                                         path_is_relative = DATA_PATH_IS_RELATIVE)

img_metadata_pan = add_names_to_metadata_dict(img_metadata_pan, AREAS)
img_metadata_ms = add_names_to_metadata_dict(img_metadata_ms, AREAS)

img_metadata_pan = dict_to_df(img_metadata_pan)
img_metadata_ms = dict_to_df(img_metadata_ms)

# Checking that string IDs and int IDs are equal in both dataframes (they should)
assert all(img_metadata_ms.index == img_metadata_pan.index)
assert all(img_metadata_ms['int_uid'] == img_metadata_pan['int_uid'])

In [None]:
img_metadata_pan

### Lookup functions for UIDs

In [None]:
def get_int_uid(string_UIDs):
    return img_metadata_pan.loc[string_UIDs]['int_uid'].tolist()

def get_string_uid(int_UIDs):
    # Could probably be neater
    # Accepts list of ints and single int
    if type(int_UIDs) == int:
        int_UIDs = [int_UIDs]
    l = []
    for int_UID in int_UIDs:
        l.append(img_metadata_pan[img_metadata_pan['int_uid'] == int_UID].index.tolist()[0])
    if len(l) == 1:
        return l[0]
    else:
        return l

# Randomly draw 2 images for early trials

In [None]:
toulon_wv02_pan = img_metadata_pan[(img_metadata_pan['sensorVehicle'] == 'WV02')
                                   & (img_metadata_pan['area_name'] == 'Toulon')]

np.random.seed(1)
img_names = sorted(toulon_wv02_pan.index.values)
np.random.shuffle(img_names)
images_for_early_trials = img_names[:2]
images_for_early_trials

In [None]:
images_for_early_trials_int_UIDs = get_int_uid(images_for_early_trials)
images_for_early_trials_int_UIDs

In [None]:
N_IMAGES = len(img_metadata_pan.index)

PAN_WIDTH, PAN_HEIGHT = (384, 384)
PAN_BANDS = 1

SR_FACTOR = 4
MS_WIDTH, MS_HEIGHT = (int(PAN_WIDTH/SR_FACTOR), int(PAN_HEIGHT/SR_FACTOR))
MS_BANDS = 8

# Convert .tif files to .npy for easier loading later

In [None]:
def tif_to_npy(path_in, filename, save_to_disk = False, path_out = None):
    path_in = pathlib.Path(path_in)
    #filename = path_in.stem
    with rasterio.open(path_in, 'r') as ds:
        img = ds.read()
    print(type(img))
    img = reshape_as_image(img)
    print(img.shape)
    if save_to_disk:
        np.save(pathlib.Path(path_out, filename), img)
        return True
    return img

def all_tif_to_npy(metadata_pan, metadata_ms, path_out):
    
    # Saving panchromatic images as .npy files
    filenames = metadata_pan.index.values.tolist()
    path_out_pan = pathlib.Path(path_out, 'pan')
    for filename in filenames:
        tif_to_npy(metadata_pan.loc[filename]['tif_path'], 
                   filename, save_to_disk = True,
                   path_out = path_out_pan)
        print('Saved', filename, 'in dir', str(path_out_pan))
    
    # Saving multispectral images as .npy files
    filenames = metadata_ms.index.values.tolist()
    path_out_ms = pathlib.Path(path_out, 'ms')
    for filename in filenames:
        tif_to_npy(metadata_ms.loc[filename]['tif_path'], 
                   filename, save_to_disk = True,
                   path_out = path_out_ms)
        print('Saved', filename, 'in', str(path_out_ms))

In [None]:
# Uncomment to actually convert (takes some time):

#all_tif_to_npy(img_metadata_pan, img_metadata_ms, DATA_PATH_NPY)

# Adding paths to .npy files as column in metadata dataframe

The metadata dataframes are kept up to date so that it can be used as a canonical source of information about images.

In [None]:
def add_npy_paths_to_metadata_df(metadata_pan, metadata_ms, path_to_npy):
    path_pan = pathlib.Path(pathlib.Path.cwd(), path_to_npy, 'pan')
    pan_paths = list(path_pan.glob('**/*.npy'))
    pan_names = [path.stem for path in pan_paths]
    pan_path_df = pd.DataFrame({'pan_names':pan_names,'npy_path':pan_paths}).set_index('pan_names')
    metadata_pan = pd.concat([metadata_pan, pan_path_df],axis=1)
    
    path_ms = pathlib.Path(pathlib.Path.cwd(), path_to_npy, 'ms')
    ms_paths = list(path_ms.glob('**/*.npy'))
    ms_names = [path.stem for path in ms_paths]
    ms_path_df = pd.DataFrame({'ms_names':ms_names,'npy_path':ms_paths}).set_index('ms_names')
    metadata_ms = pd.concat([metadata_ms, ms_path_df],axis=1)
    
    return metadata_pan, metadata_ms

img_metadata_pan, img_metadata_ms = add_npy_paths_to_metadata_df(img_metadata_pan, 
                                                                 img_metadata_ms, 
                                                                 DATA_PATH_NPY)

# Loading .npy files into memory

In [None]:
def load_npy_to_dict(metadata_df, int_UID_list = None):
    if int_UID_list == None:
        int_UID_list = list(metadata_df['int_uid'].tolist())
    d = {}
    print('Loading .npy files')
    for int_UID in int_UID_list:
        d[str(int_UID)] = np.load(metadata_df.loc[get_string_uid(int_UID)]['npy_path'])
        print(str(int_UID), ' - ', get_string_uid(int_UID), 
              'loaded into memory as ndarray with shape', d[str(int_UID)].shape)
    return d

#def load_npy_to_list(metadata_df, int_UID_list = None):
#    if ID_list == None:
#        ID_list = list(metadata_df.index.tolist())
#    l = []
#    for ID in ID_list:
#        l.append(np.load(metadata_df.loc[ID]['npy_path']))
#        print(ID, 'loaded into memory as ndarray with shape', l[-1].shape)
#    return l

## Load only early trials images

In [None]:
imgs_pan = load_npy_to_dict(img_metadata_pan, images_for_early_trials_int_UIDs)
imgs_ms = load_npy_to_dict(img_metadata_ms, images_for_early_trials_int_UIDs)

## Load all images
Keep a watch on available RAM!

In [None]:
#imgs_pan = load_npy_to_dict(img_metadata_pan)
#imgs_ms = load_npy_to_dict(img_metadata_ms)

# Data generation pipeline

In [None]:
def crop(img, yxhwc_box):
    img = tf.image.crop_to_bounding_box(
        img, 
        offset_height = yxhwc_box[0], 
        offset_width = yxhwc_box[1], 
        target_height = yxhwc_box[2], 
        target_width = yxhwc_box[3])
    return img

def get_random_box(img_shape, crop_size):
    maxval_y, maxval_x = img_shape[:2]
    maxval_y -= crop_size[0]
    maxval_x -= crop_size[1]
    #print(maxval_y, maxval_x)
    rng = np.random.default_rng()
    upper_left_yx = rng.integers(0, high=[maxval_y, maxval_x], dtype='int32')
    
    #returning in yxhwc format
    return np.concatenate((upper_left_yx, np.array(crop_size)))

def get_hr_box(lr_box, resize_factor, channels):
    hr_box = lr_box
    hr_box[:4] = lr_box[:4] * resize_factor
    hr_box[4] = channels
    return hr_box

def scale_image(img):
    # Use `convert_image_dtype` to convert to floats in the [0,1] range.
    img = tf.image.convert_image_dtype(img, tf.float32)
    return img

def add_imgID(img_array, imgID):
    img_array = np.expand_dims(img_array, 0)
    img_array[:,]

def preprocess_tiles(imgs_pan, imgs_ms, img_IDs, n_tiles):
    n_images = len(img_IDs)
    arr_ms = np.zeros((n_tiles, MS_HEIGHT, MS_WIDTH, MS_BANDS))
    arr_pan = np.zeros((n_tiles, PAN_HEIGHT, PAN_WIDTH, PAN_BANDS))
    print(arr_ms.shape)
    tile_imgID_map = []
    
    rng = np.random.default_rng()
    
    for i in range(n_tiles):
        img_ID_int = rng.integers(0, high=n_images, dtype='int32')
        img_ID = img_IDs[img_ID_int]
        tile_imgID_map.append(img_ID)
        print(img_ID)

        img_pan = scale_image(imgs_pan[img_ID_int])
        img_ms = scale_image(imgs_ms[img_ID_int])
        
        box_ms = get_random_box(img_ms.shape, [MS_HEIGHT, MS_WIDTH, MS_BANDS])
        print(box_ms)
        img_ms_cropped = crop(img_ms, box_ms)
        box_pan = get_hr_box(box_ms, SR_FACTOR, PAN_BANDS)
        img_pan_cropped = crop(img_pan, box_pan)
        arr_ms[i,:,:,:] = img_ms_cropped
        arr_pan[i,:,:,:] = img_pan_cropped
    
    return arr_ms, arr_pan, tile_imgID_map

def generate_tiles(n_tiles, imgs_pan, imgs_ms, int_UIDs):
    n_images = len(int_UIDs)
    
    for i in range(n_tiles):
        # Draw which image to produce tile from
        int_UID = random.choice(int_UIDs)
        
        img_pan = scale_image(imgs_pan[str(int_UID)])
        img_ms = scale_image(imgs_ms[str(int_UID)])
        
        box_ms = get_random_box(img_ms.shape, [MS_HEIGHT, MS_WIDTH, MS_BANDS])
        img_ms_cropped = crop(img_ms, box_ms)
        
        box_pan = get_hr_box(box_ms, SR_FACTOR, PAN_BANDS)
        img_pan_cropped = crop(img_pan, box_pan)
        
        #print(img_pan_cropped.shape, img_ms_cropped.shape, int_UID)
        
        yield int_UID, img_ms_cropped, img_pan_cropped

In [None]:
def tiles_to_npy(n_tiles, imgs_pan, imgs_ms, int_UIDs):
    arr_ms = np.zeros((n_tiles, MS_HEIGHT, MS_WIDTH, MS_BANDS))
    arr_pan = np.zeros((n_tiles, PAN_HEIGHT, PAN_WIDTH, PAN_BANDS))
    tile_img_IDs = []
    i = 0
    for tile_int_UID, img_ms_tile, img_pan_tile in generate_tiles(n_tiles, imgs_pan, imgs_ms, int_UIDs):
        arr_ms[i,:,:,:] = img_ms_tile.numpy()
        arr_pan[i,:,:,:] = img_pan_tile.numpy()
        tile_img_IDs.append(tile_int_UID)
        i += 1
        if i % 10 == 0:
            print('generated', i, 'tiles')
    
    return arr_ms, arr_pan, tile_img_IDs

# Generate training tiles



In [None]:
#arr_ms, arr_pan, tile_img_IDs = tiles_to_npy(2000, imgs_pan, imgs_ms, images_for_early_trials_int_UIDs)

plt.imshow(arr_pan[10,:,:,0], cmap = 'gray')
len(arr_ms)

In [None]:
def save_tiles(arr_ms, arr_pan, tile_img_IDs, path_out, prefix):
    path_out = pathlib.Path(path_out)
    
    np.save(pathlib.Path(path_out, str(prefix + '-ms-n' + str(len(arr_ms)))), arr_ms)
    print('Saved ms tiles to .npy')
    np.save(pathlib.Path(path_out, str(prefix + '-pan-n' + str(len(arr_pan)))), arr_pan)
    print('Saved pan tiles to .npy')
    
    with open(pathlib.Path(path_out, str(prefix + '-IDs-n' + str(len(arr_pan)) + '.txt')), 'w') as f:
        for item in tile_img_IDs:
            f.write("%s\n" % item)
    print('Saved tile<->image ID list to .txt')          
    
#save_tiles(arr_ms, arr_pan, tile_img_IDs, DATA_PATH_TILES, '01')

In [None]:
def load_tiles(dir_path, prefix):
    dir_path = pathlib.Path(dir_path)
    ms_path = list(dir_path.glob(str(prefix + '-ms*')))[0]
    pan_path = list(dir_path.glob(str(prefix + '-pan*')))[0]
    ID_path = list(dir_path.glob(str(prefix + '-ID*')))[0]
    
    arr_ms = np.load(ms_path)
    arr_pan = np.load(pan_path)
    
    with open(ID_path, 'r') as f:
        tile_img_IDs = f.read().splitlines()
    tile_img_IDs = [int(ID) for ID in tile_img_IDs]
            
    return arr_ms, arr_pan, tile_img_IDs

arr_ms, arr_pan, tile_img_IDs = load_tiles(DATA_PATH_TILES, '01')

In [None]:
arr_ms.shape

# SRCNN

In [None]:
import numpy as np
import cv2
import os
import sys
import matplotlib.pyplot as plt
from pathlib import Path


import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.initializers import RandomNormal

# Check GPUs:",
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            # Prevent TensorFlow from allocating all memory of all GPUs:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        print(e)

In [None]:
EPOCHS = 100
BATCH_SIZE = 16

In [None]:
def bicubic_upsampling(arr):
    arr = tf.image.resize(arr, [PAN_HEIGHT, PAN_WIDTH], method=tf.image.ResizeMethod.BICUBIC)
    return arr
arr_ms_upsampled = bicubic_upsampling(arr_ms)
arr_ms_upsampled.shape

In [None]:
def build_srcnn(channels_in, channels_out):
    
    srcnn = Sequential()
    
    srcnn.add(Conv2D(filters=128, kernel_size = (9, 9), 
                     kernel_initializer=RandomNormal(mean=0.0, stddev=0.001, seed=None),
                     bias_initializer='zeros',
                     activation='relu', padding='same', use_bias=True, 
                     input_shape=(PAN_HEIGHT, PAN_WIDTH, channels_in)))

    srcnn.add(Conv2D(filters=64, kernel_size = (1, 1), 
                     kernel_initializer=RandomNormal(mean=0.0, stddev=0.001, seed=None),
                     bias_initializer='zeros',
                     activation='relu', padding='same', use_bias=True))
    
    srcnn.add(Conv2D(filters=channels_out, kernel_size = (5, 5), 
                     kernel_initializer=RandomNormal(mean=0.0, stddev=0.001, seed=None), 
                     bias_initializer='zeros',
                     activation='linear', padding='same', use_bias=True))
    
    # define optimizer
    adam = Adam(lr=0.0003)
    
    # compile model
    srcnn.compile(optimizer=adam, loss='mean_squared_error', metrics=['mean_squared_error'])
    
    return srcnn

srcnn = build_srcnn(channels_in = MS_BANDS, channels_out = PAN_BANDS)
srcnn.summary()

In [None]:
history = srcnn.fit(arr_ms_upsampled, arr_pan,
                    epochs = EPOCHS, 
                    batch_size = BATCH_SIZE,
                    #validation_data = (lr_test, hr_test)
                   )
srcnn.save_weights('models/model1.h5')

In [None]:
srcnn.load_weights('models/model1.h5')

In [None]:
arr_ms_upsampled.numpy()[:,:,:,0].shape

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

In [None]:
arr_ms_upsampled_np = arr_ms_upsampled.numpy()

In [None]:
def plot_comparison(ms, pan):
    sr = srcnn.predict(np.expand_dims(ms, axis = 0))[0,:,:,:]
    
    cmap = 'gray'
    fig = plt.figure(figsize = (80,80))
    
    ax0 = fig.add_subplot(1,3,1)
    ax0.set_title('MS')
    ax0 = plt.imshow(ms[:,:,0], cmap = cmap)
    
    ax1 = fig.add_subplot(1,3,2)
    ax1.set_title('SRCNN')
    ax1 = plt.imshow(sr[:,:,0], cmap = cmap)
    
    ax2 = fig.add_subplot(1,3,3)
    ax2.set_title('PAN')
    ax2 = plt.imshow(pan[:,:,0], cmap = cmap)

In [None]:
random_idxs = np.random.randint(0, len(arr_ms_upsampled), size = 30)
for idx in random_idxs:
    print(arr_ms_upsampled_np[idx,:,:,:].shape)
    plot_comparison(arr_ms_upsampled.numpy()[idx,:,:,:], arr_pan[idx,:,:,:])

In [None]:
arr_ms = 0

# Scratchpad below: Not runnable

In [None]:
dataset = tf.data.Dataset.from_generator(generate_tiles, 
                                         args=[3, imgs_pan, imgs_ms, images_for_early_trials], 
                                         output_types= tf.float32, 
                                         output_shapes = tf.TensorShape([96, 96, 8])
                                        )

In [None]:
dataset = tf.data.Dataset.from_generator(generate_tiles, 
                                         args=[3, imgs_pan, imgs_ms, images_for_early_trials], 
                                         output_types=(None , None), 
                                         output_shapes = ((96, 96, 8) , (384, 384, 1) )
                                        )

In [None]:
test = list(dataset.take(10).as_numpy_iterator())

In [None]:
for count_batch in dataset.repeat().batch(10).take(10):
    print(count_batch.shape)
    print()
    print

In [None]:
arr_ms, arr_pan, tile_imgID_map = preprocess_tiles(imgs_pan, imgs_ms, images_for_early_trials, 10)

fig = plt.figure(figsize = (15,15))
    
ax0 = fig.add_subplot(1,2,1)
ax0.set_title('MS')
ax0 = plt.imshow(arr_ms[0,:,:,0], cmap = 'gray')
    
ax1 = fig.add_subplot(1,2,2)
ax1.set_title('PAN')
ax1 = plt.imshow(arr_pan[0,:,:,0], cmap = 'gray')

In [None]:

    feature = {
        #'img_ID': _bytes_feature(img_ID),
        'image_pan': _bytes_feature(img_pan_cropped),
        'pan_height': _int64_feature(img_pan_cropped.shape[0]),
        'pan_width': _int64_feature(img_pan_cropped.shape[1]),
        'pan_channels': _int64_feature(img_pan_cropped.shape[2]),
        'image_ms': _bytes_feature(img_ms_cropped),
        'ms_height': _int64_feature(img_ms_cropped.shape[0]),
        'ms_width': _int64_feature(img_ms_cropped.shape[1]),
        'ms_channels': _int64_feature(img_ms_cropped.shape[2])
    }
    
    return tf.train.Example(features=tf.train.Features(feature=feature))