## Imports and setup

In [None]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import pathlib
from PIL import Image
import rasterio
from rasterio.plot import reshape_as_image

import tensorflow as tf
AUTOTUNE = tf.data.experimental.AUTOTUNE

from modules.metadata_reader import img_metadata_to_dict, add_names_to_metadata_dict, dict_to_df

# Path to location where individual satellite images are located
DATA_PATH = 'data/toulon-laspezia' 
DATA_PATH_IS_RELATIVE = True
DATA_PATH_NPY = 'data/toulon-laspezia-npy' 

# Name of metadata .xml file
METADATA_NAME = 'DeliveryMetadata.xml'

# Names of areas covered by satellite imagery
AREAS = ['La_Spezia', 'Toulon'] # Spelled like the directory names

# Speficy what the xmlns url on top of metadata .xml file is
# (should be second line)
XMLNS = 'http://xsd.digitalglobe.com/xsd/dm'

## Metadata parsing from xml to pandas dataframe

Every satellite image delivery from Maxar contains a `DeliveryMetadata.xml` file with important specifications for both the multispectral and panchromatic images. The following functions finds all the `DeliveryMetadata.xml` files contained in all subdirectories of a directory and parses them into the *Pandas DataFrame* format which will be used for further descriptive statistics of the dataset.

In [None]:
img_metadata_pan, img_metadata_ms = img_metadata_to_dict(METADATA_NAME, 
                                                         DATA_PATH, XMLNS, 
                                                         path_is_relative = DATA_PATH_IS_RELATIVE)

img_metadata_pan = add_names_to_metadata_dict(img_metadata_pan, AREAS)
img_metadata_ms = add_names_to_metadata_dict(img_metadata_ms, AREAS)

img_metadata_pan = dict_to_df(img_metadata_pan)
img_metadata_ms = dict_to_df(img_metadata_ms)

In [None]:
toulon_wv02_pan = img_metadata_pan[(img_metadata_pan['sensorVehicle'] == 'WV02')
                                   & (img_metadata_pan['area_name'] == 'Toulon')]

np.random.seed(1)
img_names = sorted(toulon_wv02_pan.index.values)
np.random.shuffle(img_names)
images_for_early_trials = img_names[:2]
images_for_early_trials

In [None]:
image_paths_pan = [img_metadata_pan.loc[image]['tif_path'] for image in images_for_early_trials]
image_paths_ms = [img_metadata_ms.loc[image]['tif_path'] for image in images_for_early_trials]

In [None]:
def tif_to_npy(path_in, save_to_disk = False, path_out = None):
    path_in = pathlib.Path(path_in)
    filename = path_in.stem
    with rasterio.open(path_in, 'r') as ds:
        img = ds.read()
    print(type(img))
    img = reshape_as_image(img)
    print(img.shape)
    if save_to_disk:
        np.save(pathlib.Path(path_out, filename), img)
        return True
    return img

In [None]:
for image_path in image_paths_pan:
    tif_to_npy(image_path, save_to_disk = True, path_out = str(DATA_PATH_NPY + '/pan'))

for image_path in image_paths_ms:
    tif_to_npy(image_path, save_to_disk = True, path_out = str(DATA_PATH_NPY + '/ms'))

In [None]:
def crop(img):
    return True

def decode_img(img):
    # convert the compressed string to a 3D uint8 tensor
    #img = tf.image.decode_image(img, channels=0)
    img = np.load(img)
    # Use `convert_image_dtype` to convert to floats in the [0,1] range.
    img = tf.image.convert_image_dtype(img, tf.float32)
    return img

def process_path(file_path_pan, file_path_ms):
    #label = get_label(file_path)
    # load the raw data from the file as a string
    #img = tf.io.read_file(str(file_path))
    img_pan = decode_img(file_path_pan)
    img_ms = decode_img(file_path_ms)
    
    return img_pan, img_ms

In [None]:
test = process_path(str(DATA_PATH_NPY + '/pan/' + '19AUG04104225-P2AS-011650878010_01_P001.npy'), 
                    str(DATA_PATH_NPY + '/ms/' + '19AUG04104225-M2AS-011650878010_01_P001.npy'))

In [None]:
print(test)