In [20]:
# Helping Michael with remote sensing data

import cv2
import tifffile # for reading tiff files. There are other modules that can do this, but tifffile is most stable on Windows
import numpy as np # for array handling
import matplotlib.pyplot as plt # for QC
import glob # to gather up image filepath lists
from skimage.transform import resize # we're gonna do some rearranging
#import rasterio # let's try to ditch this
import scipy # same
from sklearn.preprocessing import OneHotEncoder
from keras import utils

In [21]:
class Dataset:
    
    """
    A Dataset class that has methods for loading tiff files of satellite images for semantic segmentation.  
    """

    def load_list(self, directory, pattern):
        """
        Simply loads data given a directory and file pattern.
        """
        
        return glob.glob(f'{directory}/' + f'{pattern}' + '*.tif')

    def load_and_resize_images(im_list, shape, dims):
        """
        Resize images and place inside a library for ML tasks.  im_list depends on using load_list prior 
        to executing the method.
        """
        
        shape = shape
        img_array = np.zeros((len(im_list), shape, shape, dims))
        
        
        for i in range(len(im_list)):
            img = tifffile.imread(im_list[i])
            img_array[i] = resize(img, (shape, shape, dims))
            
        return img_array
    
    def load_and_resize_dsm(im_list, shape, dims=1):
        """
        Resize images and place inside a library for ML tasks.  im_list depends on using load_list prior 
        to executing the method.
        """

        shape = shape
        dsm_array = np.zeros((len(im_list), shape, shape, dims))


        for i in range(len(im_list)):
            img = tifffile.imread(im_list[i])
            dsm_array[i] = resize(img, (shape, shape, dims))

        return np.squeeze(dsm_array,-1)
    

In [22]:
class Preprocessor:

    """
    The Preprocessor class is available to help prepare data for machine learning.  
    
    
    top_hat_filter will apply a white-top-hat-filter for removing terrain effect.  
    This filter is somewhat sensitive to the kernel size (as is every filter, I suppose),
    and will need some trial and error.  It is assumed that the images are reshaped prior
    to filtering.
    
    categorical leverages Keras to 
    """
    
    def top_hat_filter(dsm_array, filter_size=400):
        
        filt_size = (filter_size, filter_size)
        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, filt_size) 
        
        top_hat_array = np.zeros_like(dsm_array)
        
        for i in range(len(dsm_array)):
            top_hat_array[i] = cv2.morphologyEx(dsm_array[i], cv2.MORPH_TOPHAT, kernel) 
        
        return top_hat_array
    
    def categorical(labels):
        
        return utils.to_categorical(labels)

In [23]:
data_loader = Dataset()

In [24]:
imgs = data_loader.load_list('ISPRS/top', 'top_mosaic_09cm_area')

In [25]:
img_array = Dataset.load_and_resize_images(im_list=imgs, shape=512, dims=3)

In [7]:
dsm_list = data_loader.load_list('ISPRS/dsm', 'dsm_09cm_matching_area')

In [8]:
dsm_array = Dataset.load_and_resize_dsm(dsm_list, 512)

In [9]:
dsm_tophat = Preprocessor.top_hat_filter(dsm_array, 75)

In [10]:
labels = data_loader.load_list('ISPRS/gt', 'top_mosaic_09cm_area')

In [11]:
label_array = Dataset.load_and_resize_dsm(labels, 512)

In [12]:
labels = Preprocessor.categorical(label_array)

In [13]:
labels.shape

(33, 512, 512, 2)