In [None]:
from __future__ import print_function, division
import os
import torch
import pandas as pd
from skimage import io, transform
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import rasterio as rio
from rasterio import features
from pathlib import Path
import pathlib
import geopandas as gpd
from descartes import PolygonPatch
from PIL import Image
import itertools
import re

from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils


# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

plt.ion()   # interactive mode

In [None]:
%matplotlib inline 

In [None]:
mpl.rcParams['figure.dpi'] = 300 #increase plot resolution
mpl.rcParams['figure.dpi']

## Input Paths

In [None]:
train_dir = Path('../input/spacenet-7-multitemporal-urban-development/SN7_buildings_train')
test_dir = Path('../input/spacenet-7-multitemporal-urban-development/SN7_buildings_test_public')
sample_dir = Path('../input/spacenet-7-multitemporal-urban-development/SN7_buildings_train_sample')

## Output Paths

In [None]:
output_path = Path.cwd()
output_csv_path = output_path/'output_csvs/'
Path(output_csv_path).mkdir(parents=True, exist_ok=True)

## Test Path and GeoDataFrame

In [None]:
test_raster_path = Path('../input/spacenet-7-multitemporal-urban-development/SN7_buildings_train_sample/sample/L15-0506E-1204N_2027_3374_13/images_masked/global_monthly_2018_01_mosaic_L15-0506E-1204N_2027_3374_13.tif')
test_raster_path_24 = Path('../input/spacenet-7-multitemporal-urban-development/SN7_buildings_train_sample/sample/L15-0506E-1204N_2027_3374_13/images_masked/global_monthly_2019_12_mosaic_L15-0506E-1204N_2027_3374_13.tif')
test_geojson_path = Path('../input/spacenet-7-multitemporal-urban-development/SN7_buildings_train_sample/sample/L15-0506E-1204N_2027_3374_13/labels_match_pix/global_monthly_2018_01_mosaic_L15-0506E-1204N_2027_3374_13_Buildings.geojson')
test_geojson_path_24 = Path('../input/spacenet-7-multitemporal-urban-development/SN7_buildings_train_sample/sample/L15-0506E-1204N_2027_3374_13/labels_match_pix/global_monthly_2019_12_mosaic_L15-0506E-1204N_2027_3374_13_Buildings.geojson')
test_gdf = gpd.read_file(test_geojson_path)
test_gdf_24 = gpd.read_file(test_geojson_path_24)

In [None]:
test_gdf.set_index('Id',inplace=True)
test_gdf_24.set_index('Id',inplace=True)

test_gdf.sort_index(inplace=True)
test_gdf_24.sort_index(inplace=True)

Let's write couple of helper functions to help us visualize our raster files and vector files.

## Plotting GeoDataFrame Polygons

In [None]:
def plot_gdf(gdf,fill=False,ax=None,linewidth=0.2):
    if ax is None:
        _,ax = plt.subplots(1,figsize=(3, 3))
        
    for geom in gdf['geometry']:
        if fill:
            patch = PolygonPatch(geom,linewidth=linewidth,color='fuchsia')
            ax.add_patch(patch)
        else:
            ax.plot(*geom.exterior.xy,linewidth=linewidth)
    return(ax)

In [None]:
plot_gdf(test_gdf);

## Plotting Satellite Image
The function below takes in a satellite tif image and plots it. It also has the option to use the previous function that we created to overlay the polygons on top of the image.

In [None]:
def plot_sat(path,gdf=None, fill=False,linewidth=0.2):
    f, ax = plt.subplots(1,figsize=(3, 3))
    f.tight_layout()
    
    r = rio.open(path)
    r = r.read()
    r = r.transpose((1,2,0,))
    ax.imshow(r)
    
    if gdf is not None:
        plot_gdf(gdf,fill=fill,ax=ax,linewidth=linewidth)
        
    return(ax)

In [None]:
plot_sat(path=test_raster_path);

In [None]:
plot_sat(path=test_raster_path,gdf=test_gdf);

In [None]:
plot_sat(path=test_raster_path,gdf=test_gdf,fill=True);
plot_sat(path=test_raster_path_24,gdf=test_gdf_24,fill=True);

## Rasterizing Polygons

Below we create a short helper function that rasterizes our geodataframe

In [None]:
def rasterize_geojson(geojson_path,reference_raster_path):
    gdf = gpd.read_file(geojson_path)
    with rio.open(reference_raster_path) as raster:
        r = raster.read(1)
        
        mask = image = features.rasterize(((polygon, 255) for polygon in gdf['geometry']),out_shape=r.shape)
        
    return mask

In [None]:
def rasterize_gdf(gdf,reference_raster_path):
    
    with rio.open(reference_raster_path) as raster:
        r = raster.read(1)
        
        mask = image = features.rasterize(((polygon, 255) for polygon in gdf['geometry']),out_shape=r.shape)
        
    return mask

In [None]:
test_mask = rasterize_geojson(test_geojson_path,test_raster_path)
test_mask_24 = rasterize_geojson(test_geojson_path_24,test_raster_path_24)

We should now have a numpy array containing only values of 0 and 255. The values equal to 255 represent our target buildling footprint.

In [None]:
np.unique(test_mask)

In [None]:
test_mask_24.shape

Let's visualize our output masks for month 1 and month 24 from our sample images.

In [None]:
mpl.rcParams['figure.dpi'] = 300
_,axs = plt.subplots(1,2,figsize=(10,10))

masks = [test_mask,test_mask_24]
titles = ['month1','month24']

for i,ax in enumerate(axs):
    ax.set_title(titles[i])
    ax.imshow(masks[i]);

## Getting the Differences between 2 Geodataframes
Next we are going to create some helper functions to extract the difference from our image polygons. This function will be given 2 geodataframes and it will return a geodataframe containing the differences between the 2 images.

For more information on getting the geojson difference, check out the following [notebook](https://www.kaggle.com/amerii/augmenting-spacenet-7-sample-data-labels).

In [None]:
def gdf_difference(gdf1, gdf2, get_dates=False):
    try:
        gdf1.reset_index(inplace=True,drop=True)
    except:
        pass
    try:
        gdf2.reset_index(inplace=True,drop=True)
    except:
        pass
    
    
    len_1 = len(gdf1)
    len_2 = len(gdf2)
    
    len_diff = abs(len_2-len_1)
    
    if len_2 > len_1:
        start_index = len_2-len_diff
        diff_gdf = gdf2[start_index:].copy()
    else:
        start_index = len_1-len_diff
        diff_gdf = gdf1[start_index:].copy()

    diff_gdf.reset_index(inplace=True,drop=True)
        
    return diff_gdf

Let's see what kind of output we get, by getting the difference between our 2 previous geodataframes.

In [None]:
len(test_gdf)

In [None]:
diff_gdf = gdf_difference(test_gdf,test_gdf_24)
diff_gdf

In [None]:
test_mask_diff = rasterize_gdf(diff_gdf,test_raster_path)

In [None]:
mpl.rcParams['figure.dpi'] = 300
_,axs = plt.subplots(1,3,figsize=(10,10))

masks = [test_mask,test_mask_24,test_mask_diff]
titles = ['month 1', 'month 24', 'difference']

for i,ax in enumerate(axs):
    ax.set_title(titles[i])
    ax.imshow(masks[i]);

## Creating Chips from our Satellite Imagery
Now that we can extract the difference between 2 satellite images, the next step is to split the satellite image to smaller chips. Another thing to note is that some areas have little to know change in them; that is something we need to consider when training our model. 

One thing we can do, is exclude regions with no change at all, however we will have to experiment whether that will actually improve our results or not.

In [None]:
class chip_creator():   
    def __init__(self, chip_dimension=256,raster=False):  
        self.chip_dimension = chip_dimension
        self.raster = raster
        
    def create_chips(self,image):
        np_array = self.__read_image(image)
        # get number of chips per colomn
        n_rows = (np_array.shape[0] - 1) // self.chip_dimension + 1
        # get number of chips per row
        n_cols = (np_array.shape[1] - 1) // self.chip_dimension + 1
        # segment image into chips and return list of chips
        l = []

        for r in range(n_rows):
            for c in range(n_cols):
                start_r_idx = r*self.chip_dimension
                end_r_idx = start_r_idx + self.chip_dimension

                start_c_idx = c*self.chip_dimension
                end_c_idx = start_c_idx + self.chip_dimension
                chip = np_array[start_r_idx:end_r_idx,start_c_idx:end_c_idx]
                if self.raster:
                   chip = np.moveaxis(chip,-1,0)

                l.append(chip)

        return np.array(l)
    
    def __read_image(self,image):
        # check whether image is a path or array
        if isinstance(image,(pathlib.PurePath,str)):
                with Image.open(image) as img:
                    # convert image into np array
                    np_array = np.array(img)
                return np_array
            
        elif isinstance(image,np.ndarray):
            return image
        else:
            raise ValueError(f"Expected Path or Numpy array received: {type(image)}")

In [None]:
chips_256 = chip_creator(raster=True)

Return raster formatted image (channels, rows, columns)

In [None]:
chips_256.create_chips(test_raster_path)[0].shape

Return normally formatted image (rows, columns,channels)

In [None]:
chips_256.raster=False

In [None]:
chips_256.create_chips(test_raster_path)[0].shape

Let's create a helper function to help us plot many images. The raster flag is an option we give to the plotting function in case we are feeding it a list of rasters, in which case we will need to swap the axis of the image channels/bands.

In [None]:
def multiplot_images(list_of_images,ncols=4,dpi=300, raster=False):
    mpl.rcParams['figure.dpi'] = dpi
    nrows = (len(list_of_images) - 1) // ncols + 1
    fig,axs = plt.subplots(nrows,ncols,figsize=(10,10))
    
    fig.tight_layout()
    
    for r,ax in enumerate(axs):
        for c,row in enumerate(ax):
            # get the current index of the image
            i = r*ncols + c
            ax[c].set_title(i)
            image = list_of_images[i]
            # if the image is presented in raster format then move the channel axis
            if raster:
                image = np.moveaxis(image,0,-1)
            ax[c].imshow(image);


In [None]:
multiplot_images(chips_256.create_chips(test_raster_path));

Let's visualize the image chips for the difference mask that we obtained earlier.

In [None]:
multiplot_images(chips_256.create_chips(test_mask_diff));

# What Next???
Great we now have some amazing helper functions that will make our lives much easier. Let's see how we utilize those helper functions to create our dataset class in our [next notebook](https://www.kaggle.com/amerii/spacenet-7-pytorch-dataset-class-starter)!