# Generating Training Images and Labels for Forest Classification

This code reads two geotiff files for Sentinel-1 and Labels from Global Forest Watch (GFW) on a local disk, and generates 256 x 256 image chips and labels to be used in training. Sentinel-1 data is already reprojected to GFW grid using the code in `re-project.ipynb`.

Training images and labels are being exported/saved as numpy arrays on the disk for quick read into the training later on. 

This code is written as a test, and ideally there shouldn't be a need to writing these data on the disk and reading them again. Being able to read the source Sentinel-1 imagery (from its native projection), quickly reproject to the labels' grid, and then generate image chips on the fly is a base requirement to be able to scale this training to regional and continental level data. 

In [None]:
%matplotlib inline
from osgeo import gdal
import matplotlib.pyplot as plt
import numpy as np
import os
import glob


# this allows GDAL to throw Python Exceptions
gdal.UseExceptions()

In [None]:
pathData = "/home/ec2-user/data/"

### Read Image Data (Sentinel-1)  

In [None]:
s1_filename = pathData + "S1_Aug17_GFW_grid.tif"

try:
    s1_datafile = gdal.Open(s1_filename)
except RuntimeError:
    print('Unable to open {}'.format(s1_filename))
    sys.exit(1)

s1_nx = s1_datafile.RasterXSize  
s1_ny = s1_datafile.RasterYSize  

s1_gt = s1_datafile.GetGeoTransform()
s1_proj = s1_datafile.GetProjection()
s1_xres = s1_gt[1]
s1_yres = s1_gt[5]

s1_data = s1_datafile.ReadAsArray()

s1_data = np.swapaxes(s1_data, 0, 1)
s1_data = np.swapaxes(s1_data, 1, 2)


dataVV = s1_data[:, :, 0::2]
dataVH = s1_data[:, :, 1::2]
dataVV[dataVH<-30] = np.nan # Remove pixels less than NESZ
dataVH[dataVH<-30] = np.nan # Remove pixels less than NESZ
VV_A = np.nanmean(dataVV[:, :, 0::2], 2) # Using only one mode of observations (ascending vs descending)
VH_A = np.nanmean(dataVH[:, :, 0::2], 2) # Using only one mode of observations (ascending vs descending)

### Read Labels (Global Forest Watch)

In [None]:
labels_filename = pathData + "GFWLabels2017_noNaN.tiff"
try:
    datafile = gdal.Open(labels_filename)
except RuntimeError:
    print('Unable to open {}'.format(fileName))
    sys.exit(1)

l_nx = datafile.RasterXSize 
l_ny = datafile.RasterYSize 

l_gt = datafile.GetGeoTransform()
l_proj = datafile.GetProjection()
l_xres = l_gt[1]
l_yres = l_gt[5]

labels = datafile.ReadAsArray()

In [None]:
# Clean existing data
files = glob.glob('data/train/image/*.npy')
for f in files:
    os.remove(f)
files = glob.glob('data/test/image/*.npy')
for f in files:
    os.remove(f)
files = glob.glob('data/train/label/*.npy')
for f in files:
    os.remove(f)
files = glob.glob('data/test/label/*.npy')
for f in files:
    os.remove(f)

### Generat Image Chips

In [None]:
# Generating 256 x 256 images

VV_A = VV_A[10:-10, 10:-10]
VH_A = VH_A[10:-10, 10:-10]

test_samples = np.random.choice(120, 19, replace=False)
n_train = -1
n_test = -1
n_image = -1
for i_row in range(0, int(np.floor(VV_A.shape[0]/256))):
    for i_col in range(0, int(np.floor(VV_A.shape[1]/256))):
        n_image = n_image + 1
        if n_image in test_samples:
            n_test = n_test + 1
            image_VV = 10 ** (VV_A[i_row * 256 : (i_row + 1) * 256, i_col * 256 : (i_col + 1) * 256] / 10)
            image_VH = 10 ** (VH_A[i_row * 256 : (i_row + 1) * 256, i_col * 256 : (i_col + 1) * 256] / 10)
            image = np.dstack((image_VV, image_VH))
            label = labels[i_row * 256 : (i_row + 1) * 256, i_col * 256 : (i_col + 1) * 256]
            np.save('data/test/image/' + str(n_test), image)
            np.save('data/test/label/' + str(n_test), label)
        else:
            n_train = n_train + 1
            image_VV = VV_A[i_row * 256 : (i_row + 1) * 256, i_col * 256 : (i_col + 1) * 256] / -30
            image_VH = VH_A[i_row * 256 : (i_row + 1) * 256, i_col * 256 : (i_col + 1) * 256] / -30
            image = np.dstack((image_VV, image_VH))            
            label = labels[i_row * 256 : (i_row + 1) * 256, i_col * 256 : (i_col + 1) * 256]
            np.save('data/train/image/' + str(n_train), image)
            np.save('data/train/label/' + str(n_train), label)