In [None]:
import pandas as pd
import geopandas as gpd
import rasterio as rio
import matplotlib.pyplot as plt
from rasterio.features import rasterize
from rasterio.enums import Resampling
import os
import numpy as np
import glob
from rasterio.plot import show
from shapely.geometry import Polygon
import itertools
from rasterio import mask
import re
import random
import time
from tqdm.notebook import tqdm

In [None]:
# set I/O directories
train_poly_DIR = '/project/cper_neon_aop/cper_pdog_uas/train_polys'
train_tiles_DIR = '/project/cper_neon_aop/cper_pdog_uas/train_tiles'
outDIR_imgs = '/project/cper_neon_aop/cper_pdog_uas/cnn_train_images'
outDIR_labs = '/project/cper_neon_aop/cper_pdog_uas/cnn_train_labels'

# create output directories if necessary
if not os.path.exists(outDIR_imgs):
    os.mkdir(outDIR_imgs)
if not os.path.exists(outDIR_labs):
    os.mkdir(outDIR_labs)

# set list of trainers and training tile groups
trainer_list = ['Sean', 'Lauren', 'David']
group_list = ['random', 'group', 'group2']

In [None]:
# loop through all trainers and groups to create concatenated training dataset
idx = 0
for trainer in trainer_list:
    for group in group_list:
        if group != 'random' and trainer != 'Sean':
            continue
        else:
            print('_'.join([trainer, group]))
            inDIR = os.path.join(train_poly_DIR, '_'.join([trainer, group]))
            poly_flist = list(filter(os.path.isfile, 
                                glob.glob(inDIR + '/delim_' + '_'.join([trainer, group]) + '*.shp')))
            if len(poly_flist) > 0:
                poly_flist.sort(key=lambda x: os.path.getctime(x))
            else:
                print('ERROR: No shapefiles found')
                continue                            
            poly_f = poly_flist[-1]
            train_polys_tmp = gpd.read_file(poly_f)
            train_polys_tmp['group'] = group
            if idx == 0:
                train_polys = train_polys_tmp
            else:
                train_polys = train_polys.append(train_polys_tmp)
            idx += 1

# set CRS of training dataset
train_polys = train_polys.set_crs(32613).reset_index().rename(columns={'index':'orig_index'})

In [None]:
# drop bad polygon identified by David
train_polys = train_polys[~((train_polys['Trainer'] == 'David') & (train_polys['Tile'] == '22W_0') & (train_polys['Comment'] == 'NoB'))].copy()

In [None]:
# get any duplicated tiles from group2
rand_tile_list = train_polys[train_polys['group'] == 'random']['Tile'].unique()
group_tile_list = train_polys[train_polys['group'] == 'group']['Tile'].unique()
group2_tile_list = train_polys[train_polys['group'] == 'group2']['Tile'].unique()
dup_tile_list = [item for item in group2_tile_list if item in rand_tile_list or item in group_tile_list]
print('removing duplicated tiles: ')
display(dup_tile_list)

# drop any tiles from group2 if they also appear in random group
train_polys = train_polys[~((train_polys['group'] == 'group2') &
                            (train_polys['Tile'].isin(dup_tile_list)))]

In [None]:
train_polys['Tile'].unique()

In [None]:
# read in list of training tiles
df_tiles = pd.read_csv(os.path.join(train_tiles_DIR, 'train_bboxes_all_assigned.csv'))

# get unique id's for each polygon within each tile
train_polys['subID'] = train_polys.groupby('Tile').transform(lambda x: np.arange(len(x)))['Comment']

# save concatenated training polygon dataset
train_polys.to_csv('train_polys/train_polys_all.csv')

In [None]:
def crop_and_save(rgb_path, label_polys, geom, prefix, win_size):
    coords_x = np.array(geom.bounds)[[0, 2]]
    coords_y = np.array(geom.bounds)[[1, 3]]
    # get the box coordinate pairs
    ll, ul, lr, ur = list(itertools.product(coords_x, coords_y))
    with rio.open(rgb_path) as src_rgb:   
        profile_rgb = src_rgb.meta
        rgb_out, transform_out = mask.mask(src_rgb,
                                           [geom],
                                           crop=True)
        rgb_out = rgb_out[:, -win_size:, :win_size]
        ll_i = src_rgb.index(*ll)
        ur_i = src_rgb.index(*ur)
        if len(label_polys) > 0:
            label = rasterize(label_polys.geometry, out_shape=src_rgb.shape, transform=src_rgb.transform)
        else:
            label = np.zeros(src_rgb.shape)
        label = label[ll_i[0]-win_size:ll_i[0], ll_i[1]:ll_i[1] + win_size]
        
        profile_rgb.update({'dtype': 'int16',
                            'width': win_size,
                            'height': win_size,
                            'transform': transform_out})
        profile_single = profile_rgb.copy()
        profile_single.update({'count': 1})
        profile_single_float = profile_single.copy()
        profile_single_float.update({'dtype': 'float64'})
        tile_basename_rgb = os.path.basename(rgb_path)
        with rio.open(os.path.join(outDIR_imgs, prefix + '_' + tile_basename_rgb), 'w', **profile_rgb) as dst:
            dst.write(rgb_out)
        with rio.open(os.path.join(outDIR_labs, prefix + '_' + re.sub('_rgb', '_labels', tile_basename_rgb)), 'w', **profile_single) as dst:
            dst.write(label, 1)
        for suffix in ['ndvi', 'dsm', 'shade', 'tpi']:
            with rio.open(re.sub('rgb', suffix, rgb_path)) as src_i:
                profile_i = src_i.profile
                i_out = src_i.read(out_shape=(
                    src_i.count,
                    src_rgb.meta['width'],
                    src_rgb.meta['height']),
                                   resampling=Resampling.bilinear)
                i_out = i_out[:, ll_i[0]-win_size:ll_i[0], ll_i[1]:ll_i[1] + win_size]
                profile_i.update({'width': i_out.shape[2],
                                  'height': i_out.shape[1],
                                  'transform': transform_out})
                with rio.open(os.path.join(outDIR_imgs, prefix + '_' + re.sub('rgb', suffix, tile_basename_rgb)), 'w', **profile_i) as dst:
                    dst.write(i_out)

In [None]:
len(df_tiles['ID'][(df_tiles['trainer'] != 'Nick') &
                         (df_tiles['Digitize'] == 1)])

In [None]:
df_tiles[(df_tiles['trainer'] != 'Nick') &
         (df_tiles['Digitize'] == 1)].tail(5)

In [None]:
# loop through all tiles, select 5 random windows and select 5 windows from 
for ID in df_tiles['ID'][(df_tiles['trainer'] != 'Nick') &
                         (df_tiles['Digitize'] == 1)] :
    print(ID)
    # get unique ID of training tile from training dataset
    #ID = train_polys.Tile.unique()[0]
    # subset only the training polygons associated with the tile ID
    #train_polys_sub = train_polys[train_polys['Tile'] == ID]

    # get the path base to the imagery associated with the training tile
    tilePATH = df_tiles[df_tiles['ID'] == ID]['path_pre'].iloc[0]
    # get the x/y coordinates of the bounding box for the training polygon within the tile
    box_coords_x = df_tiles[df_tiles['ID'] == ID][['min_x', 'max_x']].iloc[0].values
    box_coords_y = df_tiles[df_tiles['ID'] == ID][['min_y', 'max_y']].iloc[0].values
    # get the box coordinate pairs
    ll, ul, lr, ur = list(itertools.product(box_coords_x, box_coords_y))
    # create a polygon from the coordinate pairs
    tile_geom = Polygon([ll, ul, ur, lr])
    # create the full path to the RGB image of the training tile
    tile_f = os.path.join(os.path.dirname(train_tiles_DIR), tilePATH + 'rgb.tif')

    with rio.open(tile_f) as src_rgb_samp:   
        res_samp = src_rgb_samp.transform[0]

    # set window size
    win_pix = 32 * 12
    win_size = win_pix * res_samp
    
    fig, ax = plt.subplots()
    
    # get the subset of training polygons from the tile
    train_polys_sub = train_polys[train_polys['Tile'] == ID]
    
    # remove any burrow polygons whose centroid is outside the training polygon
    train_polys_sub = train_polys_sub[~train_polys_sub.apply(lambda x: any([x.geometry.centroid.coords[0][0] < ll[0],
                                                                            x.geometry.centroid.coords[0][0] > ur[0],
                                                                            x.geometry.centroid.coords[0][1] < ll[1],
                                                                            x.geometry.centroid.coords[0][1] > ur[1]]),
                                                             axis=1)]
    for i in range(max(5, len(train_polys_sub))):
        ll_rand = (random.uniform(ll[0], lr[0] - win_size), random.uniform(ll[1], ul[1] - win_size))
        rand_geom = Polygon([ll_rand, (ll_rand[0], ll_rand[1] + win_size),
                             tuple(map(lambda x: x+win_size, ll_rand)), (ll_rand[0] + win_size, ll_rand[1])])
        crop_and_save(tile_f, train_polys_sub, rand_geom.buffer(0.02), 'rand_' + str(i), win_pix)

        ax.plot(*tile_geom.exterior.xy, c='grey')
        ax.plot(*rand_geom.exterior.xy, c='red')
    for poly_id, poly in train_polys_sub.iterrows():
        poly_coords_x = np.array(poly.geometry.bounds)[[0, 2]]
        poly_coords_y = np.array(poly.geometry.bounds)[[1, 3]]
        poly_coords_x[0] = (poly_coords_x[0] + poly_coords_x[1])/2 - win_size/2.0
        poly_coords_y[0] = (poly_coords_y[0] + poly_coords_y[1])/2 - win_size/2.0
        poly_coords_x[1] = poly_coords_x[0] + win_size
        poly_coords_y[1] = poly_coords_y[0] + win_size
        # get the box coordinate pairs
        poly_ll, poly_ul, poly_lr, poly_ur = list(itertools.product(poly_coords_x, poly_coords_y))
        # create a polygon from the coordinate pairs
        poly_geom = Polygon([poly_ll, poly_ul, poly_ur, poly_lr])

        crop_and_save(tile_f, train_polys_sub, poly_geom.buffer(0.05), 'poly_' + str(poly_id), win_pix)

        ax.plot(*poly.geometry.exterior.xy, c='blue')
        ax.plot(*poly_geom.exterior.xy, c='orange')
    plt.show(block=False)
    time.sleep(3)
    plt.close(fig)

In [None]:
print("complete")

In [None]:
ID = '22E_1'
tilePATH = df_tiles[df_tiles['ID'] == ID]['path_pre'].iloc[0]
tile_f = os.path.join(os.path.dirname(train_tiles_DIR), tilePATH + 'rgb.tif')
tile_basename_rgb = os.path.basename(tile_f)
prefix = 'poly_81_'

In [None]:
fig, ax = plt.subplots(figsize=(4, 4))
with rio.open(os.path.join(outDIR_imgs, prefix + tile_basename_rgb)) as src:
    show(src.read(), ax=ax)
    with rio.open(os.path.join(outDIR_labs, prefix + re.sub('_rgb', '_labels', tile_basename_rgb))) as src_lab:
        show(src_lab.read(), ax=ax, alpha=0.3)

In [None]:
with rio.open(os.path.join(outDIR_imgs, prefix + re.sub('_rgb', '_ndvi', tile_basename_rgb))) as src:
    plt.figure(figsize=(4, 4))
    show(src.read())

In [None]:
with rio.open(os.path.join(outDIR_imgs, prefix + re.sub('_rgb', '_dsm', tile_basename_rgb))) as src:
    plt.figure(figsize=(4, 4))
    show(src.read())

In [None]:
with rio.open(os.path.join(outDIR_imgs, prefix + re.sub('_rgb', '_shade', tile_basename_rgb))) as src:
    plt.figure(figsize=(4, 4))
    show(src.read())

In [None]:
with rio.open(os.path.join(outDIR_imgs, prefix + re.sub('_rgb', '_tpi', tile_basename_rgb))) as src:
    plt.figure(figsize=(4, 4))
    show(src.read())

In [None]:
for ID, train_polys_sub in tqdm(train_polys.groupby('Tile')):
    print(ID)
    # get unique ID of training tile from training dataset
    #ID = train_polys.Tile.unique()[0]
    # subset only the training polygons associated with the tile ID
    #train_polys_sub = train_polys[train_polys['Tile'] == ID]

    # get the path base to the imagery associated with the training tile
    tilePATH = df_tiles[df_tiles['ID'] == ID]['path_pre'].iloc[0]
    # get the x/y coordinates of the bounding box for the training polygon within the tile
    box_coords_x = df_tiles[df_tiles['ID'] == ID][['min_x', 'max_x']].iloc[0].values
    box_coords_y = df_tiles[df_tiles['ID'] == ID][['min_y', 'max_y']].iloc[0].values
    # get the box coordinate pairs
    ll, ul, lr, ur = list(itertools.product(box_coords_x, box_coords_y))
    # create a polygon from the coordinate pairs
    tile_geom = Polygon([ll, ul, ur, lr])
    # create the full path to the RGB image of the training tile
    tile_f = os.path.join(os.path.dirname(train_tiles_DIR), tilePATH + 'rgb.tif')

    with rio.open(tile_f) as src_rgb:
        profile_rgb = src_rgb.meta
        rgb_out, transform_out = mask.mask(src_rgb,
                                           [tile_geom],
                                           crop=True)
        ll_i = src_rgb.index(*ll)
        ur_i = src_rgb.index(*ur)
        label = rasterize(train_polys_sub.geometry, out_shape=src_rgb.shape, transform=src_rgb.transform)
        label = label[ll_i[1]:ur_i[1], ur_i[0]:ll_i[0]]
        profile_rgb.update({'dtype': 'int16',
                            'width': label.shape[1],
                            'height': label.shape[0],
                            'transform': transform_out})
        profile_single = profile_rgb.copy()
        profile_single.update({'count': 1})
        profile_single_float = profile_single.copy()
        profile_single_float.update({'dtype': 'float64'})
        tile_basename_rgb = os.path.basename(tile_f)
        with rio.open(os.path.join(outDIR_imgs, 'cnn_' + tile_basename_rgb), 'w', **profile_rgb) as dst:
            dst.write(rgb_out)
        with rio.open(os.path.join(outDIR_labs, 'cnn_' + re.sub('_rgb', '_labels', tile_basename_rgb)), 'w', **profile_single) as dst:
            dst.write(label, 1)
        for suffix in ['ndvi', 'dsm', 'shade', 'tpi']:
            with rio.open(re.sub('rgb', suffix, tile_f)) as src_i:
                profile_i = src_i.profile
                i_out = src_i.read(out_shape=(
                    src_i.count,
                    src_rgb.meta['width'],
                    src_rgb.meta['height']),
                                   resampling=Resampling.bilinear)
                i_out = i_out[:, ll_i[1]:ur_i[1], ur_i[0]:ll_i[0]]
                profile_i.update({'width': i_out.shape[2],
                                  'height': i_out.shape[1],
                                  'transform': transform_out})
                with rio.open(os.path.join(outDIR_imgs, 'cnn_' + re.sub('rgb', suffix, tile_basename_rgb)), 'w', **profile_i) as dst:
                    dst.write(i_out)

In [None]:
ID = 'CN_30'
tilePATH = df_tiles[df_tiles['ID'] == ID]['path_pre'].iloc[0]
tile_f = os.path.join(os.path.dirname(train_tiles_DIR), tilePATH + 'rgb.tif')
tile_basename_rgb = os.path.basename(tile_f)

In [None]:
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(12, 12))
with rio.open(os.path.join(outDIR_imgs, 'cnn_' + tile_basename_rgb)) as src:
    show(src.read(), ax=ax)
    with rio.open(os.path.join(outDIR_labs, 'cnn_' + re.sub('_rgb', '_labels', tile_basename_rgb))) as src_lab:
        show(src_lab.read(), ax=ax, alpha=0.2)

In [None]:
with rio.open(os.path.join(outDIR_imgs, 'cnn_' + re.sub('_rgb', '_ndvi', tile_basename_rgb))) as src:
    plt.figure(figsize=(12, 12))
    show(src.read())

In [None]:
with rio.open(os.path.join(outDIR_imgs, 'cnn_' + re.sub('_rgb', '_dsm', tile_basename_rgb))) as src:
    plt.figure(figsize=(12, 12))
    show(src.read())

In [None]:
with rio.open(os.path.join(outDIR_imgs, 'cnn_' + re.sub('_rgb', '_shade', tile_basename_rgb))) as src:
    plt.figure(figsize=(12, 12))
    show(src.read())

In [None]:
with rio.open(os.path.join(outDIR_imgs, 'cnn_' + re.sub('_rgb', '_tpi', tile_basename_rgb))) as src:
    plt.figure(figsize=(12, 12))
    show(src.read())