# Run a suite of CNN models and apply them to test data sets and whole tiles

## Setup

In [1]:
%load_ext autoreload
%autoreload 2

import os
import glob
import run_cnn

train = run_cnn.TrainingData()

Using TensorFlow backend.


In [2]:
"""
Paths to the feature (LiDAR), response (manually labelled buildings) and boundary files, and to
the directory where the output will be written
"""

base_path = '/data/gdcsdata/HawaiiMapping/ProjectFiles/Rachel/'
boundary_path = base_path+'labeled_region_boundaries/'
feature_path = base_path+'labeled_region_features/'
response_path = base_path+'labeled_region_buildings2/'

paths = {'boundary': boundary_path, 'responses': response_path, 'features': feature_path}

bfgn_output_path = base_path+'bfgn_output_buildings2/'


In [3]:
"""
Define the training and test sets
"""

training_sets = {'HBLower': 'HBLower',\
                           'HOVE1': 'tile031_3125_11250',\
                           'CC1': 'tile024_10000_3125',\
                           'MIL1': 'tile030_10000_5625',\
                           'Hamakua': 'tile016_0_4375',\
                           'KParadise': 'KParadise',\
                           'CCTrees': 'tile024_10000_4375',\
                           'WAI1': 'Waikoloa1',\
                           'KK1': 'Kukio1',\
                           'Waimea': 'Waimea'}

In [4]:
test_sets = {'HBTest': 'HBTest',\
                       'HOVE2': 'tile031_2500_11250',\
                       'MIL2': 'tile030_9375_5625',\
                       'CC2': 'tile024_10000_2500',\
                       'SKona_A': 'SKona_TestA',\
                       'SKona_B': 'SKona_TestB',\
                       'Hamakua_A': 'Hamakua_testA',\
                       'Puako': 'Puako',\
                       'KonaMauka': 'KonaMauka'}

## Run the model suite

The test datasets and model parameters vary like this:

Data types:
- 1m DSM
- 1m eigenvalues, all 4 bands
- 1m hillshade

Training data:
- all available labelled training regions

Window sizes:
- 16, 32, 64 pixels

In [6]:

data_types = ['hires_surface', 'eigen4', 'hillshade']
training_set = []
for _ in range(len(data_types)):
    training_set.append(['HBLower', 'HOVE1', 'MIL1', 'CC1', 'Hamakua', 'KParadise', 'CCTrees', 'WAI1', 'KK1',\
                        'Waimea'])

training_data = train.create_training_lists(paths, training_sets, training_set)

permutations = dict.fromkeys(['boundary_files', 'feature_files', 'response_files', 'window_radius',\
                              'loss_window_radius']) 

#This gives 1 set of training regions, 3 window sizes (w/ appropriate loss_window_radius)
parameter_combos = [(training_data[0][0], training_data[0][1], training_data[0][2], 16, 8),\
                    (training_data[0][0], training_data[0][1], training_data[0][2], 32, 16),\
                    (training_data[0][0], training_data[0][1], training_data[0][2], 64, 32)]

#This adds the different data types - DSM, eigenvals, hillshade
for kind in data_types[1:]:
    for data in parameter_combos[:3]:
        new = [[x[0].replace('hires_surface', kind)] for x in data[1]]
    parameter_combos.extend([(training_data[0][0], new, training_data[0][2], 16, 8),\
                    (training_data[0][0], new, training_data[0][2], 32, 16),\
                    (training_data[0][0], new, training_data[0][2], 64, 32)])

print(f"There are {len(parameter_combos)} parameter combinations")
            
iteration_data = {} 
iteration_data['permutations'] = permutations
iteration_data['parameter_combos'] = parameter_combos
iteration_data['nicknames'] = ['hires_surface_16', 'eigen4_16', 'hillshade_16', 'hires_surface_32', 'eigen4_32',\
                               'hillshade_32', 'hires_surface_64', 'eigen4_64', 'hillshade_64']
iteration_data['out_path'] = bfgn_output_path+'model_runs/'
iteration_data['data_types'] = data_types

model_runs = run_cnn.Loops(iteration_data, settings_file='settings_buildings.yaml')

There are 9 parameter combinations


In [7]:
%%time
model_runs.loop_over_configs(use_existing=True, rebuild_data=True, fit_model=True)


Working on parameter combination #0:

***Model /data/gdcsdata/HawaiiMapping/ProjectFiles/Rachel/bfgn_output_buildings2/model_runs/combo_0/model.h5 exists; nothing to do here

Working on parameter combination #1:

***Model /data/gdcsdata/HawaiiMapping/ProjectFiles/Rachel/bfgn_output_buildings2/model_runs/combo_1/model.h5 exists; nothing to do here

Working on parameter combination #2:

***Model /data/gdcsdata/HawaiiMapping/ProjectFiles/Rachel/bfgn_output_buildings2/model_runs/combo_2/model.h5 exists; nothing to do here

Working on parameter combination #3:

***Model /data/gdcsdata/HawaiiMapping/ProjectFiles/Rachel/bfgn_output_buildings2/model_runs/combo_3/model.h5 exists; nothing to do here

Working on parameter combination #4:

***Model /data/gdcsdata/HawaiiMapping/ProjectFiles/Rachel/bfgn_output_buildings2/model_runs/combo_4/model.h5 exists; nothing to do here

Working on parameter combination #5:

***Model /data/gdcsdata/HawaiiMapping/ProjectFiles/Rachel/bfgn_output_buildings2/model

## Apply the models to the test regions and the whole (trimmed) tiles

In [8]:
"""
The test regions
"""

#set to False if applied model files don't exist or we need to update them
use_existing = True

#find the paths to the saved model files, one for each parameter combo
model_paths = sorted(glob.glob(model_runs.out_path+'combo_*'))

#for each of the parameter combinations
for model_dir, combo in zip(model_paths, parameter_combos):
    config_file = f'{model_dir}/config.yaml'

    #for each of the test regions
    for idx, test_data in enumerate(test_sets.keys()):
        
        #name the appropriate kind of data for this parameter combo
        #(e.g. parameter combo 0 was trained on 1m DSM))
        for kind in data_types:
            if kind in combo[1][idx][0].split('/')[-1]:
                replace_with = kind
                
        #name the input data file, the applied model file, the file to be used as a 
        #template for interpolating the applied model to 2m, and the interpolated model
        application_data = f'{feature_path}{test_sets[test_data]}_{replace_with}.tif'
        outfile = f'{model_dir}/applied_model_{test_data}'
        
        #if there's isn't already a suitable applied model file, create it
        if use_existing:
            if not os.path.isfile(outfile+'.tif'):
                model_runs.apply_model(config_file, application_data, outfile)
        else:
            model_runs.apply_model(config_file, application_data, outfile)

In [9]:
"""
The training regions - applied models won't be used for calculating any CNN model performance stats, but will be used
for 
"""

use_existing = True

model_paths = sorted(glob.glob(model_runs.out_path+'combo_*'))

for model_dir, combo in zip(model_paths, parameter_combos):
    config_file = f'{model_dir}/config.yaml'
    for idx, data in enumerate(training_sets.keys()):
        for kind in data_types:
            if kind in combo[1][idx][0].split('/')[-1]:
                replace_with = kind

        application_data = f'{feature_path}{training_sets[data]}_{replace_with}.tif'
        outfile = f'{model_dir}/applied_model_{data}'

        if use_existing:
            if not os.path.isfile(outfile+'.tif'):
                model_runs.apply_model(config_file, application_data, outfile)
        else:
            model_runs.apply_model(config_file, application_data, outfile)

In [None]:
%%time
"""
Tiles
"""

tiles = ['tile024', 'tile025', 'tile030', 'tile031', 'tile008', 'tile009', 'tile014', 'tile015',\
         'tile016', 'tile021', 'tile022', 'tile007', 'tile012',  'tile013', 'tile018', 'tile019', 'tile020']

#set to False if applied model files don't exist or we need to update them
use_existing = True

#find the paths to the saved model files, one for each parameter combo
model_paths = sorted(glob.glob(model_runs.out_path+'combo_*'))

for tile in tiles:
    n = 0
    #for each parameter combination
    for model_dir, combo in zip(model_paths, parameter_combos):
        
        config_file = f'{model_dir}/config.yaml'

        #set up the name of the file the model will be applied to
        if 'hires_surface' in combo[1][0][0]:
            path = '/data/gdcsdata/HawaiiMapping/Full_Backfilled_Tiles/'
            suffix = 'backfilled_surface_1mres.tif'
            application_data = f'{path}{tile}/{tile}_{suffix}'
        elif 'eigen4' in combo[1][0][0]:
            path = '/data/gdcsdata/HawaiiMapping/Full_Backfilled_Tiles/'
            suffix = 'backfilled_surface_1mres_eigen4band_5mwind.tif'
            application_data = f'{path}{tile}/{tile}_{suffix}'
        elif 'hillshade' in combo[1][0][0]:
            suffix = 'hillshade.tif'
            application_data = f'{feature_path}{tile}_{suffix}'

        outfile = f'{model_dir}/applied_model_{tile}'

        if use_existing:
            if not os.path.isfile(outfile+'.tif'):
                model_runs.apply_model(config_file, application_data, outfile)
        else:
            model_runs.apply_model(config_file, application_data, outfile)
        
        n += 1