In [1]:
import tensorflow as tf
from keras.models import Model, load_model, model_from_json
from keras.layers import Input, Cropping2D
from keras.layers.core import Lambda
from keras.layers.convolutional import Conv2D, Conv2DTranspose
from keras.layers.pooling import MaxPooling2D
from keras.layers.merge import concatenate
from keras.layers.normalization import BatchNormalization
from keras.callbacks import EarlyStopping, ModelCheckpoint, History
from keras import backend as K
from keras.optimizers import Nadam
from keras.utils import multi_gpu_model

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
list_gpus = K.tensorflow_backend._get_available_gpus()
print(list_gpus)

['/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1']


In [3]:
import numpy as np
#from sklearn.model_selection import StratifiedShuffleSplit,ShuffleSplit
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

In [4]:
import sys
sys.path.insert(0, '/nfs/src')
from features import geopatches

In [5]:
import datetime
import os

In [6]:
import pandas as pd
import h5py

In [7]:
# Setup data seed
np.random.seed(3254)

In [8]:
# https://github.com/ternaus/kaggle_dstl_submission/

smooth = 1e-12

def jaccard_coef(y_true, y_pred):
    intersection = K.sum(y_true * y_pred, axis=[0, -1, -2])
    sum_ = K.sum(y_true + y_pred, axis=[0, -1, -2])

    jac = (intersection + smooth) / (sum_ - intersection + smooth)

    return K.mean(jac)


def jaccard_coef_int(y_true, y_pred):
    y_pred_pos = K.round(K.clip(y_pred, 0, 1))

    intersection = K.sum(y_true * y_pred_pos, axis=[0, -1, -2])
    sum_ = K.sum(y_true + y_pred_pos, axis=[0, -1, -2])

    jac = (intersection + smooth) / (sum_ - intersection + smooth)

    return K.mean(jac)

def read_model(cross,results_dir):
    json_name = 'architecture_' + cross + '.json'
    weight_name = 'model_weights_' + cross + '.h5'
    model = model_from_json(open(os.path.join(results_dir,'cache', json_name)).read())
    model.load_weights(os.path.join(results_dir,'cache', weight_name))
    return model

In [9]:
def encode_binary_data(dict_replace):
    # Encode classes
    print(dict_replace)
    u,inv = np.unique(Y_raw,return_inverse = True)
    Y = np.array([dict_replace.get(x, 0.0) for x in u])[inv].reshape(Y_raw.shape)
    Y = Y[..., np.newaxis]
    X = X_raw
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20, random_state=3254)
    X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size=0.10, random_state=3254)
    
    for data,name in zip([Y_train,Y_val,Y_test],['Y_train','Y_val','Y_test']):
        num_img = 0.0
        num_pixels = 0.0
        for x in range(data.shape[0]):
            unique, counts = np.unique(data[x], return_counts=True)
            if len(unique) > 1:
                #print(x, dict(zip(unique, counts)))
                num_img += 1
                num_pixels += dict(zip(unique, counts))[1]
        print ('composition with label != 0')
        print (name, num_img/data.shape[0], num_pixels/(data.shape[0] * data.shape[1] * data.shape[2]))
    return X_train, X_val, X_test, Y_train, Y_val, Y_test

In [16]:
if __name__ == '__main__':
    
    results_dir = '/nfs/results/unet/padtest/80_80_24/results_prelim_unet_pl_veg'
#    results_dir = '/nfs/results/unet/padtest/80_80_24/results_prelim_unet_pl_fire'
#    results_dir = '/nfs/results/unet/padtest/80_80_24/results_prelim_unet_pl_landcover'
    
    dict_img = {'width': 80, 'height': 80, 'stride': 80, 'padding': 24}
    dict_label = {'width': 80, 'height': 80, 'stride': 80}
    X_raw, Y_raw = geopatches.create_data(['/nfs/conditioned/planetlabs/1154314_2014-07-23_RE2_3A_Analytic_clipped.tif'],dict_img,\
                                        ['/nfs/conditioned/landfire/US_140EVT_020818_UTM11_30_clipped.tif'],dict_label,\
#                                        ['/nfs/conditioned/landfire/US_140FBFM13_3030_clipped.tif'],dict_label,\
#                                        ['/nfs/conditioned/landfire/escondido_landcover_clipped.tif'],dict_label,\
#                                        extraindexbands=[{'type': 'evi', 'nir':4, 'red': 2, 'blue': 0},\
#                                                         {'type': 'ccci', 'nir':4, 'red': 2, 'rededge': 3},\
#                                                         {'type': 'savi', 'nir':4, 'red': 2, 'L': 0.5}],\
#                                          normalizebands=65535,
                                          repeatlabel=6)

    # Planetlabs needs to be divided by max uint16 = 65535 to be between 0 and 1.0
    X_raw = X_raw / 65535

    # remove classes that are too small
    list_remove = []
    for x in np.asarray(np.unique(Y_raw, return_counts=True)).T:
        if x[1] < 400*36:
            list_remove.append(x[0])
    print('unique labels found:')
    list_unique = np.unique(Y_raw)
    print(list_unique)
    print('labels that do not meet threshold')
    print(list_remove)
    list_final = [x for x in list_unique if x not in list_remove]
    print('final labels')
    print(list_final)
    print('\n') 

    # Variables
    batch_size = 128
    nb_epoch = 50
    steps_per_epoch = 100
    window_size = X_raw.shape[1]
    #nb_classes = 1
    nb_channels = X_raw.shape[3]
    cropping_len = 24
    
    list_pred = []
    list_labels = map(int,[x.split('.')[0].split('_')[-1] for x in os.listdir(results_dir+'/cache') if x.endswith(".json")])
        
    # Build models
    for label in list_labels:
        
        # Encode
        print('label:',label)
        X_train, X_val, X_test, Y_train, Y_val, Y_test = encode_binary_data({label: 1.0})
        print('X_train.shape','X_val.shape','X_test.shape','Y_train.shape','Y_val.shape','Y_test.shape',':')
        print(X_train.shape,X_val.shape,X_test.shape,Y_train.shape,Y_val.shape,Y_test.shape)
        
        # Calculate metrics off saved model
        suffix = label
        
        if len(list_gpus) <= 1:
            model = read_model("{batch}_{step}_{epoch}_{suffix}".format(batch=batch_size, step=steps_per_epoch, epoch=nb_epoch, suffix=suffix),results_dir)
            model_gpu = model
        else:
            with tf.device("/cpu:0"):
                model = read_model("{batch}_{step}_{epoch}_{suffix}".format(batch=batch_size, step=steps_per_epoch, epoch=nb_epoch, suffix=suffix),results_dir)
            model_gpu = multi_gpu_model(model, gpus=len(list_gpus))
            
        Y_predict = model_gpu.predict(X_val)
        list_pred.append(Y_predict)
        #print(accuracy_score(np.rint(Y_test.flatten()), np.rint(Y_predict.flatten())))
        #print(classification_report(np.rint(Y_test.flatten()), np.rint(Y_predict.flatten())))
        K.clear_session()
        
    # Create stacked matrix
    Y_predict_matrix = np.concatenate(list_pred, axis=3)

    # Find highest value
    Y_predict_matrix = np.argmax(Y_predict_matrix, axis=3)

    # Map back to original labels
    dict_replace = {}
    for idx, label in enumerate(map(int,[x.split('.')[0].split('_')[-1] for x in os.listdir(results_dir+'/cache') if x.endswith(".json")])):
        dict_replace[idx] = label
    print(dict_replace)
    u,inv = np.unique(Y_predict_matrix,return_inverse = True)
    Y_predict = np.array([dict_replace.get(x, len(dict_replace)) for x in u])[inv].reshape(Y_predict_matrix.shape)

    X_train, X_test, Y_train, Y_test = train_test_split(X_raw, Y_raw, test_size=0.20, random_state=3254)
    X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size=0.10, random_state=3254)
    
    mask = np.where(np.in1d(Y_val.flatten(), np.array(list_final)))[0]
    print(accuracy_score(Y_val.flatten()[mask], Y_predict.flatten()[mask]))
    print(classification_report(Y_val.flatten()[mask], Y_predict.flatten()[mask]))

unique labels found:
[3014 3092 3096 3097 3099 3110 3118 3129 3130 3152 3155 3181 3182 3183
 3184 3221 3292 3294 3296 3297 3298 3299 3900 3901 3902 3903 3904 3910
 3911 3912 3913 3914 3923 3924 3926 3927 3928 3929 3964 3984]
labels that do not meet threshold
[3014, 3096, 3155, 3181, 3182, 3183, 3221, 3901, 3911, 3923, 3924, 3926, 3927, 3928, 3964, 3984]
final labels
[3092, 3097, 3099, 3110, 3118, 3129, 3130, 3152, 3184, 3292, 3294, 3296, 3297, 3298, 3299, 3900, 3902, 3903, 3904, 3910, 3912, 3913, 3914, 3929]


label: 3092
{3092: 1.0}
composition with label != 0
Y_train 0.5410628019323671 0.1474718196457327
composition with label != 0
Y_val 0.6 0.19891964285714286
composition with label != 0
Y_test 0.5202312138728323 0.12354407514450867
X_train.shape X_val.shape X_test.shape Y_train.shape Y_val.shape Y_test.shape :
(621, 128, 128, 5) (70, 128, 128, 5) (173, 128, 128, 5) (621, 80, 80, 1) (70, 80, 80, 1) (173, 80, 80, 1)
label: 3097
{3097: 1.0}
composition with label != 0
Y_train 0.172302

label: 3904
{3904: 1.0}
composition with label != 0
Y_train 0.17552334943639292 0.003517512077294686
composition with label != 0
Y_val 0.21428571428571427 0.005008928571428571
composition with label != 0
Y_test 0.1791907514450867 0.003717485549132948
X_train.shape X_val.shape X_test.shape Y_train.shape Y_val.shape Y_test.shape :
(621, 128, 128, 5) (70, 128, 128, 5) (173, 128, 128, 5) (621, 80, 80, 1) (70, 80, 80, 1) (173, 80, 80, 1)
label: 3910
{3910: 1.0}
composition with label != 0
Y_train 0.31561996779388085 0.02089573268921095
composition with label != 0
Y_val 0.3 0.012044642857142858
composition with label != 0
Y_test 0.3063583815028902 0.018995664739884392
X_train.shape X_val.shape X_test.shape Y_train.shape Y_val.shape Y_test.shape :
(621, 128, 128, 5) (70, 128, 128, 5) (173, 128, 128, 5) (621, 80, 80, 1) (70, 80, 80, 1) (173, 80, 80, 1)
label: 3912
{3912: 1.0}
composition with label != 0
Y_train 0.5845410628019324 0.021291264090177134
composition with label != 0
Y_val 0.5857142

In [17]:
if __name__ == '__main__':
    
    results_dir = '/nfs/results/unet/padtest/80_80_24/results_prelim_unet_pl_c24pl'
    
    dict_img = {'width': 80, 'height': 80, 'stride': 80, 'padding': 24}
    dict_label = {'width': 80, 'height': 80, 'stride': 80}
    X_raw, Y_raw = geopatches.create_data(['/nfs/conditioned/planetlabs/1154314_2014-07-23_RE2_3A_Analytic_clipped.tif'],dict_img,\
                                        ['/nfs/conditioned/planetlabs/ClusterRaster24_PlanetLabs.tif'],dict_label)
#                                        extraindexbands=[{'type': 'evi', 'nir':4, 'red': 2, 'blue': 0},\
#                                                         {'type': 'ccci', 'nir':4, 'red': 2, 'rededge': 3},\
#                                                         {'type': 'savi', 'nir':4, 'red': 2, 'L': 0.5}],\
#                                          normalizebands=65535)

    # Planetlabs needs to be divided by max uint16 = 65535 to be between 0 and 1.0
    X_raw = X_raw / 65535

    # remove classes that are too small
    list_remove = []
#     for x in np.asarray(np.unique(Y_raw, return_counts=True)).T:
#         if x[1] < 400*36:
#             list_remove.append(x[0])
    print('unique labels found:')
    list_unique = np.unique(Y_raw)
    print(list_unique)
    print('labels that do not meet threshold')
    print(list_remove)
    list_final = [x for x in list_unique if x not in list_remove]
    print('final labels')
    print(list_final)
    print('\n') 

    # Variables
    batch_size = 128
    nb_epoch = 50
    steps_per_epoch = 100
    window_size = X_raw.shape[1]
    #nb_classes = 1
    nb_channels = X_raw.shape[3]
    cropping_len = 24
    
    list_pred = []
    list_labels = map(int,[x.split('.')[0].split('_')[-1] for x in os.listdir(results_dir+'/cache') if x.endswith(".json")])
        
    # Build models
    for label in list_labels:
        
        # Encode
        print('label:',label)
        X_train, X_val, X_test, Y_train, Y_val, Y_test = encode_binary_data({label: 1.0})
        print('X_train.shape','X_val.shape','X_test.shape','Y_train.shape','Y_val.shape','Y_test.shape',':')
        print(X_train.shape,X_val.shape,X_test.shape,Y_train.shape,Y_val.shape,Y_test.shape)
        
        # Calculate metrics off saved model
        suffix = label
        
        if len(list_gpus) <= 1:
            model = read_model("{batch}_{step}_{epoch}_{suffix}".format(batch=batch_size, step=steps_per_epoch, epoch=nb_epoch, suffix=suffix),results_dir)
            model_gpu = model
        else:
            with tf.device("/cpu:0"):
                model = read_model("{batch}_{step}_{epoch}_{suffix}".format(batch=batch_size, step=steps_per_epoch, epoch=nb_epoch, suffix=suffix),results_dir)
            model_gpu = multi_gpu_model(model, gpus=len(list_gpus))
            
        Y_predict = model_gpu.predict(X_val)
        list_pred.append(Y_predict)
        #print(accuracy_score(np.rint(Y_test.flatten()), np.rint(Y_predict.flatten())))
        #print(classification_report(np.rint(Y_test.flatten()), np.rint(Y_predict.flatten())))
        K.clear_session()
        
    # Create stacked matrix
    Y_predict_matrix = np.concatenate(list_pred, axis=3)

    # Find highest value
    Y_predict_matrix = np.argmax(Y_predict_matrix, axis=3)

    # Map back to original labels
    dict_replace = {}
    for idx, label in enumerate(map(int,[x.split('.')[0].split('_')[-1] for x in os.listdir(results_dir+'/cache') if x.endswith(".json")])):
        dict_replace[idx] = label
    print(dict_replace)
    u,inv = np.unique(Y_predict_matrix,return_inverse = True)
    Y_predict = np.array([dict_replace.get(x, len(dict_replace)) for x in u])[inv].reshape(Y_predict_matrix.shape)

    X_train, X_test, Y_train, Y_test = train_test_split(X_raw, Y_raw, test_size=0.20, random_state=3254)
    X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size=0.10, random_state=3254)
    
    mask = np.where(np.in1d(Y_val.flatten(), np.array(list_final)))[0]
    print(accuracy_score(Y_val.flatten()[mask], Y_predict.flatten()[mask]))
    print(classification_report(Y_val.flatten()[mask], Y_predict.flatten()[mask]))

unique labels found:
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]
labels that do not meet threshold
[]
final labels
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]




FileNotFoundError: [Errno 2] No such file or directory: '/nfs/results/unet/padtest/80_80_24/results_prelim_unet_pl_c24pl/cache'