In [1]:
import os
import pandas as pd
import numpy as np
import random
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers import Dense, Dropout, Flatten,Conv2D, MaxPooling2D, AveragePooling2D,Activation, BatchNormalization, GlobalAveragePooling2D
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

import keras_tuner as kt

import rasterio
from rasterio.plot import reshape_as_image

from sklearn.model_selection import train_test_split
from skimage.transform import resize

#seed set to allow for reproducibility within the results
seed = 2505
random.seed(seed)
np.random.seed(seed)
tf.random.set_seed(seed)


width = 83 #32#83  --16
height= 83 #32#83  --16
layers = 7 #7 sat #6 drone
class_count = 5 #5
input_shape = (width, height, layers)
batch_size = 25 #25
epochs = 100
model_name = "GullyDetector_tuned"

In [2]:
#simple CSV loader
def load_samples(csv_file):
    data = pd.read_csv(os.path.join(csv_file))
    data = data[['FileName','Label','ClassName']]
    file_names = list(data.iloc[:,0])
    #Get labels withing second column
    labels = list(data.iloc[:,1])
    samples=[]
    for samp,lab in zip(file_names,labels):
        samples.append([samp,lab])
    return samples

#ensure size of tile is uniform
def preprocessing(tile,label,class_count,layers,width,height):
    #print(tile.shape,"before")
    #to avoid artifacts, no antialiasing whn rescaling
    tile = resize(tile, (layers,width,height),anti_aliasing=False)
    #print(tile.shape,"resize")
    #Returns the source array reshaped into the order expected by image processing and visualization software (matplotlib, scikit-image, etc) by swapping the axes order from (bands, rows, columns) to (rows, columns, bands)
    tile = reshape_as_image(tile)
    #print(tile.shape,"reshape")
    
    #normalising tile
    tile = tile/255
    
    label = to_categorical(label,class_count)
    return tile,label

#Keras compatible data generator - works with rasterio compatible files (tiff only as written)
def generator(samples,batch_size,width,height,layers,class_count):
    """
    Yields next training batch, checks shape of tile, ensure image format - includes DEM
    """
    num_samples = len(samples)
    while True: # Loop forever so the generator never terminates
        #shuffle(samples)
        # Get index to start each batch: [0, batch_size, 2*batch_size, ..., max multiple of batch_size <= num_samples]
        for offset in range(0, num_samples, batch_size):
            # Get the samples you'll use in this batch
            batch_samples = samples[offset:offset+batch_size]

            # Initialise X_train and y_train arrays for this batch
            X_train = []
            y_train = []

            # For each example
            for batch_sample in batch_samples:
                # Load image (X) and label (y)
                img_name = batch_sample[0]
                label = batch_sample[1]
                #load in file
                #print(os.path.join(data_path,img_name))
                with rasterio.open(os.path.join(img_name)) as ds:
                    tile=ds.read()
                #perform any preprocessing required
                tile,label = preprocessing(tile,label,class_count,layers,width,height)     

                # Add example to arrays
                X_train.append(tile)
                y_train.append(label)

            # Make sure they're numpy arrays (as opposed to lists)
            X_train = np.array(X_train)
            #X_train = np.asarray(X_train).astype('float32')
            y_train = np.array(y_train)

            # The generator-y part: yield the next training batch            
            yield X_train, y_train

In [3]:
def Gullydetector_CNN(input_shape,class_count,units,activation,dropout,lr):
    #drop1 = dropout_rate, drop2 = dropout_rate_in
    model = Sequential()

    model.add(Conv2D(64, (3, 3), padding='same', input_shape=input_shape))
    model.add(BatchNormalization())
    model.add(Activation(activation = activation))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(dropout)) #0.25

    model.add(Flatten())
    model.add(Dense(units = units))
    model.add(BatchNormalization())
    model.add(Activation(activation = activation))
    model.add(Dropout(dropout)) #0.25

    model.add(Dense(class_count))
    model.add(Activation(activation='softmax'))

    #model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=metrics)
    #opt = tensorflow.keras.optimizers.Adam(learning_rate=0.0001)
    model.compile(loss='categorical_crossentropy',
                  optimizer=keras.optimizers.Adam(learning_rate=lr),
                  metrics=['accuracy'])
    #print(model.summary())
    #plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)
    
    return model

In [4]:
def build_model(hp):
    units = hp.Int("units", min_value=64, max_value=512, step=32)
    activation = hp.Choice("activation", ["relu", "tanh","LeakyReLU"]) #hp.Choice("activation", ["relu", "tanh"])
    dropout = hp.Choice("dropout",values=[0.5, 0.6, 0.7,0.8])
    lr = hp.Choice("lr", values=[0.1, 0.01, 0.001, 0.0001,0.00001])
    # call existing model-building code with the hyperparameter values.
    model = Gullydetector_CNN(
        input_shape =input_shape,units=units, activation=activation, dropout=dropout, lr=lr,class_count = class_count
    )
    return model

In [5]:
tuner = kt.Hyperband(
    hypermodel=build_model,
    objective= "val_accuracy",
    overwrite=True,
    directory=r"./Tune",
    project_name=model_name,
    max_epochs = 50,
    factor = 3
)

#tuner = kt.RandomSearch(
#    hypermodel=build_model,
#    objective="val_accuracy",
#    max_trials=5,
#    executions_per_trial=3,
#    overwrite=True,
#    directory=r"./Tune",
#    project_name=model_name,
#)


tuner.search_space_summary()


Search space summary
Default search space size: 4
units (Int)
{'default': None, 'conditions': [], 'min_value': 64, 'max_value': 512, 'step': 32, 'sampling': None}
activation (Choice)
{'default': 'relu', 'conditions': [], 'values': ['relu', 'tanh', 'LeakyReLU'], 'ordered': False}
dropout (Choice)
{'default': 0.5, 'conditions': [], 'values': [0.5, 0.6, 0.7, 0.8], 'ordered': True}
lr (Choice)
{'default': 0.1, 'conditions': [], 'values': [0.1, 0.01, 0.001, 0.0001, 1e-05], 'ordered': True}


In [None]:

build_model(kt.HyperParameters())

data_path = './Dataset'
data_dir_list = os.listdir(data_path)
print ('the data list is: ',data_dir_list)
fulldataset_df = load_samples('Full_drone_dataset.csv')

train_df,test_df = train_test_split(fulldataset_df, test_size=0.2) #0.2
test_df,valid_df = train_test_split(test_df, test_size=0.5)

#counts necessary
valid_count = len(valid_df)
train_count = len(train_df)
test_count = len(test_df)
print(valid_count)
print(train_count)
print(test_count)

#Create the generators 
train_datagen = generator(samples=train_df,batch_size=batch_size,width=width,height=height,layers=layers,class_count=class_count)
test_datagen = generator(samples=test_df,batch_size=batch_size,width=width,height=height,layers=layers,class_count=class_count)
valid_datagen = generator(samples=valid_df,batch_size=batch_size,width=width,height=height,layers=layers,class_count=class_count)

callbacks = [
    EarlyStopping(patience=5,monitor='val_loss', verbose=1),
    #ReduceLROnPlateau(factor=0.1, patience=5, min_lr=0.00001, verbose=1),
    #ModelCheckpoint('model-inprogress-hyperparameter.h5', verbose=1, save_best_only=True, save_weights_only=True),
]

#history = model.fit(train_datagen,steps_per_epoch=train_count // batch_size, verbose=1, epochs=epochs,validation_data=valid_datagen,validation_steps=valid_count // batch_size,callbacks=callbacks)

tuner.search(train_datagen,steps_per_epoch=train_count // batch_size,validation_steps=valid_count // batch_size, epochs=epochs, validation_data=valid_datagen,callbacks=callbacks,verbose=1)


Trial 55 Complete [00h 39m 43s]
val_accuracy: 0.7738271355628967

Best val_accuracy So Far: 0.9525926113128662
Total elapsed time: 1d 02h 54m 28s

Search: Running Trial #56

Value             |Best Value So Far |Hyperparameter
384               |384               |units
tanh              |relu              |activation
0.8               |0.5               |dropout
0.1               |1e-05             |lr
6                 |50                |tuner/epochs
0                 |17                |tuner/initial_epoch
2                 |3                 |tuner/bracket
0                 |3                 |tuner/round

Epoch 1/6
Epoch 2/6
Epoch 3/6

In [None]:
tuner.results_summary()

from contextlib import redirect_stdout

with open('tuning_modelsummary_Gullydetector5.txt', 'w') as f:
    with redirect_stdout(f):
        tuner.results_summary()
5

from tensorflow import keras
best_model = keras.models.load_model('StudyModel32_tuned1')

best_model.build(input_shape=(None, 32, 32))
best_model.summary()

from contextlib import redirect_stdout

with open('modelsummary.txt', 'w') as f:
    with redirect_stdout(f):
        best_model.summary()
    

In [1]:
# Get the top 2 models.
models = tuner.get_best_models(num_models=2)
best_model = models[0]
#save model for later use
best_model.save(model_name + '1')

best_model = models[1]
#save model for later use
best_model.save(model_name + '2')
# Build the model.
# Needed for `Sequential` without specified `input_shape`.
best_model.build(input_shape=(None, 32, 32))
best_model.summary()


NameError: name 'tuner' is not defined

dir_name = 'StudyModel32_tuned2'
output_filename = dir_name
import shutil
shutil.make_archive(output_filename, 'zip', dir_name)