#Configuration Parameters

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import sys
import os
import argparse
import csv

import imageio
import numpy as np

from keras.utils import multi_gpu_model, to_categorical
from keras.layers import Activation,Input
from keras.models import Model
from keras.applications.vgg16 import VGG16
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, CSVLogger
from tensorflow import convert_to_tensor

sys.path.append("/content/drive/My Drive/tf-keras-SegNet")
from model import segnet

Using TensorFlow backend.


In [0]:
base_dir = "/content/drive/My Drive/"

In [0]:
def create_session_paths(session_name, overwrite=False, base_dir='/content/drive/My Drive/'):
    models_dir = os.path.join(base_dir, "models")
    session_dir = os.path.join(models_dir, session_name)
    # Prevent accidental overwriting of previous sessions
    try:
        os.mkdir(session_dir)
    except FileExistsError:
        if not overwrite:
            print("Set overwrite to True if you wish to continue")
            raise FileExistsError
        print("overwriting session")

    model = os.path.join(session_dir, "model.h5")
    history = os.path.join(session_dir, "history.json")
    training_log = os.path.join(session_dir, "logs.csv")
    training_config = os.path.join(session_dir, "config.json")
    classification_report = os.path.join(session_dir, "classification_report.txt")
    return {"model": model,
            "history": history,
            "logs": training_log,
            "config": training_config,
            "classification_report": classification_report}

In [5]:
session_name = "one_percent_transfer"
session_paths = create_session_paths(session_name, overwrite=True)

overwriting session


In [6]:
for i in session_paths:
    print(session_paths[i])

/content/drive/My Drive/models/one_percent_transfer/model.h5
/content/drive/My Drive/models/one_percent_transfer/history.json
/content/drive/My Drive/models/one_percent_transfer/logs.csv
/content/drive/My Drive/models/one_percent_transfer/config.json
/content/drive/My Drive/models/one_percent_transfer/classification_report.txt


In [0]:
def get_image_list(metadata_file_path):
    with open(metadata_file_path, 'r') as f:
        return [i for i in csv.reader(f) if i]

In [0]:
training_set_path = '/content/drive/My Drive/Metadata/one_percent_train.csv'
val_set_path = '/content/drive/My Drive/Metadata/one_percent_val.csv'
test_set_path = '/content/drive/My Drive/Metadata/one_percent_test.csv'
model_path = session_paths["model"]

n_splits = 1
bands = [2, 3, 4] # number of bands

num_classes = 2
image_shape = (512, 512, len(bands))
padding = ((0, 0), (0, 0))
batch_size = 5
epochs = 50
learning_rate = 0.02

training_set_list = get_image_list(training_set_path)
val_set_list = get_image_list(val_set_path)

training_set_size = len(training_set_list)
val_set_size = len(val_set_list)

loss_function = 'categorical_crossentropy'
metrics = ['accuracy']
callback_metric = "val_accuracy"

# Load segnet and vgg model


In [9]:
segnet_model = segnet(image_shape, num_classes)
vgg_model = VGG16(include_top=False, weights='imagenet', input_shape=image_shape, classes=num_classes)

Build enceder done..
Build decoder done..


In [10]:
seg_layer_names = [i for i in segnet_model.layers if "conv" in i.name]
vgg_layer_names = [i for i in vgg_model.layers if "conv" in i.name]

transferable_layer_names = {}
for i in range(len(vgg_layer_names)):
    transferable_layer_names[seg_layer_names[i].name] = vgg_layer_names[i]

layer_count = 0
for i in segnet_model.layers:
    try:
        i.set_weights(transferable_layer_names[i.name].get_weights())
        layer_count += 1
    except KeyError:
        pass

print(layer_count)

13


# Transfer weights of matching layers from image-net trained vgg16 to segnet

#Training model 


In [0]:
def data_gen(metadata_file_path, bands, batch_size):
    image_list = np.asarray(get_image_list(metadata_file_path))
    np.random.seed(1)
    np.random.shuffle(image_list)

    band_normalization_map = []
    counter = 0

    total_steps = image_list.shape[0] // batch_size
    while True:
        step_start = counter * batch_size
        step_end = step_start + batch_size
        images = []
        masks = []
        for j in range(step_start, step_end):
            images.append(np.load(image_list[j, 0])[:,:,bands])
            masks.append(np.load(image_list[j, 1]))

        y = to_categorical(np.array(masks))
        yield np.array(images), y.reshape((batch_size, y.shape[1] * y.shape[2], y.shape[3]))

        counter +=1

        if counter >= total_steps:
            counter = 0

In [12]:
train_data = data_gen(training_set_path, bands, batch_size)
val_data = data_gen(val_set_path, bands, batch_size)

segnet_model.compile(optimizer=Adam(learning_rate=learning_rate), loss=loss_function, metrics=metrics)

checkpoint = ModelCheckpoint(session_paths["model"],
                             monitor=callback_metric,
                             verbose=1,
                             save_best_only=True,
                             mode='max')

reduce_lr = ReduceLROnPlateau(monitor=callback_metric,
                              factor=0.5,
                              patience=3,
                              verbose=1,
                              mode='max',
                              min_lr=0.0001)

csv_logger = CSVLogger(session_paths["logs"])

early_stopper = EarlyStopping(monitor=callback_metric,
                              patience=9,
                              verbose=1,
                              mode='max')

callbacks_list = [checkpoint, reduce_lr, csv_logger, early_stopper]

try:
    model = multi_gpu_model(model)
except:
    print("single GPU in use")

hist = segnet_model.fit(train_data,
                        steps_per_epoch=training_set_size // batch_size,
                        epochs=epochs,
                        validation_data=val_data,
                        validation_steps=val_set_size // batch_size,
                        verbose=1,
                        callbacks=callbacks_list)

val_loss = hist.history["val_loss"]

single GPU in use
Epoch 1/50

Epoch 00001: val_accuracy improved from -inf to 0.91962, saving model to /content/drive/My Drive/models/one_percent_transfer/model.h5
Epoch 2/50

Epoch 00002: val_accuracy improved from 0.91962 to 0.91963, saving model to /content/drive/My Drive/models/one_percent_transfer/model.h5
Epoch 3/50

Epoch 00003: val_accuracy did not improve from 0.91963
Epoch 4/50

Epoch 00004: val_accuracy did not improve from 0.91963

Epoch 00004: ReduceLROnPlateau reducing learning rate to 0.009999999776482582.
Epoch 5/50

Epoch 00005: val_accuracy did not improve from 0.91963
Epoch 6/50

Epoch 00006: val_accuracy did not improve from 0.91963
Epoch 7/50

Epoch 00007: val_accuracy did not improve from 0.91963

Epoch 00007: ReduceLROnPlateau reducing learning rate to 0.004999999888241291.
Epoch 8/50

Epoch 00008: val_accuracy improved from 0.91963 to 0.91976, saving model to /content/drive/My Drive/models/one_percent_transfer/model.h5
Epoch 9/50

Epoch 00009: val_accuracy did n

#define a list of test image chunks 


In [0]:

test_names=[]

with open(test_set_path,"r") as f:
  test_image_list=[]
  for line in f.readlines():
    arr=[]
    str_array=line.split(" ")
    arr.append(str_array[0]+" "+str_array[1])
    arr.append(str_array[2]+" "+str_array[3][:-1])
    test_image_list.append(arr)
    test_names.append(line)

test_image_list=np.asarray(test_image_list)

IndexError: ignored

#read test chunks in batches using the list defined above 


In [0]:

import imageio
import numpy as np
from keras.utils import to_categorical

def read_test_image_batch(image_list, batch_size, channel_list):
    while True:
        l=len(image_list)
        num_batch=l//batch_size
        if num_batch*batch_size<l:
            num_batch+=1
        for i in range(num_batch):
            batch_set=image_list[batch_size*i:min(batch_size*(i+1),l),:]
            batch_set=[batch_set[bs] for bs in range(len(batch_set))]
            X=np.array([np.load(line[0][0:]) for line in batch_set])
            labels=np.array([np.load(line[1][0:]) for line in batch_set])
            y=to_categorical(labels,num_classes)
            X = X[:, :, :, channel_list]
            yield tuple((X, y))




#testing model


In [0]:
import argparse
from keras.models import Model
from keras.layers import Activation,Input
from scipy.stats import mode
import os


class_labels=[]


for i in os.listdir(model_path):
    if(i==".ipynb_checkpoints"):
        continue
    
    key=0
    #if(key!=0):
    #   continue
    
    print(model_path+i)
    my_model=create_model()
    my_model.compile(optimizer,loss=loss_function,metrics=metrics)
    my_model.load_weights(model_path+i)
    
    test_data=read_test_image_batch(test_image_list,batch_size,set_info[key])
    probs=my_model.predict(test_data,steps=(test_set_size+1)//batch_size)
    print("\nprobs")
    print(probs.shape)
    class_labels.append(probs.argmax(axis=-1))

hist = np.histogram(class_labels[0])
class_labels=np.asarray(class_labels)
u,indices=np.unique(class_labels,return_inverse=True)
final_labels=u[np.argmax(np.apply_along_axis(np.bincount,0,indices.reshape(class_labels.shape),None,np.max(indices)+1),axis=0)]

for i in range(test_set_size):
    line=test_names[i]
    print(line)
    str_array=line.split(" ")
    scene_id=str_array[1][18:40]
    out_str=str_array[1][40:-4]+"_output.npy"
    print(out_str)
    np.save('/content/drive/My Drive/new_test_outputs_key_0/'+scene_id+out_str,final_labels[i])
    

NotADirectoryError: ignored

In [0]:
"""
This script takes a list of scene ids and creates a file that can be used as input for a segnet model
@param string chunk_dir: The abspath base directory where each set of chunks for a scene has its own dir named with its sceneID
@param list scene_ids: A list of sceneIDs that exist in the chunk_dir. The chunks of these scenes will be used in the file.
@param string out_path: The abspath where the resulting file should be saved.
@return int lines_written: the total number of lines (corresponding to a data and label chunk path) in the file.
file format:
/path/to/scene_chunk.npy,/path/to/scene_chunk_label.npy
/path/to/scene_chunk.npy,/path/to/scene_chunk_label.npy
/path/to/scene_chunk.npy,/path/to/scene_chunk_label.npy
...
"""

import os

def make_segnet_input_file(chunk_dir, scene_ids, out_path):
    existing_scenes = [i for i in os.listdir(chunk_dir) if os.path.isdir(os.path.join(chunk_dir, i))]
    # filter out ids that don't exist in the given dir
    scene_ids = [i for i in scene_ids if i in existing_scenes]
    print(scene_ids)

    lines_to_write = []

    for i in scene_ids:
        scene_dir = os.path.join(chunk_dir, i)
        for j in os.listdir(scene_dir):
            if j[-9:] != "label.npy":
                data_path = os.path.join(scene_dir, j)
                file_split = os.path.splitext(j)
                label_path = os.path.join(scene_dir, file_split[0] + "_label" + file_split[1])

                lines_to_write.append("{},{}\n".format(data_path, label_path))

    with open(out_path, 'w+') as output:
        output.writelines(lines_to_write)

    return len(lines_to_write)

# Simple script to convert space-delimited chunk-path files to csv for easier file loading.
## Old file formats are still available in the metadata directory, but .csv equivalents should be used from now on. This script probably shouldn't been needed again.


In [0]:
import csv
metadata_path = "/content/drive/My Drive/Metadata"
image_files = [i for i in os.listdir(metadata_path) if ".txt" in i]
for i in image_files:
    file_name, extension = os.path.splitext(i)
    file_path = os.path.join(metadata_path, i)
    with open(file_path, 'r') as read_file:
        lines = [i[:-1].split(" ") for i in read_file.readlines() if i]
    lines = [[f"{i[0]} {i[1]}", f"{i[2]} {i[3]}"] for i in lines]

    with open(os.path.join(metadata_path, file_name + ".csv"), 'w') as write_file:
        writer = csv.writer(write_file)
        writer.writerows(lines)


In [0]:
metadata_file = [i for i in os.listdir(metadata_path) if ".csv" in i][0]
print(metadata_file)
test_img_path = os.path.join(metadata_path, metadata_file)

with open(test_img_path) as f:
    reader = csv.reader(f)
    data = next(reader)

print(data[0])
print(os.path.isfile(data[0]))

test_img = next(data_loader.image_segmentation_generator(data[0], data[1], 1, 2, 512, 512, 512, 512,))


train.csv
/content/drive/My Drive/uncompressed_stacked_chunks/LC80651102015019LGN00/chunk_13_12.npy
True


NotADirectoryError: ignored