In [0]:
import pandas as pd
import numpy as np
import glob as glob
import re
import random
import os
import pickle
import itertools as it
from sklearn.linear_model import LogisticRegression
from PIL import Image as ig

from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import RandomForestClassifier
from sklearn.externals import joblib

from skimage.io import imread
from skimage.color import rgb2gray
from skimage.transform import resize

#from utils import *
from keras.models import Model, load_model
from keras.layers import Input
from keras.layers.core import Dropout, Lambda
from keras.layers.convolutional import Conv2D, Conv2DTranspose
from keras.layers.pooling import MaxPooling2D
from keras.layers.merge import concatenate
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras import backend as K
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense, MaxPooling2D, Conv2D, Activation, UpSampling2D
from keras.losses import categorical_crossentropy
from keras import optimizers

Using TensorFlow backend.


In [0]:
! git clone https://github.com/santitellez/Term_Project_st41_eps2

Cloning into 'Term_Project_st41_eps2'...
remote: Enumerating objects: 54, done.[K
remote: Counting objects: 100% (54/54), done.[K
remote: Compressing objects: 100% (40/40), done.[K
remote: Total 22369 (delta 26), reused 41 (delta 14), pack-reused 22315[K
Receiving objects: 100% (22369/22369), 629.77 MiB | 33.15 MiB/s, done.
Resolving deltas: 100% (1176/1176), done.
Checking out files: 100% (23991/23991), done.


In [0]:
! ls

sample_data  Term_Project_st41_eps2


In [0]:
### Functions

BatchSize = 10

def create_submission(csv_name, predictions, image_ids):
    """
    csv_name -> string for csv ('XXXXXXX.csv')
    predictions -> numpyarray of size (num_examples, height, width)
                In this case (num_examples, 512, 512)
    image_ids -> numpyarray or list of size (num_examples,)
    
    predictions[i] should be the prediciton of road for image_id[i]
    """
    sub = pd.DataFrame()
    sub['ImageId'] = image_ids
    encodings = []
    num_images = len(image_ids)
    for i in range(num_images):
        if (i+1) % (num_images//10) == 0:
            print(i, num_images)
        encodings.append(rle_encoding(predictions[i]))
        
    sub['EncodedPixels'] = encodings
    sub['Height'] = [512]*num_images
    sub['Width'] = [512]*num_images
    sub.to_csv(csv_name, index=False)

# Run-length encoding stolen from https://www.kaggle.com/rakhlin/fast-run-length-encoding-python
def rle_encoding(x):
    """
    x = numpyarray of size (height, width) representing the mask of an image
    if x[i,j] == 0:
        image[i,j] is not a road pixel
    if x[i,j] != 0:
        image[i,j] is a road pixel
    """
    dots = np.where(x.T.flatten() != 0)[0]
    run_lengths = []
    prev = -2
    for b in dots:
        if (b>prev+1): 
            run_lengths.extend((b+1, 0))
        run_lengths[-1] += 1
        prev = b
    return run_lengths

def get_img_id(img_path):
    
    img_basename = os.path.basename(img_path)
    img_id = os.path.splitext(img_basename)[0][:-len('_sat')]
    return img_id

def image_gen(img_paths, img_size=(512, 512)):

    for img_path in img_paths:
        
        img_id = get_img_id(img_path)
        mask_path = os.path.join('Term_Project_st41_eps2/comp-540-spring-2019/train', img_id + '_msk.png')
        
        img = imread(img_path) / 255.
        mask = rgb2gray(imread(mask_path))
        
        img = resize(img, img_size, preserve_range=True)
        mask = resize(mask, img_size, mode='constant', preserve_range=True)
        mask = (mask >= 0.5).astype(float)
        
        yield img, mask

def dice_coef(y_true, y_pred):
    
    y_true_f = K.flatten(y_true)
    y_pred = K.cast(y_pred, 'float32')
    y_pred_f = K.cast(K.greater(K.flatten(y_pred), 0.5), 'float32')
    intersection = y_true_f * y_pred_f
    score = 2. * (K.sum(intersection) + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
    
    return score

def image_batch_generator(img_paths, batchsize = BatchSize):
    
    while True:
        
        ig = image_gen(img_paths)
        batch_img, batch_mask = [], []
        
        for img, mask in ig:

            batch_img.append(img)
            batch_mask.append(mask)

            if len(batch_img) == batchsize:
                
                yield np.stack(batch_img, axis=0), np.expand_dims(np.stack(batch_mask, axis=0),axis = -1)
                batch_img, batch_mask = [], []
        
        if len(batch_img) != 0:
            yield np.stack(batch_img, axis=0), np.expand_dims(np.stack(batch_mask, axis=0),axis = -1)
            batch_img, batch_mask = [], []
            
def image_batch_generator_model1(img_paths, batchsize = BatchSize):
    
    while True:
        
        ig = image_gen(img_paths)
        batch_img, batch_mask = [], []
        
        for img, mask in ig:

            batch_img.append(img)
            batch_mask.append((np.sum(mask) > 0) * 1)

            if len(batch_img) == batchsize:
                
                yield np.stack(batch_img, axis=0), np.stack(batch_mask, axis=0)
                batch_img, batch_mask = [], []
        
        if len(batch_img) != 0:
            yield np.stack(batch_img, axis=0), np.expand_dims(np.stack(batch_mask, axis=0),axis = -1)
            batch_img, batch_mask = [], []

def calc_steps(data_len, batchsize):
    
    return (data_len + batchsize - 1) // batchsize

In [0]:
### Model Pipeline Parameters

ImgPaths = glob.glob("Term_Project_st41_eps2/comp-540-spring-2019/train/*.jpg")
ImgNums = [re.findall(r'/(\d+)', path)[0] for path in ImgPaths]

M = len(glob.glob("Term_Project_st41_eps2/comp-540-spring-2019/train/*.jpg")) # Number of training images
H = 512 # Image height
W = 512 # Image width
C = 3 # Channels R, G, B

Pt = .8 # Train proportion
Pv = .2 # Validation proportion
P1 = .25 # Proportion of data used in Model 1
P2 = .5 # Proportion of data used in Model 2
P3 = 1 - P1 - P2 # Proportion of data used in Model 3

# Subsetting images for training and validation

random.seed(1)
RandomOrder = np.random.choice(ImgNums, M, replace = False)

ImgNums1 = RandomOrder[:int(M * P1)] # Model 1 images
ImgNums2 = RandomOrder[int(M * P1):int(M * (P1 + P2))] # Model 2 images
ImgNums3 = RandomOrder[int(M * (P1 + P2)):] # Model 3 images

BatchSize = 10

smooth = 1e-9

In [0]:
### Feature engineering Model 1

def Model1FeatureEngineering(imgnums, train_or_val = "train"):
    Data = []

    for num in imgnums:
        img = ig.open("comp-540-spring-2019/" + train_or_val + "/" + num + "_sat.jpg")
        if train_or_val == "train":
            msk = ig.open("comp-540-spring-2019/" + train_or_val + "/" + num + "_msk.png")
        img_mat = np.array(img.getdata())
        img_avg = np.mean(img_mat, axis = 0)
        img_med = np.median(img_mat, axis = 0)
        img_var = np.var(img_mat, axis = 0)
        img_min = np.amin(img_mat, axis = 0)
        img_max = np.amax(img_mat, axis = 0)
        img_ran = np.ptp(img_mat, axis = 0)
        img_25q = np.percentile(img_mat, axis = 0, q = 25)
        img_75q = np.percentile(img_mat, axis = 0, q = 75)
        if train_or_val == "train":
            y = [(np.sum(np.array(msk.getdata())) > 0) * 1]

            img_data = np.concatenate((img_avg, img_med, img_var, img_min, img_max, img_ran, img_25q, img_75q, y))
        if train_or_val == "val":
            img_data = np.concatenate((img_avg, img_med, img_var, img_min, img_max, img_ran, img_25q, img_75q))
        Data.append(img_data)

    Data = np.array(Data)
    return(Data)

In [0]:
### Model for predicting whether image has any roads

Data = Model1FeatureEngineering(ImgNums1, "train")
X_train, X_val, y_train, y_val = train_test_split(Data[:, :(Data.shape[1] - 1)], Data[:, (Data.shape[1] - 1)], train_size = Pt)

ntrees = [50]
mtrys = [2]
max_depth = [35]

results = {}
best_val_acc = 0

for nt in ntrees:
    for mt in mtrys:
        for md in max_depth:
            any_roads_model = RandomForestClassifier(n_estimators = nt, max_depth = md, min_samples_split = mt)
            any_roads_model_fit = any_roads_model.fit(X_train, y_train)
            
            y_pred = np.round(any_roads_model.predict(X_val))
            y_pred = np.round(y_pred)
            val_acc = np.sum(y_pred == y_val) / y_val.shape[0]
            
            results[(nt, mt, md)] = val_acc
            
            if val_acc > best_val_acc:
                best_model = any_roads_model
                best_val_acc = val_acc

y_pred = np.round(best_model.predict(X_val))

conf_mat = confusion_matrix(y_val, y_pred)
print(conf_mat)
print(classification_report(y_val, y_pred))



[[ 29  58]
 [ 11 447]]
              precision    recall  f1-score   support

         0.0       0.72      0.33      0.46        87
         1.0       0.89      0.98      0.93       458

   micro avg       0.87      0.87      0.87       545
   macro avg       0.81      0.65      0.69       545
weighted avg       0.86      0.87      0.85       545



In [0]:
### Images for convolutional net

def images_conv(img_nums):
    
    #X_train = []
    y_train = []
    
    for i in list(img_nums):
        
        #img = ig.open("comp-540-spring-2019/train/" + i + "_sat.jpg")
        msk = ig.open("Term_Project_st41_eps2/comp-540-spring-2019/train/" + i + "_msk.png")
        #img_mat = np.array(img.getdata()).reshape((512, 512, 3))
        y = [pix[0] for pix in list(msk.getdata())]
        
        #X_train.append(img_mat)
        y_train.append((sum(y) > 0) * 1)
        
    #return np.array(X_train), y_train
    return y_train


In [0]:
### Convolutional net for predicting whether image has any roads

model = Sequential()

model.add(Conv2D(8, (4, 4), input_shape = (H, W, C)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(16, (4, 4)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(4, 4)))

model.add(Conv2D(16, (2, 2)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(16, (2, 2)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Dense(200))
model.add(Dropout(.75))
model.add(Activation('relu'))
model.add(Flatten())
model.add(Dense(1))
model.add(Activation('sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [0]:
### All images at once

X_train, y_train = images_conv(ImgNums1)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size = Pv)

In [0]:
### Train first model

BatchSize = 20
epochs = 5
earlystopper = EarlyStopping(patience = 2, verbose = 1)
checkpointer = ModelCheckpoint('Model1.h5', verbose = 1, save_best_only = True)

ImgNums1_paths = list(it.compress(ImgPaths, list(np.isin(np.array(ImgNums), ImgNums1))))
ImgNums1_paths_train, ImgNums1_paths_val = train_test_split(ImgNums1_paths, test_size = Pv)
ImgNums1_gen_train = image_batch_generator_model1(ImgNums1_paths_train, batchsize = BatchSize)
ImgNums1_gen_val = image_batch_generator_model1(ImgNums1_paths_val, batchsize = BatchSize)

model.fit_generator(ImgNums1_gen_train,
                    epochs = epochs,
                    verbose = 1,
                    validation_data = ImgNums1_gen_val,
                    steps_per_epoch = calc_steps(len(ImgNums1_paths_train), BatchSize),
                    validation_steps = calc_steps(len(ImgNums1_paths_val), BatchSize),
                    callbacks = [earlystopper, checkpointer])

Epoch 1/5
 23/109 [=====>........................] - ETA: 8:12 - loss: 0.6264 - acc: 0.8152

KeyboardInterrupt: 

(20, 512, 512, 3)

In [0]:
### Best convolutional Model 1

best_conv1 = load_model("Model1.h5")

In [0]:
### Save best Model 1

best_model1 = joblib.dump(best_model, "Model1.sav")

In [0]:
### Model 1 feature engineering for Model 2

ImgNums23 = np.concatenate((ImgNums2, ImgNums3))
Data =  Model1FeatureEngineering(ImgNums23, "train")

y_actual = Data[:, (Data.shape[1] - 1)]
Data = Data[:, :(Data.shape[1] - 1)]

y_pred = np.round(best_model.predict(Data))

conf_mat = confusion_matrix(y_actual, y_pred)
print(conf_mat)
print(classification_report(y_actual, y_pred))

ImgNumsWithRoads = ImgNums23[(y_pred == 1)]
ImgNumsWithoutRoads = ImgNums23[(y_pred != 1)]


[[ 524  825]
 [ 277 6547]]
              precision    recall  f1-score   support

         0.0       0.65      0.39      0.49      1349
         1.0       0.89      0.96      0.92      6824

   micro avg       0.87      0.87      0.87      8173
   macro avg       0.77      0.67      0.70      8173
weighted avg       0.85      0.87      0.85      8173



In [0]:
### Predicting convolutional net to determine if remaining images have roads

BatchSize = 1

ImgNums23 = np.concatenate((ImgNums2, ImgNums3))

ImgNums23_paths = list(it.compress(ImgPaths, list(np.isin(np.array(ImgNums), ImgNums23))))
ImgNums23_gen = image_batch_generator_model1(ImgNums23_paths, batchsize = BatchSize)

y_pred = np.squeeze(best_conv1.predict_generator(ImgNums23_gen, steps = len(ImgNums23_paths))).astype(int)
y_actual = np.array(images_conv(ImgNums23))

conf_mat = confusion_matrix(y_actual, y_pred)
print(conf_mat)
print(classification_report(y_actual, y_pred))

ImgNumsWithRoads = ImgNums23[(y_pred == 1)]
ImgNumsWithoutRoads = ImgNums23[(y_pred != 1)]


[[1365    1]
 [6793   14]]
              precision    recall  f1-score   support

           0       0.17      1.00      0.29      1366
           1       0.93      0.00      0.00      6807

   micro avg       0.17      0.17      0.17      8173
   macro avg       0.55      0.50      0.15      8173
weighted avg       0.81      0.17      0.05      8173



In [0]:
y_pred

NameError: name 'y_pred' is not defined

In [0]:
### Building U-Net for images with roads
# Generally, decrease (x,y) by factor 2 => Increase (z) by factor 2

inputs = Input((H, W, C))
s = Lambda(lambda x: x / 255) (inputs)

n = 1 # can use this as a performance complexity lever

c1 = Conv2D(16, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (s)
c1 = Dropout(0.1) (c1)
c1 = Conv2D(16, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c1)
p1 = MaxPooling2D((2, 2)) (c1)

c2 = Conv2D(32, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (p1)
c2 = Dropout(0.1) (c2)
c2 = Conv2D(32, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c2)
p2 = MaxPooling2D((2, 2)) (c2)

c3 = Conv2D(64, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (p2)
c3 = Dropout(0.2) (c3)
c3 = Conv2D(64, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c3)
p3 = MaxPooling2D((2, 2)) (c3)

c4 = Conv2D(128, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (p3)
c4 = Dropout(0.2) (c4)
c4 = Conv2D(128, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c4)
p4 = MaxPooling2D(pool_size=(2, 2)) (c4)

c5 = Conv2D(256, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (p4)
c5 = Dropout(0.3) (c5)
c5 = Conv2D(256, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c5)

u6 = Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same') (c5)
u6 = concatenate([u6, c4])
c6 = Conv2D(128, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (u6)
c6 = Dropout(0.2) (c6)
c6 = Conv2D(128, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c6)

u7 = Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same') (c6)
u7 = concatenate([u7, c3])
c7 = Conv2D(64, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (u7)
c7 = Dropout(0.2) (c7)
c7 = Conv2D(64, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c7)

u8 = Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same') (c7)
u8 = concatenate([u8, c2])
c8 = Conv2D(32, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (u8)
c8 = Dropout(0.1) (c8)
c8 = Conv2D(32, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c8)

u9 = Conv2DTranspose(16, (2, 2), strides=(2, 2), padding='same') (c8)
u9 = concatenate([u9, c1], axis=3)
c9 = Conv2D(16, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (u9)
c9 = Dropout(0.1) (c9)
c9 = Conv2D(16, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c9)

outputs = Conv2D(1, (1, 1), activation='sigmoid') (c9)

Model2 = Model(inputs=[inputs], outputs=[outputs])
Model2.compile(optimizer='adam', loss='binary_crossentropy', metrics=[dice_coef])
Model2.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_6 (InputLayer)            (None, 512, 512, 3)  0                                            
__________________________________________________________________________________________________
lambda_6 (Lambda)               (None, 512, 512, 3)  0           input_6[0][0]                    
__________________________________________________________________________________________________
conv2d_120 (Conv2D)             (None, 512, 512, 16) 448         lambda_6[0][0]                   
__________________________________________________________________________________________________
dropout_18 (Dropout)            (None, 512, 512, 16) 0           conv2d_120[0][0]                 
__________________________________________________________________________________________________
conv2d_121

In [0]:
### A different U-Net implementation


inputs = Input((H, W, C))
s = Lambda(lambda x: x / 255) (inputs)

n = 1 # can use this as a performance complexity lever

c1 = Conv2D(32, (3, 3), activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(inputs)
c1 = Conv2D(32, (3, 3), activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(c1)
p1 = MaxPooling2D(pool_size=(2, 2))(c1)

c2 = Conv2D(64, (3, 3), activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(p1)
c2 = Conv2D(64, (3, 3), activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(c2)
p2 = MaxPooling2D(pool_size=(2, 2))(c2)

c3 = Conv2D(128, (3, 3), activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(p2)
c3 = Conv2D(128, (3, 3), activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(c3)
p3 = MaxPooling2D(pool_size=(2, 2))(c3)

c4 = Conv2D(256, (3, 3), activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(p3)
c4 = Conv2D(256, (3, 3), activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(c4)
d4 = Dropout(0.5)(c4)
p4 = MaxPooling2D(pool_size=(2, 2))(d4)

c5 = Conv2D(512, (3, 3), activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(p4)
c5 = Conv2D(512, (3, 3), activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(c5)
d5 = Dropout(0.5)(c5)

u6 = Conv2D(128, (2, 2), activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(d5))
m6 = concatenate([d4,u6], axis = 3)
c6 = Conv2D(128, (3, 3), activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(m6)
c6 = Conv2D(128, (3, 3), activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(c6)

u7 = Conv2D(128, (3, 3), activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(c6))
m7 = concatenate([c3,u7], axis = 3)
c7 = Conv2D(128, (3, 3), activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(m7)
c7 = Conv2D(128, (3, 3), activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(c7)

u8 = Conv2D(64, (2, 2), activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(c7))
m8 = concatenate([c2,u8], axis = 3)
c8 = Conv2D(64, (3, 3), activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(m8)
c8 = Conv2D(64, (3, 3), activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(c8)

u9 = Conv2D(32, (2, 2), activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(c8))
m9 = concatenate([c1,u9], axis = 3)
c9 = Conv2D(32, (3, 3), activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(m9)
c9 = Conv2D(32, (3, 3), activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(c9)
c9 = Conv2D(2, (3, 3), activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(c9)
c10 = Conv2D(1, 1, activation = 'sigmoid')(c9)

outputs = Conv2D(1, (1, 1), activation='sigmoid') (c10)

Model2 = Model(inputs=[inputs], outputs=[outputs])
Model2.compile(optimizer='adam', loss='binary_crossentropy', metrics=[dice_coef])
Model2.summary()


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            (None, 512, 512, 3)  0                                            
__________________________________________________________________________________________________
conv2d_76 (Conv2D)              (None, 512, 512, 32) 896         input_4[0][0]                    
__________________________________________________________________________________________________
conv2d_77 (Conv2D)              (None, 512, 512, 32) 9248        conv2d_76[0][0]                  
__________________________________________________________________________________________________
max_pooling2d_13 (MaxPooling2D) (None, 256, 256, 32) 0           conv2d_77[0][0]                  
__________________________________________________________________________________________________
conv2d_78 

In [0]:
ImgNumsWithRoads = ImgNums

In [0]:
### Generator for second model

BatchSize = 10

ImgNumsModel2WithRoads = list(set(ImgNums2) & set(ImgNumsWithRoads))
ImgPathsModel2WithRoads = list(it.compress(ImgPaths, list(np.isin(np.array(ImgNums), ImgNumsModel2WithRoads))))

Model2TrainPaths, Model2ValPaths = train_test_split(ImgPathsModel2WithRoads, test_size = Pv)

Model2TrainGen = image_batch_generator(Model2TrainPaths, batchsize = BatchSize)
Model2ValGen = image_batch_generator(Model2ValPaths, batchsize = BatchSize)

Model2TrainSteps = calc_steps(len(Model2TrainPaths), BatchSize)
Model2ValSteps = calc_steps(len(Model2ValPaths), BatchSize)

In [0]:
### Training Model 2

earlystopper = EarlyStopping(patience = 3, verbose = 1)
checkpointer = ModelCheckpoint('Term_Project_st41_eps2/Model2_v3.h5', verbose = 1, save_best_only = True)

Model2Results = Model2.fit_generator(
    
    generator = Model2TrainGen,
    steps_per_epoch = Model2TrainSteps,
    epochs = 25,
    validation_data = Model2ValGen,
    validation_steps = Model2ValSteps,
    verbose = 1,
    callbacks=[earlystopper, checkpointer]
    
)

Epoch 1/25


  warn("The default mode, 'constant', will be changed to 'reflect' in "



Epoch 00001: val_loss improved from inf to 0.15484, saving model to Term_Project_st41_eps2/Model2_v2.h5
Epoch 2/25

Epoch 00002: val_loss improved from 0.15484 to 0.14563, saving model to Term_Project_st41_eps2/Model2_v2.h5
Epoch 3/25

Epoch 00003: val_loss improved from 0.14563 to 0.13897, saving model to Term_Project_st41_eps2/Model2_v2.h5
Epoch 4/25

Epoch 00004: val_loss improved from 0.13897 to 0.12704, saving model to Term_Project_st41_eps2/Model2_v2.h5
Epoch 5/25

Epoch 00005: val_loss improved from 0.12704 to 0.11987, saving model to Term_Project_st41_eps2/Model2_v2.h5
Epoch 6/25

Epoch 00006: val_loss improved from 0.11987 to 0.11450, saving model to Term_Project_st41_eps2/Model2_v2.h5
Epoch 7/25

Epoch 00007: val_loss improved from 0.11450 to 0.10955, saving model to Term_Project_st41_eps2/Model2_v2.h5
Epoch 8/25

Epoch 00008: val_loss improved from 0.10955 to 0.09246, saving model to Term_Project_st41_eps2/Model2_v2.h5
Epoch 9/25

Epoch 00009: val_loss improved from 0.09246

KeyboardInterrupt: ignored

In [0]:
! ls Term_Project_st41_eps2

 comp-540-spring-2019	 Model1_rf.sav		    README.md
 EDA.ipynb		 Model2_diff.h5		    submission_2019-03-29.csv
'Keras Tutorial.ipynb'	 Model2_v2.h5		    submission_2019-03-30.csv
 Keras_U-Net.ipynb	 model-comp540-project.h5   Untitled.ipynb
 Model1			 Pixel_By_Pixel.ipynb	    utils.py
 Model1.h5		 __pycache__		    utils.pyc


In [0]:
from google.colab import files
files.download('Term_Project_st41_eps2/Model2_v2.h5') 

In [0]:
%%time
### Loading images

path_to_train = 'comp-540-spring-2019/train'

glob_train_imgs = os.path.join(path_to_train, '*_sat.jpg')
glob_train_masks = os.path.join(path_to_train, '*_msk.png')

train_img_paths = glob.glob(glob_train_imgs)
train_mask_paths = glob.glob(glob_train_masks)

ig = image_gen(train_img_paths)
train_pixels, train_masks = [], []
neighborhoods = [1 << exponent for exponent in range(1, 9)]

count = 0
for image, mask in ig:
    
    temp_img = pd.DataFrame(np.array(image).reshape((img_width * img_height, 3)))
    temp_img.columns = ["R", "G", "B"]
    temp_img["Lightness"] = (temp_img[["R", "G", "B"]].max(axis = 1) + temp_img[["R", "G", "B"]].max(axis = 1)) / 2
    
    for neighbor in neighborhoods:
        temp_n = img.fromarray(np.uint8(image)).resize((neighbor, neighbor))
        temp_n_low_res = temp_n.resize((img_width, img_height))
        temp_n_mat = pd.DataFrame(list(temp_n_low_res.getdata()))
        temp_n_mat.columns = ["R", "G", "B"]
        temp_img[("R_" + chr(neighbor))] = temp_n_mat["R"]
        temp_img[("G_" + chr(neighbor))] = temp_n_mat["G"]
        temp_img[("B_" + chr(neighbor))] = temp_n_mat["B"]
    
    temp_img['Mask'] = np.array(mask).reshape((img_width * img_height))
    temp_img = temp_img.sample(int((temp_img.shape[0]) * subset_proportion))
    
    train_masks.append(temp_img['Mask'])
    train_pixels.append(temp_img.drop('Mask', axis = 1))
    
    if(count == batch_size):
        break
    count += 1


  warn("The default mode, 'constant', will be changed to 'reflect' in "
  warn("Anti-aliasing will be enabled by default in skimage 0.15 to "


CPU times: user 1min 16s, sys: 7.96 s, total: 1min 24s
Wall time: 47 s


In [0]:
train_masks = np.array(np.concatenate(train_masks, axis = 0))[:, np.newaxis]
train_pixels = np.concatenate(train_pixels, axis = 0)
train_data = pd.DataFrame(np.concatenate([train_pixels, train_masks], axis = 1))

In [0]:
### Building features
count

20

In [0]:
### Splitting into train and validation

train_X, test_X, train_y, test_y = train_test_split(train_data[train_data.columns[0:28]], \
                                                    train_data[train_data.columns[28]], test_size = 0.5)

In [0]:
### Model for road detection in images with roads
### Input: list of images which have been predicted to have roads

rf = RandomForestRegressor(n_estimators = 300, random_state = 123)

rffit = rf.fit(train_X, train_y)


In [0]:
### Evaluation metrics

pred_y = np.round(rf.predict(test_X))

pred_y = np.round(pred_y)
conf_mat = confusion_matrix(test_y, pred_y)
print(conf_mat)
print(classification_report(test_y, pred_y))
print(1 - dice(pred_y, test_y))

[[132612   1163]
 [  3194    655]]
              precision    recall  f1-score   support

         0.0       0.98      0.99      0.98    133775
         1.0       0.36      0.17      0.23      3849

   micro avg       0.97      0.97      0.97    137624
   macro avg       0.67      0.58      0.61    137624
weighted avg       0.96      0.97      0.96    137624

0.23116287277218983


In [0]:
### Model for smoothing mask predictions
### Input: list of masks