In [1]:
%matplotlib inline
import matplotlib.image as mpimg
import numpy as np
import matplotlib.pyplot as plt
# import os,sys
# from PIL import Image

from helpers import *
from sklearn import linear_model
from sklearn.metrics import confusion_matrix

%load_ext autoreload
%autoreload 2

In [2]:
from sklearn import metrics, cross_validation
from sklearn.model_selection import KFold
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score

from plots import cross_validation_visualization



In [3]:
try: 
    import cv2
except: 
    import pip
    pip.main(['install', 'opencv-python'])
    import cv2 

In [4]:
# Loaded a set of images
root_dir = "training/"
image_dir = root_dir + "images/"
gt_dir = root_dir + "groundtruth/"

files = os.listdir(image_dir)

n = len(files)
imgs = [load_image(image_dir + files[i]) for i in range(n)]
gt_imgs = [load_image(gt_dir + files[i]) for i in range(n)]

print("Loading " + str(n) + " satellite + ground truth images")

Loading 100 satellite + ground truth images


In [5]:
patch_size = 16

kf = KFold(n_splits = 10)
kf.get_n_splits(imgs)

10

In [None]:
# Cs = np.arange(1e4, 1e4 + 1, 10000) 
lambdas = np.logspace(-5, 0, 15)
Cs = [1/lambda_ for lambda_ in lambdas]

acc_threshold =[]

accuracy_train_C = []
f1_score_train_C = []

accuracy_test_C = []
f1_score_test_C = []

for C in Cs:
    print("C = {}".format(C))

    accuracy_train_CV = []
    accuracy_test_CV = []

    f1_score_train_CV = []
    f1_score_test_CV = []
    
    accuracy_train_CV_pp = []
    accuracy_test_CV_pp = []
    
    
    f1_score_train_CV_pp = []
    f1_score_test_CV_pp = []

    for ind, [train_index, test_index] in enumerate(kf.split(imgs)):

        #
        # Split dataset for Cross Validation
        #
        print("\n{}-th CV".format(ind+1))

        X_train = [imgs[ind] for ind in train_index]
        X_test = [imgs[ind] for ind in test_index]

        y_train = [gt_imgs[ind] for ind in train_index]
        y_test = [gt_imgs[ind] for ind in test_index]

        #
        # Crop images, extract features, features augmentation and standardization
        # For both train & test datasets
        # 
        X_train = [img_crop(X_train[i], patch_size, patch_size, step = 4) for i in range(len(train_index))]
        y_train = [img_crop(y_train[i], patch_size, patch_size, step = 4) for i in range(len(train_index))]
        X_test = [img_crop(X_test[i], patch_size, patch_size) for i in range(len(test_index))]
        y_test = [img_crop(y_test[i], patch_size, patch_size) for i in range(len(test_index))]

        X_train = np.asarray([X_train[i][j] for i in range(len(X_train)) for j in range(len(X_train[i]))])
        X_test = np.asarray([X_test[i][j] for i in range(len(X_test)) for j in range(len(X_test[i]))])
        y_train = np.asarray([y_train[i][j] for i in range(len(y_train)) for j in range(len(y_train[i]))])
        y_test = np.asarray([y_test[i][j] for i in range(len(y_test)) for j in range(len(y_test[i]))])

        y_train = np.asarray([value_to_class(np.mean(y_train[i])) for i in range(y_train.shape[0])])
        y_test = np.asarray([value_to_class(np.mean(y_test[i])) for i in range(y_test.shape[0])])
        print("Got all data in arrays!")
        
        X_train = np.asarray([ extract_features(X_train[i]) for i in range(len(X_train))])
        X_train = features_augmentation(X_train)
        X_train -= np.mean(X_train)
        X_train /= np.std(X_train, axis = 0)
        print("Train data ready!")

        X_test = np.asarray([ extract_features(X_test[i]) for i in range(len(X_test))])
        X_test = features_augmentation(X_test)
        X_test -= np.mean(X_test)
        X_test /= np.std(X_test, axis = 0)
        print("Test data ready!")

        # 
        # Run logistic regression 
        # 
        logreg = linear_model.LogisticRegression(C=C, class_weight="balanced")
        logreg.fit(X_train, y_train)
        print("Model fitted!")
        z_train = logreg.predict(X_train)
        z_test = logreg.predict(X_test)

        # 
        # Compute f1 score & accuracy using sklearn functions
        # 
        f1_score_train = f1_score(y_train, z_train, average='macro')
        accuracy_score_train = accuracy_score(y_train, z_train)
        f1_score_test = f1_score(y_test, z_test, average='macro')
        accuracy_score_test = accuracy_score(y_test, z_test)
        
        # 
        # Post processing on test dataset
        # 
        
        # Reshape prediction as matrix for each image
        z_reshaped = []
        
        num_patch_total = len(z_test)
        num_patch_by_img = num_patch_total // kf.get_n_splits(imgs)
        
        for i in range(0, num_patch_total, num_patch_by_img):
            z_crt = z_test[i : i + num_patch_by_img]
            z_reshaped.append(np.reshape(z_crt, [400 // 16, 400 // 16]))

        # Run post process 
        for ind, label_img in enumerate(z_reshaped):
            label_img = postprocess(label_img)
            z_reshaped[ind] = np.reshape(label_img, [z_crt.shape[0]])

        # Convert list as array
        z_test_pp = np.concatenate( z_reshaped , axis = 0 )
        print("Postprocessing done!")

        f1_score_test_pp = f1_score(y_test, z_test_pp, average='macro')
        accuracy_score_test_pp = accuracy_score(y_test, z_test_pp)
        
        
        # 
        # Store accuracy for train, test and test+PP
        # 
        f1_score_train_CV.append(f1_score_train)
        accuracy_train_CV.append(accuracy_score_train)

        f1_score_test_CV.append(f1_score_test)
        accuracy_test_CV.append(accuracy_score_test)
        
        f1_score_test_CV_pp.append(f1_score_test_pp)
        accuracy_test_CV_pp.append(accuracy_score_test_pp)
    
    print("Average test accuracy: {}".format(np.mean(accuracy_test_CV)))
    print("Variance test accuracy: {}".format(np.std(accuracy_test_CV)))
    print("Min test accuracy: {} // Max test accuracy: {}\n".format(np.min(accuracy_test_CV), np.max(accuracy_test_CV)))
    
    print("Average test accuracy PP: {}".format(np.mean(accuracy_test_CV_pp)))
    print("Variance test accuracy PP: {}".format(np.std(accuracy_test_CV_pp)))
    print("Min test accuracy PP: {} // Max test accuracy PP: {}\n".format(np.min(accuracy_test_CV), np.max(accuracy_test_CV)))
    
    accuracy_train_C.append(np.mean(accuracy_train_CV))
    f1_score_train_C.append(np.mean(f1_score_train_CV))
    
    accuracy_test_C.append(np.mean(accuracy_test_CV))
    f1_score_test_C.append(np.mean(f1_score_test_CV))
        
        
# cross_validation_visualization(Cs, f1_score_train_C, f1_score_test_C)
#     # we create an instance of the classifier and fit the data
#     logreg = linear_model.LogisticRegression(C=1e5, class_weight="balanced")
#     logreg.fit(X, Y)
    
#     # Predict on the training set
#     Z = logreg.predict(X)
    
#     acc = accuracy(labels = Y, predictions = Z)
#     acc_threshold.append(acc)
#     print("Foreground threshold = {}".format(threshold))
#     print("Accuracy post Logistic Regression: {}".format(acc))


C = 99999.99999999999

1-th CV
Got all data in arrays!
Train data ready!
Test data ready!
Model fitted!


  'precision', 'predicted', average, warn_for)


Postprocessing done!

2-th CV
Got all data in arrays!
Train data ready!
Test data ready!
Model fitted!
Postprocessing done!

3-th CV
Got all data in arrays!
Train data ready!
Test data ready!
Model fitted!
Postprocessing done!

4-th CV
Got all data in arrays!
Train data ready!
Test data ready!
Model fitted!
Postprocessing done!

5-th CV
Got all data in arrays!
Train data ready!
Test data ready!
Model fitted!
Postprocessing done!

6-th CV
Got all data in arrays!
Train data ready!
Test data ready!
Model fitted!
Postprocessing done!

7-th CV
Got all data in arrays!
Train data ready!
Test data ready!
Model fitted!
Postprocessing done!

8-th CV
Got all data in arrays!
Train data ready!
Test data ready!
Model fitted!
Postprocessing done!

9-th CV
Got all data in arrays!


In [None]:
cross_validation_visualization(Cs, accuracy_train_C, accuracy_test_C)

In [None]:
accuracy_test_C

In [None]:
# Data to evaluate
root_testdir = "test_set_images"
test_names = os.listdir(root_testdir)
num_test = len(test_names)

#
# Reorder test data 
# (Loaded in alphabetic order, but we want them in numeric order)
# 
print("Loading test data")
order = [int(test_names[i].split("_")[1]) for i in range(num_test)]
p = np.argsort(order)
imgs_test = [load_image(os.path.join(root_testdir, test_names[i], test_names[i]) + ".png") for i in range(num_test)]

imgs_test = [imgs_test[i] for i in p]
img_patches_test = [img_crop(imgs_test[i], patch_size, patch_size) for i in range(num_test)]

# 
# Linearize list of patches
img_patches_test = np.asarray([img_patches_test[i][j] for i in range(len(img_patches_test)) for j in range(len(img_patches_test[i]))])

print("Extraction, augmentation and standardization of features")
X_test = np.asarray([ extract_features(img_patches_test[i]) for i in range(len(img_patches_test))])
X_test = features_augmentation(X_test)
X_test -= np.mean(X_test)
X_test /= np.std(X_test, axis = 0)


print("Let's predict our new data")
# Run prediction
Z_test = logreg.predict(X_test)


In [None]:
""" Postprocessing """

# Reshape prediction
Z_reshaped = []

num_patch_total = len(Z_test)
num_patch_by_img = num_patch_total // num_test

for i in range(0, num_patch_total, num_patch_by_img):
    Z_crt = Z_test[i : i + num_patch_by_img]
    Z_reshaped.append(np.reshape(Z_crt, [608 // 16, 608 // 16]))

    
# Run post process 
for ind, label_img in enumerate(Z_reshaped):
    label_img = postprocess(label_img)
    Z_reshaped[ind] = np.reshape(label_img, [Z_crt.shape[0]])
    
# Convert list as array
result = np.concatenate( Z_reshaped , axis = 0 )

# Save prediction
create_submission(result, "submission_postprocess.csv")

In [None]:
import random 
patch_size = 16

# Run prediction on the img_idx-th image
img_idx = random.randint(0,n-1)
img_idx = 1

patch_size = 16
Xi = extract_img_features(image_dir + files[img_idx])
Xi = features_augmentation(Xi)
Zi = logreg.predict(Xi)

# Display prediction as an image
w = gt_imgs[img_idx].shape[0]
h = gt_imgs[img_idx].shape[1]

predicted_im = label_to_img(w, h, patch_size, patch_size, Zi)
cimg = concatenate_images(imgs[img_idx], predicted_im)
fig1 = plt.figure(figsize=(10, 10)) # create a figure with the default size 
plt.imshow(cimg, cmap='Greys_r')

new_img = make_img_overlay(imgs[img_idx], predicted_im)

plt.imshow(new_img)

In [None]:
Zi_reshaped = np.reshape(Zi, [400 // 16, 400 // 16])
postprocess_img = postprocess(Zi_reshaped)
postprocess_img = np.reshape(postprocess_img, [Zi.shape[0]])
postprocess_img = label_to_img(w, h, patch_size, patch_size, postprocess_img)
    
fig1 = plt.figure(figsize=(10, 10)) 
new_img = make_img_overlay(imgs[img_idx], postprocess_img)
plt.imshow(new_img)

In [None]:
Zi.shape

In [None]:
Zi_reshaped.shape

In [None]:
# Extract patches from input images
patch_size = 16 # each patch is 16*16 pixels

img_patches = [img_crop(imgs[i], patch_size, patch_size, step = 4) for i in range(n)]
gt_patches = [img_crop(gt_imgs[i], patch_size, patch_size, step = 4) for i in range(n)]

# Linearize list of patches
img_patches = np.asarray([img_patches[i][j] for i in range(len(img_patches)) for j in range(len(img_patches[i]))])
gt_patches =  np.asarray([gt_patches[i][j] for i in range(len(gt_patches)) for j in range(len(gt_patches[i]))])


In [None]:
len(img_patches)

In [None]:

#     Y = np.asarray([value_to_class(np.mean(gt_patches[i])) for i in range(len(gt_patches))])

In [None]:
# Extract features
X = np.asarray([ extract_features(img_patches[i]) for i in range(len(img_patches))])

print("shape X[i] : {}".format(X[0].shape))

# Features augmentation 
X = features_augmentation(X)

# Standardize data
X -= np.mean(X)
X /= np.std(X, axis = 0)

# Print feature statistics
print('Computed ' + str(X.shape[0]) + ' features')
print('Feature dimension = ' + str(X.shape[1]))
