In [1]:
%matplotlib inline
import matplotlib.image as mpimg
import numpy as np
import matplotlib.pyplot as plt
import os,sys
from PIL import Image
from proj2_helpers import *
from sklearn import linear_model
from sklearn.preprocessing import PolynomialFeatures
from helpers_logreg import*

In [2]:
def build_k_indices(y, k_fold, seed):
    """
    Build k indices for k-fold cross-validation.
    """
    num_row = y.shape[0]
    interval = int(num_row / k_fold)
    np.random.seed(seed)
    indices = np.random.permutation(num_row)
    k_indices = [indices[k * interval: (k + 1) * interval]
                 for k in range(k_fold)]
    return np.array(k_indices)

def get_classification_results(y, y_test):
    """
    Get the ratio of correct answers.
    """
    y = y.reshape(-1) # Linearize
    y_test = y_test.reshape(-1) # Linearize
    diff = y - y_test
    correct = np.sum(diff == 0)
    return correct / y_test.size
def k_fold_cross_validation(Y, X, k_fold, seed):
    """
    Run a full k-fold cross-validation and print mean accuracy and standard deviation.
    """
    np.random.seed(seed)
    k_indices = build_k_indices(Y, k_fold, seed)
    results = np.zeros(k_fold)
    for k in range(k_fold):
        results[k] = cross_validation_iteration_logreg(Y, X, k_indices, k)
        print('Accuracy: ' + str(results[k]))
    print(results)
    print('Cross validation accuracy: ' + str(np.mean(results)) + ', std=' + str(np.std(results)))
    
def fast_cross_validation( Y, X, k_fold, seed):
    """
    Execute cross-validation with a static validation set,
    i.e. a single run of k-fold cross-validation.
    """
    np.random.seed(seed)
    k_indices = build_k_indices(Y, k_fold, seed)
    result = cross_validation_iteration(model, Y, X, k_indices, 0)
    print('Cross validation accuracy: ' + str(result))
    
def cross_validation_iteration_logreg(Y, X, k_indices, k):
    """
    Execute a single run of cross-validation.
    Returns the ratio of correct answers on the validation set.
    """
    non_k_indices = k_indices[np.arange(k_indices.shape[0]) != k].ravel()
    tx_tr = X[non_k_indices]
    y_tr = Y[non_k_indices]
    tx_te = X[k_indices[k]]
    y_te = Y[k_indices[k]]
    
    # Train the model
    logreg = linear_model.LogisticRegression(C=1e5)
    logreg.fit(tx_tr, y_tr)
    
    
    # Run classification
    Z = logreg.predict(tx_te)
    
    # Calculate accuracy
    y_real = y_te
    return get_classification_results(y_real, Z)    

In [3]:
# Loaded a set of images
root_dir = "Datasets/training"

image_dir = root_dir + "/images/"
files = os.listdir(image_dir)
n = min(100,len(files)) # Load maximum 20 images
print("Loading " + str(n) + " images")
imgs = [load_image(image_dir + files[i]) for i in range(n)]
print(files[0])

Loading 100 images
satImage_001.png


In [4]:
gt_dir = root_dir + "/groundtruth/"
print("Loading " + str(n) + " images")
gt_imgs = [load_image(gt_dir + files[i]) for i in range(n)]
print(files[0])

Loading 100 images
satImage_001.png


In [5]:
# Extract patches from input images
patch_size = 16 # each patch is 16*16 pixels

img_patches = [img_crop(imgs[i], patch_size, patch_size) for i in range(n)]
gt_patches = [img_crop(gt_imgs[i], patch_size, patch_size) for i in range(n)]

In [6]:
# Linearize list of patches
img_patches = np.asarray([img_patches[i][j] for i in range(len(img_patches)) for j in range(len(img_patches[i]))])
gt_patches =  np.asarray([gt_patches[i][j] for i in range(len(gt_patches)) for j in range(len(gt_patches[i]))])

In [7]:
# Compute features for each image patch
X_tra = np.asarray([extract_features(img_patches[i]) for i in range(len(img_patches))])
Y_tra = np.asarray([value_to_class(np.mean(gt_patches[i])) for i in range(len(gt_patches))])

In [8]:
X_tra_poly = poly_fit(X_tra)

In [9]:
k_fold = 5
seed = 1000
k_fold_cross_validation(Y_tra, X_tra_poly, k_fold, seed)



Accuracy: 0.79328




Accuracy: 0.7924




Accuracy: 0.77888




Accuracy: 0.7824




Accuracy: 0.78936
[0.79328 0.7924  0.77888 0.7824  0.78936]
Cross validation accuracy: 0.787264, std=0.005672987220151292


In [9]:
logreg = linear_model.LogisticRegression(C=1e5)
logreg.fit(X_tra_poly, Y_tra)



LogisticRegression(C=100000.0, class_weight=None, dual=False,
                   fit_intercept=True, intercept_scaling=1, l1_ratio=None,
                   max_iter=100, multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=None, solver='warn', tol=0.0001, verbose=0,
                   warm_start=False)

In [13]:
# Get prediction
TEST_SIZE = 50
test_data_filename = 'Datasets/test_set_images/'
print("Running prediction on test set")
prediction_test_dir = "predictions_test/"
if not os.path.isdir(prediction_test_dir):
    os.mkdir(prediction_test_dir)
for i in range(1, TEST_SIZE + 1):
    test = "test_%d" % i
    image_filename = test_data_filename + test + '/' + test +  ".png"
    img = mpimg.imread(image_filename)
    pimg = get_prediction_logreg(logreg, img, patch_size)
    pimg8 = img_float_to_uint8(pimg)
    pimg8_L = Image.fromarray(pimg8, 'L')
    pimg8_L.save(prediction_test_dir + "prediction_" + str(i) + ".png")

Running prediction on test set
