In [None]:
import numpy as np
import pandas as pd
import xml.etree.ElementTree as ET
import matplotlib.pyplot as plt
from PIL import Image
import os
import glob
import time
import zipfile
import operator
import collections
from skimage import transform as tform
from sklearn import decomposition, mixture, cluster
from scipy.spatial.distance import pdist, squareform

In [None]:
root_images = "../input/all-dogs/all-dogs/"
root_annots = "../input/annotation/Annotation/"

all_images = os.listdir("../input/all-dogs/all-dogs/")
breeds = glob.glob('../input/annotation/Annotation/*')

annotation=[]
for b in breeds:
    annotation+=glob.glob(b+"/*")

breed_map={}
for annot in annotation:
    breed=annot.split("/")[-2]
    index=breed.split("-")[0]
    breed_map.setdefault(index,breed)

breed_folders = glob.glob('../input/annotation/Annotation/*')
breed_index_name_map = {}
for breed_folder in breed_folders:
    full_breed_name = breed_folder.split("/")[-1]
    breed_index  = full_breed_name.split("-")[ 0]
    breed_name   = full_breed_name.split("-")[-1]
    breed_index_name_map[breed_index] = breed_name

num_dog_images = len(all_images)
num_dog_breeds = len(breed_map)
num_avg_images_per_breed = num_dog_images / num_dog_breeds
print('Total %d dog images of %d different breeds (on average %.1f images per breed)' %(num_dog_images,num_dog_breeds,num_avg_images_per_breed))

In [None]:
def bounding_box(image_filename):
    bpath=root_annots+str(breed_map[image_filename.split("_")[0]])+"/"+str(image_filename.split(".")[0])
    tree = ET.parse(bpath)
    root = tree.getroot()
    objects = root.findall('object')
    for o in objects:
        bndbox = o.find('bndbox') # reading bound box
        xmin = int(bndbox.find('xmin').text)
        ymin = int(bndbox.find('ymin').text)
        xmax = int(bndbox.find('xmax').text)
        ymax = int(bndbox.find('ymax').text)
        
    return (xmin,ymin,xmax,ymax)

def expand_bounding_box(bbox, orig_image, expand_margin_fraction=0.1):
    im_width, im_height = orig_image.size

    bbox_w = bbox[2]-bbox[0]
    bbox_h = bbox[3]-bbox[1]
    
    xmin = max(0, bbox[0] - 0.5*expand_margin_fraction*bbox_w)
    ymin = max(0, bbox[1] - 0.5*expand_margin_fraction*bbox_h)
    
    xmax = min(im_width,  bbox[2] + 0.5*expand_margin_fraction*bbox_w)
    ymax = min(im_height, bbox[3] + 0.5*expand_margin_fraction*bbox_h)
    
    return [xmin,ymin,xmax,ymax]

num_rows = 5
num_cols = 9
num_images_to_show = num_rows*num_cols
selected_images = np.random.choice(all_images, size=num_images_to_show, replace=False)

expand_margin_fraction = 0.125

fig = plt.figure(figsize=(30,16)); 
plt.subplots_adjust(left=0.02, right=0.98, top=0.94, bottom=0.02, hspace=0.05, wspace=0.05); 
plt.suptitle('figure 1: raw samples from dataset', fontsize=30)
for k, image_filename in enumerate(selected_images):
    bbox = bounding_box(image_filename)
    orig_image = Image.open(os.path.join(root_images, image_filename))
    
    bbox_expanded = expand_bounding_box(bbox, orig_image, expand_margin_fraction=expand_margin_fraction)
    cropped_image = orig_image.crop(bbox_expanded)
    
    plt.subplot(num_rows,num_cols,k+1); plt.imshow(cropped_image); plt.axis("off")
fig.savefig('figure_1.png')

In [None]:
# create a small dataset of resized images
num_images_in_dataset = num_dog_images
#num_images_in_dataset = 16384
#num_images_in_dataset = 8192

expantion_factor = 8

start_time = time.time()

image_dimention = 64 + expantion_factor
resize_shape = (image_dimention, image_dimention)

selected_images = np.random.choice(all_images, size=num_images_in_dataset, replace=False)

# create a matrix to hold all images
image_dataset_4D_matrix = np.zeros((image_dimention,image_dimention,3,num_images_in_dataset), dtype=np.uint8)
image_breed_label_list = []

# fill up the matrix with images
for k, image_filename in enumerate(selected_images):
    bbox = bounding_box(image_filename)
    orig_image = Image.open(os.path.join(root_images, image_filename))
    bbox_expanded = expand_bounding_box(bbox, orig_image, expand_margin_fraction=expand_margin_fraction)
    cropped_image = orig_image.crop(bbox_expanded)
    resized_image = tform.resize(np.array(cropped_image), resize_shape, preserve_range=True).astype(np.uint8)

    image_dataset_4D_matrix[:,:,:,k] = resized_image
    image_breed_label_list.append(breed_index_name_map[image_filename.split('_')[0]])
    
training_duration_min = (time.time()-start_time)/60
print('finished collecting dataset. took %.1f minutes' %(training_duration_min))

# Show collected rescaled dataset

In [None]:
num_rows = 6
num_cols = 9
num_images_to_show = num_rows*num_cols
selected_inds = np.random.choice(num_images_in_dataset,size=num_images_to_show,replace=False)

fig = plt.figure(figsize=(30,22)); 
plt.subplots_adjust(left=0.02, right=0.98, top=0.93, bottom=0.02, hspace=0.13, wspace=0.05); 
plt.suptitle('figure 2: rescaled loosly cropped images', fontsize=30)
for k, image_ind in enumerate(selected_inds):
    dog_image = image_dataset_4D_matrix[:,:,:,image_ind]
    dog_breed = image_breed_label_list[image_ind]
    
    plt.subplot(num_rows,num_cols,k+1); plt.imshow(dog_image); plt.title(dog_breed, fontsize=16); plt.axis("off")
fig.savefig('figure_2.png')

In [None]:
from sklearn import preprocessing

breed_encoder = preprocessing.LabelEncoder()
breed_encoder.fit(image_breed_label_list)

num_breeds = len(list(breed_encoder.classes_))
print('total num breeds in dataset is %d' %(num_breeds))

# short test:
test_labels = image_breed_label_list[:7]
breed_index = breed_encoder.transform(test_labels)
print(breed_index, breed_index.shape)
test_labels_hat = list(breed_encoder.inverse_transform(breed_index))
print(test_labels)
print(test_labels_hat)

# Define Autoencoder Architecture and Learning params

In [None]:
import keras
from keras.models import Model
from keras.layers import GlobalMaxPooling2D, GlobalAveragePooling2D
from keras.layers import Input, Dropout, Dense, Conv2D, MaxPooling2D, AveragePooling2D, UpSampling2D, Reshape, LeakyReLU, Flatten
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import LearningRateScheduler
from keras.layers.normalization import BatchNormalization
from keras.layers.noise import GaussianNoise, GaussianDropout
from keras.regularizers import l1,l2,l1_l2
from keras import optimizers

In [None]:
# from keras.applications.vgg19 import VGG19

# vgg_encoder = VGG19(include_top=False, weights='imagenet', input_shape=(64,64,3))

# # Creating dictionary that maps layer names to the layers
# layer_dict = dict([(layer.name, layer) for layer in vgg_encoder.layers])

# # Getting output tensor of the last VGG layer that we want to include
# desired_output = layer_dict['block4_conv2'].output

# # Creating new model. Please note that this is NOT a Sequential() model.
# from keras.models import Model
# custom_model = Model(input=vgg_model.input, output=desired_output)

# # Make sure that the pre-trained bottom layers are not trainable
# for layer in custom_model.layers[:7]:
#     layer.trainable = False

In [None]:
# hyperparams
kernel_reg = 1e-7
activity_reg = 1e-6

encoder_output_channel_size = 256

multiplicative_noise_sigma = 0.2
additive_noise_sigma = 0.02
dropout_noise_rate = 0.02

leaky_relu_slope = 0.33
leaky_relu = lambda x: LeakyReLU(alpha=leaky_relu_slope)(x)

multiplicative_gaussian_noise_level = (multiplicative_noise_sigma**2)/(1+multiplicative_noise_sigma**2)
additive_gaussian_noise_level = additive_noise_sigma

# encoder
input_image = Input(shape=(64, 64, 3), name='input_image')
x = BatchNormalization(name='encoder_BN_1_1')(input_image)
x = Conv2D( 64, (3, 3), activation=leaky_relu, padding='same', kernel_regularizer=l2(kernel_reg), name='encoder_conv1_1')(x)
x = BatchNormalization(name='encoder_BN_1_2')(x)
x = Conv2D( 64, (3, 3), activation=leaky_relu, padding='same', kernel_regularizer=l2(kernel_reg), name='encoder_conv1_2')(x)
x = MaxPooling2D((2, 2), padding='same', name='encoder_pool1')(x)

x = BatchNormalization(name='encoder_BN_2_1')(x)
x = Conv2D(128, (3, 3), activation=leaky_relu, padding='same', kernel_regularizer=l2(kernel_reg), name='encoder_conv2_1')(x)
x = BatchNormalization(name='encoder_BN_2_2')(x)
x = Conv2D(128, (3, 3), activation=leaky_relu, padding='same', kernel_regularizer=l2(kernel_reg), name='encoder_conv2_2')(x)
x = MaxPooling2D((2, 2), padding='same', name='encoder_pool2')(x)

x = BatchNormalization(name='encoder_BN_3_1')(x)
x = Conv2D(256, (3, 3), activation=leaky_relu, padding='same', kernel_regularizer=l2(kernel_reg), name='encoder_conv3_1')(x)
x = BatchNormalization(name='encoder_BN_3_2')(x)
x = Conv2D(256, (3, 3), activation=leaky_relu, padding='same', kernel_regularizer=l2(kernel_reg), name='encoder_conv3_2')(x)
x = MaxPooling2D((2, 2), padding='same', name='encoder_pool3')(x)

x = BatchNormalization(name='encoder_BN_4_1')(x)
x = Conv2D(256, (3, 3), activation=leaky_relu, padding='same', kernel_regularizer=l2(kernel_reg), name='encoder_conv4_1')(x)
x = BatchNormalization(name='encoder_BN_4_2')(x)
x = Conv2D(256, (3, 3), activation=leaky_relu, padding='same', kernel_regularizer=l2(kernel_reg), name='encoder_conv4_2')(x)
x = MaxPooling2D((2, 2), padding='same', name='encoder_pool4')(x)

# bottleneck layer. Try to make it sparse using L1 activity regularization
encoder_output = Conv2D(encoder_output_channel_size, (1, 1), activation='linear', kernel_regularizer=l2(kernel_reg), activity_regularizer=l1(activity_reg), name='encoder_output')(x)

# add some noise during training to force interpulation smoothness in the latent space (use both additive and multiplicative noise and dropout noise)
encoder_output = GaussianDropout(multiplicative_gaussian_noise_level, name='multiplicative_noise')(encoder_output)
encoder_output = GaussianNoise(additive_gaussian_noise_level        , name='additive_noise'      )(encoder_output)
encoder_output = Dropout(dropout_noise_rate                         , name='dropout_noise'       )(encoder_output)

encoder = Model(input_image, encoder_output, name='encoder')
print('---------------------------------------------------------------------------------------------------')
print(' Encoder:')
print('---------------------------------------------------------------------------------------------------')
encoder.summary()
print('---------------------------------------------------------------------------------------------------')


# latent auxilary classifier
external_encoder_output = Input(shape=(4,4,encoder_output_channel_size), name='extenral_image_rep')

# CNN
x = BatchNormalization(name='latent_aux_BN_1')(external_encoder_output)
x = Conv2D(256, (3, 3), activation=leaky_relu, padding='same', kernel_regularizer=l2(kernel_reg), name='latent_aux_conv1')(x)
x = Dropout(0.85, name='latent_aux_dropout_1')(x)
x = BatchNormalization(name='latent_aux_BN_2')(x)
x = Conv2D(256, (3, 3), activation=leaky_relu, padding='same', kernel_regularizer=l2(kernel_reg), name='latent_aux_conv2')(x)
x = GlobalMaxPooling2D(name='global_pool')(x)

# FCN
'''
x = Flatten(name='flatten_encoder_latent')(external_encoder_output)
x = BatchNormalization(name='latent_aux_BN_1')(x)
x = Dense(768, activation=leaky_relu, name='latent_aux_h1', kernel_regularizer=l2(kernel_reg))(x)
x = Dropout(0.7, name='dropout_1')(x)
x = BatchNormalization(name='latent_aux_BN_2')(x)
x = Dense(768, activation=leaky_relu, name='latent_aux_h2', kernel_regularizer=l2(kernel_reg))(x)
x = Dropout(0.7, name='dropout_2')(x)
x = BatchNormalization(name='latent_aux_BN_3')(x)
x = Dense(768, activation=leaky_relu, name='latent_aux_h3', kernel_regularizer=l2(kernel_reg))(x)
x = Dropout(0.7, name='dropout_3')(x)
'''

latent_aux_classifier_output = Dense(num_breeds, activation='softmax', name='latent_aux_pred')(x)
latent_aux_classifier = Model(external_encoder_output, latent_aux_classifier_output, name='latent_aux_classifier')
print('---------------------------------------------------------------------------------------------------')
print(' Latent Auxilary Classifier:')
print('---------------------------------------------------------------------------------------------------')
latent_aux_classifier.summary()
print('---------------------------------------------------------------------------------------------------')


# decoder
#external_encoder_output = Input(shape=(4,4,encoder_output_channel_size), name='extenral_image_rep')

x = BatchNormalization(name='decoder_BN_1_1')(external_encoder_output)
x = Conv2D(256, (3, 3), activation=leaky_relu, padding='same', kernel_regularizer=l2(kernel_reg), name='decoder_conv1_1')(x)
x = BatchNormalization(name='decoder_BN_1_2')(external_encoder_output)
x = Conv2D(256, (3, 3), activation=leaky_relu, padding='same', kernel_regularizer=l2(kernel_reg), name='decoder_conv1_2')(x)
x = UpSampling2D((2, 2), interpolation='bilinear', name='decoder_upsample_1')(x)

x = BatchNormalization(name='decoder_BN_2_1')(x)
x = Conv2D(256, (3, 3), activation=leaky_relu, padding='same', kernel_regularizer=l2(kernel_reg), name='decoder_conv2_1')(x)
x = BatchNormalization(name='decoder_BN_2_2')(x)
x = Conv2D(256, (3, 3), activation=leaky_relu, padding='same', kernel_regularizer=l2(kernel_reg), name='decoder_conv2_2')(x)
x = UpSampling2D((2, 2), interpolation='bilinear', name='decoder_upsample_2')(x)

x = BatchNormalization(name='decoder_BN_3_1')(x)
x = Conv2D(256, (3, 3), activation=leaky_relu, padding='same', kernel_regularizer=l2(kernel_reg), name='decoder_conv3_1')(x)
x = BatchNormalization(name='decoder_BN_3_2')(x)
x = Conv2D(256, (3, 3), activation=leaky_relu, padding='same', kernel_regularizer=l2(kernel_reg), name='decoder_conv3_2')(x)
x = UpSampling2D((2, 2), interpolation='bilinear', name='decoder_upsample_3')(x)

x = BatchNormalization(name='decoder_BN_4_1')(x)
x = Conv2D(256, (3, 3), activation=leaky_relu, padding='same', kernel_regularizer=l2(kernel_reg), name='decoder_conv4_1')(x)
x = BatchNormalization(name='decoder_BN_4_2')(x)
x = Conv2D(256, (3, 3), activation=leaky_relu, padding='same', kernel_regularizer=l2(kernel_reg), name='decoder_conv4_2')(x)
x = UpSampling2D((2, 2), interpolation='bilinear', name='decoder_upsample_4')(x)

x = BatchNormalization(name='decoder_BN_5_1')(x)
x = Conv2D(256, (3, 3), activation=leaky_relu, padding='same', kernel_regularizer=l2(kernel_reg), name='decoder_conv5_1')(x)
x = BatchNormalization(name='decoder_BN_5_2')(x)
x = Conv2D(256, (3, 3), activation=leaky_relu, padding='same', kernel_regularizer=l2(kernel_reg), name='decoder_conv5_2')(x)

output_image = Conv2D(3, (3, 3), activation='linear', padding='same', name='generated_image')(x)

decoder = Model(external_encoder_output, output_image, name='decoder')
print('---------------------------------------------------------------------------------------------------')
print(' Decoder:')
print('---------------------------------------------------------------------------------------------------')
decoder.summary()
print('---------------------------------------------------------------------------------------------------')

# autoencoder
encoder_latent = encoder(input_image)
autoencoder = Model(input_image, decoder(encoder_latent))
print('---------------------------------------------------------------------------------------------------')
print(' Autoencoder:')
print('---------------------------------------------------------------------------------------------------')
autoencoder.summary()
print('---------------------------------------------------------------------------------------------------')

# autoencoder with latent aux classifier
autoencoder_with_latent_aux_classifier = Model(input_image, outputs=[decoder(encoder_latent), latent_aux_classifier(encoder_latent)])
#autoencoder_with_latent_aux_classifier = Model(input_image, outputs=[decoder(encoder(input_image)), latent_aux_classifier(encoder(input_image))])
print('---------------------------------------------------------------------------------------------------')
print(' Autoencoder with latent auxilary classifer:')
print('---------------------------------------------------------------------------------------------------')
autoencoder_with_latent_aux_classifier.summary()
print('---------------------------------------------------------------------------------------------------')

# Generator function

In [None]:
def normalize_image(orig_scale_images):
    # map [0,255] range to [16/256,240/256]
    normlized_images = (orig_scale_images.astype(np.float32) * (224/255) + 16) / 256
    return normlized_images

def unnormalize_image(normlized_images):
    # map from the range [16/256,240/256] back to [0,255]
    orig_scale_images = 255 * ((normlized_images - 16/256) / (224/256))
    return orig_scale_images

def generate_batches(possible_inds_list, batch_size=64, random_crops=True, random_flips=True):
    num_possible_images = len(possible_inds_list)
    assert(num_possible_images >= batch_size)
    
    while True:
        curr_batch = np.zeros((batch_size,64,64,3))
        curr_batch_lables_list = []
        selected_images_for_batch = np.random.choice(possible_inds_list,size=batch_size,replace=False)
        for k, selected_image_ind in enumerate(selected_images_for_batch):
            if random_crops:
                h_start = np.random.randint(expantion_factor)
                w_start = np.random.randint(expantion_factor)
            else:
                h_start = int(expantion_factor/2)
                w_start = int(expantion_factor/2)
    
            h_end = h_start + 64
            w_end = w_start + 64
                        
            if random_flips:
                image = np.fliplr(image_dataset_4D_matrix[h_start:h_end,w_start:w_end,:,selected_image_ind])
            else:
                image = image_dataset_4D_matrix[h_start:h_end,w_start:w_end,:,selected_image_ind]
            
            curr_batch[k,:,:,:] = normalize_image(image)
            curr_batch_lables_list.append(image_breed_label_list[selected_image_ind])
            
        curr_batch_lables_vec = breed_encoder.transform(curr_batch_lables_list)
        curr_batch_lables = keras.utils.to_categorical(curr_batch_lables_vec, num_breeds)

        yield (curr_batch, [curr_batch, curr_batch_lables])

# Train autoencoder

In [None]:
num_epochs = 1600
batch_size = 16
learning_rate = 0.0002

valid_data_fraction = 0.125
train_steps_per_epoch = 96
valid_steps_per_epoch = 16

valid_cutoff = int((1-valid_data_fraction) * num_images_in_dataset)
train_inds = [x for x in range(valid_cutoff)]
valid_inds = [x for x in range(valid_cutoff,num_images_in_dataset)]

train_data_generator = generate_batches(train_inds, batch_size=batch_size, random_crops=True, random_flips=True)
valid_data_generator = generate_batches(valid_inds, batch_size=batch_size, random_crops=True, random_flips=True)

losses_to_use = ['mae','categorical_crossentropy']
loss_weights_to_use = [1.0,0.01]
metrics_to_use = ['accuracy']
optimizer_to_use = optimizers.Nadam(lr=learning_rate)
autoencoder_with_latent_aux_classifier.compile(optimizer=optimizer_to_use, loss=losses_to_use, loss_weights=loss_weights_to_use, metrics=metrics_to_use)

start_time = time.time()
history = autoencoder_with_latent_aux_classifier.fit_generator(generator=train_data_generator, epochs=num_epochs, steps_per_epoch=train_steps_per_epoch,
                                                               validation_data=valid_data_generator, validation_steps=valid_steps_per_epoch)

In [None]:
training_duration_sec = time.time()-start_time
training_duration_hours = training_duration_sec / 3600
training_duration_remaining_minutes = 60 * (training_duration_hours - int(training_duration_hours))
print('finished training Autoencoder. took in total %d hours and %d minutes' %(training_duration_hours, training_duration_remaining_minutes))

In [None]:
losses_to_show = ['loss','decoder_loss','latent_aux_classifier_loss','latent_aux_classifier_acc']
ylim_ranges = {}
ylim_ranges['loss'] = [0.01,0.27]
ylim_ranges['decoder_loss'] = [0.01,0.17]
ylim_ranges['latent_aux_classifier_loss'] = [0.3,5.8]
ylim_ranges['latent_aux_classifier_acc'] = [0.02,1.01]
num_rows_in_subplot = len(losses_to_show)

# show learning curves
epoch_number = np.arange(1,num_epochs+1)
fig = plt.figure(figsize=(min(30,int(5+0.5*num_epochs)),25)); 
plt.subplots_adjust(left=0.02, right=0.98, top=0.92, bottom=0.02, hspace=0.25, wspace=0.05);
plt.suptitle('figure 3: Auto-Encoder learning curves', fontsize=24);
for k, loss_string in enumerate(losses_to_show):
    plt.subplot(num_rows_in_subplot,1,k+1);
    
    final_train_loss = np.array(history.history[loss_string][-25:]).mean()
    final_valid_loss = np.array(history.history['val_'+loss_string][-25:]).mean()
    plt.title('final (train,valid) %s = (%.4f,%.4f)' %(loss_string, final_train_loss, final_valid_loss),fontsize=22)
    plt.plot(epoch_number, history.history[loss_string],'b')
    plt.plot(epoch_number, history.history['val_'+loss_string],'g')
    plt.legend(['train','valid'], fontsize=18);
    plt.xlabel('num iterations', fontsize=16); plt.ylabel(loss_string, fontsize=16)
    plt.ylim(ylim_ranges[loss_string][0],ylim_ranges[loss_string][1])
    
fig.savefig('figure_3.png')

# Gather data for GMM learning

In [None]:
all_data_generator = generate_batches(list(range(num_images_in_dataset)), batch_size=1024, random_crops=True, random_flips=True)

h_start = int(expantion_factor/2); h_end = h_start + 64;
w_start = int(expantion_factor/2); w_end = w_start + 64;

X_normlized_center_crop = np.transpose(normalize_image(image_dataset_4D_matrix[h_start:h_end,w_start:w_end,:,:]),[3,0,1,2])
X_normlized_center_crop_flipped = np.flip(np.transpose(normalize_image(image_dataset_4D_matrix[h_start:h_end,w_start:w_end,:,:]),[3,0,1,2]),axis=2)
#X_normlized_random_crop_random_flip = next(all_data_generator)[0]

X_normlized = np.concatenate((X_normlized_center_crop,X_normlized_center_crop_flipped,next(all_data_generator)[0],
                                                                                      next(all_data_generator)[0],
                                                                                      next(all_data_generator)[0],
                                                                                      next(all_data_generator)[0]),axis=0)

print(X_normlized.shape, X_normlized.min(), X_normlized.max())
del X_normlized_center_crop, X_normlized_center_crop_flipped

In [None]:
# quick verification of flipping and data that will be used for GMM fitting
fig = plt.figure(figsize=(30,12)); 
plt.subplots_adjust(left=0.02, right=0.98, top=0.94, bottom=0.02, hspace=0.08, wspace=0.05); 
plt.suptitle('figure 4: flipping and crops quick check', fontsize=30)
for k in range(8):
    rand_ind_center_crop = np.random.randint(num_images_in_dataset)
    rand_ind_center_crop_flipped = rand_ind_center_crop + num_images_in_dataset
    rand_ind_random_crop = np.random.randint(2*num_images_in_dataset, X_normlized.shape[0])
    plt.subplot(3,8,k+1+ 0);  plt.imshow(unnormalize_image(X_normlized[rand_ind_center_crop,:,:,:]).astype(np.uint8)); plt.axis('off')
    plt.subplot(3,8,k+1+ 8);  plt.imshow(unnormalize_image(X_normlized[rand_ind_center_crop_flipped,:,:,:]).astype(np.uint8)); plt.axis('off')
    plt.subplot(3,8,k+1+ 16); plt.imshow(unnormalize_image(X_normlized[rand_ind_random_crop,:,:,:]).astype(np.uint8)); plt.axis('off')
fig.savefig('figure_4.png')

# Show some Autoencoder reconstructions

In [None]:
# show several model reconstructions
num_rows = 6
num_cols = 9
num_images_to_show = num_rows*num_cols

selected_inds = np.random.choice(X_normlized.shape[0], size=num_images_to_show, replace=False)

X_rec_autuencoder = np.transpose(unnormalize_image(autoencoder.predict(X_normlized[selected_inds])), [1,2,3,0])
print(X_rec_autuencoder.shape, X_rec_autuencoder.mean(), X_rec_autuencoder.std())

fig = plt.figure(figsize=(30,20)); 
plt.subplots_adjust(left=0.02, right=0.98, top=0.95, bottom=0.02, hspace=0.05, wspace=0.05); 
plt.suptitle('figure 5: AE reconstuctions', fontsize=30)
for k in range(num_images_to_show):
    doglike_image = X_rec_autuencoder[:,:,:,k]
    doglike_image[doglike_image > 255] = 255
    doglike_image[doglike_image <   0] =   0
    
    plt.subplot(num_rows,num_cols,k+1); plt.imshow(doglike_image.astype(np.uint8)); plt.axis("off")
fig.savefig('figure_5.png')

# Show histograms of encoder latent space units

In [None]:
X_rep_autoencoder = encoder.predict(X_normlized).reshape((X_normlized.shape[0],-1))
print(X_rep_autoencoder.shape)

fig = plt.figure(figsize=(30,12)); 
plt.subplots_adjust(left=0.02, right=0.98, top=0.92, bottom=0.02, hspace=0.08, wspace=0.12); 
plt.suptitle('figure 6: AE unit activations', fontsize=30)
for k in range(36):
    selected_ind = np.random.randint(X_rep_autoencoder.shape[-1])
    unit_activations = X_rep_autoencoder[:,selected_ind]
    range_limit = max(abs(unit_activations.min()), abs(unit_activations.max()))
    activation_range = np.linspace(-range_limit,range_limit,100)
    plt.subplot(4,9,k+1); plt.hist(unit_activations, bins=activation_range, log=True);
fig.savefig('figure_6.png')

# Add noise pertubations in AE latent space

In [None]:
# calc std for each latent direction
ae_latent_rep = encoder.predict(X_normlized)
ae_latent_std = ae_latent_rep.std(axis=0,keepdims=True)

print(ae_latent_std.shape, ae_latent_std.mean(), ae_latent_std.std())

# Show distribution of euclidean distances between samples in latent space

In [None]:
# show the distribution of distnaces between different samples in the latent space
subset_size = 4500 # to limit compute complexity
subset_inds = np.random.choice(ae_latent_rep.shape[0], size=subset_size, replace=False)

ae_latent_rep_table = ae_latent_rep.reshape((ae_latent_rep.shape[0],-1))
ae_latent_space_distances = pdist(ae_latent_rep_table[subset_inds,:], 'euclidean')

fig = plt.figure(figsize=(20,10)); 
plt.subplots_adjust(left=0.05, right=0.95, top=0.93, bottom=0.05, hspace=0.15, wspace=0.05); 
plt.suptitle('figure 7: euclidean distance distribution in latent space', fontsize=30)
plt.subplot(2,1,1); plt.hist(ae_latent_space_distances, bins=200);
plt.subplot(2,1,2); plt.hist(ae_latent_space_distances, bins=200, log=True); plt.xlabel('euclidean distance');
fig.savefig('figure_7.png')

In [None]:
d_min = np.percentile(ae_latent_space_distances, 1)
d_max = np.percentile(ae_latent_space_distances,99)
d_mean = ae_latent_space_distances.mean()
d_std = ae_latent_space_distances.std()
print('98%s of euclidian distnaces range between %.4f to %.4f (mean = %.4f, std = %.4f)' %('%', d_min,d_max,d_mean,d_std))

# Show noisy images around an image with various noise levels

In [None]:
noise_levels_to_show = [0.2,0.5,0.75,1.0,1.25,1.5,2.0] # these are in units of std per feature
num_pertubation_per_noise_level = 4

num_cols = len(noise_levels_to_show)
num_rows = num_pertubation_per_noise_level

selected_image_rep = ae_latent_rep[np.random.randint(ae_latent_rep.shape[0])][np.newaxis,:,:,:]

fig = plt.figure(figsize=(30,18));
plt.subplots_adjust(left=0.02, right=0.98, top=0.92, bottom=0.02, hspace=0.15, wspace=0.05); 
plt.suptitle('figure 8: image + random noise in latent space (a)', fontsize=30)
for col, noise_level in enumerate(noise_levels_to_show):
    for row in range(num_pertubation_per_noise_level):
        noise_to_add = noise_level * np.random.normal(scale=ae_latent_std, size=ae_latent_std.shape)
        noisy_image_rep = selected_image_rep + noise_to_add
        noisy_image = unnormalize_image(decoder.predict(noisy_image_rep)[0])
        noisy_image[noisy_image > 255] = 255
        noisy_image[noisy_image <   0] =   0
        
        src_to_noisy_dist = pdist(np.concatenate((selected_image_rep.reshape((1,-1)), noisy_image_rep.reshape((1,-1))), axis=0))[0]
        plt.subplot(num_rows,num_cols,col+1+row*num_cols); plt.imshow(noisy_image.astype(np.uint8)); plt.axis("off"); 
        plt.title('L2 distance = %.3f' %(src_to_noisy_dist), fontsize=18)
fig.savefig('figure_8.png')

# another image with more noise samples

In [None]:
noise_levels_to_show = [0.2,0.5,0.75,1.0,1.25,1.5,2.0,2.5] # these are in units of std per feature
num_pertubation_per_noise_level = 6

num_cols = len(noise_levels_to_show)
num_rows = num_pertubation_per_noise_level

selected_image_rep = ae_latent_rep[np.random.randint(ae_latent_rep.shape[0])][np.newaxis,:,:,:]

fig = plt.figure(figsize=(30,24)); 
plt.subplots_adjust(left=0.02, right=0.98, top=0.93, bottom=0.02, hspace=0.15, wspace=0.05); 
plt.suptitle('figure 9: image + random noise in latent space (b)', fontsize=30)
for col, noise_level in enumerate(noise_levels_to_show):
    for row in range(num_pertubation_per_noise_level):        
        noise_to_add = noise_level * np.random.normal(scale=ae_latent_std, size=ae_latent_std.shape)
        noisy_image_rep = selected_image_rep + noise_to_add
        noisy_image = unnormalize_image(decoder.predict(noisy_image_rep)[0])
        noisy_image[noisy_image > 255] = 255
        noisy_image[noisy_image <   0] =   0
        
        src_to_noisy_dist = pdist(np.concatenate((selected_image_rep.reshape((1,-1)), noisy_image_rep.reshape((1,-1))), axis=0))[0]
        plt.subplot(num_rows,num_cols,col+1+row*num_cols); plt.imshow(noisy_image.astype(np.uint8)); plt.axis("off"); 
        plt.title('L2 distance = %.3f' %(src_to_noisy_dist), fontsize=16)
fig.savefig('figure_9.png')

# another image with more noise samples

In [None]:
noise_levels_to_show = [0.2,0.5,0.75,1.0,1.25,1.5,2.0,2.5] # these are in units of std per feature
num_pertubation_per_noise_level = 6

num_cols = len(noise_levels_to_show)
num_rows = num_pertubation_per_noise_level

selected_image_rep = ae_latent_rep[np.random.randint(ae_latent_rep.shape[0])][np.newaxis,:,:,:]

fig = plt.figure(figsize=(30,24)); 
plt.subplots_adjust(left=0.02, right=0.98, top=0.93, bottom=0.02, hspace=0.15, wspace=0.05); 
plt.suptitle('figure 10: image + random noise in latent space (c)', fontsize=30)
for col, noise_level in enumerate(noise_levels_to_show):
    for row in range(num_pertubation_per_noise_level):        
        noise_to_add = noise_level * np.random.normal(scale=ae_latent_std, size=ae_latent_std.shape)
        noisy_image_rep = selected_image_rep + noise_to_add
        noisy_image = unnormalize_image(decoder.predict(noisy_image_rep)[0])
        noisy_image[noisy_image > 255] = 255
        noisy_image[noisy_image <   0] =   0
        
        src_to_noisy_dist = pdist(np.concatenate((selected_image_rep.reshape((1,-1)), noisy_image_rep.reshape((1,-1))), axis=0))[0]
        plt.subplot(num_rows,num_cols,col+1+row*num_cols); plt.imshow(noisy_image.astype(np.uint8)); plt.axis("off"); 
        plt.title('L2 distance = %.3f' %(src_to_noisy_dist), fontsize=16)
fig.savefig('figure_10.png')

# Show latent interpulations between two images

In [None]:
interpulation_weights = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
num_interpulations = len(interpulation_weights)
num_pairs = 5

num_cols = len(interpulation_weights) + 2
num_rows = num_pairs

fig = plt.figure(figsize=(30,20)); 
plt.subplots_adjust(left=0.02, right=0.98, top=0.95, bottom=0.02, hspace=0.05, wspace=0.05); 
plt.suptitle('figure 11: latent space interpulations', fontsize=30)
for row in range(num_pairs):
    # randomly select two pairs of images 
    selected_image_1_rep = ae_latent_rep[np.random.randint(ae_latent_rep.shape[0])][np.newaxis,:,:,:]
    selected_image_2_rep = ae_latent_rep[np.random.randint(ae_latent_rep.shape[0])][np.newaxis,:,:,:]

    # show left and right images
    selected_image_1 = unnormalize_image(decoder.predict(selected_image_1_rep)[0])
    selected_image_1[selected_image_1 > 255] = 255
    selected_image_1[selected_image_1 <   0] =   0
    
    selected_image_2 = unnormalize_image(decoder.predict(selected_image_2_rep)[0])
    selected_image_2[selected_image_2 > 255] = 255
    selected_image_2[selected_image_2 <   0] =   0

    image_ind = 1+row*num_cols
    plt.subplot(num_rows,num_cols,1 + row*num_cols); plt.imshow(selected_image_1.astype(np.uint8)); plt.axis("off"); 
    plt.subplot(num_rows,num_cols,(row+1)*num_cols); plt.imshow(selected_image_2.astype(np.uint8)); plt.axis("off"); 
    
    for col, weight in enumerate(interpulation_weights):

        # create latent interpulations between them
        interpulated_image_rep = (1-weight)*selected_image_1_rep + weight*selected_image_2_rep
    
        interpulated_image = unnormalize_image(decoder.predict(interpulated_image_rep)[0])
        interpulated_image[interpulated_image > 255] = 255
        interpulated_image[interpulated_image <   0] =   0
        
        plt.subplot(num_rows,num_cols,col+2+row*num_cols); plt.imshow(interpulated_image.astype(np.uint8)); plt.axis("off"); 
fig.savefig('figure_11.png')

# Show interpulations side by side along with linear blend of the pixels

In [None]:
interpulation_weights = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
num_interpulations = len(interpulation_weights)
num_pairs = 10

num_cols = len(interpulation_weights) + 2
num_rows = 2*num_pairs

fig = plt.figure(figsize=(30,50)); 
plt.subplots_adjust(left=0.02, right=0.98, top=0.96, bottom=0.02, hspace=0.06, wspace=0.05); 
plt.suptitle('figure 12: AE interpulations vs pixel space interpulations', fontsize=30)
for pair in range(num_pairs):
    # randomly select two pairs of images 
    selected_image_1_rep = ae_latent_rep[np.random.randint(ae_latent_rep.shape[0])][np.newaxis,:,:,:]
    selected_image_2_rep = ae_latent_rep[np.random.randint(ae_latent_rep.shape[0])][np.newaxis,:,:,:]

    # show left and right images
    selected_image_1 = unnormalize_image(decoder.predict(selected_image_1_rep)[0])
    selected_image_1[selected_image_1 > 255] = 255
    selected_image_1[selected_image_1 <   0] =   0
    
    selected_image_2 = unnormalize_image(decoder.predict(selected_image_2_rep)[0])
    selected_image_2[selected_image_2 > 255] = 255
    selected_image_2[selected_image_2 <   0] =   0

    # plot odd rows (latent space interpulations)
    row = 2*pair
    plt.subplot(num_rows,num_cols,1 + row*num_cols); plt.imshow(selected_image_1.astype(np.uint8)); plt.axis("off"); 
    plt.subplot(num_rows,num_cols,(row+1)*num_cols); plt.imshow(selected_image_2.astype(np.uint8)); plt.axis("off"); 

    for col, weight in enumerate(interpulation_weights):
        # create latent interpulations between them
        interpulated_image_rep = (1-weight)*selected_image_1_rep + weight*selected_image_2_rep
        interpulated_image = unnormalize_image(decoder.predict(interpulated_image_rep)[0])
        interpulated_image[interpulated_image > 255] = 255
        interpulated_image[interpulated_image <   0] =   0
        plt.subplot(num_rows,num_cols,col+2+row*num_cols); plt.imshow(interpulated_image.astype(np.uint8)); plt.axis("off"); 
        
    # plot even rows (pixel space interpulations)
    row = 2*pair + 1
    plt.subplot(num_rows,num_cols,1 + row*num_cols); plt.imshow(selected_image_1.astype(np.uint8)); plt.axis("off"); 
    plt.subplot(num_rows,num_cols,(row+1)*num_cols); plt.imshow(selected_image_2.astype(np.uint8)); plt.axis("off"); 

    for col, weight in enumerate(interpulation_weights):
        interpulated_image = (1-weight)*selected_image_1 + weight*selected_image_2      
        plt.subplot(num_rows,num_cols,col+2+row*num_cols); plt.imshow(interpulated_image.astype(np.uint8)); plt.axis("off"); 
fig.savefig('figure_12.png')

# Apply Kmeans on the latents and show the cluster centers

### first train a PCA to reduce dimentionality so that kmeans will ever finish running

In [None]:
# create PCA model of the data for more efficient kmeans
num_components_for_kmeans = 384

dog_PCA_for_kmeans = decomposition.PCA(n_components=num_components_for_kmeans, whiten=True)
dog_PCA_for_kmeans.fit(ae_latent_rep_table)

print('total explained percent by %d components - %.1f%s' %(num_components_for_kmeans, 100*dog_PCA_for_kmeans.explained_variance_ratio_.sum(),'%'))

In [None]:
num_clusters = 180

X_for_kmeans = dog_PCA_for_kmeans.transform(ae_latent_rep_table)

dog_Kmeans = cluster.KMeans(n_clusters=num_clusters)

start_time = time.time()
cluster_inds = dog_Kmeans.fit_predict(X_for_kmeans)
print('finished training Kmeans model. took %.1f minutes' %((time.time()-start_time)/60))

In [None]:
# sort the clusters according to their frequency
cluster_counter_dict = collections.Counter(cluster_inds)
sorted_cluster_count = sorted(cluster_counter_dict.items(), key=operator.itemgetter(1))
sorted_cluster_inds = [x[0] for x in sorted_cluster_count]

In [None]:
num_rows = 20
num_cols = 9

fig = plt.figure(figsize=(30,70)); 
plt.subplots_adjust(left=0.02, right=0.98, top=0.96, bottom=0.02, hspace=0.17, wspace=0.05); 
plt.suptitle('figure 13: Kmeans cluster centers', fontsize=30)
for k in range(num_clusters):
    cluster_ind = sorted_cluster_inds[-k-1]
    cluster_count = cluster_counter_dict[cluster_ind]
    cluster_center_rep_row = dog_PCA_for_kmeans.inverse_transform(dog_Kmeans.cluster_centers_[cluster_ind,:][np.newaxis,:])
    cluster_center_rep = np.reshape(cluster_center_rep_row, (1,4,4,encoder_output_channel_size))
    cluster_doglike_image = unnormalize_image(decoder.predict(cluster_center_rep)[0])
    cluster_doglike_image[cluster_doglike_image > 255] = 255
    cluster_doglike_image[cluster_doglike_image <   0] =   0

    plt.subplot(num_rows,num_cols,k+1); plt.imshow(cluster_doglike_image.astype(np.uint8)); 
    plt.title('(%d,%d)' %(k+1,cluster_count), fontsize=16); plt.axis("off"); 
fig.savefig('figure_13.png')

# Interpulate between two images from the same cluster

In [None]:
interpulation_weights = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
num_interpulations = len(interpulation_weights)
num_pairs = 12

most_frequent_clusters_cutoff = min(60,num_clusters)

num_cols = len(interpulation_weights) + 2
num_rows = num_pairs

fig = plt.figure(figsize=(30,42)); 
plt.subplots_adjust(left=0.02, right=0.98, top=0.95, bottom=0.02, hspace=0.05, wspace=0.05); 
plt.suptitle('figure 14: same cluster interpulations', fontsize=30)
for row in range(num_pairs):
    
    # randomly select a cluster amoungs the most frequent clusters
    selected_cluster_ind = np.random.choice(sorted_cluster_inds[-most_frequent_clusters_cutoff:], size=1)
    
    # randomly select two pairs of images from the same cluster
    possible_candidates = np.nonzero(cluster_inds == selected_cluster_ind)[0]
    chosen_pair_inds = np.random.choice(possible_candidates, size=2, replace=False)
    selected_image_1_rep = ae_latent_rep[chosen_pair_inds[0]][np.newaxis,:,:,:]
    selected_image_2_rep = ae_latent_rep[chosen_pair_inds[1]][np.newaxis,:,:,:]

    # show left and right images
    selected_image_1 = unnormalize_image(decoder.predict(selected_image_1_rep)[0])
    selected_image_1[selected_image_1 > 255] = 255
    selected_image_1[selected_image_1 <   0] =   0
    
    selected_image_2 = unnormalize_image(decoder.predict(selected_image_2_rep)[0])
    selected_image_2[selected_image_2 > 255] = 255
    selected_image_2[selected_image_2 <   0] =   0

    image_ind = 1+row*num_cols
    plt.subplot(num_rows,num_cols,1 + row*num_cols); plt.imshow(selected_image_1.astype(np.uint8)); plt.axis("off"); 
    plt.subplot(num_rows,num_cols,(row+1)*num_cols); plt.imshow(selected_image_2.astype(np.uint8)); plt.axis("off"); 
    
    for col, weight in enumerate(interpulation_weights):

        # create latent interpulations between them
        interpulated_image_rep = (1-weight)*selected_image_1_rep + weight*selected_image_2_rep
    
        interpulated_image = unnormalize_image(decoder.predict(interpulated_image_rep)[0])
        interpulated_image[interpulated_image > 255] = 255
        interpulated_image[interpulated_image <   0] =   0
        
        plt.subplot(num_rows,num_cols,col+2+row*num_cols); plt.imshow(interpulated_image.astype(np.uint8)); plt.axis("off"); 
fig.savefig('figure_14.png')

# Train PCA on encoder represnetation to sample from

In [None]:
# create PCA model of the data
num_components = 512

dog_PCA = decomposition.PCA(n_components=num_components, whiten=True)
dog_PCA.fit(X_rep_autoencoder)

print('finished training PCA model')
X_pca = dog_PCA.transform(X_rep_autoencoder)

# show cumulative variance explained
fig = plt.figure(figsize=(16,10)); plt.title('figure 15: PCA variance explained', fontsize=26)
plt.plot(100*np.concatenate((np.array([0]),np.cumsum(dog_PCA.explained_variance_ratio_))))
plt.xlabel('num components', fontsize=16); plt.ylabel('% variance explained', fontsize=16); plt.ylim(-1,101); plt.xlim(-1,num_components+1);
fig.savefig('figure_15.png')
print('total explained percent by %d components - %.1f%s' %(num_components, 100*dog_PCA.explained_variance_ratio_.sum(),'%'))

# Show some Autoencoder + PCA reconstructions (maximal expected performace)

In [None]:
num_rows = 6
num_cols = 9
num_images_to_show = num_rows*num_cols

X_rep_autoencoder_rec = dog_PCA.inverse_transform(X_pca)
selected_inds = np.random.choice(X_rep_autoencoder_rec.shape[0], size=num_images_to_show, replace=False)

fig = plt.figure(figsize=(30,20)); 
plt.subplots_adjust(left=0.02, right=0.98, top=0.95, bottom=0.02, hspace=0.05, wspace=0.05); 
plt.suptitle('figure 16: AE + PCA reconstrctions', fontsize=30)
for k, selected_ind in enumerate(selected_inds):
    decoder_input = np.reshape(X_rep_autoencoder_rec[selected_ind,:], (1,4,4,encoder_output_channel_size))
    doglike_image = unnormalize_image(decoder.predict(decoder_input)[0])
    doglike_image[doglike_image > 255] = 255
    doglike_image[doglike_image <   0] =   0

    plt.subplot(num_rows,num_cols,k+1); plt.imshow(doglike_image.astype(np.uint8)); plt.axis("off")
fig.savefig('figure_16.png')

### Show some PCA unit activation histograms (should appear approx. gaussian)

In [None]:
selected_inds = np.random.choice(num_components, size=36, replace=False)

fig = plt.figure(figsize=(30,12)); 
plt.subplots_adjust(left=0.02, right=0.98, top=0.93, bottom=0.02, hspace=0.13, wspace=0.13); 
plt.suptitle('figure 17: PCA unit activations', fontsize=30)
for k, selected_ind in enumerate(selected_inds):
    unit_activations = X_pca[:,selected_ind]
    range_limit = max(abs(unit_activations.min()), abs(unit_activations.max()))
    activation_range = np.linspace(-range_limit,range_limit,100)
    plt.subplot(4,9,k+1); plt.hist(unit_activations, bins=activation_range, log=True);
fig.savefig('figure_17.png')

# Decompose data with ICA (optional)
the goal is to find statistically independent directions in the latent space for more efficient sampling

In [None]:
#decomposition_method = 'ICA'
decomposition_method = 'PCA'

if decomposition_method == 'ICA':
    num_ICA_components = num_components
    dog_ICA = decomposition.FastICA(n_components=num_ICA_components, algorithm='parallel', whiten=True)

    start_time = time.time()
    X_pca_ica = dog_ICA.fit_transform(X_pca)
    print('finished training ICA model. took %.1f minutes' %((time.time()-start_time)/60))

### Show unit activation histograms for ICA model (should be non gaussians)

In [None]:
if decomposition_method == 'ICA':
    selected_inds = np.random.choice(num_ICA_components, size=36, replace=False)

    fig = plt.figure(figsize=(30,12)); 
    plt.subplots_adjust(left=0.02, right=0.98, top=0.93, bottom=0.02, hspace=0.13, wspace=0.13); 
    plt.suptitle('figure 18: ICA unit activations', fontsize=30)
    for k, selected_ind in enumerate(selected_inds):
        unit_activations = X_pca_ica[:,selected_ind]
        range_limit = max(abs(unit_activations.min()), abs(unit_activations.max()))
        activation_range = np.linspace(-range_limit,range_limit,100)
        plt.subplot(4,9,k+1); plt.hist(unit_activations, bins=activation_range, log=True);
    fig.savefig('figure_18.png')

# Sample from single gaussian in the selected decomposition methond and present samples

In [None]:
if decomposition_method == 'ICA':
    covariance_matrix_regularization = 1e-5
else:
    covariance_matrix_regularization = 1e-3

# generate several random samples from the gaussian model and present them
dog_single_gaussian_model = mixture.GaussianMixture(n_components=1, covariance_type='diag', reg_covar=covariance_matrix_regularization, n_init=5)

if decomposition_method == 'ICA':
    dog_single_gaussian_model.fit(X_pca_ica)
else:
    dog_single_gaussian_model.fit(X_pca)

num_rows = 6
num_cols = 9
num_images_to_show = num_rows*num_cols

random_latents = dog_single_gaussian_model.sample(num_images_to_show)[0]
print(random_latents.shape, random_latents.mean(), random_latents.std())

if decomposition_method == 'ICA':
    random_doglike_vectors = dog_PCA.inverse_transform(dog_ICA.inverse_transform(random_latents))
else:
    random_doglike_vectors = dog_PCA.inverse_transform(random_latents)
print(random_doglike_vectors.shape, random_doglike_vectors.mean(), random_doglike_vectors.std())

fig = plt.figure(figsize=(30,20)); 
plt.subplots_adjust(left=0.02, right=0.98, top=0.95, bottom=0.02, hspace=0.05, wspace=0.05); 
plt.suptitle('figure 19: single gaussian samples', fontsize=30)
for k in range(num_images_to_show):
    # convert to 
    decoder_input = np.reshape(random_doglike_vectors[k,:], (1,4,4,encoder_output_channel_size))
    doglike_image = unnormalize_image(decoder.predict(decoder_input)[0])
    doglike_image[doglike_image > 255] = 255
    doglike_image[doglike_image <   0] =   0

    plt.subplot(num_rows,num_cols,k+1); plt.imshow(doglike_image.astype(np.uint8)); plt.axis("off")
fig.savefig('figure_19.png')

# Find best number gaussians for GMM

In [None]:
num_gaussians_to_try = [1,2,3,4,5,7,10,13,17,22,30,40,50,60,100]
covariance_matrix_type = 'diag'

if decomposition_method == 'ICA':
    covariance_matrix_regularization = 1e-5
else:
    covariance_matrix_regularization = 1e-3

valid_fraction = 0.3
valid_cutoff = int((1-valid_fraction)*X_pca.shape[0])
rand_perm = np.random.permutation(X_pca.shape[0])

X_pca_train = X_pca[rand_perm[:valid_cutoff],:]
X_pca_valid = X_pca[rand_perm[valid_cutoff:],:]
if decomposition_method == 'ICA':
    X_pca_ica_train = X_pca_ica[rand_perm[:valid_cutoff],:]
    X_pca_ica_valid = X_pca_ica[rand_perm[valid_cutoff:],:]

train_LogLikelihood = []
valid_LogLikelihood = []
for num_gaussians in num_gaussians_to_try:
    
    curr_dog_GMM = mixture.GaussianMixture(n_components=num_gaussians, covariance_type=covariance_matrix_type, n_init=2,
                                           reg_covar=covariance_matrix_regularization, verbose=0, verbose_interval=1)
    
    if decomposition_method == 'ICA':
        curr_dog_GMM.fit(X_pca_ica_train)
        train_LL = curr_dog_GMM.score_samples(X_pca_ica_train).mean()
        valid_LL = curr_dog_GMM.score_samples(X_pca_ica_valid).mean()
    else:
        curr_dog_GMM.fit(X_pca_train)
        train_LL = curr_dog_GMM.score_samples(X_pca_train).mean()
        valid_LL = curr_dog_GMM.score_samples(X_pca_valid).mean()
    
    print('for %d gaussians: (train,valid) LogLikelihood = (%.5f,%.5f)' %(num_gaussians, train_LL, valid_LL))
    
    train_LogLikelihood.append(train_LL)
    valid_LogLikelihood.append(valid_LL)

fig = plt.figure(figsize=(20,10)); plt.title('figure 20: GMM LL vs number of gaussians', fontsize=26)
plt.plot(num_gaussians_to_try, train_LogLikelihood, color='b')
plt.plot(num_gaussians_to_try, valid_LogLikelihood, color='g')
plt.legend(['train','valid'], fontsize=16)
plt.ylabel('Log Likelihood', fontsize=16); plt.xlabel('num gaussians', fontsize=16)
fig.savefig('figure_20.png')

# Train final GMM

In [None]:
# train a mixture of gaussians model in the PCA space
num_gaussians = num_gaussians_to_try[np.argmax(valid_LogLikelihood)]
num_gaussians = 8000

print('selected number of gaussians is %d' %(num_gaussians))

covariance_matrix_type = 'diag'
if decomposition_method == 'ICA':
    covariance_matrix_regularization = 1e-5
else:
    covariance_matrix_regularization = 1e-3
dog_gaussian_mixture_model = mixture.GaussianMixture(n_components=num_gaussians, covariance_type=covariance_matrix_type, n_init=3, 
                                                     reg_covar=covariance_matrix_regularization, verbose=2, verbose_interval=1)

if decomposition_method == 'ICA':
    dog_gaussian_mixture_model.fit(X_pca_ica_train)
else:
    dog_gaussian_mixture_model.fit(X_pca_train)

print('finished training GMM')

# Show GMM samples

In [None]:
# generate several random samples from the mixture model and present them
num_rows = 6
num_cols = 9
num_images_to_show = num_rows*num_cols

random_latents = dog_gaussian_mixture_model.sample(num_images_to_show)[0]

if decomposition_method == 'ICA':
    random_doglike_vectors = dog_PCA.inverse_transform(dog_ICA.inverse_transform(random_latents))
else:
    random_doglike_vectors = dog_PCA.inverse_transform(random_latents)

fig = plt.figure(figsize=(30,20)); 
plt.subplots_adjust(left=0.02, right=0.98, top=0.95, bottom=0.02, hspace=0.05, wspace=0.05); 
plt.suptitle('figure 21: GMM samples', fontsize=30)
for k in range(num_images_to_show):
    decoder_input = np.reshape(random_doglike_vectors[k,:], (1,4,4,encoder_output_channel_size))
    doglike_image = unnormalize_image(decoder.predict(decoder_input)[0])
    doglike_image[doglike_image > 255] = 255
    doglike_image[doglike_image <   0] =   0

    plt.subplot(num_rows,num_cols,k+1); plt.imshow(doglike_image.astype(np.uint8)); plt.axis("off")
fig.savefig('figure_21.png')

# Create a submission

In [None]:
num_images_to_submit = 10000

sampled_latents = dog_gaussian_mixture_model.sample(num_images_to_submit)[0]

if decomposition_method == 'ICA':
    random_doglike_vectors = dog_PCA.inverse_transform(dog_ICA.inverse_transform(sampled_latents))
else:
    random_doglike_vectors = dog_PCA.inverse_transform(sampled_latents)

z = zipfile.PyZipFile('images.zip', mode='w')
for k in range(num_images_to_submit):
    decoder_input = np.reshape(random_doglike_vectors[k,:], (1,4,4,encoder_output_channel_size))
    doglike_image = unnormalize_image(decoder.predict(decoder_input)[0])
    doglike_image[doglike_image > 255] = 255
    doglike_image[doglike_image <   0] =   0
    image_to_save = Image.fromarray(doglike_image.astype(np.uint8))

    image_filename = '%d.png' %(k)
    image_to_save.save(image_filename,'PNG'); z.write(image_filename); os.remove(image_filename)
print('finished writing "image.zip"')