In [38]:
import tensorflow as tf
import matplotlib.pyplot
import math
import pickle
import matplotlib.pyplot as plt
import numpy as np
import cv2
import time

In [39]:
def PrepareTrainData():
    pickle_obj = pickle.load(file=open('face_all.pickle', 'rb'))
    features = pickle_obj['Features']
    labels = pickle_obj['Labels']
    labels = labels[0]
    labels = labels[0:21100]
    train_x = features[0:21100] / 255
    train_all_glasses = train_x[np.where(labels == 1)]
    train_all_no_glasses = train_x[np.where(labels == 0)]
    return train_all_glasses, train_all_no_glasses

In [40]:
def resizeImages(celeb_glasses, celeb_no_glasses):
    new_celeb_glasses = []
    for each_celeb_with_glass in celeb_glasses:
        im = np.expand_dims(each_celeb_with_glass,axis =0) 
        im = im.reshape(54,44)
        im = cv2.resize(im, (64, 64))
        im = im.reshape(im.shape + (1,))
        new_celeb_glasses.append(im)
        
    new_celeb_no_glasses = []
    for each_celeb_with_no_glass in celeb_no_glasses:
        im = np.expand_dims(each_celeb_with_no_glass,axis =0) 
        im = im.reshape(54,44)
        im = cv2.resize(im, (64, 64))
        im = im.reshape(im.shape + (1,))
        new_celeb_no_glasses.append(im)
    
    return np.asarray(new_celeb_glasses), np.asarray(new_celeb_no_glasses)      

In [41]:
celeb_glasses, celeb_no_glasses = PrepareTrainData()
celeb_glasses, celeb_no_glasses = resizeImages(celeb_glasses, celeb_no_glasses)

In [42]:
tf.reset_default_graph()

In [43]:
def visualizeSampleImage(index=10, batch_sz=64):
    input_index = 80 # any index with in the size of input works 
    test_images = celeb_no_glasses[input_index: input_index+batch_sz]
    Z_no_glasses = ae_no_glasses.extract_Z(test_images, batch_sz)
    predictions_as_if_glasses_present = ae_with_glasses.predict_using_Z(Z_no_glasses, batch_sz)
    plt.figure()
    fig,axes = plt.subplots(nrows = 1, ncols = 2, figsize=(64,64))
    axes[0].imshow(np.squeeze(test_images[0]))
    axes[1].imshow(np.squeeze(predictions_as_if_glasses_present[0]))

In [44]:
class ConvLayer():
    def __init__(self, mi, mo, name, apply_batch_norm, filter_sz = 5, stride = 2, f= tf.nn.relu):
        self.W  = tf.get_variable("W_%s"%name, shape = (filter_sz, filter_sz, mi, mo), dtype = tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.02))
        self.b = tf.get_variable("b_%s"%name, shape = (mo, ), dtype = tf.float32, initializer= tf.zeros_initializer())
        
        self.name = name
        self.stride = stride
        self.filter_sz = filter_sz
        self.f =f
        self.apply_batch_norm = apply_batch_norm
        self.params = [self.W, self.b] 
        
    def forward(self, X, reuse, is_training):
        #Here strides shoud be a 4d tensor: [NHWC]
        with tf.device('/device:GPU:1'):
            conv_out = tf.nn.conv2d(X, self.W, strides =[1, self.stride, self.stride, 1], padding = 'SAME')
        
        conv_out = tf.nn.bias_add(conv_out, self.b)
        if self.apply_batch_norm:
            conv_out = tf.nn.batch_normalization(conv_out, mean = 0.5, variance = 0.02, offset=0, scale = 1, variance_epsilon=1e-5, name = 'batch_morm'+ self.name)
        
        return self.f(conv_out)   

In [45]:
class DeConvLayer():
    def __init__(self, mi, mo, name, output_shape, apply_batch_norm, filter_sz =5, stride =2, f = tf.nn.relu): 
        self.W= tf.get_variable("W_%s"%name, shape = (filter_sz, filter_sz, mo, mi), dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.02))
        self.b = tf.get_variable("b_%s"%name, shape = (mo,), initializer = tf.zeros_initializer())
            
        self.output_shape = output_shape    
        self.name = name
        self.stride =stride
        self.filter_sz =filter_sz
        self.f = f
        self.apply_batch_norm = apply_batch_norm
        self.params = [self.W, self.b]
        
    def forward(self, X, reuse, is_training):
        with tf.device('/device:GPU:1'):
            de_conv_out = tf.nn.conv2d_transpose(X, filter = self.W, output_shape=self.output_shape, strides = [1,self.stride, self.stride, 1])   
        de_conv_out = tf.nn.bias_add(de_conv_out, self.b)    
        
        if self.apply_batch_norm:
            de_conv_out = tf.nn.batch_normalization(de_conv_out, mean = 0.5, variance = 0.02, offset=0, scale = 1, variance_epsilon=1e-5, name = 'batch_morm'+ self.name)
        
        return self.f(de_conv_out)    

In [46]:
class HiddenLayer(object):
    def __init__(self, M1, M2, layer_num, activation=tf.nn.leaky_relu):
        self.activation = activation
        layer_num = str(layer_num)
        self.layer_num = layer_num
        r= math.sqrt(6) / math.sqrt(M1 + M2 + 1)
       
        self.W1 = tf.Variable(tf.random_normal(shape =(M1, M2), stddev=r), name = 'W1'+layer_num)  
        self.b1 = tf.Variable(tf.random_normal(shape =(1, M2), stddev=r), name = 'b1'+ layer_num)
        self.W2 = tf.Variable(tf.random_normal(shape =(M2, M1), stddev=r), name = 'W2'+layer_num)
        self.b2 = tf.Variable(tf.random_normal(shape =(1, M1), stddev=r), name = 'b1'+ layer_num)
        self.params = [self.W1, self.W2]
                         
    def forward(self,X, encode = False, batch_norm = False): 
        if encode == True:
            with tf.device('/device:GPU:0'):
                l=tf.matmul(X, self.W1)+self.b1
                if batch_norm == True:
                    ll = tf.nn.batch_normalization(l, mean = 0.5, variance = 0.02, offset=0, scale = 1, variance_epsilon=1e-5, name = 'encode'+ self.layer_num)
        else:
            with tf.device('/device:GPU:0'):
                l=tf.matmul(X, self.W2)+self.b2
                if batch_norm == True:
                    ll = tf.nn.batch_normalization(l, mean = 0.5, variance = 0.02, offset=0, scale = 1, variance_epsilon=1e-5, name = 'decode'+ self.layer_num)
        return self.activation(l)
    
    def layer_weight(self, norm = tf.nn.l2_loss):
        return norm(self.W1) + norm(self.W2)


In [47]:
class CSE674AutoEncoder():
    def __init__(self, image_sz, hidden_layer_sizes , conv_layer_sizes, rho=0.01, alpha=0.0001, beta=3, activation=tf.nn.sigmoid, optimizer=tf.train.AdamOptimizer(), name ='CNN_'):
        self.hidden_layer_sizes = hidden_layer_sizes
        self.rho=rho  # sparse parameters
        self.alpha =alpha
        self.beta=beta
        self.optimizer=optimizer
        self.image_sz = image_sz
        
        self.X = tf.placeholder(tf.float32, shape =(None, image_sz, image_sz, 1), name = 'X')
        self.batch_sz = tf.placeholder(tf.int32, shape=(), name='batch_sz')
        
        self.encode_layers = []
        self.decode_layers = []
        self.layers_weights = []
        
        #now make the structure
        layer_num = 0
        M1 = image_sz #input size
        dim = image_sz #used for recording dimensions
        
        #now make cnn
        cnn_layers = []
        final_num_units =0 
        cuboid_count = 1
        output_shapes = []
        input_channels  = 1
        for each_cnn_cuboid in conv_layer_sizes:
            cnn_layer = ConvLayer(input_channels, each_cnn_cuboid[0], 'CNN_'+str(cuboid_count), each_cnn_cuboid[3], filter_sz=each_cnn_cuboid[1], stride = each_cnn_cuboid[2])
            input_channels = each_cnn_cuboid[0]
            cnn_layers.append(cnn_layer)
            cuboid_count += 1
            dim = int(np.ceil(float(dim) / each_cnn_cuboid[2])) 
            M1 = [each_cnn_cuboid[0], dim, dim, each_cnn_cuboid[3]]
            output_shapes.append(M1)
            final_num_units = each_cnn_cuboid[0]*dim*dim*1
            
        output = self.X
        
        #now forward all the cnn connections
        cnt = 0
        for layer in cnn_layers:
            cnt +=1 
            output = layer.forward(output, reuse =False,  is_training =True)
        
        M1= final_num_units
        for M2 in hidden_layer_sizes:
            layer_num += 1
            layer = HiddenLayer(M1, M2 ,layer_num)
            #layer = self.activation(layer)
            M1 = M2
            self.encode_layers.append(layer)
            self.decode_layers.append(layer)
        
        #output flattened before passing to the autoencoder
        output = tf.contrib.layers.flatten(output)
        for layer in self.encode_layers:
            output = layer.forward(output, encode=True, batch_norm = False)
            layer_weight = layer.layer_weight()
            self.layers_weights += layer_weight
        
        self.Z = output
        for layer in reversed(self.decode_layers):
            output = layer.forward(output, batch_norm = False)
            self.layers_weights += layer_weight 
        
        #now time for de_convolution
        last_conv_shape = output_shapes[-1]
        
        #need to know the conversion everywhere
        self.last_conv_shape = last_conv_shape
        
        #now reshape the outtput to cuboid 
        output = tf.reshape(output, shape = (self.batch_sz, last_conv_shape[0], last_conv_shape[1], last_conv_shape[2]))

        deconv_layers = []
        # skip last one
        deconv_conv_layer_sizes = conv_layer_sizes[:-1]
        deconv_output_shapes = output_shapes[:-1]
        for each_output_shape, each_filter_shape in zip(reversed(deconv_output_shapes), reversed(deconv_conv_layer_sizes)):
            output_shape = [self.batch_sz, each_output_shape[1], each_output_shape[2], each_output_shape[0]]
            cnn_layer = DeConvLayer(last_conv_shape[0], each_output_shape[0], 'DE_CNN_'+str(cuboid_count), output_shape=output_shape, apply_batch_norm = last_conv_shape[3], filter_sz=each_filter_shape[1], stride = each_filter_shape[2])
            deconv_layers.append(cnn_layer)
            last_conv_shape = each_output_shape
            cuboid_count += 1
            
        #assuming gray scale images
        final_output_shape = [self.batch_sz, self.image_sz, self.image_sz, 1]
        final_filter_shape = conv_layer_sizes[0]
        cnn_layer = DeConvLayer(last_conv_shape[0], 1, 'DE_CNN_'+str(cuboid_count), output_shape=final_output_shape, apply_batch_norm = last_conv_shape[3], filter_sz=final_filter_shape[1], stride = final_filter_shape[2])
        deconv_layers.append(cnn_layer)
        
        self.deconv_layers = deconv_layers
        for layer in deconv_layers:
            output = layer.forward(output, reuse =False, is_training= True)
        
        self.X_hat = output
        self.loss = self.loss(self.X)
        self.train_op  = self.optimizer.minimize(self.loss)
        
        # always initialize global variables after optimizer
        self.init_op = tf.global_variables_initializer()
        self.session = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
        self.session.run(self.init_op)
            
    def kl_divergence(self, rho, rho_hat):
        #ρ: −ρlogh ̄j −(1−ρ)log(1−h ̄j)+constant
        return rho * tf.log(rho) - rho * tf.log(rho_hat) + (1 - rho) * tf.log(1 - rho) - (1 - rho) * tf.log(1 - rho_hat)
            
    def loss(self,X):
        H = self.Z
        rho_hat=tf.reduce_mean(H,axis=0)   #Average hidden layer over all data points in X, Page 14 in https://web.stanford.edu/class/cs294a/sparseAutoencoder_2011new.pdf
        kl=self.kl_divergence(self.rho, rho_hat)
        X_=self.X_hat
        diff=X-X_
        diff = tf.contrib.layers.flatten(diff)
        cost= 0.5*tf.reduce_mean(tf.reduce_sum(diff**2,axis=1)) 
        return cost
    
    def train_each_batch(self, X, batch_sz):
        return self.session.run([self.loss, self.train_op], feed_dict ={self.X:X, self.batch_sz: batch_sz})
    
    def predict(self, X, batch_sz):
        return self.session.run(self.X_hat, feed_dict={self.X:X, self.batch_sz: batch_sz})
    
    def extract_Z(self, X, batch_sz):
        return self.session.run(self.Z, feed_dict= {self.X:X, self.batch_sz: batch_sz})
    
    def predict_using_Z_old(self, X, Z, batch_sz):
        self.Z = Z
        return self.session.run(self.X_hat, feed_dict= {self.X:X, self.batch_sz: batch_sz})
    
    def predict_using_Z(self, Z, batch_sz):
        output = Z
        for layer in reversed(self.decode_layers):
            output = layer.forward(output, batch_norm = False)
        
        #now reshape the output to cuboid 
        output = tf.reshape(output, shape = (self.batch_sz, self.last_conv_shape[0], self.last_conv_shape[1], self.last_conv_shape[2]))
        
        #Decoder done! Now de CNN
        for CNN_layer in self.deconv_layers:
            output = CNN_layer.forward(output, reuse =False, is_training= True)
        
        return self.session.run(output, feed_dict={self.batch_sz: batch_sz})
    

In [48]:
def faceData():
    hidden_layer_sizes = [500, 100, 40, 10]
    conv_layer_sizes = [(8, 5, 2, False), (16, 5, 2, False)]
    batch_sz = 64
    n_inputs=2376 
    learning_rate=0.01
    image_sz = 64
    epochs = 4
    
    with tf.variable_scope('glasses_AE'):
        ae_with_glasses =   CSE674AutoEncoder(image_sz, hidden_layer_sizes, conv_layer_sizes)
    with tf.variable_scope('No_glasses_AE'):
        ae_no_glasses =   CSE674AutoEncoder(image_sz, hidden_layer_sizes, conv_layer_sizes)
    losses_with_glasses = []
    losses_no_glasses = []

    #first with glasses
    celebs_with_glasses = np.asarray(celeb_glasses)
    batch_num  = 0
    num_batches= celebs_with_glasses.shape[0]// batch_sz
    times = []
    start_time = time.time()

    for j in range(epochs): 
        start_time = time.time()
        for i in range(num_batches):
            batch =  celebs_with_glasses[batch_sz*i: batch_sz*(i+1)]
            loss = ae_with_glasses.train_each_batch(batch, batch_sz)
            batch_num += 1
            
            losses_with_glasses.append(loss)
        print('epoch:', j)
        end_time = time.time()
        times.append( [j, end_time-start_time])
    print('Training the celeb data with glasses is done!')

    #Now with_out glasses
    celebs_with_no_glasses = np.asarray(celeb_no_glasses)
    batch_num  = 0
    num_batches= celebs_with_no_glasses.shape[0]// batch_sz

    for j in range(epochs):
        for i in range(num_batches):
            batch =  celebs_with_no_glasses[batch_sz*i: batch_sz*(i+1)]
            loss = ae_no_glasses.train_each_batch(batch, batch_sz)
            batch_num += 1
            losses_no_glasses.append(loss)
        print('epoch:', j)
    print('Training the celeb data celeb without glasses is also done!')    

    print('done!')
    index = 90
    visualizeSampleImage(index, batch_sz)
    print("Algorithm Performance")
    plt.figure()
    times = np.asarray(times)
    plt.plot(times[:,0], times[:,1])
    plt.show()
    
    #plot losses 
    print('Losses with glasses')
    plt.figure()
    plt.plot(losses_with_glasses)
    
    print('Losses without glasses')
    plt.figure()
    plt.plot(losses_no_glasses)
    
if __name__ == '__main__':
    faceData()
    

epoch: 0
epoch: 1


KeyboardInterrupt: 