# Ladder Network with Convolutional Layers
**Input**: M,N,K specifying the general architecture: INPUT -> [[CONV -> RELU]*N -> POOL]*M -> [FC -> RELU]*K -> FC

The Ladder network typically has a following layer structure: [Input size, say 700, 1000, 500, 250, 250, 250, 10]

# Data

In [1]:
!pip install attributedict
import numpy as np
from sklearn.decomposition import PCA
import scipy.io as sio
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
import os
import random
from random import shuffle
from skimage.transform import rotate
import scipy.ndimage
from sklearn.model_selection import train_test_split
import scipy

def load_pavia():
  
  !pip install GoogleDriveDownloader
  from google_drive_downloader import GoogleDriveDownloader as gdd
  gdd.download_file_from_google_drive(file_id='146WN2eZ6Syf-z1KMVRw9GmZdBu_g1JBj',
                                    dest_path='./datasets/paviau.mat', unzip=False)

  gdd.download_file_from_google_drive(file_id='1L9OoAHnLVmPGbfKx8NhEbugxMzE1PG4j',
                                    dest_path='./datasets/paviau_gt.mat', unzip=False)

  X = sio.loadmat('./datasets/paviau.mat')['paviaU']
  y = sio.loadmat('./datasets/paviau_gt.mat')['paviaU_gt']

  return X, y
  
  
def createPatches(X, y, windowSize=5, removeZeroLabels = True):
  margin = int((windowSize - 1) / 2)
  zeroPaddedX = padWithZeros(X, margin=margin)
  # split patches
  patchesData = np.zeros((X.shape[0] * X.shape[1], windowSize, windowSize, X.shape[2]))
  patchesLabels = np.zeros((X.shape[0] * X.shape[1]))
  patchIndex = 0
  for r in range(margin, zeroPaddedX.shape[0] - margin):
      for c in range(margin, zeroPaddedX.shape[1] - margin):
          patch = zeroPaddedX[r - margin:r + margin + 1, c - margin:c + margin + 1]   
          patchesData[patchIndex, :, :, :] = patch
          patchesLabels[patchIndex] = y[r-margin, c-margin]
          patchIndex = patchIndex + 1
  if removeZeroLabels:
      patchesData = patchesData[patchesLabels>0,:,:,:]
      patchesLabels = patchesLabels[patchesLabels>0]
      patchesLabels -= 1
  return patchesData, patchesLabels
  
  
def padWithZeros(X, margin=2):
  newX = np.zeros((X.shape[0] + 2 * margin, X.shape[1] + 2* margin, X.shape[2]))
  x_offset = margin
  y_offset = margin
  newX[x_offset:X.shape[0] + x_offset, y_offset:X.shape[1] + y_offset, :] = X
  return newX
  
def standartizeData(X):
  newX = np.reshape(X, (-1, X.shape[2]))
  scaler = preprocessing.StandardScaler().fit(newX)  
  newX = scaler.transform(newX)
  newX = np.reshape(newX, (X.shape[0],X.shape[1],X.shape[2]))
  return newX, scaler
  
  
def applyPCA(X, numComponents=75):
  newX = np.reshape(X, (-1, X.shape[2]))
  pca = PCA(n_components=numComponents, whiten=True)
  newX = pca.fit_transform(newX)
  newX = np.reshape(newX, (X.shape[0],X.shape[1], numComponents))
  return newX, pca
  
  
def diff(first, second):
  second = set(second)
  return [item for item in first if item not in second]


def splitTrainTestSet(X, y, testRatio=0.10):
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=testRatio, random_state=345,
                                                      stratify=y)
  return X_train, X_test, y_train, y_test
  
  
def AugmentData(X_train):
  for i in range(int(X_train.shape[0]/2)):
      patch = X_train[i,:,:,:]
      num = random.randint(0,2)
      if (num == 0):
          flipped_patch = np.flipud(patch)
      if (num == 1):
          flipped_patch = np.fliplr(patch)
      if (num == 2):
          no = random.randrange(-180,180,30)
          flipped_patch = scipy.ndimage.interpolation.rotate(patch, no,axes=(1, 0),
                                                             reshape=False, output=None, order=3, mode='constant', cval=0.0, prefilter=False)

      patch2 = flipped_patch
      X_train[i,:,:,:] = patch2

  return X_train
  
  
def oversampleWeakClasses(X, y):
  uniqueLabels, labelCounts = np.unique(y, return_counts=True)
  maxCount = np.max(labelCounts)
  labelInverseRatios = maxCount / labelCounts  
  # repeat for every label and concat
  newX = X[y == uniqueLabels[0], :, :, :].repeat(round(labelInverseRatios[0]), axis=0)
  newY = y[y == uniqueLabels[0]].repeat(round(labelInverseRatios[0]), axis=0)
  for label, labelInverseRatio in zip(uniqueLabels[1:], labelInverseRatios[1:]):
      cX = X[y== label,:,:,:].repeat(round(labelInverseRatio), axis=0)
      cY = y[y == label].repeat(round(labelInverseRatio), axis=0)
      newX = np.concatenate((newX, cX))
      newY = np.concatenate((newY, cY))
  np.random.seed(seed=42)
  rand_perm = np.random.permutation(newY.shape[0])
  newX = newX[rand_perm, :, :, :]
  newY = newY[rand_perm]
  return newX, newY
  
  
  
def savePreprocessedData(X_trainPatches, X_testPatches, y_trainPatches, y_testPatches, windowSize):
  
  from google.colab import drive
  drive.mount('/content/gdrive')
  
  with open("/content/gdrive/My Drive/colab/Ladder-CNN/preprocessedData/XtrainWindowSize" + str(windowSize) + ".npy", 'wb') as outfile:
      np.save(outfile, X_trainPatches)
  with open("/content/gdrive/My Drive/colab/Ladder-CNN/preprocessedData/XtestWindowSize" + str(windowSize) + ".npy", 'wb') as outfile:
      np.save(outfile, X_testPatches)
  with open("/content/gdrive/My Drive/colab/Ladder-CNN/preprocessedData/ytrainWindowSize" + str(windowSize) + ".npy", 'wb') as outfile:
      np.save(outfile, y_trainPatches)
  with open("/content/gdrive/My Drive/colab/Ladder-CNN/preprocessedData/ytestWindowSize" + str(windowSize) + ".npy", 'wb') as outfile:
      np.save(outfile, y_testPatches)
      
      
      
      
# Global Variables
numComponents = 30
windowSize = 5
testRatio = 0.25
saved = True

from google.colab import drive
drive.mount('/content/gdrive')

if saved == False:
  X, y = load_pavia()
  X,_ = standartizeData(X)
  X,pca = applyPCA(X,numComponents=numComponents)
  XPatches, yPatches = createPatches(X, y, windowSize=windowSize)
  X_train, X_test, y_train, y_test = splitTrainTestSet(XPatches, yPatches, testRatio)
  print(X_train.shape)
  X_train, y_train = oversampleWeakClasses(X_train, y_train)
  X_train = AugmentData(X_train)
  savePreprocessedData(X_train, X_test, y_train, y_test, windowSize = windowSize)
  print(X_train.shape)
  
else:
  X_train = np.load("/content/gdrive/My Drive/colab/Ladder-CNN/preprocessedData/XtrainWindowSize" + str(windowSize) + ".npy")
  y_train = np.load("/content/gdrive/My Drive/colab/Ladder-CNN/preprocessedData/ytrainWindowSize" + str(windowSize) + ".npy")
  X_test = np.load("/content/gdrive/My Drive/colab/Ladder-CNN/preprocessedData/XtestWindowSize" + str(windowSize) + ".npy")
  y_test = np.load("/content/gdrive/My Drive/colab/Ladder-CNN/preprocessedData/ytestWindowSize" + str(windowSize) + ".npy")
  print(X_train.shape)

Collecting attributedict
  Downloading https://files.pythonhosted.org/packages/f8/74/3b48e2749e1e96ba05fde101618de8f72dcb94cad78247677ee412fb8312/attributedict-0.1.8.tar.gz
Collecting easypackage>=0.1.8 (from attributedict)
  Downloading https://files.pythonhosted.org/packages/94/ae/858e97891b7b27f958a2fbc4fb42aacaa56f8c083e0e8043e713dea9bb5f/easypackage-0.1.8.tar.gz
Collecting deepdiff>=3.3.0 (from attributedict)
  Downloading https://files.pythonhosted.org/packages/50/0b/87df7f45ce7dc02aa576458ffdf146f0b350d541fce373a91e8a81751deb/deepdiff-3.3.0-py3-none-any.whl
Collecting tox>=3.0.0 (from attributedict)
[?25l  Downloading https://files.pythonhosted.org/packages/d0/43/2160a300e0b77a929a980f36ac7427dcef8f4ddac7a8c21e5a8baedad828/tox-3.5.3-py2.py3-none-any.whl (53kB)
[K    100% |████████████████████████████████| 61kB 6.8MB/s 
[?25hCollecting pygments>=2.2.0 (from attributedict)
[?25l  Downloading https://files.pythonhosted.org/packages/02/ee/b6e02dc6529e82b75bb06823ff7d005b141037cb

# CIFAR-10 Data
from [https://github.com/wenxinxu/resnet-in-tensorflow]


In [0]:
import tarfile
from six.moves import urllib
import sys
import numpy as np
import pickle
import os
import cv2
import tensorflow as tf

data_dir = 'cifar10_data'
full_data_dir = 'cifar10_data/cifar-10-batches-py/data_batch_'
vali_dir = 'cifar10_data/cifar-10-batches-py/test_batch'
DATA_URL = 'http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'


IMG_WIDTH = 32
IMG_HEIGHT = 32
IMG_DEPTH = 3
NUM_CLASS = 10

TRAIN_RANDOM_LABEL = False # Want to use random label for train data?
VALI_RANDOM_LABEL = False # Want to use random label for validation?

NUM_TRAIN_BATCH = 5 # How many batches of files you want to read in, from 0 to 5)
EPOCH_SIZE = 10000 * NUM_TRAIN_BATCH


def maybe_download_and_extract():
    '''
    Will download and extract the cifar10 data automatically
    :return: nothing
    '''
    dest_directory = data_dir
    if not os.path.exists(dest_directory):
        os.makedirs(dest_directory)
    filename = DATA_URL.split('/')[-1]
    filepath = os.path.join(dest_directory, filename)
    if not os.path.exists(filepath):
        def _progress(count, block_size, total_size):
            sys.stdout.write('\r>> Downloading %s %.1f%%' % (filename, float(count * block_size)
                                                             / float(total_size) * 100.0))
            sys.stdout.flush()
        filepath, _ = urllib.request.urlretrieve(DATA_URL, filepath, _progress)
        print()
        statinfo = os.stat(filepath)
        print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
        tarfile.open(filepath, 'r:gz').extractall(dest_directory)


def _read_one_batch(path, is_random_label):
    '''
    The training data contains five data batches in total. The validation data has only one
    batch. This function takes the directory of one batch of data and returns the images and
    corresponding labels as numpy arrays
    :param path: the directory of one batch of data
    :param is_random_label: do you want to use random labels?
    :return: image numpy arrays and label numpy arrays
    '''
    fo = open(path, 'rb')
    dicts = pickle.load(fo, encoding='latin1')
    fo.close()

    data = dicts['data']
    if is_random_label is False:
        label = np.array(dicts['labels'])
    else:
        labels = np.random.randint(low=0, high=10, size=10000)
        label = np.array(labels)
    return data, label


def read_in_all_images(address_list, shuffle=True, is_random_label = False):
    """
    This function reads all training or validation data, shuffles them if needed, and returns the
    images and the corresponding labels as numpy arrays
    :param address_list: a list of paths of cPickle files
    :return: concatenated numpy array of data and labels. Data are in 4D arrays: [num_images,
    image_height, image_width, image_depth] and labels are in 1D arrays: [num_images]
    """
    data = np.array([]).reshape([0, IMG_WIDTH * IMG_HEIGHT * IMG_DEPTH])
    label = np.array([])

    for address in address_list:
        print('Reading images from ' + address)
        batch_data, batch_label = _read_one_batch(address, is_random_label)
        # Concatenate along axis 0 by default
        data = np.concatenate((data, batch_data))
        label = np.concatenate((label, batch_label))

    num_data = len(label)

    # This reshape order is really important. Don't change
    # Reshape is correct. Double checked
    data = data.reshape((num_data, IMG_HEIGHT * IMG_WIDTH, IMG_DEPTH), order='F')
    data = data.reshape((num_data, IMG_HEIGHT, IMG_WIDTH, IMG_DEPTH))


    if shuffle is True:
        print('Shuffling')
        order = np.random.permutation(num_data)
        data = data[order, ...]
        label = label[order]

    data = data.astype(np.float32)
    return data, label


def horizontal_flip(image, axis):
    '''
    Flip an image at 50% possibility
    :param image: a 3 dimensional numpy array representing an image
    :param axis: 0 for vertical flip and 1 for horizontal flip
    :return: 3D image after flip
    '''
    flip_prop = np.random.randint(low=0, high=2)
    if flip_prop == 0:
        image = cv2.flip(image, axis)

    return image


def whitening_image(image_np):
    '''
    Performs per_image_whitening
    :param image_np: a 4D numpy array representing a batch of images
    :return: the image numpy array after whitened
    '''
    for i in range(len(image_np)):
        mean = np.mean(image_np[i, ...])
        # Use adjusted standard deviation here, in case the std == 0.
        std = np.max([np.std(image_np[i, ...]), 1.0/np.sqrt(IMG_HEIGHT * IMG_WIDTH * IMG_DEPTH)])
        image_np[i,...] = (image_np[i, ...] - mean) / std
    return image_np


def random_crop_and_flip(batch_data, padding_size):
    '''
    Helper to random crop and random flip a batch of images
    :param padding_size: int. how many layers of 0 padding was added to each side
    :param batch_data: a 4D batch array
    :return: randomly cropped and flipped image
    '''
    cropped_batch = np.zeros(len(batch_data) * IMG_HEIGHT * IMG_WIDTH * IMG_DEPTH).reshape(
        len(batch_data), IMG_HEIGHT, IMG_WIDTH, IMG_DEPTH)

    for i in range(len(batch_data)):
        x_offset = np.random.randint(low=0, high=2 * padding_size, size=1)[0]
        y_offset = np.random.randint(low=0, high=2 * padding_size, size=1)[0]
        cropped_batch[i, ...] = batch_data[i, ...][x_offset:x_offset+IMG_HEIGHT,
                      y_offset:y_offset+IMG_WIDTH, :]

        cropped_batch[i, ...] = horizontal_flip(image=cropped_batch[i, ...], axis=1)

    return cropped_batch


def prepare_train_data(padding_size):
    '''
    Read all the train data into numpy array and add padding_size of 0 paddings on each side of the
    image
    :param padding_size: int. how many layers of zero pads to add on each side?
    :return: all the train data and corresponding labels
    '''
    path_list = []
    for i in range(1, NUM_TRAIN_BATCH+1):
        path_list.append(full_data_dir + str(i))
    data, label = read_in_all_images(path_list, is_random_label=TRAIN_RANDOM_LABEL)
    
    pad_width = ((0, 0), (padding_size, padding_size), (padding_size, padding_size), (0, 0))
    data = np.pad(data, pad_width=pad_width, mode='constant', constant_values=0)
    
    return data, label


def read_validation_data():
    '''
    Read in validation data. Whitening at the same time
    :return: Validation image data as 4D numpy array. Validation labels as 1D numpy array
    '''
    validation_array, validation_labels = read_in_all_images([vali_dir],
                                                       is_random_label=VALI_RANDOM_LABEL)
    validation_array = whitening_image(validation_array)

    return validation_array, validation_labels

In [91]:
maybe_download_and_extract()
X,y = prepare_train_data(padding_size = 0)
X_val, y_val = read_validation_data()

print(X.shape)
print(X_val.shape)

Reading images from cifar10_data/cifar-10-batches-py/data_batch_1
Reading images from cifar10_data/cifar-10-batches-py/data_batch_2
Reading images from cifar10_data/cifar-10-batches-py/data_batch_3
Reading images from cifar10_data/cifar-10-batches-py/data_batch_4
Reading images from cifar10_data/cifar-10-batches-py/data_batch_5
Shuffling
Reading images from cifar10_data/cifar-10-batches-py/test_batch
Shuffling
(50000, 32, 32, 3)
(10000, 32, 32, 3)


# Conv. Ladder Net
Architecture: INPUT -> [[CONV -> RELU]*N -> POOL]*M -> [FC -> RELU]*K -> FC

**Params**: N,M,K,filter_size (Array of length N)

**Default**: N=3,M=0,K=1, filter_size=[3*PCA_comp=90, 30, 15]

**For now use only convolution**


In [0]:
import tensorflow as tf
from attributedict.collections import AttributeDict

def train(X,y,X_test=None,y_test=None,N=3,filter_size=[90,30,15],fc=[],kernel_size=5,
          denoising_cost=[10,1,0.1,0.1,0.1],num_epochs=150,batch_size=200,num_labeled=100,noise_std=0.3,lr=0.02,
          decay_after=15):
  
  assert len(denoising_cost) is 2+len(filter_size)+len(fc), "Please specify denoising cost for every Layer. len(denoising_cost) != 2+len(fc)+len(filter_size)"
  
  tf.reset_default_graph()
  tf.set_random_seed(12345)
  #We double the batch size here. This has the advantage that in case num_labeled is -1 (use all labels) we can use half of the
  #batch size for the clean encoder and the other half for the unsupervised run
  batch_size *= 2
 
  #Number of convolutions
  N = len(filter_size)
  #Number of fully connected layers
  K = len(fc)
  
  #Shape of X: (?,WND_SZE,WND_SZE, N_CHANNELS)
  WND_SZE = X.shape[1]
  N_CHANNELS = X.shape[3]
  N_CLASSES = len(np.unique(y))
  N_EXAMPLES = X.shape[0]
  DEPTH = X.shape[-1]
  
  L = K+N+2 #Input+Convs+Softmax
  
  #Create list of action,output-shape pairs, e.g. fs=[90,30,15] & fc=[100,50,20] would correspond to
  #{'conv',(?,5,5,90);'conv',(?,5,5,30);'conv',(?,5,5,15);'relu',(?,100);'relu',(?,50);'relu',(?,20)}
  #Implicit: 'flatten' & 'softmax'
  shapes = [('conv',s) for s in filter_size]+[('relu',s) for s in fc]+[('softmax',N_CLASSES)]
  
  num_labeled_tf = tf.placeholder(tf.int32, shape=())
  
  n_classes = len(np.unique(y_test))
  n_labeled_per_class = int(num_labeled/n_classes) #22

  #Create X_labeled and X_unlabeled where X_labeled has num_labeled entries which are balanced w.r.t. the class labels
  indices = np.arange(len(y))
  i_labeled = []
  for c in range(n_classes):
        i = indices[y==c][:n_labeled_per_class]
        i_labeled += list(i)


  X_labeled = X[i_labeled,:,:,:]
  y_labeled = y[i_labeled]


  if num_labeled > batch_size:
    n_labeled_per_class = int(0.5*batch_size/n_classes) #Use 100 points for the unlabeled and the rest for the labeled
  else:
    n_labeled_per_class = int(num_labeled/n_classes)

  #Take everything as unlabeled data
  X_unlabeled = X

  #Create dataset from tensor slices
  features_placeholder_labeled = tf.placeholder(X_labeled.dtype, X_labeled.shape)
  features_placeholder = tf.placeholder(X_unlabeled.dtype, X_unlabeled.shape)
  labels_placeholder = tf.placeholder(y.dtype, y.shape) #This is for num_labeled == -1
  labels_placeholder_labeled = tf.placeholder(y_labeled.dtype, y_labeled.shape)

  ds_lab = tf.data.Dataset.from_tensor_slices((features_placeholder_labeled, labels_placeholder_labeled))

  ds_unlab = tf.data.Dataset.from_tensor_slices(features_placeholder)
  ds_unlab = ds_unlab.shuffle(buffer_size=10, reshuffle_each_iteration=True).batch(batch_size=batch_size-n_labeled_per_class*n_classes, drop_remainder=True).repeat()
  print("Size unlab batch: %s" % (batch_size-n_labeled_per_class*n_classes))

  ds_full = tf.data.Dataset.from_tensor_slices((features_placeholder,labels_placeholder))
  ds_full = ds_full.shuffle(buffer_size=10, reshuffle_each_iteration=True).batch(batch_size=batch_size, drop_remainder=True).repeat()

  iterator_full = ds_full.make_initializable_iterator()
  iterator_unlab = ds_unlab.make_initializable_iterator()
  #Create datasets for each class
  datasets = [ds_lab.filter(lambda x,y : tf.equal(y,lab)) for lab in range(n_classes)]
  iterators = []
  nexts = []

  next = ()

  if num_labeled != -1:
    for idx,d in enumerate(datasets):
      datasets[idx] = d.shuffle(buffer_size=10, reshuffle_each_iteration=True).batch(batch_size=n_labeled_per_class, drop_remainder=True).repeat()
      iterators =iterators + [datasets[idx].make_initializable_iterator()]
      nexts = nexts + [iterators[idx].get_next()]

    seed = np.random.randint(100)
    X_out = tf.random.shuffle(tf.concat([x[0] for x in nexts],axis=0),seed = seed)
    y_out = tf.random.shuffle(tf.concat([x[1] for x in nexts],axis=0),seed = seed)
    y_out_again = tf.random.shuffle(tf.concat([x[1] for x in nexts],axis=0),seed = seed)

    X_out_un = iterator_unlab.get_next()
    y_out_un = tf.constant(shape=([batch_size-n_labeled_per_class*n_classes]), value = -1,dtype=tf.float64)

    next = (tf.concat([X_out,X_out_un],axis=0), tf.concat([y_out,y_out_un],axis=0))
    print(next)
  else:
    next = iterator_full.get_next()

  
  if num_labeled == -1 or num_labeled > batch_size:
    num_labeled = batch_size/2 #Since we doubled the batch size before.
    
  
  features_placeholder_test = tf.placeholder(X_test.dtype, shape=(None,WND_SZE,WND_SZE,N_CHANNELS),name='X_test')
  labels_placeholder_test = tf.placeholder(y_test.dtype, shape=(None,),name='y_test')
  
  
  inputs =  tf.placeholder(tf.float32, shape=(None,WND_SZE,WND_SZE,N_CHANNELS),name='inputs')
  outputs = tf.placeholder(tf.float32, shape=(None,),name='outputs')
  isTrain = tf.placeholder(tf.bool, shape=())
  
  
  #Gamma and beta initialization: Need one gamma (for softmax) and N+K many with different shapes. 
  gamma = tf.Variable(tf.ones([N_CLASSES])) #Take the prev. to last one e.g. 90
  beta = [tf.Variable(tf.zeros([kernel_size,kernel_size,fs])) for fs in filter_size]+[tf.Variable(tf.zeros([s])) for s in fc]
  beta = beta + [tf.Variable(tf.zeros([N_CLASSES]))] #For the last layer
  
  def usetrain():
    inputs = next[0]
    outputs = next[1]
    return inputs, outputs
  def usetest():
    return features_placeholder_test, labels_placeholder_test

  assert X_test is not None, "Check if Test data is present in session"
  input, output = tf.cond(isTrain, usetrain, usetest)
  
  #Helper functions
  join = lambda l, u: tf.concat([l, u], axis=0) #Stack in the depth (batch, height, w, depth)
  labeled = lambda x: x[:num_labeled_tf] if x is not None else x #Use tf.getitem (implicitly)
  unlabeled = lambda x: x[num_labeled_tf:] if x is not None else x
  split_lu = lambda x: (labeled(x), unlabeled(x))
  
  #Running average for the clean pass and the labeled points
  ema = tf.train.ExponentialMovingAverage(decay=0.9999)  # to calculate the moving averages of mean and variance
  bn_assigns = []
  #Initialize with shapes (1,kernel_size, kernel_size, filter_size)
  running_mean = [tf.Variable(tf.constant(0.0, shape=[1,kernel_size,kernel_size,f]), trainable=False) for f in filter_size]+[tf.Variable(tf.constant(0.0, shape=[s]), trainable=False) for s in fc]
  running_mean = running_mean + [tf.Variable(tf.constant(0.0, shape=[N_CLASSES]))]
  running_var = [tf.Variable(tf.constant(1.0, shape=[1,kernel_size,kernel_size,f]), trainable=False) for f in filter_size]+[tf.Variable(tf.constant(1.0, shape=[s]), trainable=False) for s in fc]
  running_var = running_var + [tf.Variable(tf.constant(1.0, shape=[N_CLASSES]))]
  
  
  def new_activation_dict():
    return AttributeDict({'z': {}, 'h': {}, 's': {}, 'm': {}})
  
  if shapes[-2][0] == 'conv':
    W = tf.Variable(tf.random_normal(shape=[kernel_size**2 * filter_size[-1],N_CLASSES])) #In case the last layer is a conv layer
    V = tf.Variable(tf.random_normal(shape=[N_CLASSES, kernel_size**2 * filter_size[-1]])) #Matrix for decoder. Takes the softmax layer shape (?,9) -> (?,kernel_size**2 * filter_size[-1]) to then reshape to a tensor  
  else:
    W = tf.Variable(tf.random_normal(shape=[shapes[-2][1],N_CLASSES])) #In case the last layer is a fully connected layer.
    V = tf.Variable(tf.random_normal(shape=[N_CLASSES,shapes[-2][1]])) #Matrix for decoder. Takes the softmax layer shape (?,9) -> (?,kernel_size**2 * filter_size[-1]) / (?,fully_connected_shape) -> reshape or not
  
  if K>0:
    W_fc = [tf.Variable(tf.random_normal(shape=[kernel_size**2 * filter_size[-1],fc[0]]))] #The first weight matrix for the fc part.
    V_fc = [tf.Variable(tf.random_normal(shape=[fc[0],kernel_size**2 * filter_size[-1]]))] #Input dimesnion is fc[0], e.g. fc=[10,20,30], encoder: (?,5,5,30)->(?,10)->(?,20)->(?,30)->(?,9)->(Decoder)(?,30)->(?,20)->(?,10)->(?,5,5,30)
    if K>1: #TODO: Works without if? 
      W_fc = W_fc + [tf.Variable(tf.random_normal(shape=[fc[i-1],fc[i]])) for i in range(1,K)]
      V_fc = V_fc + [tf.Variable(tf.random_normal(shape=[fc[i],fc[i-1]])) for i in range(1,K)] #The matrix that 
      print(V_fc)

  
  def g(z_lat, u, size):
    shape = tf.shape(u)[1:] #Don't take the batch size as a dimension 
    wi = lambda inits, name: tf.Variable(inits * tf.ones(size), name=name)
    a1 = wi(0., 'a1')
    a2 = wi(1., 'a2')
    a3 = wi(0., 'a3')
    a4 = wi(0., 'a4')
    a5 = wi(0., 'a5')
    a6 = wi(0., 'a6')
    a7 = wi(1., 'a7')
    a8 = wi(0., 'a8')
    a9 = wi(0., 'a9')
    a10 = wi(0., 'a10')
    mu = a1 * tf.sigmoid(a2 * u + a3) + a4 * u + a5
    v = a6 * tf.sigmoid(a7 * u + a8) + a9 * u + a10
    z_est = (z_lat - mu) * v + mu
    return z_est
  
  
  #Encoder
  def encoder(input, noise_std):
    with tf.variable_scope('encoder', reuse=tf.AUTO_REUSE):
      #Apply noise to the input
      h = tf.cast(input,tf.float32) + tf.random_normal(dtype=tf.float32,shape=tf.shape(input)) * noise_std #Normal noise 0 mean 1 std
      d = AttributeDict() #This is what we will return. It will contain all the information we need
      d.unlabeled = new_activation_dict()
      d.labeled = new_activation_dict()
      d.unlabeled.z[0] = unlabeled(h)
      d.labeled.z[0] = labeled(h)

      for i in range(1,L): #Go through the convolutional layers, if we are at i==N+1, we need to flatten and apply W
        d.labeled.h[i-1], d.unlabeled.h[i-1] = split_lu(h)
        
        operation = shapes[i-1][0]
        output_shape = shapes[i-1][1]

        if operation=='softmax':
          z = tf.layers.flatten(h)
          z = tf.matmul(z,W)
          keep_dims = False
        elif operation=='conv':
          #Compute new z by applying convolution followed by ReLU after normalization
          z = tf.layers.conv2d(h,filters=filter_size[i-1], kernel_size=kernel_size, padding='same')
          keep_dims = True
        else:
          #No need to check for input dim, because flatten preserves batch axis
          z = tf.layers.flatten(h)
          z = tf.matmul(z,W_fc[i-1-N])
          keep_dims = False
          
        #Shape: (?,5,5,filter_size) or (?,fc_size)
        #Normalize
        z_lbld, z_unlbld = split_lu(z)

        m_unlbld, s_unlbld = tf.nn.moments(z_unlbld, axes=[0], keep_dims=keep_dims) #Compute along depth
        m_lbld, s_lbld = tf.nn.moments(z_lbld, axes=[0], keep_dims=keep_dims)
        #Shape: (1,5,5,filter_size)

        if noise_std == 0: #Clean pass
          #Update the running averages and get the mean and variance of the labeled points again
          assign_mean = running_mean[i-1].assign(m_lbld)
          assign_var = running_var[i-1].assign(s_lbld)
          with tf.control_dependencies([assign_mean, assign_var]):
            bn_assigns.append(ema.apply([running_mean[i-1], running_var[i-1]]))
            m_lbld = ema.average(running_mean[i-1])
            s_lbld = ema.average(running_var[i-1])


        z = join(
          (z_lbld-m_lbld) / tf.sqrt(s_lbld + 1e-10),
          (z_unlbld-m_unlbld) / tf.sqrt(s_unlbld + 1e-10))

        if noise_std > 0:
          z += tf.random_normal(tf.shape(z)) * noise_std

        z_lateral = z

        if i==L-1: #We need to apply softmax and multiply with gamma
          z = tf.add(z,beta[i-1])
          z = tf.multiply(z, gamma)
          h = tf.nn.softmax(z)
        else:  
          #Now apply activation. But before we apply the activation, add beta and multiply
          #with gamma. Gamma is not used for ReLU. We apply Gamma for the softmax layer.
          z += beta[i-1] #i starts at 1, but beta starts at 0
          #Apply ReLU
          h = tf.nn.relu(z) #h gets assigned at the beginning of the for loop

        #Now save the variables: z_lateral, m_unlbld, s_unlbld, h
        d.labeled.z[i], d.unlabeled.z[i] = split_lu(z_lateral) #The real z has been compromised
        d.unlabeled.s[i] = s_unlbld
        d.unlabeled.m[i] = m_unlbld

      #Get the last h.
      d.labeled.h[i], d.unlabeled.h[i] = split_lu(h)

      return h, d
   #End encoder
  
  
  #If isTrain is false, use the encoder without the splitting
  y_clean, clean = encoder(input, noise_std=0.0)
  
  #Get the clean run
  #y_clean, clean = encoder(input, noise_std=0.0, isTrain=True)
  #Get the corrupted encoder run
  y_corrupted, corr = encoder(input, noise_std=noise_std)
  
  #Use this to store the z_est etc.
  est = new_activation_dict()
  
  #Decoder path
  filter_dims = [DEPTH] + filter_size
  #Start at index N+1 and go through index 0, N=3
  cost_recon = []
  for i in np.arange(L)[::-1]: #Start from L-1 --> 0, L+1 = N+2 = 6, 30-90-30-15-9
    #Get all the information we need
    z_corr = corr.unlabeled.z[i]
    z_clean = clean.unlabeled.z[i]
    if i != 0:
      z_clean_s = clean.unlabeled.s[i]
      z_clean_m = clean.unlabeled.m[i]
      
    if i==L-1: #The top level
      #Just normalize the (?,9) output
      ver = corr.unlabeled.h[i]
      size = [N_CLASSES]
      keep_dims = False
    elif i==L-2: #Apply the matrix V
      ver = tf.matmul(est.z.get(i+1), V) #This produces a (?,375)
      if K==0: #If we do not have any fully connected layers after this, then reshape
        ver = tf.reshape(ver, shape=[-1,WND_SZE,WND_SZE,filter_size[-1]])
        size = [WND_SZE, WND_SZE, filter_size[-1]]
        keep_dims = True
      else:
        size = [fc[-1]]
        keep_dims = False
    else:
      #Get the corresponding operation:
      operation = shapes[i][0]
      print(operation)
      if operation == 'conv':
        #Deconvolve. This is just a convolution to a new filter size. We leave the kernel size untouched.
        ver = tf.layers.conv2d(est.z.get(i+1),filters=filter_dims[i], kernel_size=kernel_size, padding='same')
        size = [WND_SZE, WND_SZE, filter_dims[i]]
        keep_dims = True
      else: #Operation must be to apply the V_fc matrix
        tmp = tf.layers.flatten(est.z.get(i+1)) #Flatten. Note: This can bet optimized by checking if we really need to reshape
        print(tmp)
        print(V_fc[i-N])
        ver = tf.matmul(tmp,V_fc[i-N])
        if (i-N) == 0: #This was the last fully connected layer, now reshape
          ver = tf.reshape(ver, shape=[-1,WND_SZE,WND_SZE,filter_size[-1]])
          size = [WND_SZE, WND_SZE, filter_size[-1]]
          keep_dims = True
        else:
          size = [fc[i-1-N]]
          keep_dims = False
        
    print(size)
    m, s = tf.nn.moments(ver, axes=[0], keep_dims=keep_dims) #Compute along depth
    ver = (ver-m) / tf.sqrt(s + 1e-10)
    
    #Now apply g to get z_est, g(z_corr_from_encoder, ver (u in the paper))
    z_est = g(z_corr, ver, size)
    
    #Now normalize using the clean mean and clean variance, but only if i != 0
    if i != 0:
      z_est_norm = (z_est - z_clean_m) / tf.sqrt(z_clean_s + 1e-10)
    else:
      z_est_norm = z_est
    
    #Now compute the cost and append the weighted cost. Weigh by the size of the layer and the denoising cost
    c_tmp = (tf.reduce_mean(tf.reduce_sum(tf.square(z_est_norm - z_clean), 1)) / tf.cast(tf.reduce_prod(tf.shape(z_est)[1:]),dtype=tf.float32)) * denoising_cost[i]
    cost_recon.append(c_tmp)
    est.z[i] = z_est_norm  
  
    
  y_corrupted = labeled(y_corrupted)
  target = labeled(tf.one_hot(tf.cast(output,tf.int32),depth=N_CLASSES))
  target = tf.cast(target, dtype=tf.float32)
  yy = labeled(y_clean)  
    
  with tf.name_scope('supervised_cost'):
    supervised_cost = -tf.reduce_mean(tf.reduce_sum(target*tf.log(y_corrupted), 1), name='supervised_cost')
  supervised_cost_sum = tf.summary.scalar('supervised_cost', supervised_cost)
    
  with tf.name_scope('unsupervised_cost'):
    #unsupervised_cost = tf.add_n(cost_recon, name='unsupervised_cost')
    unsupervised_cost = tf.cond(isTrain, lambda: tf.add_n(cost_recon, name='unsupervised_cost'), lambda: tf.constant(0,dtype=tf.float32, shape=()))
  tf.summary.scalar('unsupervised_cost', unsupervised_cost)
  
  with tf.name_scope('total'):  
    loss = supervised_cost + unsupervised_cost
  tf.summary.scalar('total', loss)

   
  prediction_cost = -tf.reduce_mean(tf.reduce_sum(target*tf.log(yy), 1),name='pred_cost')
  correct_prediction = tf.equal(tf.argmax(yy,1), tf.argmax(target, 1), name='correct_prediction')
  with tf.name_scope('accuracy'):
    accuracy = tf.multiply(tf.reduce_mean(tf.cast(correct_prediction, dtype=tf.float32)),tf.constant(100.0),name='accuracy')
  accuracy_sum = tf.summary.scalar('accuracy', accuracy)
  
  
  learning_rate = tf.Variable(lr, trainable=False)
  train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
  
  # add the updates of batch normalization statistics to train_step
  bn_updates = tf.group(*bn_assigns)
  with tf.control_dependencies([train_step]):
     train_step = tf.group(bn_updates)
  
  saver = tf.train.Saver()
  sess = tf.Session()
  
  merged = tf.summary.merge_all()
  merged2 = tf.summary.merge([supervised_cost_sum, accuracy_sum])
  train_writer = tf.summary.FileWriter('./log_ladder/train' , sess.graph)
  test_writer = tf.summary.FileWriter('./log_ladder/test')
  
  sess.run(tf.global_variables_initializer())

  
  #Initialize the iterators for the data
  sess.run(iterator_full.initializer, feed_dict={features_placeholder: X, labels_placeholder:y })

  sess.run(iterator_unlab.initializer, feed_dict={features_placeholder: X})
  for iterator in iterators:
    sess.run(iterator.initializer, feed_dict={features_placeholder_labeled: X_labeled,
                                            labels_placeholder_labeled: y_labeled})
  
  
  #Restore checkpoints, if any
  i_iter = 0
  ckpt = tf.train.get_checkpoint_state('checkpoints/')
  if ckpt and ckpt.model_checkpoint_path:
    print("Found checkpont! Restore...")
    saver.restore(sess, ckpt.model_checkpoint_path)
    epoch_n = int(ckpt.model_checkpoint_path.split('-')[1])
    i_iter = epoch_n+1
    print("Restored Epoch %s" % epoch_n)
  else:
    print("No checkpoint, initialize variables...")
    if not os.path.exists('checkpoints'):
      os.makedirs('checkpoints')
    sess.run(tf.global_variables_initializer()) # initialization
    print(sess.run(tf.report_uninitialized_variables()))
  
  
  
  def train_acc():
    acc = sess.run(accuracy, feed_dict={isTrain: False, num_labeled_tf:X_train.shape[0], features_placeholder_test: X_train, labels_placeholder_test:y_train})
    return acc
  
  def test_acc():
    acc = sess.run(accuracy, feed_dict={isTrain: False, num_labeled_tf:X_test.shape[0], features_placeholder_test: X_test, labels_placeholder_test:y_test})
    return acc
  
  
  n_iter = int(N_EXAMPLES/batch_size)
  for epoch in range(i_iter,num_epochs):
    for i in range(n_iter):
      
      #Training step. Set num_labeled to the true num_labeled so that we split the data accordingly.
      sess.run(train_step, feed_dict={isTrain: True, num_labeled_tf:num_labeled, features_placeholder_test: X_test, labels_placeholder_test:y_test})
      
      #For accuracy measures, we want to use the clean encoder and the running average mean and std of the labeled points. We thus have to set num_labeled to the
      #full test/train size to avoid splitting. Note that we are NOT training in this step, since we are not requesting the 'train_step' operation.
#      if i % 100 == 0: #Remove this for more fine grained analysis.
#        print("Test accuracy is: %s" % test_acc())
#         summary = sess.run(merged2, feed_dict={isTrain: False, num_labeled_tf:X_train.shape[0], features_placeholder_test: X_train, labels_placeholder_test:y_train})
#         train_writer.add_summary(summary, i + epoch*n_iter)
#         train_writer.flush()
      
#       if i % 100 == 0:
#         summary = sess.run(merged2, feed_dict={isTrain: False, num_labeled_tf:X_test.shape[0], features_placeholder_test: X_test, labels_placeholder_test:y_test})
#         test_writer.add_summary(summary, i + epoch*n_iter)
#         test_writer.flush()
        
    print("Epoch: %s Train accuracy: %s Test Accuracy: %s" % (epoch, train_acc(),test_acc()))
    saver.save(sess, 'checkpoints/model.ckpt', epoch)

  print("Final test accuracy is: %s" % test_acc())
  sess.close()
    
  tf.reset_default_graph()
  return
        

def delete_checkpoints():
  import shutil
  import os
  if os.path.exists('checkpoints/') and os.path.isdir('checkpoints/'):
      shutil.rmtree('checkpoints/')

In [0]:
!wget https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
!unzip ngrok-stable-linux-amd64.zip

In [32]:
LOG_DIR = './log_ladder'
get_ipython().system_raw(
    'tensorboard --logdir {} --host 0.0.0.0 --port 6006 &'
    .format(LOG_DIR)
)
get_ipython().system_raw('./ngrok http 6006 &')
! curl -s http://localhost:4040/api/tunnels | python3 -c \
    "import sys, json; print(json.load(sys.stdin)['tunnels'][0]['public_url'])"

http://09bb5076.ngrok.io


In [3]:
noise_sds = [0.2,0.3,0.4]
for ns in noise_sds:
  #Shuffle the data
  indices = np.arange(X_train.shape[0])
  np.random.shuffle(indices)
  X_train = X_train[indices,:,:,:]
  y_train = y_train[indices]
  
  delete_checkpoints()
  train(X_train,y_train,X_test,y_test,num_epochs=10,noise_std=ns,lr=0.01,filter_size=[90,30,15],fc=[20],
        denoising_cost=[0.1,0.1,0.1,0.1,0.1,0.1],num_labeled=90,batch_size=100)

(128051,)
Size unlab batch: 110
(<tf.Tensor 'concat_3:0' shape=(200, 5, 5, 30) dtype=float64>, <tf.Tensor 'concat_4:0' shape=(200,) dtype=float64>)
[9]
[20]
relu
Tensor("flatten/Reshape:0", shape=(?, 20), dtype=float32)
<tf.Variable 'Variable_19:0' shape=(20, 375) dtype=float32_ref>
[5, 5, 15]
conv
[5, 5, 30]
conv
[5, 5, 90]
conv
[5, 5, 30]
No checkpoint, initialize variables...
[]
Epoch: 0 Train accuracy: 80.21648 Test Accuracy: 69.79614
Epoch: 1 Train accuracy: 86.724815 Test Accuracy: 78.67028
Epoch: 2 Train accuracy: 88.525665 Test Accuracy: 80.92388
Epoch: 3 Train accuracy: 89.96728 Test Accuracy: 90.70507
Epoch: 4 Train accuracy: 91.75797 Test Accuracy: 92.902565
Epoch: 5 Train accuracy: 87.701775 Test Accuracy: 78.89471
Epoch: 6 Train accuracy: 88.05007 Test Accuracy: 81.00804
Epoch: 7 Train accuracy: 88.46007 Test Accuracy: 81.43819
Epoch: 8 Train accuracy: 86.68421 Test Accuracy: 77.64166
Epoch: 9 Train accuracy: 86.20862 Test Accuracy: 81.59715
Final test accuracy is: 81.5971

In [0]:
import shutil
import os
if os.path.exists('log_ladder/') and os.path.isdir('log_ladder/'):
    shutil.rmtree('log_ladder/')

In [0]:

def unpool_2d(pool, 
              ind, 
              stride=[1, 2, 2, 1], 
              scope='unpool_2d'):
  """Adds a 2D unpooling op.
  https://arxiv.org/abs/1505.04366
  Unpooling layer after max_pool_with_argmax.
       Args:
           pool:        max pooled output tensor
           ind:         argmax indices
           stride:      stride is the same as for the pool
       Return:
           unpool:    unpooling tensor
  """
  with tf.variable_scope(scope):
    input_shape = tf.shape(pool)
    output_shape = [input_shape[0], input_shape[1] * stride[1], input_shape[2] * stride[2], input_shape[3]]

    flat_input_size = tf.reduce_prod(input_shape)
    flat_output_shape = [output_shape[0], output_shape[1] * output_shape[2] * output_shape[3]]

    pool_ = tf.reshape(pool, [flat_input_size])
    batch_range = tf.reshape(tf.range(tf.cast(output_shape[0], tf.int64), dtype=ind.dtype), 
                                      shape=[input_shape[0], 1, 1, 1])
    b = tf.ones_like(ind) * batch_range
    b1 = tf.reshape(b, [flat_input_size, 1])
    ind_ = tf.reshape(ind, [flat_input_size, 1])
    ind_ = tf.concat([b1, ind_], 1)

    ret = tf.scatter_nd(ind_, pool_, shape=tf.cast(flat_output_shape, tf.int64))
    ret = tf.reshape(ret, output_shape)

    set_input_shape = pool.get_shape()
    set_output_shape = [set_input_shape[0], set_input_shape[1] * stride[1], set_input_shape[2] * stride[2], set_input_shape[3]]
    ret.set_shape(set_output_shape)
    return ret

input = tf.placeholder(dtype = X_test.dtype, shape=X_test.shape)

z = tf.layers.conv2d(input,10,[5,5],padding='same') #filter_size, kernel_size
shape_z = tf.shape(z)
#Do max_pooling
output, argmax = tf.nn.max_pool_with_argmax(z,ksize=[1,4,4,1],strides=[1,1,1,1],padding='VALID')
shape_pooled = tf.shape(output)

unpooled = unpool_2d(output,argmax,stride=[1,1,1,1])
shape_unpooled = tf.shape(unpooled)

sess = tf.Session()
sess.run(tf.global_variables_initializer())
sz,su,sp = sess.run([shape_z, shape_unpooled,shape_pooled],feed_dict={input:X_test})
print(sz)
print(su)
print(sp)