# TensorFlow__ladder-network

- 論文
  - [Semi-Supervised Learning with Ladder Networks - Antti Rasmus, Harri Valpola, Mikko Honkala, Mathias Berglund, Tapani Raiko]](https://arxiv.org/abs/1507.02672)

- code
  - [rinuboney/ladder](https://github.com/rinuboney/ladder)
  - [tarvaina/tensorflow-ladder](https://github.com/tarvaina/tensorflow-ladder)


<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Config" data-toc-modified-id="Config-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Config</a></span><ul class="toc-item"><li><span><a href="#Import" data-toc-modified-id="Import-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>Import</a></span></li><li><span><a href="#TensorFlow" data-toc-modified-id="TensorFlow-1.2"><span class="toc-item-num">1.2&nbsp;&nbsp;</span>TensorFlow</a></span></li><li><span><a href="#Path" data-toc-modified-id="Path-1.3"><span class="toc-item-num">1.3&nbsp;&nbsp;</span>Path</a></span></li><li><span><a href="#Import-2" data-toc-modified-id="Import-2-1.4"><span class="toc-item-num">1.4&nbsp;&nbsp;</span>Import 2</a></span></li><li><span><a href="#Class" data-toc-modified-id="Class-1.5"><span class="toc-item-num">1.5&nbsp;&nbsp;</span>Class</a></span><ul class="toc-item"><li><span><a href="#batch_normalization" data-toc-modified-id="batch_normalization-1.5.1"><span class="toc-item-num">1.5.1&nbsp;&nbsp;</span>batch_normalization</a></span></li><li><span><a href="#ladder_network" data-toc-modified-id="ladder_network-1.5.2"><span class="toc-item-num">1.5.2&nbsp;&nbsp;</span>ladder_network</a></span></li></ul></li></ul></li><li><span><a href="#Class" data-toc-modified-id="Class-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Class</a></span><ul class="toc-item"><li><span><a href="#class-Session" data-toc-modified-id="class-Session-2.1"><span class="toc-item-num">2.1&nbsp;&nbsp;</span>class Session</a></span></li><li><span><a href="#class-Graph" data-toc-modified-id="class-Graph-2.2"><span class="toc-item-num">2.2&nbsp;&nbsp;</span>class Graph</a></span></li><li><span><a href="#class-_Placeholders" data-toc-modified-id="class-_Placeholders-2.3"><span class="toc-item-num">2.3&nbsp;&nbsp;</span>class _Placeholders</a></span></li><li><span><a href="#class-_ForwardPass" data-toc-modified-id="class-_ForwardPass-2.4"><span class="toc-item-num">2.4&nbsp;&nbsp;</span>class _ForwardPass</a></span></li><li><span><a href="#class-_InputLayerWrapper" data-toc-modified-id="class-_InputLayerWrapper-2.5"><span class="toc-item-num">2.5&nbsp;&nbsp;</span>class _InputLayerWrapper</a></span></li><li><span><a href="#class-_EncoderLayer" data-toc-modified-id="class-_EncoderLayer-2.6"><span class="toc-item-num">2.6&nbsp;&nbsp;</span>class _EncoderLayer</a></span></li><li><span><a href="#class-_DecoderLayer" data-toc-modified-id="class-_DecoderLayer-2.7"><span class="toc-item-num">2.7&nbsp;&nbsp;</span>class _DecoderLayer</a></span></li></ul></li><li><span><a href="#input_data" data-toc-modified-id="input_data-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>input_data</a></span></li><li><span><a href="#ladder_on_mnist" data-toc-modified-id="ladder_on_mnist-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>ladder_on_mnist</a></span></li><li><span><a href="#End" data-toc-modified-id="End-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>End</a></span></li></ul></div>

## Config

### Import

In [1]:
# OPTIONAL: Load the "autoreload" extension so that code can change
%load_ext autoreload
# OPTIONAL: always reload modules so that as you change code in src, it gets loaded
%autoreload 2
# If you want to reload manually, add a below line head.
%aimport
# ref: https://ipython.org/ipython-doc/3/config/extensions/autoreload.html

import os,sys
import re
from pathlib import Path
import matplotlib.pyplot as plt
%matplotlib inline

import numpy as np
import sklearn
from sklearn import datasets

seed = None
np.random.seed(seed=seed)

print("numpy ver: {}".format(np.__version__))
print("scikit-learn ver: {}".format(sklearn.__version__))

Modules to reload:
all-except-skipped

Modules to skip:



  return f(*args, **kwds)
  return f(*args, **kwds)
  return f(*args, **kwds)
  return f(*args, **kwds)


numpy ver: 1.15.0
scikit-learn ver: 0.19.2


### TensorFlow

In [2]:
#____________________________________________________________________________________________________
#  TensorFlow and Keras GPU configures
##________________________________________________________________________________
##  OPTIONAL : set a GPU viewed by TensorFlow
###____________________________________________________________
###  - https://stackoverflow.com/questions/37893755/tensorflow-set-cuda-visible-devices-within-jupyter
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"
##________________________________________________________________________________


##________________________________________________________________________________
##  TensorFlow
###____________________________________________________________
import tensorflow as tf
print("tensorflow ver: {}".format(tf.__version__))
###  eager mode
#tf.enable_eager_execution()
print("tf.executing_eagerly(): {}".format(tf.executing_eagerly()))

# You can double check that you have the correct devices visible to TF
#   - https://stackoverflow.com/questions/37893755/tensorflow-set-cuda-visible-devices-within-jupyter
from tensorflow.python.client import device_lib
print("""
________________________________________
Visible GPUs from TensorFlow
________________________________________""")
for _device in device_lib.list_local_devices():
    match = re.search(pattern=r'name: "/device:(?P<name>[A-Z]{3}):(?P<device_num>\d{1})*',
                      string=str(_device))
    if match is None:
        print("Not Match")
        continue
    if match.group("name") == "CPU":
        name, device_num = match.group("name", "device_num")
        print()
        print("({}:{})".format(name, device_num))
        continue
    name, device_num = match.group("name", "device_num")
    match = re.search(pattern=r'.*pci bus id: (?P<pci_bus_id>\d{4}:\d{2}:\d{2}.\d{1}).*',
                      string=str(_device))
    if match is None:
        print("No GPUs")
        continue
    print("({}:{}: pci_bus_id: {})".format(name, device_num, match.group("pci_bus_id")))
print("________________________________________")

###____________________________________________________________
###  sessioin
global _SESSION
config = tf.ConfigProto(allow_soft_placement=True,
                        log_device_placement=True)
config.gpu_options.allow_growth = True
#_SESSION = tf.Session(config=config)
###____________________________________________________________
##________________________________________________________________________________
#____________________________________________________________________________________________________

  return f(*args, **kwds)
  return f(*args, **kwds)


tensorflow ver: 1.9.0
tf.executing_eagerly(): False

________________________________________
Visible GPUs from TensorFlow
________________________________________

(CPU:0)
(GPU:0: pci_bus_id: 0000:01:00.0)
________________________________________


### Path

In [3]:
HOME = Path(os.getcwd()).parent
print(HOME)

/home/pollenjp/workdir/git/article_script/20180914__semi-supervised-deeplearning-ladder-networks__in_kabuku


In [4]:
path_list = []
data_Path = HOME / "data"
path_list.append(data_Path)
raw_Path = data_Path / "raw"
path_list.append(raw_Path)
plot_images_Path = data_Path / "plot_images"
path_list.append(plot_images_Path)
src_Path = HOME / "src"
path_list.append(src_Path)

for _Path in path_list:
    _path = str(_Path)
    if not os.path.exists(_path):
        os.makedirs(name=_path)
        print("make a directory: \n\t", _path)
    else:
        print(os.path.exists(_path), ": ", _path)

True :  /home/pollenjp/workdir/git/article_script/20180914__semi-supervised-deeplearning-ladder-networks__in_kabuku/data
True :  /home/pollenjp/workdir/git/article_script/20180914__semi-supervised-deeplearning-ladder-networks__in_kabuku/data/raw
True :  /home/pollenjp/workdir/git/article_script/20180914__semi-supervised-deeplearning-ladder-networks__in_kabuku/data/plot_images
True :  /home/pollenjp/workdir/git/article_script/20180914__semi-supervised-deeplearning-ladder-networks__in_kabuku/src


### Import 2

### Class

In [5]:
import tensorflow as tf
import numpy
from tensorflow.python import control_flow_ops
from time import strftime

#### batch_normalization

In [6]:
## Adapted from http://stackoverflow.com/a/34634291/64979

import tensorflow as tf
from tensorflow.python import control_flow_ops

def batch_norm(inputs, is_training_phase):
  """
  Batch normalization for fully connected layers.
  Args:
    inputs:            2D Tensor, batch size * layer width
    is_training_phase: boolean tf.Variable, true indicates training phase
  Return:
    normed:            batch-normalized Tensor
  """
  with tf.name_scope('batch_norm') as scope:
    depth = inputs.get_shape()[-1].value

    batch_mean, batch_var = tf.nn.moments(inputs, [0], name = 'moments')
    batch_std = tf.sqrt(batch_var)
    ema = tf.train.ExponentialMovingAverage(decay = 0.9)
    ema_apply_op = ema.apply([batch_mean, batch_var])
    ema_mean, ema_var = ema.average(batch_mean), ema.average(batch_var)

    def mean_var_with_update():
        with tf.control_dependencies([ema_apply_op]):
            return tf.identity(batch_mean), tf.identity(batch_var)

    mean, var = control_flow_ops.cond(is_training_phase,
                                      mean_var_with_update,
                                      lambda: (ema_mean, ema_var))

    normed = (inputs - batch_mean) / batch_std

    return normed, batch_mean, batch_std

#### ladder_network

In [7]:
### class Session

class Session:
    def __init__(self, graph):
        self.session = tf.Session()
        self.graph = graph
        self.writer = tf.train.SummaryWriter(logdir = strftime("logs/%Y-%m-%d_%H-%M-%S"),
                                             graph = self.session.graph)

    def __enter__(self):
        self.session.run(tf.initialize_all_variables())
        return self

    def __exit__(self, type, value, traceback):
        self.session.close()

    def train_supervised_batch(self, inputs, labels, step_number):
        return self._run(self.graph.supervised_train_step,
                         summary_action = self.graph.supervised_summaries,
                         step_number = step_number,
                         inputs = inputs,
                         labels = labels,
                         is_training_phase = True)

    def train_unsupervised_batch(self, inputs, step_number):
        return self._run(self.graph.unsupervised_train_step,
                         summary_action = self.graph.unsupervised_summaries,
                         step_number = step_number,
                         inputs = inputs,
                         is_training_phase = True)

    def test(self, inputs, labels, step_number):
        result = self._run(self.graph.accuracy_measure,
                           summary_action = self.graph.test_summaries,
                           step_number = step_number,
                           inputs = inputs,
                           labels = labels,
                           is_training_phase = False)
        self.writer.flush()
        return result

    def save(self):
        return self.graph.saver.save(self.session, "checkpoints")

    def _run(self, action, summary_action, step_number, inputs, labels = None, is_training_phase = True):
        variable_placements = self.graph.placeholders.placements(inputs, labels, is_training_phase)
        action_result, summary = self.session.run([action, summary_action], variable_placements)
        self.writer.add_summary(summary, step_number)
        return action_result

### class Graph

class Graph:
    def __init__(self,
        learning_rate,
        noise_level,
        input_layer_size,
        class_count,
        encoder_layer_definitions,
        denoising_cost_multipliers):
        assert class_count == encoder_layer_definitions[-1][0]
        self.learning_rate = learning_rate
        self.denoising_cost_multipliers = denoising_cost_multipliers
        self.placeholders = _Placeholders(input_layer_size, class_count)
        self.output = _ForwardPass(self.placeholders,
                                   noise_level=noise_level,
                                   encoder_layer_definitions=encoder_layer_definitions)
        self.accuracy_measure = self._accuracy_measure(
            self.placeholders, self.output)
        self.supervised_train_step = self._supervised_train_step(
            self.placeholders, self.output)
        self.unsupervised_train_step = self._unsupervised_train_step(
            self.placeholders, self.output)

        self.unsupervised_summaries = tf.merge_all_summaries("unsupervised")
        self.supervised_summaries = tf.merge_all_summaries("supervised")
        self.test_summaries = tf.merge_all_summaries("test")

        self.saver = tf.train.Saver()

    def _accuracy_measure(self, placeholders, output):
        with tf.name_scope("accuracy_measure") as scope:
            actual_labels = tf.argmax(output.clean_label_probabilities, 1)
            expected_labels = tf.argmax(placeholders.labels, 1)
            correct_prediction = tf.equal(actual_labels, expected_labels)
            accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
            tf.histogram_summary("class distribution", actual_labels, ["test"])
            tf.scalar_summary("test accuracy", accuracy, ["test"])
            return accuracy

    def _supervised_train_step(self, placeholders, output):
        with tf.name_scope("supervised_training") as scope:
            total_cost = self._total_cost(placeholders, output)
            return self._optimizer(self.learning_rate, total_cost, ["supervised"])

    def _unsupervised_train_step(self, placeholders, output):
        with tf.name_scope("unsupervised_training") as scope:
            summary_tags = ["unsupervised"]
            total_denoising_cost, layer_denoising_costs = self._total_denoising_cost(placeholders, output)
            tf.scalar_summary("total denoising cost", total_denoising_cost, summary_tags)
            for index, layer_cost in enumerate(layer_denoising_costs):
                tf.scalar_summary("layer %i denoising cost" % index, layer_cost, summary_tags)
            return self._optimizer(self.learning_rate, total_denoising_cost, summary_tags)

    def _optimizer(self, learning_rate, cost_function, summary_tags):
        with tf.name_scope("optimizer") as scope:
            optimizer = tf.train.AdamOptimizer(learning_rate)
            gradients_and_vars = optimizer.compute_gradients(cost_function)
            for (gradient, var) in gradients_and_vars:
                if gradient is not None:
                    tf.histogram_summary("gradient for %s" % var.name, gradient, summary_tags)
            return optimizer.apply_gradients(gradients_and_vars)

    def _total_cost(self, placeholders, output):
        with tf.name_scope("total_cost") as scope:
            cross_entropy = self._cross_entropy(placeholders, output)
            total_denoising_cost, layer_denoising_costs = self._total_denoising_cost(placeholders, output)
            total_cost = cross_entropy + total_denoising_cost

            self._log_all_costs(total_cost, cross_entropy, total_denoising_cost, layer_denoising_costs, ["supervised"])

        return total_cost

    def _log_all_costs(self,
                       total_cost = None, cross_entropy = None,
                       total_denoising_cost = None, layer_denoising_costs = None,
                       summary_tags = tf.GraphKeys.SUMMARIES):
        tf.scalar_summary("total cost", total_cost, summary_tags)

        tf.scalar_summary("cross entropy", cross_entropy, summary_tags)
        tf.scalar_summary("cross entropy %", 100 * cross_entropy / total_cost, summary_tags)

        tf.scalar_summary("total denoising cost", total_denoising_cost, summary_tags)
        tf.scalar_summary("total denoising cost %", 100 * total_denoising_cost / total_cost, summary_tags)

        for index, layer_cost in enumerate(layer_denoising_costs):
            tf.scalar_summary("layer %i denoising cost" % index, layer_cost, summary_tags)
            tf.scalar_summary("layer %i denoising cost %%" % index, 100 * layer_cost / total_cost, summary_tags)


    def _cross_entropy(self, placeholders, output):
        with tf.name_scope("cross_entropy_cost") as scope:
            cross_entropy = -tf.reduce_mean(placeholders.labels * tf.log(output.corrupted_label_probabilities))
        return cross_entropy

    def _total_denoising_cost(self, placeholders, output):
        with tf.name_scope("denoising_cost") as scope:
            layer_costs = [self._layer_denoising_cost(*params)
                           for params in zip(output.clean_encoder_outputs,
                                             reversed(output.decoder_outputs),
                                             self.denoising_cost_multipliers)]
            total_denoising_cost = sum(layer_costs)
            return total_denoising_cost, layer_costs

    def _layer_denoising_cost(self, encoder, decoder, cost_multiplier):
        return cost_multiplier * self._mean_squared_error(encoder.pre_activation, decoder.post_2nd_normalization)

    def _mean_squared_error(self, expected, actual):
        return tf.reduce_mean(tf.pow(expected - actual, 2))

### class _Placeholders

class _Placeholders:
    def __init__(self, input_layer_size, class_count):
        with tf.name_scope("placeholders") as scope:
            self.inputs = tf.placeholder(tf.float32, [None, input_layer_size], name = 'inputs')
            self.labels = tf.placeholder(tf.float32, [None, class_count], name = 'labels')
            self.is_training_phase = tf.placeholder(tf.bool, name = 'is_training_phase')

    def placements(self, inputs, labels = None, is_training_phase = True):
        if labels is None:
            labels = numpy.zeros([inputs.shape[0], _layer_size(self.labels)])
        return {
            self.inputs: inputs,
            self.labels: labels,
            self.is_training_phase: is_training_phase
        }


### class _ForwardPass

class _ForwardPass:
    def __init__(self, placeholders, encoder_layer_definitions, noise_level):
        with tf.name_scope("clean_encoder") as scope:
            clean_encoder_outputs = self._encoder_layers(input_layer = placeholders.inputs,
                                                         other_layer_definitions = encoder_layer_definitions,
                                                         is_training_phase = placeholders.is_training_phase)

        with tf.name_scope("corrupted_encoder") as scope:
            corrupted_encoder_outputs = self._encoder_layers(input_layer = placeholders.inputs,
                                                             other_layer_definitions = encoder_layer_definitions,
                                                             is_training_phase = placeholders.is_training_phase,
                                                             noise_level = noise_level,
                                                             reuse_variables = clean_encoder_outputs[1:])

        with tf.name_scope("decoder") as scope:
            decoder_outputs = self._decoder_layers(clean_encoder_layers = clean_encoder_outputs,
                                                   corrupted_encoder_layers = corrupted_encoder_outputs,
                                                   is_training_phase = placeholders.is_training_phase)

        self.clean_label_probabilities = clean_encoder_outputs[-1].post_activation
        self.corrupted_label_probabilities = corrupted_encoder_outputs[-1].post_activation
        self.autoencoded_inputs = decoder_outputs[-1]
        self.clean_encoder_outputs = clean_encoder_outputs
        self.corrupted_encoder_outputs = corrupted_encoder_outputs
        self.decoder_outputs = decoder_outputs

    def _encoder_layers(self,
                        input_layer, other_layer_definitions,
                        noise_level = None, is_training_phase = True, reuse_variables = None):
        first_encoder_layer = _InputLayerWrapper(input_layer)
        if reuse_variables is None:
            reuse_variables = [None for layer in other_layer_definitions]
        layer_accumulator = [first_encoder_layer]
        for ((layer_size, non_linearity), reuse_layer) in zip(other_layer_definitions, reuse_variables):
            layer_output = _EncoderLayer(inputs = layer_accumulator[-1].post_activation,
                                         output_size = layer_size,
                                         non_linearity = non_linearity,
                                         noise_level = noise_level,
                                         is_training_phase = is_training_phase,
                                         reuse_variables = reuse_layer)
            layer_accumulator.append(layer_output)
        return layer_accumulator

    def _decoder_layers(self, clean_encoder_layers, corrupted_encoder_layers,is_training_phase):
        # FIXME: Actually the first decoder layer should get the correct label from above
        encoder_layers = reversed(zip(clean_encoder_layers, corrupted_encoder_layers))
        layer_accumulator = [None]
        for clean_layer, corrupted_layer in encoder_layers:
            layer = _DecoderLayer(clean_encoder_layer = clean_layer,
                                  corrupted_encoder_layer = corrupted_layer,
                                  previous_decoder_layer = layer_accumulator[-1],
                                  is_training_phase = is_training_phase)
            layer_accumulator.append(layer)
        return layer_accumulator[1:]

### class _InputLayerWrapper

class _InputLayerWrapper:
    def __init__(self, input_layer):
        self.pre_activation = input_layer
        self.post_activation = input_layer
        self.batch_mean = tf.zeros_like(input_layer)
        self.batch_std = tf.ones_like(input_layer)

### class _EncoderLayer

class _EncoderLayer:
    def __init__(self, inputs, output_size, non_linearity,
                 noise_level, is_training_phase, reuse_variables = None):
        with tf.name_scope("encoder_layer") as scope:
            self._create_or_reuse_variables(reuse_variables, _layer_size(inputs), output_size)
            self.pre_normalization = tf.matmul(inputs, self.weights)
            pre_noise, self.batch_mean, self.batch_std = batch_norm(self.pre_normalization,
                                                                    is_training_phase = is_training_phase)
            self.pre_activation = self._add_noise(pre_noise, noise_level)
            beta_gamma = self.gamma * (self.pre_activation + self.beta)
            self.post_activation = non_linearity(beta_gamma)

    def _create_or_reuse_variables(self, variables, input_size, output_size):
        if variables is None:
            self.weights = _weight_variable([input_size, output_size], name = 'W')
            self.beta = tf.Variable(tf.constant(0.0, shape = [output_size]), name = 'beta')
            self.gamma = tf.Variable(tf.constant(1.0, shape = [output_size]), name = 'gamma')
        else:
            self.weights = variables.weights
            self.beta = variables.beta
            self.gamma = variables.gamma

    def _add_noise(self, tensor, noise_level):
        if noise_level is None:
            return tensor
        else:
            return tensor + tf.random_normal([_layer_size(tensor)], mean = 0.0, stddev = noise_level)

### class _DecoderLayer

class _DecoderLayer:
    def __init__(self, clean_encoder_layer, corrupted_encoder_layer,
                 previous_decoder_layer = None, is_training_phase = True):
        with tf.name_scope("decoder_layer") as scope:
            is_first_decoder_layer = previous_decoder_layer is None
            if is_first_decoder_layer:
                pre_1st_normalization = corrupted_encoder_layer.post_activation
            else:
                input_size = _layer_size(previous_decoder_layer.post_denoising)
                output_size = _layer_size(clean_encoder_layer.post_activation)
                weights = _weight_variable([input_size, output_size], name = 'V')
                pre_1st_normalization = tf.matmul(previous_decoder_layer.post_denoising, weights)

        pre_denoising, _, _ = batch_norm(pre_1st_normalization, is_training_phase = is_training_phase)
        post_denoising = self._denoise(corrupted_encoder_layer.pre_activation, pre_denoising)
        post_2nd_normalization = (post_denoising - clean_encoder_layer.batch_mean) / clean_encoder_layer.batch_std
        self.post_denoising = post_denoising
        self.post_2nd_normalization = post_2nd_normalization

    def _denoise(self, from_left, from_above):
        with tf.name_scope('mu') as scope:
            mu = self._modulate(from_above)
        with tf.name_scope('v') as scope:
            v = self._modulate(from_above)
        return (from_left - mu) * v + mu

    def _modulate(self, u):
        a = [_weight_variable([_layer_size(u)], name = str(i)) for i in xrange(5)]
        return a[0] * tf.nn.sigmoid(a[1] * u + a[2]) + a[3] * u + a[4]

def _weight_variable(shape, name = 'weight'):
    initial = tf.truncated_normal(shape, stddev = 0.1)
    return tf.Variable(initial, name = name)

def _layer_size(layer_output):
    return layer_output.get_shape()[-1].value

## Class

### class Session

In [8]:
class Session:
    def __init__(self, graph):
        self.session = tf.Session()
        self.graph = graph
        self.writer = tf.train.SummaryWriter(logdir = strftime("logs/%Y-%m-%d_%H-%M-%S"),
                                             graph = self.session.graph)

    def __enter__(self):
        self.session.run(tf.initialize_all_variables())
        return self

    def __exit__(self, type, value, traceback):
        self.session.close()

    def train_supervised_batch(self, inputs, labels, step_number):
        return self._run(self.graph.supervised_train_step,
                         summary_action = self.graph.supervised_summaries,
                         step_number = step_number,
                         inputs = inputs,
                         labels = labels,
                         is_training_phase = True)

    def train_unsupervised_batch(self, inputs, step_number):
        return self._run(self.graph.unsupervised_train_step,
                         summary_action = self.graph.unsupervised_summaries,
                         step_number = step_number,
                         inputs = inputs,
                         is_training_phase = True)

    def test(self, inputs, labels, step_number):
        result = self._run(self.graph.accuracy_measure,
                           summary_action = self.graph.test_summaries,
                           step_number = step_number,
                           inputs = inputs,
                           labels = labels,
                           is_training_phase = False)
        self.writer.flush()
        return result

    def save(self):
        return self.graph.saver.save(self.session, "checkpoints")

    def _run(self, action, summary_action, step_number, inputs, labels = None, is_training_phase = True):
        variable_placements = self.graph.placeholders.placements(inputs, labels, is_training_phase)
        action_result, summary = self.session.run([action, summary_action], variable_placements)
        self.writer.add_summary(summary, step_number)
        return action_result

### class Graph

In [9]:
class Graph:
    def __init__(self,
        learning_rate,
        noise_level,
        input_layer_size,
        class_count,
        encoder_layer_definitions,
        denoising_cost_multipliers):
        assert class_count == encoder_layer_definitions[-1][0]
        self.learning_rate = learning_rate
        self.denoising_cost_multipliers = denoising_cost_multipliers
        self.placeholders = _Placeholders(input_layer_size, class_count)
        self.output = _ForwardPass(self.placeholders,
                                   noise_level=noise_level,
                                   encoder_layer_definitions=encoder_layer_definitions)
        self.accuracy_measure = self._accuracy_measure(
            self.placeholders, self.output)
        self.supervised_train_step = self._supervised_train_step(
            self.placeholders, self.output)
        self.unsupervised_train_step = self._unsupervised_train_step(
            self.placeholders, self.output)

        self.unsupervised_summaries = tf.merge_all_summaries("unsupervised")
        self.supervised_summaries = tf.merge_all_summaries("supervised")
        self.test_summaries = tf.merge_all_summaries("test")

        self.saver = tf.train.Saver()

    def _accuracy_measure(self, placeholders, output):
        with tf.name_scope("accuracy_measure") as scope:
            actual_labels = tf.argmax(output.clean_label_probabilities, 1)
            expected_labels = tf.argmax(placeholders.labels, 1)
            correct_prediction = tf.equal(actual_labels, expected_labels)
            accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
            tf.histogram_summary("class distribution", actual_labels, ["test"])
            tf.scalar_summary("test accuracy", accuracy, ["test"])
            return accuracy

    def _supervised_train_step(self, placeholders, output):
        with tf.name_scope("supervised_training") as scope:
            total_cost = self._total_cost(placeholders, output)
            return self._optimizer(self.learning_rate, total_cost, ["supervised"])

    def _unsupervised_train_step(self, placeholders, output):
        with tf.name_scope("unsupervised_training") as scope:
            summary_tags = ["unsupervised"]
            total_denoising_cost, layer_denoising_costs = self._total_denoising_cost(placeholders, output)
            tf.scalar_summary("total denoising cost", total_denoising_cost, summary_tags)
            for index, layer_cost in enumerate(layer_denoising_costs):
                tf.scalar_summary("layer %i denoising cost" % index, layer_cost, summary_tags)
            return self._optimizer(self.learning_rate, total_denoising_cost, summary_tags)

    def _optimizer(self, learning_rate, cost_function, summary_tags):
        with tf.name_scope("optimizer") as scope:
            optimizer = tf.train.AdamOptimizer(learning_rate)
            gradients_and_vars = optimizer.compute_gradients(cost_function)
            for (gradient, var) in gradients_and_vars:
                if gradient is not None:
                    tf.histogram_summary("gradient for %s" % var.name, gradient, summary_tags)
            return optimizer.apply_gradients(gradients_and_vars)

    def _total_cost(self, placeholders, output):
        with tf.name_scope("total_cost") as scope:
            cross_entropy = self._cross_entropy(placeholders, output)
            total_denoising_cost, layer_denoising_costs = self._total_denoising_cost(placeholders, output)
            total_cost = cross_entropy + total_denoising_cost

            self._log_all_costs(total_cost, cross_entropy, total_denoising_cost, layer_denoising_costs, ["supervised"])

        return total_cost

    def _log_all_costs(self,
                       total_cost = None, cross_entropy = None,
                       total_denoising_cost = None, layer_denoising_costs = None,
                       summary_tags = tf.GraphKeys.SUMMARIES):
        tf.scalar_summary("total cost", total_cost, summary_tags)

        tf.scalar_summary("cross entropy", cross_entropy, summary_tags)
        tf.scalar_summary("cross entropy %", 100 * cross_entropy / total_cost, summary_tags)

        tf.scalar_summary("total denoising cost", total_denoising_cost, summary_tags)
        tf.scalar_summary("total denoising cost %", 100 * total_denoising_cost / total_cost, summary_tags)

        for index, layer_cost in enumerate(layer_denoising_costs):
            tf.scalar_summary("layer %i denoising cost" % index, layer_cost, summary_tags)
            tf.scalar_summary("layer %i denoising cost %%" % index, 100 * layer_cost / total_cost, summary_tags)


    def _cross_entropy(self, placeholders, output):
        with tf.name_scope("cross_entropy_cost") as scope:
            cross_entropy = -tf.reduce_mean(placeholders.labels * tf.log(output.corrupted_label_probabilities))
        return cross_entropy

    def _total_denoising_cost(self, placeholders, output):
        with tf.name_scope("denoising_cost") as scope:
            layer_costs = [self._layer_denoising_cost(*params)
                           for params in zip(output.clean_encoder_outputs,
                                             reversed(output.decoder_outputs),
                                             self.denoising_cost_multipliers)]
            total_denoising_cost = sum(layer_costs)
            return total_denoising_cost, layer_costs

    def _layer_denoising_cost(self, encoder, decoder, cost_multiplier):
        return cost_multiplier * self._mean_squared_error(encoder.pre_activation, decoder.post_2nd_normalization)

    def _mean_squared_error(self, expected, actual):
        return tf.reduce_mean(tf.pow(expected - actual, 2))

### class _Placeholders

In [10]:
class _Placeholders:
    def __init__(self, input_layer_size, class_count):
        with tf.name_scope("placeholders") as scope:
            self.inputs = tf.placeholder(tf.float32, [None, input_layer_size], name = 'inputs')
            self.labels = tf.placeholder(tf.float32, [None, class_count], name = 'labels')
            self.is_training_phase = tf.placeholder(tf.bool, name = 'is_training_phase')

    def placements(self, inputs, labels = None, is_training_phase = True):
        if labels is None:
            labels = numpy.zeros([inputs.shape[0], _layer_size(self.labels)])
        return {
            self.inputs: inputs,
            self.labels: labels,
            self.is_training_phase: is_training_phase
        }


### class _ForwardPass

In [11]:
class _ForwardPass:
    def __init__(self, placeholders, encoder_layer_definitions, noise_level):
        with tf.name_scope("clean_encoder") as scope:
            clean_encoder_outputs = self._encoder_layers(input_layer = placeholders.inputs,
                                                         other_layer_definitions = encoder_layer_definitions,
                                                         is_training_phase = placeholders.is_training_phase)

        with tf.name_scope("corrupted_encoder") as scope:
            corrupted_encoder_outputs = self._encoder_layers(input_layer = placeholders.inputs,
                                                             other_layer_definitions = encoder_layer_definitions,
                                                             is_training_phase = placeholders.is_training_phase,
                                                             noise_level = noise_level,
                                                             reuse_variables = clean_encoder_outputs[1:])

        with tf.name_scope("decoder") as scope:
            decoder_outputs = self._decoder_layers(clean_encoder_layers = clean_encoder_outputs,
                                                   corrupted_encoder_layers = corrupted_encoder_outputs,
                                                   is_training_phase = placeholders.is_training_phase)

        self.clean_label_probabilities = clean_encoder_outputs[-1].post_activation
        self.corrupted_label_probabilities = corrupted_encoder_outputs[-1].post_activation
        self.autoencoded_inputs = decoder_outputs[-1]
        self.clean_encoder_outputs = clean_encoder_outputs
        self.corrupted_encoder_outputs = corrupted_encoder_outputs
        self.decoder_outputs = decoder_outputs

    def _encoder_layers(self,
                        input_layer, other_layer_definitions,
                        noise_level = None, is_training_phase = True, reuse_variables = None):
        first_encoder_layer = _InputLayerWrapper(input_layer)
        if reuse_variables is None:
            reuse_variables = [None for layer in other_layer_definitions]
        layer_accumulator = [first_encoder_layer]
        for ((layer_size, non_linearity), reuse_layer) in zip(other_layer_definitions, reuse_variables):
            layer_output = _EncoderLayer(inputs = layer_accumulator[-1].post_activation,
                                         output_size = layer_size,
                                         non_linearity = non_linearity,
                                         noise_level = noise_level,
                                         is_training_phase = is_training_phase,
                                         reuse_variables = reuse_layer)
            layer_accumulator.append(layer_output)
        return layer_accumulator

    def _decoder_layers(self, clean_encoder_layers, corrupted_encoder_layers,is_training_phase):
        # FIXME: Actually the first decoder layer should get the correct label from above
        encoder_layers = reversed(zip(clean_encoder_layers, corrupted_encoder_layers))
        layer_accumulator = [None]
        for clean_layer, corrupted_layer in encoder_layers:
            layer = _DecoderLayer(clean_encoder_layer = clean_layer,
                                  corrupted_encoder_layer = corrupted_layer,
                                  previous_decoder_layer = layer_accumulator[-1],
                                  is_training_phase = is_training_phase)
            layer_accumulator.append(layer)
        return layer_accumulator[1:]

### class _InputLayerWrapper

In [12]:
class _InputLayerWrapper:
    def __init__(self, input_layer):
        self.pre_activation = input_layer
        self.post_activation = input_layer
        self.batch_mean = tf.zeros_like(input_layer)
        self.batch_std = tf.ones_like(input_layer)

### class _EncoderLayer

In [13]:
class _EncoderLayer:
    def __init__(self, inputs, output_size, non_linearity,
                 noise_level, is_training_phase, reuse_variables = None):
        with tf.name_scope("encoder_layer") as scope:
            self._create_or_reuse_variables(reuse_variables, _layer_size(inputs), output_size)
            self.pre_normalization = tf.matmul(inputs, self.weights)
            pre_noise, self.batch_mean, self.batch_std = batch_norm(self.pre_normalization,
                                                                    is_training_phase = is_training_phase)
            self.pre_activation = self._add_noise(pre_noise, noise_level)
            beta_gamma = self.gamma * (self.pre_activation + self.beta)
            self.post_activation = non_linearity(beta_gamma)

    def _create_or_reuse_variables(self, variables, input_size, output_size):
        if variables is None:
            self.weights = _weight_variable([input_size, output_size], name = 'W')
            self.beta = tf.Variable(tf.constant(0.0, shape = [output_size]), name = 'beta')
            self.gamma = tf.Variable(tf.constant(1.0, shape = [output_size]), name = 'gamma')
        else:
            self.weights = variables.weights
            self.beta = variables.beta
            self.gamma = variables.gamma

    def _add_noise(self, tensor, noise_level):
        if noise_level is None:
            return tensor
        else:
            return tensor + tf.random_normal([_layer_size(tensor)], mean = 0.0, stddev = noise_level)

### class _DecoderLayer

In [14]:
class _DecoderLayer:
    def __init__(self, clean_encoder_layer, corrupted_encoder_layer,
                 previous_decoder_layer = None, is_training_phase = True):
        with tf.name_scope("decoder_layer") as scope:
            is_first_decoder_layer = previous_decoder_layer is None
            if is_first_decoder_layer:
                pre_1st_normalization = corrupted_encoder_layer.post_activation
            else:
                input_size = _layer_size(previous_decoder_layer.post_denoising)
                output_size = _layer_size(clean_encoder_layer.post_activation)
                weights = _weight_variable([input_size, output_size], name = 'V')
                pre_1st_normalization = tf.matmul(previous_decoder_layer.post_denoising, weights)

        pre_denoising, _, _ = batch_norm(pre_1st_normalization, is_training_phase = is_training_phase)
        post_denoising = self._denoise(corrupted_encoder_layer.pre_activation, pre_denoising)
        post_2nd_normalization = (post_denoising - clean_encoder_layer.batch_mean) / clean_encoder_layer.batch_std
        self.post_denoising = post_denoising
        self.post_2nd_normalization = post_2nd_normalization

    def _denoise(self, from_left, from_above):
        with tf.name_scope('mu') as scope:
            mu = self._modulate(from_above)
        with tf.name_scope('v') as scope:
            v = self._modulate(from_above)
        return (from_left - mu) * v + mu

    def _modulate(self, u):
        a = [_weight_variable([_layer_size(u)], name = str(i)) for i in xrange(5)]
        return a[0] * tf.nn.sigmoid(a[1] * u + a[2]) + a[3] * u + a[4]

In [15]:
def _weight_variable(shape, name = 'weight'):
    initial = tf.truncated_normal(shape, stddev = 0.1)
    return tf.Variable(initial, name = name)

def _layer_size(layer_output):
    return layer_output.get_shape()[-1].value

## input_data

In [16]:
import input_data

## ladder_on_mnist

In [17]:
print("Loading MNIST data")
mnist = input_data.read_data_sets("MNIST_data/",
                                  one_hot=True,
                                  labeled_size=5000,
                                  validation_size=5000)

print(mnist.train_unlabeled.num_examples, "unlabeled training examples")
print(mnist.train_labeled.num_examples, "labeled training examples")
print(mnist.validation.num_examples, "validation examples")
print(mnist.test.num_examples, "test examples")


hyperparameters = {
    "learning_rate": 0.01,
    "noise_level": 0.2,
    "input_layer_size": 784,
    "class_count": 10,
    "encoder_layer_definitions": [
        (100, tf.nn.relu), # first hidden layer
        (50, tf.nn.relu),
        (10, tf.nn.softmax) # output layer
    ],
    "denoising_cost_multipliers": [
        1000, # input layer
        0.5, # first hidden layer
        0.1,
        0.1 # output layer
    ]
}

Loading MNIST data
Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
50000 unlabeled training examples
5000 labeled training examples
5000 validation examples
10000 test examples


In [18]:
graph = Graph(**hyperparameters)

TypeError: argument to reversed() must be a sequence

In [None]:
with ladder_network.Session(graph) as session:
    for step in xrange(1000):
        if step % 5 == 0:
            images, labels = mnist.train_labeled.next_batch(100)
            session.train_supervised_batch(images, labels, step)
            else:
                images, _ = mnist.train_unlabeled.next_batch(100)
                session.train_unsupervised_batch(images, step)
        if step % 200 == 0:
            save_path = session.save()
            accuracy = session.test(mnist.validation.images, mnist.validation.labels, step)
            print()
            print("Model saved in file: %s" % save_path)
            print("Accuracy: %f" % accuracy)

## End