This is implementation of [FractalNet](http://arxiv.org/abs/1605.07648) paper

In [4]:
#!/usr/bin/env python

import numpy as np
import tensorflow as tf
import tflearn
from scipy import stats


def noPaths(n):
    """
        Returns number of path with C=n
    """
    # Lazy recurrsion... yeah it could be optimized, but meh.
    if n <= 1:
        return 1
    return 1 + noPaths(n-1)**2

def pmfPaths(n):
    pdf = np.array(
        [noPaths(x) for x in range(1, n+1)],
        dtype=np.float32
    )
    pdf /= np.sum(pdf) # Normalizing
    return stats.rv_discrete(values=(range(n), pdf))

    
class FractalNet():
    def __init__(self, input, n, f_height = 3, f_width = 3):
        _, _, _, c = input.get_shape()
        f_channels = int(c)
        self.n = n 
        self.pmf = pmfPaths(n)
        self.children = []
        with tf.name_scope("F%d" % n) as scope:
            # Convolutional layer filter
            self.filter = tf.Variable(tf.truncated_normal(
                [f_height, f_width, f_channels, f_channels], stddev=0.35),
                name="weights")
            
            # activations in join layer 
            # for mean join layer they should be equal and sum to 1
            self.is_active = [
                tf.Variable(1.0/n, trainable=False, name="a%d"%i)
                for i in range(n)
            ] 
            self.__tensors = [
                tf.nn.conv2d(input, self.filter, [1,1,1,1], 'SAME')
            ]
            if n > 1:
                Fp = FractalNet(input, n - 1, f_height, f_width)
                self.children.append(Fp)
                Fp = FractalNet(Fp.get_tensor(), n - 1, f_height, f_width)
                self.children.append(Fp)
                self.__tensors.extend(Fp.__tensors)
                
            self.__tensor = tf.add_n(
                [tf.mul(m, x) for m, x in zip(self.is_active, self.__tensors)], 
                name="Join"
            )

    def get_tensor(self):
        return self.__tensor

    def gen_drop_path(self):
        """
            Randomly pick path in network and drop all others
        """
        active_path = self.pmf.rvs() # Uniformly choose path
        assert 0 <= active_path < len(self.is_active) # Sanity check
        
        # Recursively select only one activation path through the network
        # This does a bit more...since in each pooling layer, only one input
        # is accepted, therefore there would be some useless half-path from 
        # source, but not reaching the sink. It's only to simplyfy the
        # implementation.
        return tf.group(*[
            var.assign(1 if i == active_path else 0)
            for i, var in enumerate(self.is_active)
        ], *[fp.gen_drop_path() for fp in self.children])

    def set_test(self):
        """
            Kills any droppaths set
        """
        return tf.group(*[
            var.assign(1.0/self.n) 
            for var in self.is_active
        ])

height, width, channels = 28, 28, 1
noClasses = 10
    
g = tf.Graph()

with g.as_default():
    X = tf.placeholder(tf.float32, [None, height, width, channels])
    Y = tf.placeholder(tf.float32, [None, noClasses])
    
    F = FractalNet(X, 3)
    
    net = F.get_tensor()
    net = tflearn.fully_connected(net, noClasses)
    yp = tf.nn.softmax(net)
    loss = tf.nn.softmax_cross_entropy_with_logits(yp, net)
    
with tf.Session(graph=g) as sess:
    sess.run(tf.initialize_all_variables())
    merged = tf.merge_all_summaries()
    writer = tf.train.SummaryWriter("/tmp/FractalNet", sess.graph)
    sess.run(F.gen_drop_path())
    sess.run(F.set_test())