In [2]:
import autograd.numpy as np
from autograd import grad, jacobian

In [47]:
def surf(weights, pos):
    z = np.exp(-(weights[0]*pos[0,:]**2.) - (weights[1]*pos[1, :]**2.))
    return np.array([pos[0, :], pos[1, :], z])

In [48]:
weights = np.array([1., 1.])
f = lambda x: surf(weights, x)
jac = jacobian(f)

In [53]:
pos = np.array([[0.1, 0.1], [0.2, 0.2]])
jac(pos).shape

(3, 2, 2, 2)

In [44]:
def volume_form(weights, pos):
    f = lambda x: surf(weights, x)
    jac = jacobian(f)(pos)
    g = np.dot(jac.T, jac)
    print(g)
    return np.sqrt(np.linalg.det(g))
    

In [46]:
volume_form(weights, np.array([0.12, 0.1]))

[[1.0548566  0.04571384]
 [0.04571384 1.03809486]]


1.0454431915187965

In [51]:
from autograd.scipy.signal import convolve

def squared_diff(X):
    Xa = X[:, np.newaxis, :]
    Xb = X[np.newaxis, :, :]
    return np.mean(np.square((Xa - Xb)), 2)

class WeightsParser(object):
    """A helper class to index into a parameter vector."""
    def __init__(self):
        self.idxs_and_shapes = {}
        self.N = 0

    def add_weights(self, name, shape):
        start = self.N
        self.N += np.prod(shape)
        self.idxs_and_shapes[name] = (slice(start, self.N), shape)

    def get(self, vect, name):
        idxs, shape = self.idxs_and_shapes[name]
        return np.reshape(vect[idxs], shape)

def make_nn_funs(input_shape, encoder_layer_specs, decoder_layer_specs):
    parser = WeightsParser()
    cur_shape = input_shape
    for layer in encoder_layer_specs + decoder_layer_specs:
        N_weights, cur_shape = layer.build_weights_dict(cur_shape)
        parser.add_weights(layer, (N_weights,))
        
    def latent_space(W_vect, inputs):
        cur_units = inputs
        for layer in encoder_layer_specs:
            cur_weights = parser.get(W_vect, layer)
            cur_units = layer.forward_pass(cur_units, cur_weights)
        return cur_units

    def predictions(W_vect, inputs):
        """Outputs normalized log-probabilities.
        shape of inputs : [data, color, y, x]"""
        cur_units = inputs
        for layer in encoder_layer_specs + decoder_layer_specs:
            cur_weights = parser.get(W_vect, layer)
            cur_units = layer.forward_pass(cur_units, cur_weights)
        return cur_units

    def loss(W_vect, X, T):
        recon = predictions(W_vect, X)
        latent = latent_space(W_vect, X)
        recon_loss = np.mean(np.square(recon-X))
        sdx = squared_diff(X)
        sdz = squared_diff(latent)
        sdx = sdx / np.mean(sdx)
        sdz = sdz / np.mean(sdz)
        distance_loss = np.mean(np.square(np.log(1.0 + sdx) - np.log(1.0 + sdz)))
        
        L = recon_loss + latent_loss_weight*distance_loss
        return L

    return parser.N, latent_space, predictions, loss

def relu(x):
    return (np.abs(x) + x)/2

def sigmoid(x):
    return 1.0/(1.0+np.exp(-x))

class conv_layer(object):
    def __init__(self, kernel_shape, num_filters, activation=relu):
        print('Adding conv layer: {} {}'.format(kernel_shape, num_filters))
        self.kernel_shape = kernel_shape
        self.num_filters = num_filters
        self.activation = activation

    def forward_pass(self, inputs, param_vector):
        # Input dimensions:  [data, color_in, y, x]
        # Params dimensions: [color_in, color_out, y, x]
        # Output dimensions: [data, color_out, y, x]
        params = self.parser.get(param_vector, 'params')
        biases = self.parser.get(param_vector, 'biases')
        conv = convolve(inputs, params, axes=([2, 3], [2, 3]), dot_axes = ([1], [0]), mode='same')
        return self.activation(conv + biases)

    def build_weights_dict(self, input_shape):
        # Input shape : [color, y, x] (don't need to know number of data yet)
        self.parser = WeightsParser()
        self.parser.add_weights('params', (input_shape[0], self.num_filters) + self.kernel_shape)
        self.parser.add_weights('biases', (1, self.num_filters, 1, 1))
        output_shape = (self.num_filters,) + self.conv_output_shape(input_shape[1:], self.kernel_shape)
        print('Conv Layer - Input shape: {} Output shape: {}'.format(input_shape, output_shape))
        return self.parser.N, output_shape

    def conv_output_shape(self, A, B):
        return (A[0] - B[0] + 1, A[1] - B[1] + 1)
    
class full_layer(object):
    def __init__(self, size):
        print('Adding Full Layer: {}'.format(size))
        self.size = size

    def build_weights_dict(self, input_shape):
        # Input shape is anything (all flattened)
        input_size = np.prod(input_shape, dtype=int)
        self.parser = WeightsParser()
        self.parser.add_weights('params', (input_size, self.size))
        self.parser.add_weights('biases', (self.size,))
        print('Full Layer - Input shape: {} Output shape: {}'.format(input_shape, self.size))
        return self.parser.N, (self.size,)

    def forward_pass(self, inputs, param_vector):
        params = self.parser.get(param_vector, 'params')
        biases = self.parser.get(param_vector, 'biases')
        if inputs.ndim > 2:
            inputs = inputs.reshape((inputs.shape[0], np.prod(inputs.shape[1:])))
        return self.nonlinearity(np.dot(inputs[:, :], params) + biases)
    
class reshape_layer(object):
    def __init__(self, newshape):
        print('Adding reshape layer: {}'.format(newshape))
        self.newshape = newshape
        
    def build_weights_dict(self, input_shape):
        print('reshape layer - Input shape: {} Output shape: {}'.format(input_shape, self.newshape[1:]))
        return 0, self.newshape[1:]
    
    def forward_pass(self, inputs, param_vector):
        return np.reshape(inputs, self.newshape)

class tanh_layer(full_layer):
    def nonlinearity(self, x):
        return np.tanh(x)
    
class dense_layer(full_layer):
    def nonlinearity(self, x):
        return x
    
class relu_layer(full_layer):
    def nonlinearity(self, x):
        return relu(x)

class ConvAE(object):
    
    def __init__(self, dims, batch_size, encoder_dims, decoder_dims, hidden_size, adam_eps=1.0):

        self.dims = dims
        self.batch_size = batch_size
        self.encoder_dims = encoder_dims
        self.decoder_dims = decoder_dims
        self.hidden_size = hidden_size
        self.batch_num = 0  # how many batches have been trained
        # training loss for SSE and distance

        self.adam_eps = adam_eps
        self.initialize_network()
        
    def initialize_network(self,):
        """ Defines the network architecture
        """
        self.initialization_AE()
        print('Network Initialized')
        
    def initialization_AE(self):
        """ Initializes the network architecture of an autoencoder by
        1) running inputs through the network architecture
        2) calculating the losses for the architecture
        3) applying the losses to different parts of the network
        4) Creating a list of gradients for each GPU (or lack thereof)
        """
        enc_layer_specs = self.encoder()
        dec_layer_specs = self.decoder()
        net_layer_specs = enc_layer_specs + dec_layer_specs
        self.Nweights, self.latent_space, self.predictions, self.loss = make_nn_funs(self.dims, enc_layer_specs, dec_layer_specs)
               
    def encoder(self):
        enc_layer_specs = []
        for layer_num, (filters, kernel_size) in enumerate(self.encoder_dims):
            if kernel_size > 0:
                enc_layer_specs.append(conv_layer((kernel_size, kernel_size), filters))
            else:
                enc_layer_specs.append(relu_layer(filters))
        enc_layer_specs.append(dense_layer(self.hidden_size))
        return enc_layer_specs

    def decoder(self):
        dec_layer_specs = []
        for layer_num, (filters, kernel_size) in enumerate(self.decoder_dims):
            #print('Decoder layer: {} Filters, Kernel: {}'.format(layer_num, (filters, kernel_size)))
            if kernel_size > 0:
                if (layer_num == 0) or (self.decoder_dims[layer_num -1][1] == 0):
                    cur_shape = self.dims[1]
                    dec_layer_specs.append(relu_layer(cur_shape*cur_shape*filters))
                    dec_layer_specs.append(reshape_layer((self.batch_size, filters, cur_shape, cur_shape)))
                else:
                    dec_layer_specs.append(conv_layer((kernel_size, kernel_size), filters, activation=relu))
            else:
                dec_layer_specs.append(relu_layer(filters))
        dec_layer_specs.append(conv_layer((1,1), 1, activation=sigmoid))
        return dec_layer_specs


In [52]:
test = ConvAE([1, 8, 8], 16, [[16, 3], [16, 3], [20, 0]], ([20, 0], [16, 3]), 2)

Adding conv layer: (3, 3) 16
Adding conv layer: (3, 3) 16
Adding Full Layer: 20
Adding Full Layer: 2
Adding Full Layer: 20
Adding Full Layer: 1024
Adding reshape layer: (16, 16, 8, 8)
Adding conv layer: (1, 1) 1
Conv Layer - Input shape: [1, 8, 8] Output shape: (16, 6, 6)
Conv Layer - Input shape: (16, 6, 6) Output shape: (16, 4, 4)
Full Layer - Input shape: (16, 4, 4) Output shape: 20
Full Layer - Input shape: (20,) Output shape: 2
Full Layer - Input shape: (2,) Output shape: 20
Full Layer - Input shape: (20,) Output shape: 1024
reshape layer - Input shape: (1024,) Output shape: (16, 8, 8)
Conv Layer - Input shape: (16, 8, 8) Output shape: (1, 8, 8)
Network Initialized


In [53]:
W = np.random.randn(test.Nweights)

In [54]:
dat = np.random.randn(16,1, 8,8)

In [55]:
test.loss(W, dat, None)

AssertionError: Mode same not yet implemented

In [27]:
loss_grad = grad(test.loss)

In [28]:
loss_grad(W, dat, None)

  return f_raw(*args, **kwargs)


ValueError: operands could not be broadcast together with shapes (16,1,6,6) (16,1,8,8) 