# Sandbox

Herman Kamper, Stellenbosch University, 2018-2019.

## Preliminaries

In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

from os import path
from scipy.spatial.distance import pdist
import matplotlib.pyplot as plt
import numpy as np
import sys
import tensorflow as tf

In [2]:
sys.path.append(path.join("..", "src"))

import plotting
import samediff
import tflego
import training

from tflego import NP_DTYPE, TF_DTYPE, NP_ITYPE, TF_ITYPE

output_dir = "/tmp/data-kamperh/"

In [None]:
def build_conv2d_relu(x, filter_shape, stride=1, padding="VALID"):
    """Single convolutional layer with bias and ReLU activation."""
    W = tf.get_variable(
        "W", filter_shape, dtype=TF_DTYPE, initializer=tf.contrib.layers.xavier_initializer()
        )
    b = tf.get_variable(
        "b", [filter_shape[-1]], dtype=TF_DTYPE, initializer=tf.random_normal_initializer()
        )
    x = tf.nn.conv2d(x, W, strides=[1, stride, stride, 1], padding=padding)
    x = tf.nn.bias_add(x, b)
    return tf.nn.relu(x)


def build_conv2d_linear(x, filter_shape, stride=1, padding="VALID"):
    """Single convolutional layer with bias and linear activation."""
    W = tf.get_variable(
        "W", filter_shape, dtype=TF_DTYPE, initializer=tf.contrib.layers.xavier_initializer()
        )
    b = tf.get_variable(
        "b", [filter_shape[-1]], dtype=TF_DTYPE, initializer=tf.random_normal_initializer()
        )
    x = tf.nn.conv2d(x, W, strides=[1, stride, stride, 1], padding=padding)
    x = tf.nn.bias_add(x, b)
    return x


def build_maxpool2d(x, pool_shape, padding="VALID", name=None):
    """Max pool over `x` using a `pool_shape` of [pool_height, pool_width]."""
    ksize = [1,] + pool_shape + [1,]
    return tf.nn.max_pool(x, ksize=ksize, strides=ksize, padding=padding, name=name)


def build_cnn(x, input_shape, filter_shapes, pool_shapes, padding="VALID"):
    """
    Build a convolutional neural network (CNN).
    
    As an example, a CNN with single-channel [28, 28] shaped input with two
    convolutional layers can be constructud using:
    
        x = tf.placeholder(TF_DTYPE, [None, 28*28])
        input_shape = [-1, 28, 28, 1] # [n_data, height, width, d_in]
        filter_shapes = [
            [5, 5, 1, 32],  # filter shape of first layer
            [5, 5, 32, 64]  # filter shape of second layer
            ]   
        pool_shapes = [
            [2, 2],         # pool shape of first layer
            [2, 2]          # pool shape of second layer
            ]
        cnn = build_cnn(x, input_shape, filter_shapes, pool_shapes)
    
    Parameters
    ----------
    x : Tensor [n_data, n_input]
        Input to the CNN, which is reshaped to match `input_shape`.
    input_shape : list
        The shape of the input to the CNN as [n_data, height, width, d_in].
    filter_shapes : list of list
        The filter shape of each layer as [height, width, d_in, d_out].
    pool_shape : list of list
        The pool shape of each layer as [height, width]. If None, then no
        pooling is applied.
    """
    assert len(filter_shapes) == len(pool_shapes)
    x = tf.reshape(x, input_shape)
    cnn = x
    for i_layer, (filter_shape, pool_shape) in enumerate(zip(filter_shapes, pool_shapes)):
        with tf.variable_scope("cnn_layer_{}".format(i_layer)):
            cnn = build_conv2d_relu(cnn, filter_shape, padding=padding)
            if pool_shape is not None:
                cnn = build_maxpool2d(cnn, pool_shape, padding=padding)
            print("CNN layer {} shape: {}".format(i_layer, cnn.get_shape().as_list()))
    return cnn


def np_conv2d(x, filters, padding="valid"):
    """
    Calculate the convolution of `x` using `filters`.
    
    A useful tutorial: http://www.robots.ox.ac.uk/~vgg/practicals/cnn/.
    
    Parameters
    ----------
    x : matrix [n_data, height, width, d_in]
    filters : matrix [filter_height, filter_width, d_in, d_out]
    """

    import scipy.signal

    # Dimensions
    n_data, height, width, d_in = x.shape
    filter_height, filter_width, _, d_out = filters.shape
    assert d_in == _
    
    # Loop over data
    conv_over_data = []
    for i_data in xrange(n_data):
        # Loop over output channels
        conv_over_channels = []
        for i_out_channel in xrange(d_out):
            conv_result = 0.
            # Loop over input channels
            for i_in_channel in xrange(d_in):
                conv_result += scipy.signal.correlate(
                    x[i_data, :, :, i_in_channel], filters[:, :, i_in_channel,
                    i_out_channel], mode=padding
                    )
            conv_over_channels.append(conv_result)
        conv_over_data.append(np.transpose(np.array(conv_over_channels), (1, 2, 0)))
    
    return np.array(conv_over_data)


def np_maxpool2d(x, pool_shape, ignore_border=False):
    """
    Performs max pooling on `x`.
    
    Parameters
    ----------
    x : matrix [n_data, height, width, d_in]
        Input over which pooling is performed.
    pool_shape : list
        Gives the pooling shape as (pool_height, pool_width).
    """
    
    # Dimensions
    n_data, height, width, d_in = x.shape
    pool_height, pool_width = pool_shape
    round_func = np.floor if ignore_border else np.ceil
    output_height = int(round_func(1.*height/pool_height))
    output_width = int(round_func(1.*width/pool_width))

    # Max pool
    max_pool = np.zeros((n_data, output_height, output_width, d_in))
    for i_data in xrange(n_data):
        for i_channel in xrange(d_in):
            for i in xrange(output_height):
                for j in xrange(output_width):
                    max_pool[i_data, i, j, i_channel] = np.max(x[
                        i_data,
                        i*pool_height:i*pool_height + pool_height,
                        j*pool_width:j*pool_width + pool_width,
                        i_channel
                        ])
    
    return max_pool


def np_cnn(x, input_shape, weights, biases, pool_shapes):
    """
    Push the input `x` through the CNN with `cnn_specs` matching the parameters
    passed to `build_cnn`, `weights` and `biases` the parameters of each
    convolutional layer.
    """
    cnn = x.reshape(input_shape)
    for W, b, pool_shape in zip(weights, biases, pool_shapes):
        if pool_shape is not None:
            cnn = np_relu(np_maxpool2d(np_conv2d(cnn, W) + b, pool_shape))
        else:
            cnn = np_relu(np_conv2d(cnn, W) + b)
    return cnn

## Convolution and deconvolution

In [None]:
# Data

# NumPy archive
npz = np.load(
    path.join("..", "features", "mfcc", "buckeye",
    "devpart1.samediff.dd.npz")
    )

# Data and labels
train_x = []
train_labels = []
train_lengths = []
train_keys = []
n_items = 0
for utt_key in sorted(npz):
    train_keys.append(utt_key)
    train_x.append(npz[utt_key])
    word = "_".join(utt_key.split("_")[:-2])
    train_labels.append(word)
    train_lengths.append(npz[utt_key].shape[0])
    n_items += 1
print("No. items:", n_items)
print("E.g. item shape:", train_x[0].shape)

In [None]:
plt.imshow(train_x[0].T, origin="bottom");

In [None]:
x = tf.placeholder(TF_DTYPE, [None, d_in])