In [2]:
import capslayer as cl
import tensorflow as tf
import Model_functions as mf
import Dataset

  from ._conv import register_converters as _register_converters


## Data Pipeline

In [41]:
#  Set other variables
sample_rate=16384
n_fft=1024
fft_hop=256
patch_window=256
patch_hop=128
n_parallel_readers=4
normalise=True
batch_size = 5
shuffle=False
n_shuffle = 10

directory_a = 'C:/Users/Toby/MSc_Project/Test_Audio/GANdatasetsMini/test/Mixed'
directory_b = 'C:/Users/Toby/MSc_Project/Test_Audio/GANdatasetsMini/test/Voice'

#  Create the pipeline
tf.reset_default_graph()
data = Dataset.zip_files(directory_a, directory_b)
data = Dataset.get_paired_dataset(data,
                                  sample_rate,
                                  n_fft,
                                  fft_hop,
                                  patch_window,
                                  patch_hop,
                                  n_parallel_readers,
                                  batch_size,
                                  n_shuffle,
                                  normalise)

#  Create the iterator
mixed_spec, voice_spec, mixed_audio, voice_audio = data.make_one_shot_iterator().get_next()

#  Create variable placeholders
is_training = tf.placeholder(shape=(), dtype=bool)
mixed_mag = tf.expand_dims(mixed_spec[:, :, 1:, 0], 3)
mixed_phase = tf.expand_dims(mixed_spec[:, :, 1:, 1], 3)
voice_mag = tf.expand_dims(voice_spec[:, :, 1:, 0], 3)

## CapsNet Definition

In [42]:
class basicCapsNet(object):
    
    def __init__(self, mixed_mag, voice_mag, is_training, reuse=True, name='basic_caps_net'):
        """
        input_tensor: Tensor with shape [batch_size, height, width, channels]
        is_training:  Boolean - should the model be trained on the current input or not
        name:         Model instance name
        """
        with tf.variable_scope(name):
            self.mixed_mag = mixed_mag
            self.voice_mag = voice_mag
            self.is_training = is_training
            
            with tf.variable_scope('Convolution'):
                net = mf.conv(mixed_mag, filters=128, kernel_size=5, stride=(1, 1))
                self.conv1 = net
                
            with tf.variable_scope('Primary_Caps'):
                net, activation = cl.layers.primaryCaps(inputs=net, 
                                                        filters=16, 
                                                        kernel_size=5, 
                                                        strides=(1,1),
                                                        out_caps_dims=[8,1], 
                                                        method='norm')
                self.primary_caps = (net,activation)
            
            with tf.variable_scope('Conv_Caps'):
                net, activation = cl.layers.conv2d(inputs=net,
                                                   activation=activation,
                                                   filters=1,
                                                   out_caps_dims=[16,1],
                                                   kernel_size=1,
                                                   strides=(1,1),
                                                   padding="valid",
                                                   routing_method="DynamicRouting",
                                                   reuse=None)
                self.conv_caps = (net,activation)
            """"""
            #with tf.variable_scope('Reconstruction_Conv'): # Output should be (?, 256, 512, 128)
            #    net = 
                
            #with tf.vriable_scope('Mask_Constructor'): # Output.shape == mixed_mag.shape ((?, 256, 512, 1))
            # Output of caps layers needs to be?
            #           Mask of shape [input_tensor]
            self.voice_mask = net
            #self.gen_voice = self.voice_mask * mixed_mag
            
            #self.cost = ####
            #self.optimizer = ####
            #self.training_op = ####

In [43]:

model = basicCapsNet(mixed_mag, voice_mag, is_training=False)

In [12]:
model.conv_caps

(<tf.Tensor 'basic_caps_net/Conv_Caps/conv2d/routing/Squeeze:0' shape=(?, 252, 508, 1, 16, 1) dtype=float32>,
 <tf.Tensor 'basic_caps_net/Conv_Caps/conv2d/clip_by_value:0' shape=(?, 252, 508, 1) dtype=float32>)

Network so far:
- Input: (?, 256, 512, 1)
- Convolution: (?, 252, 508, 128)
- Primary_Caps: (?, 252, 508, 16, 8, 1), (?, 252, 508, 16)
- Conv_Caps: (?, 252, 508, 16, 1), (?, 252, 508, 1)

Up-ConvCaps layer needs to:
- take - 
    - input: [batch size, height, width, channels (capsule layer count), caps dims 1, caps dim 2]
    - activtion: [batch size, height, width, channels (capsule layer count)]
    - output dims: [channels (capsule layer count), caps dims 1, caps dim 2]
    - strides
    - padding
- return - 
    - output: [batch size, height, width, channels (capsule layer count), caps dims 1, caps dim 2]

Conv_caps to Conv layer needs to:
- take - 
    - input: 6D Tensor, [batch size, height, width, channels (capsule layer count), caps dims 1, caps dim 2]
    - activtion: 4D Tensor, [batch size, height, width, channels (capsule layer count)]
    - output filters: int, channels (convolutional filter count)
    - strides
    - padding
- return - 
    - output: 4D Tensor, [batch size, height, width, channels (convolutional filter count)]

Mask reconstruction layer needs to:
- take - 
    - input: 4D Tensor, [batch size, height, width, channels (convolutional filter count)]
    - activtion: [batch size, height, width, channels (capsule layer count)]
    - output filters: int, channels (depth of original input, generally one, but flexibility would be good)
    - strides
    - padding
- return - 
    - output: 4D Tensor, [batch size, height, width, channels (depth of original input, generally one, but flexibility would be good)]

Can this be a normal tf.conv2d_transpose?
- Args -
    - value: 4D Tensor, [batch, height, width, in_channels]
    - filter: 4D Tensor, [height, width, output_channels, in_channels]
    - output_shape: 1-D Tensor representing the output shape of the deconvolution op
    - strides
    - padding
- Returns -
    - A Tensor with the same type as value

In [29]:
mixed_mag

<tf.Tensor 'ExpandDims:0' shape=(?, 256, 512, 1) dtype=float32>

In [None]:
test = tf.reshape(mixed_mag, shape=[-1, mixed_mag.shape[1], mixed_mag.shape[2], mixed_mag.shape[3]])

In [None]:
test

In [None]:
model.primary_caps[1]

In [19]:
test = transforming(model.voice_mask,1, [16,1])

In [20]:
test

<tf.Tensor 'transforming/Sum:0' shape=(?, 252, 508, 16, 1, 16, 1) dtype=float32>

In [43]:
test2 = cl.layers.conv2d(inputs=model.primary_caps[0],
                         activation=model.primary_caps[1],
                         filters=1,
                         out_caps_dims=[16,1],
                         kernel_size=1,
                         strides=(1,1),
                         padding="valid",
                         routing_method="DynamicRouting",
                         name=None,
                         reuse=None)
test2

(<tf.Tensor 'conv2d/routing/Squeeze:0' shape=(?, 252, 508, 1, 16, 1) dtype=float32>,
 <tf.Tensor 'conv2d/clip_by_value:0' shape=(?, 252, 508, 1) dtype=float32>)

In [62]:
a=1
b=6
c=2
d=4

[a,b]<[c,d]

True

In [15]:
spatial_shape = [1, 2, 3, 4]
input_shape = [5,5,5]

In [16]:
[-1] + spatial_shape + input_shape[1:]

[-1, 1, 2, 3, 4, 5, 5]

In [None]:
def space_to_batch_nd_v1(inputs, kernel_size, strides, name=None):
    """ for convCapsNet model: memory 4719M, speed 0.169 sec/step
    """
    name = "space_to_batch_nd" if name is None else name
    with tf.name_scope(name):
        height, width, depth = cl.shape(inputs)[1:4]
        h_offsets = [[(h + k) for k in range(0, kernel_size[0])] for h in range(0, height + 1 - kernel_size[0], strides[0])]
        w_offsets = [[(w + k) for k in range(0, kernel_size[1])] for w in range(0, width + 1 - kernel_size[1], strides[1])]
        d_offsets = [[(d + k) for k in range(0, kernel_size[2])] for d in range(0, depth + 1 - kernel_size[2], strides[2])]
        patched = tf.gather(inputs, h_offsets, axis=1)
        patched = tf.gather(patched, w_offsets, axis=3)
        patched = tf.gather(patched, d_offsets, axis=5)

In [46]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())
inputs = model.primary_caps[0]
height, width, depth = cl.shape(inputs)[1:4]
kernel_size = [5,5,1]
strides = [2,2,1]

In [47]:
h_offsets = [[(h + k) for k in range(0, kernel_size[0])] for h in range(0, height + 1 - kernel_size[0], strides[0])]
w_offsets = [[(w + k) for k in range(0, kernel_size[1])] for w in range(0, width + 1 - kernel_size[1], strides[1])]
d_offsets = [[(d + k) for k in range(0, kernel_size[2])] for d in range(0, depth + 1 - kernel_size[2], strides[2])]

In [48]:
patched = tf.gather(inputs, h_offsets, axis=1)
patched

<tf.Tensor 'GatherV2:0' shape=(?, 124, 5, 508, 16, 8, 1) dtype=float32>

In [49]:
patched = tf.gather(patched, w_offsets, axis=3)
patched

<tf.Tensor 'GatherV2_1:0' shape=(?, 124, 5, 252, 5, 16, 8, 1) dtype=float32>

In [50]:
patched = tf.gather(patched, d_offsets, axis=5)
patched

<tf.Tensor 'GatherV2_2:0' shape=(?, 124, 5, 252, 5, 16, 1, 8, 1) dtype=float32>

In [52]:
if len(patched.shape) == 7:
    perm = [0, 1, 3, 5, 2, 4, 6]
else:
    perm = [0, 1, 3, 5, 2, 4, 6, 7, 8]
patched = tf.transpose(patched, perm=perm)
shape = cl.shape(patched)
shape

[<tf.Tensor 'shape_2/strided_slice:0' shape=() dtype=int32>,
 124,
 252,
 16,
 5,
 5,
 1,
 8,
 1]

In [53]:
import numpy as np
shape = shape[:3] + [np.prod(shape[3:-2])] + shape[-2:] if len(patched.shape) == 9 else shape[:3] + [np.prod(shape[3:])]
shape

[<tf.Tensor 'shape_2/strided_slice:0' shape=() dtype=int32>,
 124,
 252,
 400,
 8,
 1]

In [54]:
patched = tf.reshape(patched, shape=shape)
patched

<tf.Tensor 'Reshape:0' shape=(?, 124, 252, 400, 8, 1) dtype=float32>