## CV_TasNet - Model Parts

In [None]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import glob
import time
import datetime
import numpy as np
%config Completer.use_jedi = False

import tensorflow as tf
import tensorflow.keras as keras

# Unknownerror, cudnn 어쩌고저쩌고 에러
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)

In [None]:
print('tf.__version__:', tf.__version__)

In [None]:
import sys
import random
import soundfile as sf
# sys.getsizeof

In [None]:
BATCH_SIZE = 8

In [None]:
# tf.__version__ 2.5.0에서는 keras.layers.DepthwiseConv2D는 있는데 1D는 없어서 직접 만들어봄
class DepthConv1D(keras.layers.Layer):
    def __init__(self, filters, kernel_size, strides=1, padding='causal',
                 dilation_rate=1, bias_initializer='zeros', use_bias=True, **kwargs):
        super(DepthConv1D, self).__init__(**kwargs)
        self.filters = filters
        self.kernel_size = kernel_size
        self.strides = strides
        self.padding = padding
        self.dilation_rate = dilation_rate
        self.bias_initializer=keras.initializers.get(bias_initializer)
        self.use_bias = use_bias
    def build(self, batch_input_shape):
        self.batch_size = batch_input_shape[0]
        self.channels = batch_input_shape[-1] # channels_last
        self.layers = []
        for i in range(self.channels):
            self.layers.append(keras.layers.Conv1D(self.filters, self.kernel_size, self.strides,
                    padding=self.padding, dilation_rate=self.dilation_rate, use_bias=False, 
                                                   name='depth_conv{}'.format(i)))
        if self.use_bias:
            self.b = self.add_weight(name='conv1d_bias',
                        shape=(self.filters*self.channels, ), 
                        initializer=self.bias_initializer, trainable=True) # add bias
        super(DepthConv1D, self).build(batch_input_shape)
    def call(self, inputs):
        # channel별로 쪼개서, 각각의 keras.layers.Conv1D를 거치고,
        # 그걸 다시 stack.
        results = []
        for i in range(self.channels):
            results.append(self.layers[i](inputs[:, :, i:i+1]))
        stacked = tf.stack(results, axis=2) # input should be [batch_size, timesteps, channels]
        if self.use_bias:
            reshaped = tf.reshape(stacked, [self.batch_size, -1, self.channels*self.filters]) + self.b
        else:
            reshaped = tf.reshape(stacked, [self.batch_size, -1, self.channels*self.filters])
        return reshaped

In [None]:
# 논문 Figure 1. (C)의 1-D Conv block design 따라서 만들어보면,
class ConvBlock(keras.layers.Layer):
    def __init__(self, h_filters, kernel_size, dilation_rate,
                 sc_filters, res_filters, **kwargs):
        '''
        h_filters: num of channels in convolutional blocks
        kernel_size: kernel size in convolutional blocks
        sc_filters: num of channels of 1x1-conv skip-connection
        res_filters: num of channels of 1x1-conv residual path
        '''
        super(ConvBlock, self).__init__(**kwargs)
        # 1x1-conv, D-conv가 있고 각각 다음에 PReLU, Normalization이 와야됨
        self.point_conv = keras.layers.Conv1D(h_filters, kernel_size=1, # dilation_rate=dilation_rate,
                                use_bias=False, name='point_conv') # pointwise conv to begin with. 1x1-conv는 dilation_rate 의미없음.
        self.prelu1 = keras.layers.PReLU(shared_axes=-1, name='1st_prelu')
        self.norm1 = keras.layers.LayerNormalization(axis=-1, name='1st_norm')
        
        
        self.depth_conv = DepthConv1D(1, kernel_size=kernel_size, strides=1, padding='causal',
                            dilation_rate=dilation_rate, use_bias=False, name='depth_conv') # depthwise conv
        self.prelu2 = keras.layers.PReLU(shared_axes=-1, name='2nd_prelu')
        self.norm2 = keras.layers.LayerNormalization(axis=-1, name='2nd_norm')
        
        self.point_conv2 = keras.layers.Conv1D(sc_filters, kernel_size=1, 
                                        use_bias=False, name='sc_conv') # linear, skip-connection 1x1 conv
        self.point_conv3 = keras.layers.Conv1D(res_filters, kernel_size=1, 
                                        use_bias=False, name='res_conv') # linear, residual path 1x1 conv
        
    def call(self, inputs):
        x = self.point_conv(inputs)
        x = self.prelu1(self.norm1(x)) # after 1x1-conv
        x = self.depth_conv(x)
        x = self.prelu2(self.norm2(x)) # after depth-conv
        skip_connection = self.point_conv2(x)
        residual = self.point_conv3(x)
        return skip_connection, inputs + residual # activation 따로 없음. skip-connection은 for sum, residual은 input for the next

In [None]:
# # test1
# num_channels = 128 # this equals number of residual filters
# x = tf.random.normal((64, 16, num_channels)) # 32 samples, 16 timesteps, 8 channels
# cblock3 = ConvBlock(h_filters=512, kernel_size=3, dilation_rate=1, sc_filters=128, res_filters=num_channels)
# sc_output3, res_output3 = cblock3(x)
# print(sc_output3.shape)
# print(res_output3.shape)

In [None]:
# Conv Blocks가 달라지는 dilation_rate와 함께 stack되는게 C_Blocks
def C_Blocks(num_of_blocks, h_filters, kernel_size, sc_filters, res_filters):
    total = []
    for i in range(num_of_blocks):
        dilation_rate = 2**i
        total.append(ConvBlock(h_filters, kernel_size, dilation_rate, 
#                               sc_filters, res_filters, name='{}_block'.format(i)))
                               sc_filters, res_filters))
    return total

In [None]:
# best
num_spks = 2
n_filters = 512 # enc_dim
length = 16
b_filters = 128 # feature_dim
h_filters = 512
sc_filters = 128
p = 3
x = 8
r = 3

In [None]:
# 2-second long segments, monophonic
sample_inputs = keras.Input((16000, 1), batch_size=BATCH_SIZE)

In [None]:
# number of basis signals = 512만큼 filters를 펌핑하고,
encoder = keras.layers.Conv1D(n_filters, kernel_size=length, strides=length//2, 
                              use_bias=False, activation='sigmoid')
sample_encoder_outputs = encoder(sample_inputs)
print('Sample encoder outputs shape:', sample_encoder_outputs.shape)

In [None]:
layernorm = keras.layers.LayerNormalization()
bottleneck_layer = keras.layers.Conv1D(b_filters, kernel_size=1,
                                use_bias=False, name='bottleneck_layer')

before_blocks = layernorm(sample_encoder_outputs)
before_blocks = bottleneck_layer(before_blocks)
print('Before blocks shape:', before_blocks.shape) # 여기까지가 separation module의 1-D Conv 들어가기 직전

In [None]:
blocks = []
for i in range(r):
    blocks += C_Blocks(x, h_filters, p, sc_filters, b_filters)
print('Total block numbers:', len(blocks))

In [None]:
# 이제 separation module에 있는 1-D Conv Block들을 통과하면서 나오는 결과물들
# skip-connection results, residual results. 전자는 summed up, 후자는 next block의 input
res_output = before_blocks
after_blocks = tf.zeros((BATCH_SIZE, sample_encoder_outputs.shape[1], sc_filters))
for i, block in enumerate(blocks):
    sc_output, res_output = block(res_output)
    after_blocks += sc_output
    print('Number {} block done'.format(i), end="\t")
print()
print('After blocks outputs shape:', after_blocks.shape) # 이게 모든 skip connections들의 합. [32, 16, 128]

In [None]:
# PReLU(), pointwise conv, sigmoid 를 통과해야 이게 masks
prelu_layer = keras.layers.PReLU(shared_axes=-1)
bottleneck_later = keras.layers.Conv1D(n_filters*2, kernel_size=1, use_bias=False,
                                      name='bottleneck_layer2') # 여기서 곱하기 2, pointwise Conv1D
after_separation = prelu_layer(after_blocks)
after_separation = bottleneck_later(after_separation)
print(after_separation.shape) # [32, 16, 128*2]

In [None]:
reshaped = tf.reshape(after_separation, shape=(BATCH_SIZE, num_spks, -1, n_filters))
masks = keras.activations.softmax(reshaped, axis=1) # 이 masks에 unit summation constraint 어떻게 주지??
print('masks.shape:', masks.shape) # 이 mask의 shape이 (bs, num_spks, length, h_filters)여야한다는거아냐

# encoder_outputs와 masks를 elementwise multiplication을 함.
separation_outputs = keras.layers.Multiply()([sample_encoder_outputs, masks])
print('Separation_outputs.shape:', separation_outputs.shape)

In [None]:
sep = []
decoders = []
for i in range(num_spks):
    sep.append(separation_outputs[:, i, :, :])
    decoders.append(keras.layers.Conv1DTranspose(1, kernel_size=length, strides=length//2, 
                                        use_bias=False, name='Decoder_Num{}'.format(i)))
decoder_outputs = []
for i in range(num_spks):
    decoder_outputs.append(decoders[i](sep[i]))

outputs1 = decoder_outputs[0]
outputs2 = decoder_outputs[1]
outputs = tf.concat([outputs1, outputs2], axis=-1)
print('Final outputs shape:', outputs.shape)

In [None]:
cv_tasnet = keras.Model(sample_inputs, outputs)
cv_tasnet.summary()

In [None]:
cv_tasnet.optimizer = keras.optimizers.Adam(0.001, clipnorm=5.)

In [None]:
trainable_lists = cv_tasnet.trainable_variables
non_trainable_lists = cv_tasnet.non_trainable_variables
trainable_vars = 0
non_trainable_vars = 0
for trainable_list in trainable_lists:
    trainable_vars += np.prod(trainable_list.shape)
for non_trainable_list in non_trainable_lists:
    non_trainable_vars += np.prod(non_trainable_list.shape)
total_vars = trainable_vars + non_trainable_vars
print('Total parameters:', total_vars)