### Import modules

In [1]:
import tensorflow as tf

# from https://medium.com/ibm-data-ai/memory-hygiene-with-tensorflow-during-model-training-and-deployment-for-inference-45cf49a15688
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    for gpu in gpus:
        print(str(gpu))
        tf.config.experimental.set_virtual_device_configuration(gpu,[tf.config.experimental.VirtualDeviceConfiguration(memory_limit=4096)])


import tensorflow.keras as keras
import numpy as np
from dataset import *

2022-12-31 23:04:01.478556: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-12-31 23:04:02.084255: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2022-12-31 23:04:03.901343: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2022-12-31 23:04:03.901512: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or 

PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')


#### GPU config

### Create dataset from test audio file


In [2]:
xf, X = create_dataset('audio/training_audio.wav', 2, 0.5)
print(X.shape)

(116, 1024, 169)


### Define some 'blocks' of NN code

In [3]:
def residual_block(inputs, input_shape, channels, block_name='ResBlock'):
    
    conv2d_1 = keras.layers.Conv2D(channels, kernel_size=(10, 1), padding='same', input_shape=input_shape)(inputs)

    batch_norm_1 = keras.layers.BatchNormalization()(conv2d_1)
    activation_1 = keras.layers.Activation('relu')(batch_norm_1)

    conv2d_2 = keras.layers.Conv2D(channels, kernel_size=10, padding='same')(activation_1)
    batch_norm_2 = keras.layers.BatchNormalization()(conv2d_2)

    add = keras.layers.Add()([batch_norm_2, inputs])
    activation_2 = keras.layers.Activation('relu')(add)
    return activation_2

def downscale_block(inputs, num_filters, block_name='DownscaleBlock'):

    downscale = keras.layers.Conv2D(num_filters, kernel_size=(10, 1), padding='same')(inputs)
    batch_norm = keras.layers.BatchNormalization()(downscale)
    activation = keras.layers.Activation('relu')(batch_norm)
    
    return activation

def upscale_block(inputs, target_size, num_filters, block_name='UpscaleBlock'):
    
    _, height, timesteps, channels = inputs.shape
    upscale = keras.layers.Conv2DTranspose(num_filters, (target_size - height + 1, 1))(inputs)
    batch_norm = keras.layers.BatchNormalization()(upscale)
    activation = keras.layers.Activation('relu')(batch_norm)

    return activation

### Define and train model

In [5]:
def encoder_lstm_decoder(spec_shape, start_filters):
    input_length =  spec_shape[0]
    time_steps = spec_shape[1]
    input_shape = (input_length, time_steps, 1)

    # Encoder
    inputs = keras.layers.Input(input_shape)                                                                    # input_length is power of two
    print(input_shape)

    res_block_1 = residual_block(inputs, input_shape, 1, 'ResBlock1')
    max_pool_1 = keras.layers.MaxPool2D(pool_size=(2,1))(res_block_1)                 # (input_length/2,)
    down_scale_1 = downscale_block(max_pool_1, start_filters, 'DownscaleBlock1')
    print(down_scale_1.shape)

    res_block_2 = residual_block(down_scale_1, (input_length//2, time_steps, start_filters), start_filters, 'ResBlock2')
    max_pool_2 = keras.layers.MaxPool2D(pool_size=(2,1))(res_block_2)                 # (input_length/2,)
    down_scale_2 = downscale_block(max_pool_2, start_filters * 2, 'DownscaleBlock2')
    print(down_scale_2.shape)

    res_block_3 = residual_block(down_scale_2, (input_length//4, time_steps, start_filters*2), start_filters * 2, 'ResBlock3')
    max_pool_3 = keras.layers.MaxPool2D(pool_size=(2,1))(res_block_2)                 # (input_length/2,)
    down_scale_3 = downscale_block(max_pool_3, start_filters * 4, 'DownscaleBlock3')
    print(down_scale_3.shape)

    res_block_4 = residual_block(down_scale_3, (input_length//8, time_steps, start_filters*4), start_filters * 4, 'ResBlock4')
    max_pool_4 = keras.layers.MaxPool2D(pool_size=(2,1))(res_block_4)                 # (input_length/2,)
    down_scale_4 = downscale_block(max_pool_4, start_filters * 8, 'DownscaleBlock4')
    print(down_scale_4.shape)

    # Decoder
    _, height, timesteps, channels = down_scale_4.shape
    # down_scale_4_flat = tf.reshape(down_scale_4, (-1, timesteps, height * channels))
    down_scale_4_flat = keras.layers.Reshape((timesteps, height * channels))(down_scale_4)
    print(f"down_scale_4 {down_scale_4_flat.shape}")
    lstm = keras.layers.LSTM(height * channels, return_sequences=True)(down_scale_4_flat)    # todo CHANGE N UNITS

    print(f"LSTM: {lstm.shape}")

    # lstm_unflatten = tf.reshape(lstm, (-1, height, timesteps, channels))
    lstm_unflatten = keras.layers.Reshape((height, timesteps, channels))(lstm)
    print(f"Unflattened LSTM: {lstm_unflatten.shape}")

    upscale_1 = upscale_block(lstm_unflatten, input_length//8, start_filters * 4, 'Upscale1')
    # print(upscale_1.shape)
    upscale_2 = upscale_block(upscale_1, input_length//4, start_filters * 2, 'Upscale2')
    # print(upscale_2.shape)
    upscale_3 = upscale_block(upscale_2, input_length//2, start_filters, 'Upscale3')
    # print(upscale_3.shape)
    upscale_4 = upscale_block(upscale_3, input_length, 1, 'Upscale4')
    # print(upscale_4.shape)

    _, height, timesteps, channels = upscale_4.shape
    # outputs = tf.reshape(upscale_4, (-1, height, timesteps))
    outputs = keras.layers.Reshape((height, timesteps))(upscale_4)
    # print(outputs.shape)
    
    # outputs = keras.layers.Dense(input_length, activation='sigmoid')(upscale_4_flatten)
    
    model = keras.Model(inputs=inputs, outputs=outputs)
    return model

In [6]:
X_fit = X[:2]

print("==================")
print(X_fit.shape)
print("==================")
model = encoder_lstm_decoder(X_fit[0].shape, 8)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
              loss=tf.keras.losses.BinaryCrossentropy(),
              metrics=[tf.keras.metrics.BinaryAccuracy(),
                       tf.keras.metrics.FalseNegatives()])
print("COMPILATION FINISHED")
print(model.summary())
# model.fit(x=X, y=X, epochs=1)


(2, 1024, 169)
(1024, 169, 1)


2022-12-31 23:05:14.319859: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-12-31 23:05:14.325499: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:966] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-12-31 23:05:14.326647: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:966] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-12-31 23:05:14.327648: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:966] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built witho

(None, 512, 169, 8)
(None, 256, 169, 16)
(None, 256, 169, 32)
(None, 128, 169, 64)
down_scale_4 (None, 169, 8192)


2022-12-31 23:05:28.165593: W tensorflow/core/common_runtime/bfc_allocator.cc:479] Allocator (GPU_0_bfc) ran out of memory trying to allocate 1.00GiB (rounded to 1073741824)requested by op AddV2
If the cause is memory fragmentation maybe the environment variable 'TF_GPU_ALLOCATOR=cuda_malloc_async' will improve the situation. 
Current allocation summary follows.
Current allocation summary follows.
2022-12-31 23:05:28.165767: I tensorflow/core/common_runtime/bfc_allocator.cc:1033] BFCAllocator dump for GPU_0_bfc
2022-12-31 23:05:28.165789: I tensorflow/core/common_runtime/bfc_allocator.cc:1040] Bin (256): 	Total Chunks: 93, Chunks in use: 93. 23.2KiB allocated for chunks. 23.2KiB in use in bin. 4.8KiB client-requested in use in bin.
2022-12-31 23:05:28.165798: I tensorflow/core/common_runtime/bfc_allocator.cc:1040] Bin (512): 	Total Chunks: 2, Chunks in use: 2. 1.0KiB allocated for chunks. 1.0KiB in use in bin. 720B client-requested in use in bin.
2022-12-31 23:05:28.165805: I tensorflo

ResourceExhaustedError: {{function_node __wrapped__AddV2_device_/job:localhost/replica:0/task:0/device:GPU:0}} failed to allocate memory [Op:AddV2]