### Import modules

In [2]:
import tensorflow as tf

# from https://medium.com/ibm-data-ai/memory-hygiene-with-tensorflow-during-model-training-and-deployment-for-inference-45cf49a15688
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    for gpu in gpus:
        print(str(gpu))
        tf.config.experimental.set_virtual_device_configuration(gpu,[tf.config.experimental.VirtualDeviceConfiguration(memory_limit=4096)])


import tensorflow.keras as keras
from keras import layers
import numpy as np
from dataset import *

2023-01-04 15:33:44.030418: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-01-04 15:33:44.277439: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-01-04 15:33:45.283946: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2023-01-04 15:33:45.284099: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or 

PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')


#### GPU config

### Create dataset from test audio file


In [None]:
xf, X, spec_params = create_dataset('audio/training_audio.wav', 2, 0.5)
X = np.abs(X)
print(X.shape)

## Define model

In [10]:
POOLS = 5
inputs = keras.Input((256, 1024, 1))

# Encoder - Conv2D gradually down to LSTM
# Conv1D didn't work as expected - instead, using conv2d but width of sliding window is 1

filter_expand = inputs
for i in range(POOLS):
    batch_norm = layers.BatchNormalization()(filter_expand)
    conv = layers.Conv2D(2**i, (1, 3), (1, 1), padding='same', activation='relu')(batch_norm)
    # add
    pool = layers.MaxPool2D((1, 2), (1, 2))(conv)   # (add)
    filter_expand = layers.Conv2D(2**(i+1), (1, 3), (1, 1), padding='same', activation='relu')(pool)

lstm_input = tf.reshape(filter_expand, (-1, 256, 1024))

lstm = layers.LSTM(512, return_sequences=True)(lstm_input)
dense = layers.Dense(2**10)(lstm)
dense_reshaped = tf.reshape(dense, (-1, 256, 1024//2**POOLS, 2**POOLS))

conv = dense_reshaped
for i in range(POOLS):
    batch_norm = layers.BatchNormalization()(conv)
    filter_reduce = layers.Conv2D(2**(POOLS - i - 1), (1, 3), (1,1), padding='same', activation='relu')(batch_norm)
    depool = layers.UpSampling2D((1, 2))(filter_reduce)
    conv = layers.Conv2D(2**(POOLS - i - 1), (1, 3), (1, 1), padding='same', activation='relu')(depool)

outputs = layers.Activation('sigmoid')(conv)
model = keras.Model(inputs=inputs, outputs=outputs, name="conv1d-lstm")

for layer in model.layers[-10:]:
    print(layer.output_shape)


(None, 256, 256, 4)
(None, 256, 256, 4)
(None, 256, 256, 2)
(None, 256, 512, 2)
(None, 256, 512, 2)
(None, 256, 512, 2)
(None, 256, 512, 1)
(None, 256, 1024, 1)
(None, 256, 1024, 1)
(None, 256, 1024, 1)


In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy')
model.summary()

In [None]:
X_fit = X
X_fit = (X_fit - np.min(X_fit))/(np.max(X_fit) - np.min(X_fit))

print("==================")
print(X_fit.shape)
print("==================")


In [None]:
model.load_weights("lstm_autoencoder")

In [None]:
model.fit(X_fit, X_fit,
        epochs=200,
        shuffle=True
)

### COMPARE SPECTROGRAMS IN IMAGES


In [None]:
import matplotlib.pyplot as plt

In [None]:
x_example = X[0]

plt.imsave("INPUT_EXAMPLE.png", x_example)
x_example = np.reshape(x_example, (1, x_example.shape[0], x_example.shape[1], 1))
print(x_example.shape)
prediction = model.predict(x_example)
plt.imsave("OUTPUT_EXAMPLE.png", prediction[0, :, :, 0])

In [None]:
model.save("cnn_autoencoder")

### Save to audio file

In [None]:
import importlib
import postprocessing

In [None]:
importlib.reload(postprocessing)
out_samp, out_win, out_stride = spec_params
audio = postprocessing.reverse_spectrogram(prediction[0, :, :, 0], out_samp, out_win, out_stride)
audio = np.reshape(audio, (-1, 1))
f_out = open_write("test_output_cnn.wav", 1, 2, 44100)
write(f_out, audio)