# Load tfrecords, define model and train

In [None]:
# Dependencies

 # TensorFlow and tf.keras
import tensorflow as tf
print('Tensorflow Version:', tf.__version__)
from tensorflow import keras
from keras import layers
from keras import models

# Helper libraries
import os
import os.path
import glob
import librosa
import librosa.display
import json
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import IPython.display as pd
import pprint

# Check if the GPU is available (otherwise computing will take a looooonnnnggggg time)
print("GPU", "available (YESS!!!!)" if tf.config.list_physical_devices("GPU") else "not available :(")



In [None]:
# load global settings in config-dictionary
with open('./MA_CONFIG.json', 'r') as fp:
  config = json.load(fp)

# define some extra values
config['batch_size'] = 64
config['shuffle_buffer_size'] = 300
config['n_epochs'] = 5
config['train_dataset_path'] = '/Users/marius/Documents/Uni/TU_Berlin_Master/Masterarbeit/Dataset/train.tfrecord'
config['test_dataset_path'] = '/Users/marius/Documents/Uni/TU_Berlin_Master/Masterarbeit/Dataset/test.tfrecord'


# print config
print(json.dumps(config, indent=4))

# save config to disk
with open('./MA_CONFIG.json', 'w+') as fp:
    json.dump(config, fp, sort_keys=True, indent=4)


# load datasets

In [None]:
# load datasets from disk
train_dataset = tf.data.experimental.load(config['train_dataset_path'],
                                   (tf.TensorSpec(shape=(441000, 1),
                                                   dtype=tf.float32, name=None),
                                     tf.TensorSpec(shape=(441000, 1),
                                                  dtype=tf.float32, name=None)),
                                                   compression='GZIP')

test_dataset = tf.data.experimental.load(config['test_dataset_path'],
                                      (tf.TensorSpec(shape=(441000, 1),
                                                        dtype=tf.float32, name=None),
                                        tf.TensorSpec(shape=(441000, 1),
                                                        dtype=tf.float32, name=None)),
                                                        compression='GZIP')




# count elements in train-dataset and save to config
i = 0
for d in train_dataset:
    i = i + 1
print(f'Number of elements in train-dataset: {i}')

# count elements in test-dataset and save to config
i = 0
for d in test_dataset:
    i = i + 1
print(f'Number of elements in test-dataset: {i}')


# batching and shuffling
train_dataset = train_dataset.shuffle(config['shuffle_buffer_size']).batch(config['batch_size'])
test_dataset = test_dataset.shuffle(config['shuffle_buffer_size']).batch(config['batch_size'])




In [None]:
# check if datasets are loaded correctly
for d in train_dataset:
    print(d[0].shape)
    print(d[1].shape)
    break   

for d in test_dataset:
    print(d[0].shape)
    print(d[1].shape)
    break



# look at data

In [None]:
# look at some example data from train dataset
wavs = train_dataset.unbatch().as_numpy_iterator()
noisy = []
gt = []

# Setup Subplot
nrows, ncols = 2, 2
fig, ax = plt.subplots(nrows=nrows, ncols=ncols, sharex=True, figsize=(16, 9))


# iterate over dataset
for i, sample in enumerate(wavs):
    
    # get the column and row by modulo and remainder
    j = i % ncols
    k = int(i / ncols)
    
    # extract noisy and produced speech file from tensors
    wave = sample[0]
    ground_truth = sample[1]
        
    # plot files
    librosa.display.waveshow(np.squeeze(wave), x_axis='time', sr=config['sr'], ax=ax[k][j], label='test_file')
    librosa.display.waveshow(np.squeeze(ground_truth), alpha=0.3, x_axis='time', sr=config['sr'], ax=ax[k][j], label='ground_truth')
    ax[k][j].legend()
    ax[k][j].axis('on')
    ax[k][j].set_title('10s speech')  

    # save speech to arrays
    noisy.append(np.squeeze(wave))
    gt.append(np.squeeze(ground_truth))
    
    if i+1 == ncols*nrows:
        break
    
# adjust whitespace in between subplots        
plt.subplots_adjust(hspace=0.25, wspace=0.15)
plt.show()


# listen to the audio samples
for i in range(len(gt)):
    print(f'----------- {i+1}. speechsnippet ---------------')
    print('')
    print(f'Voicefixer file')
    pd.display(pd.Audio(noisy[i].T, rate=config['sr']))
    print(f'corresponding produced file')
    pd.display(pd.Audio(gt[i].T, rate=config['sr']))
    print('')

## Model architecture

In [None]:
input_shape = (441000, 1)
output_channels = 1

# build model with 12 layers
def build_model(input_shape):

    # define model
    model = keras.Sequential(name='PostNet_Conv1D')
    model.add(keras.Input(shape=input_shape))

    # add layer 
    model.add(keras.layers.Conv1D(filters=128, kernel_size=32, padding='same'))
    model.add(keras.layers.Activation('tanh'))

    # Add the remaining Conv1D layers
    for _ in range(11):
        model.add(keras.layers.Conv1D(filters=128, kernel_size=32, padding='same'))
        model.add(keras.layers.Activation('tanh'))

    # Add the final Conv1D layer
    model.add(keras.layers.Conv1D(filters=output_channels, kernel_size=1, padding='same'))
    model.add(keras.layers.Activation('tanh'))

    return model

In [None]:
# get model
model = build_model(input_shape = input_shape)

# compile model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# fit model
model.summary()

model.fit(train_dataset, epochs=config['n_epochs'])

#model.evaluate(test_dataset)