# Load tfrecords, define model and train

In [310]:
# Dependencies

 # TensorFlow and tf.keras
import tensorflow as tf
print('Tensorflow Version:', tf.__version__)
from tensorflow import keras
from keras import layers
from keras import models

#Tensorboard
%load_ext tensorboard


# Helper libraries
import os
import os.path
import glob
import librosa
import librosa.display
import json
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import IPython.display as pd
import pprint
import datetime        
from scipy.io.wavfile import write


# Check if the GPU is available (otherwise computing will take a looooonnnnggggg time)
print("GPU", "available (YESS!!!!)" if tf.config.list_physical_devices("GPU") else "not available :(")



Tensorflow Version: 2.9.1
The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard
GPU not available :(


In [311]:
# # Clear any logs from previous runs
# !rm -rf ./logs/

In [329]:
# load global settings in config-dictionary
with open('./MA_CONFIG.json', 'r') as fp:
  config = json.load(fp)

# define some extra values
config['batch_size'] = 16
config['shuffle_buffer_size'] = 300
config['n_epochs'] = 3
config['filter_size'] = 8
config['kernel_size'] = 2

# print config
print(json.dumps(config, indent=4))

# save config to disk
with open('./MA_CONFIG.json', 'w+') as fp:
    json.dump(config, fp, sort_keys=True, indent=4)


{
    "batch_size": 16,
    "filter_size": 8,
    "fps_noisy": "/Users/marius/Documents/Uni/TU_Berlin_Master/Masterarbeit/Data/allFiles/noisySpeech",
    "fps_produced": "/Users/marius/Documents/Uni/TU_Berlin_Master/Masterarbeit/Data/allFiles/producedSpeech",
    "fps_voicefixer": "/Users/marius/Documents/Uni/TU_Berlin_Master/Masterarbeit/Data/allFiles/voicefixerOutput",
    "hop_length": 64,
    "input_shape": [
        441000,
        1
    ],
    "kernel_size": 2,
    "n_epochs": 3,
    "n_fft": 512,
    "n_mels": 16,
    "offset": 6,
    "sample_length": 20,
    "shuffle_buffer_size": 300,
    "sr": 44100,
    "test_dataset_path": "../Dataset/test.tfrecord",
    "train_dataset_path": "../Dataset/train.tfrecord",
    "win_length": 512
}


# load tfrecords

### func for decoding tfrecords

In [328]:
# autotune for performance
AUTOTUNE = tf.data.experimental.AUTOTUNE

# decode tfrecords
def decode_tf_records(seralized_example):
    feature_description = {
        "voicefixer": tf.io.FixedLenFeature([], tf.string),
        "produced": tf.io.FixedLenFeature([], tf.string),
    }
    example = tf.io.parse_single_example(seralized_example, feature_description)

    voicefixer, _ = tf.audio.decode_wav(example["voicefixer"], desired_channels=-1)
    produced, _ = tf.audio.decode_wav(example["produced"], desired_channels=-1)

    # voicefixer, produced = (
    #     tf.squeeze(voicefixer, 1),
    #     tf.squeeze(produced, 1),
    # )
    
    return voicefixer, produced

In [326]:
# load train tfrecords
tfrecords_paths = glob.glob('/Users/marius/Documents/Uni/TU_Berlin_Master/Masterarbeit/Dataset/train_tfrecords/*.tfrecords')
train_dataset = tf.data.TFRecordDataset(tfrecords_paths[:2])
train_dataset = train_dataset.map(decode_tf_records, num_parallel_calls=AUTOTUNE)

# count elements in train_dataset
print(f'Number of elements in train_dataset: {len([d for d in train_dataset])}')

# batching and shuffling
train_dataset = train_dataset.shuffle(config['shuffle_buffer_size']).batch(config['batch_size'])


Number of elements in train_dataset: 120


In [327]:
# load test tfrecords
tfrecords_paths = glob.glob('/Users/marius/Documents/Uni/TU_Berlin_Master/Masterarbeit/Dataset/test_tfrecords/*.tfrecords')
test_dataset = tf.data.TFRecordDataset(tfrecords_paths[:1])
test_dataset = test_dataset.map(decode_tf_records, num_parallel_calls=AUTOTUNE)

# count elements in test_dataset
print(f'Number of elements in test_dataset: {len([d for d in test_dataset])}')

# batching and shuffling
test_dataset = test_dataset.shuffle(config['shuffle_buffer_size']).batch(config['batch_size'])


Number of elements in test_dataset: 60


In [317]:
# check if datasets are loaded correctly
for d in train_dataset:
    print(d[0].shape)
    print(d[1].shape)
    break   

for d in test_dataset:
    print(d[0].shape)
    print(d[1].shape)
    break

(16, 441000, 1)
(16, 441000, 1)
(16, 441000, 1)
(16, 441000, 1)


# look at data

In [318]:
# # look at some example data from train dataset
# wavs = train_dataset.unbatch().as_numpy_iterator()
# noisy = []
# gt = []

# # Setup Subplot
# nrows, ncols = 2, 2
# fig, ax = plt.subplots(nrows=nrows, ncols=ncols, sharex=True, figsize=(16, 9))


# # iterate over dataset
# for i, sample in enumerate(wavs):
    
#     # get the column and row by modulo and remainder
#     j = i % ncols
#     k = int(i / ncols)
    
#     # extract noisy and produced speech file from tensors
#     wave = sample[0]
#     ground_truth = sample[1]
        
#     # plot files
#     librosa.display.waveshow(np.squeeze(wave), x_axis='time', sr=config['sr'], ax=ax[k][j], label='test_file')
#     librosa.display.waveshow(np.squeeze(ground_truth), alpha=0.3, x_axis='time', sr=config['sr'], ax=ax[k][j], label='ground_truth')
#     ax[k][j].legend()
#     ax[k][j].axis('on')
#     ax[k][j].set_title('10s speech')  

#     # save speech to arrays
#     noisy.append(np.squeeze(wave))
#     gt.append(np.squeeze(ground_truth))
    
#     if i+1 == ncols*nrows:
#         break
    
# # adjust whitespace in between subplots        
# plt.subplots_adjust(hspace=0.25, wspace=0.15)
# plt.show()


# # listen to the audio samples
# for i in range(len(gt)):
#     print(f'----------- {i+1}. speechsnippet ---------------')
#     print('')
#     print(f'Voicefixer file')
#     pd.display(pd.Audio(noisy[i].T, rate=config['sr']))
#     print(f'corresponding produced file')
#     pd.display(pd.Audio(gt[i].T, rate=config['sr']))
#     print('')

## Model architecture

In [319]:
# some values for the model
input_shape = (441000, 1)
output_channels = 1
filter_size = config['filter_size']
kernel_size = config['kernel_size']


# build model with 12 layers
def build_model(input_shape):

    # define model
    model = keras.Sequential(name='PostNet_Conv1D')
    model.add(keras.Input(shape=input_shape))

    # add layer 
    model.add(keras.layers.Conv1D(filters=filter_size, kernel_size=kernel_size, padding='same'))
    model.add(keras.layers.Activation('tanh'))

    # Add the remaining Conv1D layers
    for _ in range(11):
        model.add(keras.layers.Conv1D(filters=filter_size, kernel_size=kernel_size, padding='same'))
        model.add(keras.layers.Activation('tanh'))

    # Add the final Conv1D layer
    model.add(keras.layers.Conv1D(filters=output_channels, kernel_size=1, padding='same'))
    model.add(keras.layers.Activation('tanh'))

    return model

In [330]:

# get model
model = build_model(input_shape = input_shape)


# define callbacks

log_dir = "./logs/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")



save_callback = keras.callbacks.ModelCheckpoint(
    filepath='./model_checkpoint',
    save_best_only=False,
    monitor='val_loss',
    verbose=0)

tensorboard_callback = keras.callbacks.TensorBoard(
    log_dir= log_dir,
    histogram_freq=1,
    write_graph=True,
    write_images=True,
    write_steps_per_second=True,
    update_freq='epoch',
    profile_batch=2,
    embeddings_freq=1)



# set speechfile for prediction
dataset = test_dataset.unbatch().as_numpy_iterator()
speech_for_predicition = []
for i, sample in enumerate(dataset):
    speech_for_predicition.append(sample[0])
    break
speech_for_predicition = speech_for_predicition[0]


# define custom callback
class CustomCallback(keras.callbacks.Callback):

    def on_epoch_end(self, epoch, logs=None):
        
        # print(logs)
        print('---------------------')
        print(f'Epoch {epoch+1} Metrics:')
        print(logs)
        print('---------------------')
        print('')
        

        #save audio file to disk

        # get audio file from model prediciton
        audio = self.model.predict(speech_for_predicition)
        
        if not tf.is_tensor(audio):
          audio = tf.convert_to_tensor(audio)

        # change shape to (441000, 1)
        audio = tf.squeeze(audio, axis=-1).numpy()

        # normalize audio with numpy
        audio = librosa.util.normalize(audio)

        # save plot    
        plt.figure(figsize=(8, 4))
        plt.plot(audio)
        plt.savefig(log_dir + '_audiofile_epoch' + str(epoch+1) + '.png')
        plt.close()

        # write audio file to disk
        write(log_dir + '_audiofile_epoch' + str(epoch+1) + '.wav', 44100, audio)
 

        # # convert audio back to tensor
        # audio = tf.convert_to_tensor(audio)
        # # write audio file to disk
        # audio = tf.audio.encode_wav(audio, sample_rate=tf.constant(44100, dtype=tf.int32))
        # # write audio file to disk
        # tf.io.write_file(log_dir + '_audiofile_epoch' + str(epoch+1) + '.wav', audio, name=None)



        #------------------------------------
        # # save audio file to tf.summary.audio and event files for tensorboard
        # path = log_dir + '/audiosummary'
        # writer = tf.summary.create_file_writer(path)
        # with writer.as_default():

        #     # get audio file from model
        #     audio = self.model.output

                
            # # check if audio is keras tensor
            # if tf.keras.backend.is_keras_tensor(audio) == True:
            #     print('is keras tensor')


        #     # audio.shape=(None, 441000, 1) should be (1, 441000, 1)
        #     # change shape to (1, 441000, 1)
        #     audio = tf.squeeze(audio, axis=0)
        #     audio = tf.expand_dims(audio, axis=0)

        #     # # print audio tensor
        #     # print(audio)
        #     # print(audio.dtype)
        #     # print(audio.shape)

        #     # write audio file to tf.summary.audio
        #     tf.summary.audio('audio_' + str(epoch), audio, int(config['sr']), step=epoch, max_outputs=1)
        #     writer.flush()




          

# early_stopping_callback = keras.callbacks.EarlyStopping(
#     monitor='val_loss',
#     patience=5,
#     verbose=1)

# keras.callbacks.LambdaCallback(
#     on_epoch_end = logging_wav)





# compile model
model.compile(optimizer = keras.optimizers.Adam(learning_rate=0.01),
              loss = tf.keras.losses.MeanAbsolutePercentageError(),
              metrics = tf.keras.losses.MeanSquaredError())

model.summary()



Model: "PostNet_Conv1D"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_1235 (Conv1D)        (None, 441000, 8)         24        
                                                                 
 activation_1235 (Activation  (None, 441000, 8)        0         
 )                                                               
                                                                 
 conv1d_1236 (Conv1D)        (None, 441000, 8)         136       
                                                                 
 activation_1236 (Activation  (None, 441000, 8)        0         


2023-06-09 12:50:35.406715: I tensorflow/core/profiler/lib/profiler_session.cc:99] Profiler session initializing.
2023-06-09 12:50:35.406947: I tensorflow/core/profiler/lib/profiler_session.cc:114] Profiler session started.
2023-06-09 12:50:35.408549: I tensorflow/core/profiler/lib/profiler_session.cc:126] Profiler session tear down.


 )                                                               
                                                                 
 conv1d_1237 (Conv1D)        (None, 441000, 8)         136       
                                                                 
 activation_1237 (Activation  (None, 441000, 8)        0         
 )                                                               
                                                                 
 conv1d_1238 (Conv1D)        (None, 441000, 8)         136       
                                                                 
 activation_1238 (Activation  (None, 441000, 8)        0         
 )                                                               
                                                                 
 conv1d_1239 (Conv1D)        (None, 441000, 8)         136       
                                                                 
 activation_1239 (Activation  (None, 441000, 8)        0         
 )        

In [324]:
# Clear any logs from previous runs
!rm -rf ./logs/

# fit model
history = model.fit(train_dataset,
                    epochs=config['n_epochs'],
                    validation_data=test_dataset,
                    callbacks=[save_callback, tensorboard_callback, CustomCallback()])

# # save model
# model.save('./model.h5')

# # save history
# with open('./history.json', 'w+') as fp:
#     json.dump(history.history, fp, sort_keys=True, indent=4)


# %tensorboard --logdir logs



Epoch 1/5
      1/Unknown - 5s 5s/step - loss: 67158.9141 - mean_squared_error: 0.0217

2023-06-08 18:44:33.090329: I tensorflow/core/profiler/lib/profiler_session.cc:99] Profiler session initializing.
2023-06-08 18:44:33.090368: I tensorflow/core/profiler/lib/profiler_session.cc:114] Profiler session started.


      2/Unknown - 10s 5s/step - loss: 71871.2266 - mean_squared_error: 0.0219

2023-06-08 18:44:37.598538: I tensorflow/core/profiler/lib/profiler_session.cc:66] Profiler session collecting data.
2023-06-08 18:44:37.601315: I tensorflow/core/profiler/lib/profiler_session.cc:126] Profiler session tear down.
2023-06-08 18:44:37.603770: I tensorflow/core/profiler/rpc/client/save_profile.cc:136] Creating directory: ./logs/20230608-184054/plugins/profile/2023_06_08_18_44_37

2023-06-08 18:44:37.604899: I tensorflow/core/profiler/rpc/client/save_profile.cc:142] Dumped gzipped tool data for trace.json.gz to ./logs/20230608-184054/plugins/profile/2023_06_08_18_44_37/Mariuss-MBP.trace.json.gz
2023-06-08 18:44:37.608106: I tensorflow/core/profiler/rpc/client/save_profile.cc:136] Creating directory: ./logs/20230608-184054/plugins/profile/2023_06_08_18_44_37

2023-06-08 18:44:37.608207: I tensorflow/core/profiler/rpc/client/save_profile.cc:142] Dumped gzipped tool data for memory_profile.json.gz to ./logs/20230608-184054/plugins/profile/2023_06_08_18_44_37/Mariuss-MBP.memory

     75/Unknown - 262s 3s/step - loss: 84088.8984 - mean_squared_error: 0.0210



INFO:tensorflow:Assets written to: ./model_checkpoint/assets


INFO:tensorflow:Assets written to: ./model_checkpoint/assets


---------------------
Epoch 1 Metrics:
{'loss': 84088.8984375, 'mean_squared_error': 0.020989101380109787, 'val_loss': 25717.22265625, 'val_mean_squared_error': 0.014231903478503227}
---------------------

Epoch 2/5



INFO:tensorflow:Assets written to: ./model_checkpoint/assets


INFO:tensorflow:Assets written to: ./model_checkpoint/assets


---------------------
Epoch 2 Metrics:
{'loss': 59297.75390625, 'mean_squared_error': 0.020926039665937424, 'val_loss': 103271.3984375, 'val_mean_squared_error': 0.014425699599087238}
---------------------

Epoch 3/5



INFO:tensorflow:Assets written to: ./model_checkpoint/assets


INFO:tensorflow:Assets written to: ./model_checkpoint/assets


---------------------
Epoch 3 Metrics:
{'loss': 41076.8671875, 'mean_squared_error': 0.02091315947473049, 'val_loss': 31994.3046875, 'val_mean_squared_error': 0.01437667291611433}
---------------------

Epoch 4/5



INFO:tensorflow:Assets written to: ./model_checkpoint/assets


INFO:tensorflow:Assets written to: ./model_checkpoint/assets


---------------------
Epoch 4 Metrics:
{'loss': 57964.75390625, 'mean_squared_error': 0.020927349105477333, 'val_loss': 25870.04296875, 'val_mean_squared_error': 0.014391820877790451}
---------------------

Epoch 5/5



INFO:tensorflow:Assets written to: ./model_checkpoint/assets


INFO:tensorflow:Assets written to: ./model_checkpoint/assets


---------------------
Epoch 5 Metrics:
{'loss': 24355.744140625, 'mean_squared_error': 0.020896360278129578, 'val_loss': 20530.623046875, 'val_mean_squared_error': 0.014158929698169231}
---------------------



In [None]:
%tensorboard --logdir logs

In [None]:
# plot mse
train_mse = history.history['mean_squared_error']
eval_mse = history.history['val_mean_squared_error']

fig2 = plt.figure()
plt.plot(range(config['n_epochs']), train_mse, label='train')
plt.plot(range(config['n_epochs']), eval_mse, label='test')
plt.legend()
plt.grid(True)
plt.xlabel('Epochs')
plt.ylabel('mse')
plt.title('Training with ' 
                           + str(config['n_epochs'])
                           + ' epochs \n batch-size: '
                           + str(config['batch_size']))
                     
plt.show()  

In [None]:
# reconstructed_model = keras.models.load_model('./model.h5')
