# Training Notebook
This notebook encompasses model training.
This notebook uses Tensorflow.

Be careful, all the libraries I use a very sensitive to different versions of each other. Here are the versions of the packages I used:
- Python == 3.10.10
- tensorflow == 2.11.0
- tensorflow-io == 0.31.0
- Numpy == 1.23.5
- Matplotlib == 3.7.1
- Jupyter Notebook == 6.5.3

## Fix parameters

In [1]:
import tensorflow as tf
from model import *
from tensorflow.keras import layers, losses

# - Modify the parameters at will -------------------------
nb_samples = 300            # Total number of samples in the dataset
key = "P"                # The key used for generating the data. Datapoints npy files should have the format "key_i.npy" with i the index ranging from 0 to nb_samples.
dataset_dir = "dataset_vector_{}".format(key)
duration = 45               # In seconds, the duration of the total audio segment

nb_epochs = 45              # Number of epochs for the duration of the training
training_index = 1         # Index to differentiate training from the others in the logs
weight_save_frequency = 1   # In epochs, save frequency for the model weights

batch_size = 32
latent_dim = 64             # Number of dimensions of the latent spce
cut = int(0.8*nb_samples)   # Defines the train/validation cut. NB: A good cut is generally 80/20 %

## I. Prepare callbacks and dataset for training. Load and build the model.

In [3]:
# -- Dataset --------------------------------------
first = np.load("{}/{}_0.npy".format(dataset_dir, key))  # Load the first datapoint to retrieve the shape
data_shape = first.shape    # Shape for the whole dataset
print("Shape of the data points: ", data_shape)

time_stamps = np.load("miscellaneous/time_stamps_{}.npy".format(key))
spec_indices = np.load("miscellaneous/spectrogram_indices_{}.npy".format(key))

dataset = np.zeros((nb_samples, data_shape[0], data_shape[1]))  # Create the empty vector that will hold the full data
dataset[0] = first

for i in range(1, nb_samples):
    dataset[i] = np.load("{}/{}_{}.npy".format(dataset_dir, key, i))

dataset = dataset.reshape((nb_samples, data_shape[0], data_shape[1], 1))


# -- Callbacks -------------------------------------
checkpoint_path = "logs/training_{training_index}/cp-{epoch:04d}.ckpt"

cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=True,
                                                 verbose=1,
                                                 save_freq="epoch")

%load_ext tensorboard
import datetime

log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") + "_training_{training_index}"
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=weight_save_frequency)


# -- Build the Model --------------------------------
autoencoder = Autoencoder(data_shape, latent_dim)
autoencoder.compile(optimizer="adam",
                    loss=losses.MeanSquaredError())
autoencoder.build((None, data_shape[0], data_shape[1], 1))

autoencoder.summary()

Shape of the data points:  (2504, 48)
The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard
Model: "autoencoder_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 encoder_1 (Encoder)         multiple                  6445568   
                                                                 
 decoder_1 (Decoder)         multiple                  7905089   
                                                                 
Total params: 14,350,657
Trainable params: 14,350,657
Non-trainable params: 0
_________________________________________________________________


## II. Fit the model

In [None]:
autoencoder.fit(dataset[:cut],
                dataset[:cut],
                epochs=nb_epochs,
                batch_size=batch_size,
                shuffle=True,
                validation_data=(dataset[cut:], dataset[cut:]), callbacks=[tensorboard_callback, cp_callback])

## III. Visualize training in Tensorboard

In [None]:
#%reload_ext tensorboard
%tensorboard --logdir logs/fit