In [1]:
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import models
# import tensorflow_io as tfio  
import matplotlib.pyplot as plt
import time


train_spectrogram_ds = None
val_spectrogram_ds = None
test_spectrogram_ds = None
example_spectrograms = None
label_names = None



model = None
history = None

Add `Dataset.cache` and `Dataset.prefetch` operations to reduce read latency while training the model:


In [2]:
def init():
    global train_spectrogram_ds, val_spectrogram_ds, test_spectrogram_ds    
    train_spectrogram_ds = train_spectrogram_ds.cache().shuffle(10000).prefetch(tf.data.AUTOTUNE)
    val_spectrogram_ds = val_spectrogram_ds.cache().prefetch(tf.data.AUTOTUNE)
    test_spectrogram_ds = test_spectrogram_ds.cache().prefetch(tf.data.AUTOTUNE)

For the model, you'll use a simple convolutional neural network (CNN), since you have transformed the audio files into spectrogram images.

Your `tf.keras.Sequential` model will use the following Keras preprocessing layers:

- `tf.keras.layers.Resizing`: to downsample the input to enable the model to train faster.
- `tf.keras.layers.Normalization`: to normalize each pixel in the image based on its mean and standard deviation.

For the `Normalization` layer, its `adapt` method would first need to be called on the training data in order to compute aggregate statistics (that is, the mean and the standard deviation).


In [3]:
@tf.autograph.experimental.do_not_convert
def build_model():    
    input_shape = example_spectrograms.shape[1:]
    print('Input shape:', input_shape)
    num_labels = len(label_names)

    # Instantiate the `tf.keras.layers.Normalization` layer.
    norm_layer = layers.Normalization()
    # Fit the state of the layer to the spectrograms
    # with `Normalization.adapt`.
    norm_layer.adapt(data=train_spectrogram_ds.map(map_func=lambda spec, label: spec))
    global model
    model = models.Sequential([
        layers.Input(shape=input_shape),
        # Downsample the input.
        layers.Resizing(32, 32),
        # Normalize.
        norm_layer,
        layers.Conv2D(32, 3, activation='relu'),
        layers.Conv2D(64, 3, activation='relu'),
        layers.MaxPooling2D(),
        layers.Dropout(0.25),
        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(num_labels),
    ])

    model.summary()

In [None]:
def build_model_GPT(): 
    input_shape = example_spectrograms.shape[1:]
    print('Input shape:', input_shape)
    num_labels = len(label_names)

    norm_layer = layers.Normalization()
    norm_layer.adapt(data=train_spectrogram_ds.map(map_func=lambda spec, label: spec))
    
    global model
    model = models.Sequential([
        layers.Input(shape=input_shape),
        layers.Resizing(32, 32),
        norm_layer,
        layers.Conv2D(32, 3, activation='relu'),
        layers.BatchNormalization(),
        layers.Conv2D(64, 3, activation='relu'),
        layers.BatchNormalization(),
        layers.MaxPooling2D(),
        layers.Dropout(0.25),
        layers.Conv2D(128, 3, activation='relu'),
        layers.BatchNormalization(),
        layers.MaxPooling2D(),
        layers.Dropout(0.25),
        layers.Flatten(),
        layers.Dense(256, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(num_labels),
    ])

    model.summary()

    # # Compile the model
    # model.compile(
    #     optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),  # Adjusted learning rate
    #     loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    #     metrics=['accuracy'],
    # )

Configure the Keras model with the Adam optimizer and the cross-entropy loss:


In [4]:
def compile():
    model.compile(
        optimizer=tf.keras.optimizers.Adam(),
        loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        metrics=['accuracy'],
    )

Train the model over 10 epochs for demonstration purposes:


In [5]:
def train():
    EPOCHS = 500
    global history
    
    # Berechnung der Anzahl der Trainings- und Validierungsdateien
    num_train_files = sum(1 for _ in train_spectrogram_ds.unbatch())  # Unbatch für die Gesamtzahl
    num_val_files = sum(1 for _ in val_spectrogram_ds.unbatch())
    
      # Ausgabe der Anzahl der Dateien
    print(f"Number of training files: {num_train_files}")
    print(f"Number of validation files: {num_val_files}")
    
    class TimeHistory(tf.keras.callbacks.Callback):
        def on_train_begin(self, logs=None):
            self.start_time = time.time()  # Zeit zu Beginn des Trainings
            self.epoch_times = []  # Liste zur Speicherung der Epochendauern

        def on_epoch_begin(self, epoch, logs=None):
            self.epoch_start_time = time.time()  # Zeit zu Beginn der Epoche

        def on_epoch_end(self, epoch, logs=None):
            epoch_time = time.time() - self.epoch_start_time  # Zeit für die aktuelle Epoche
            self.epoch_times.append(epoch_time)

            avg_epoch_time = sum(self.epoch_times) / len(self.epoch_times)

            remaining_epochs = self.params['epochs'] - (epoch + 1)
            estimated_remaining_time = remaining_epochs * avg_epoch_time

            hours, rem = divmod(estimated_remaining_time, 3600)
            minutes, seconds = divmod(rem, 60)

            print(f"\nEpoch {epoch + 1}/{self.params['epochs']} - Estimated time until finished: "
                f"{int(hours)} hours, {int(minutes)} minutes, {int(seconds)} seconds")

    time_callback = TimeHistory()
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
    reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=10, min_lr=1e-6)
    
    history = model.fit(
        train_spectrogram_ds,
        validation_data=val_spectrogram_ds,
        epochs=EPOCHS,
        callbacks=[time_callback, early_stopping, reduce_lr]
        # callbacks=tf.keras.callbacks.EarlyStopping(verbose=1, patience=2),
    )
    

In [6]:
def run(_train_spectrogram_ds,_val_spectrogram_ds,_test_spectrogram_ds, _example_spectrograms, _label_names):
    global train_spectrogram_ds,val_spectrogram_ds,test_spectrogram_ds, example_spectrograms, label_names, model, history
    
    train_spectrogram_ds = _train_spectrogram_ds
    val_spectrogram_ds =_val_spectrogram_ds
    test_spectrogram_ds=_test_spectrogram_ds
    label_names = _label_names
    
    example_spectrograms = _example_spectrograms
    
    init()
    build_model()
    # build_model_GPT()
    compile()
    train()
    
    return train_spectrogram_ds,val_spectrogram_ds,test_spectrogram_ds, example_spectrograms, label_names, model, history