In [None]:
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import models
from tensorflow.keras.utils import plot_model
# import tensorflow_io as tfio  
import matplotlib.pyplot as plt
import time
import io
import sys

from IPython.display import SVG
from tensorflow.keras.utils import model_to_dot
from types import SimpleNamespace
from tensorflow.keras.optimizers.schedules import LearningRateSchedule



train_spectrogram_ds = None
val_spectrogram_ds = None
test_spectrogram_ds = None
example_spectrograms = None
label_names = None



model = None
history = None
session = None

EPOCHS = None
RUNS_FIND_GOOD_AI = None


In [None]:
def use_model(number=1):
    input_shape = example_spectrograms.shape[1:]
    print('Input shape:', input_shape)
    num_labels = len(label_names)
    print(f"num_labels: {num_labels}")

    # Instantiate the `tf.keras.layers.Normalization` layer.
    norm_layer = layers.Normalization()
    # Fit the state of the layer to the spectrograms
    # with `Normalization.adapt`.
    norm_layer.adapt(data=train_spectrogram_ds.map(map_func=lambda spec, label: spec))
    
    data_augmentation = tf.keras.Sequential([
        layers.RandomZoom(0.1),
        layers.RandomContrast(0.2),
    ])
    
    if number == 1:
        model = models.Sequential([
            layers.Input(shape=input_shape),
            # data_augmentation,
            layers.Resizing(32, 32),
            norm_layer,
            layers.Conv2D(8, 3, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)),
            layers.BatchNormalization(),
            # layers.MaxPooling2D(),
            layers.Conv2D(16, 3, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)),
            layers.BatchNormalization(),
            layers.MaxPooling2D(),
            layers.Conv2D(32, 3, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)),
            layers.BatchNormalization(),
            layers.MaxPooling2D(),
            layers.Conv2D(64, 3, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)),
            layers.BatchNormalization(),
            layers.MaxPooling2D(),
            # layers.Dropout(0.3),
            
            layers.Flatten(),
            layers.Dense(128, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)),
            layers.Dropout(0.5),
            # layers.Dense(num_labels),
            layers.Dense(num_labels, activation='softmax'),
        ])
    elif number == 2:
        model = models.Sequential([
            layers.Input(shape=input_shape),
            # data_augmentation,
            layers.Resizing(32, 32),
            norm_layer,
            layers.Conv2D(16, 3, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)),
            layers.BatchNormalization(),
            layers.MaxPooling2D(),
            # layers.Dropout(0.2),
            layers.Conv2D(32, 3, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)),
            layers.BatchNormalization(),
            layers.MaxPooling2D(),
            # layers.Dropout(0.3),
            layers.Conv2D(64, 3, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)),
            layers.BatchNormalization(),
            layers.MaxPooling2D(),
            # layers.Dropout(0.4),
            layers.GlobalAveragePooling2D(),
            layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)),
            layers.Dropout(0.5),
            layers.Dense(num_labels, activation='softmax'),
        ])
    else:
        raise ValueError(f"Unbekanntes Modell Nummer: {number}")

    return model


Add `Dataset.cache` and `Dataset.prefetch` operations to reduce read latency while training the model:


In [None]:
def init():
    global train_spectrogram_ds, val_spectrogram_ds, test_spectrogram_ds
    # for example_spectrogram, label in train_spectrogram_ds.take(1):
    #     print("Beispiel-Mel-Spektrogramm Shape:", example_spectrogram.shape)
    #     print("Label:", label)
    # Batching, Caching, Prefetching
    train_spectrogram_ds = train_spectrogram_ds.cache().shuffle(10000).prefetch(tf.data.AUTOTUNE)
    val_spectrogram_ds = val_spectrogram_ds.cache().prefetch(tf.data.AUTOTUNE)
    test_spectrogram_ds = test_spectrogram_ds.cache().prefetch(tf.data.AUTOTUNE)


For the model, you'll use a simple convolutional neural network (CNN), since you have transformed the audio files into spectrogram images.

Your `tf.keras.Sequential` model will use the following Keras preprocessing layers:

- `tf.keras.layers.Resizing`: to downsample the input to enable the model to train faster.
- `tf.keras.layers.Normalization`: to normalize each pixel in the image based on its mean and standard deviation.

For the `Normalization` layer, its `adapt` method would first need to be called on the training data in order to compute aggregate statistics (that is, the mean and the standard deviation).


In [None]:
@tf.autograph.experimental.do_not_convert
def build_model():      
    global model
    model = use_model(1)

    model.summary()

    # plot_model(model, show_shapes=True, show_layer_names=True)

Configure the Keras model with the Adam optimizer and the cross-entropy loss:


In [None]:
def compile():

    class WarmUpSchedule(LearningRateSchedule):
        def __init__(self, initial_lr, warmup_steps):
            self.initial_lr = initial_lr
            self.warmup_steps = warmup_steps
        
        def __call__(self, step):
            if step < self.warmup_steps:
                return self.initial_lr * (step / self.warmup_steps)
            return self.initial_lr

    warmup_schedule = WarmUpSchedule(initial_lr=1e-4, warmup_steps=1000)
    optimizer = tf.keras.optimizers.Adam(learning_rate=warmup_schedule)
    
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
        loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),  # Wichtig: from_logits=False
        metrics=['accuracy']
    )

Train the model over 10 epochs for demonstration purposes:


In [None]:
class EarlyStoppingMultipleMetrics(tf.keras.callbacks.Callback):
    def __init__(self, patience, monitor_metrics=['val_loss', 'val_accuracy']):
        super().__init__()
        self.patience = patience
        self.monitor_metrics = monitor_metrics
        self.best_values = {metric: float('inf') for metric in monitor_metrics}  # Initialisiere mit hohen Werten
        self.best_weights = None  # Für das Speichern der besten Gewichtungen
        self.best_epoch = 0  # Speichert die Epoche mit den besten Gewichtungen
        self.wait = 0

    def on_epoch_end(self, epoch, logs=None):
        stop_training = False
        for metric in self.monitor_metrics:
            current_value = logs.get(metric)
            if current_value is None:
                continue
            if current_value < self.best_values[metric]:
                self.best_values[metric] = current_value
                self.best_epoch = epoch  # Speichern der aktuellen Epoche
                self.wait = 0  # Reset wait, weil eine Verbesserung stattgefunden hat
                self.best_weights = self.model.get_weights()  # Speichere die besten Gewichtungen
            else:
                self.wait += 1

            if self.wait >= self.patience:
                stop_training = True

        if stop_training:
            self.model.stop_training = True
            # Wiederherstellen der besten Gewichtungen
            self.model.set_weights(self.best_weights)



In [None]:
class TimeHistory(tf.keras.callbacks.Callback):
    def on_train_begin(self, logs=None):
        self.start_time = time.time()  
        self.epoch_times = [] 

    def on_epoch_begin(self, epoch, logs=None):
        self.epoch_start_time = time.time()

    def on_epoch_end(self, epoch, logs=None):
        epoch_time = time.time() - self.epoch_start_time 
        self.epoch_times.append(epoch_time)

        avg_epoch_time = sum(self.epoch_times) / len(self.epoch_times)

        remaining_epochs = self.params['epochs'] - (epoch + 1)
        estimated_remaining_time = remaining_epochs * avg_epoch_time

        hours, rem = divmod(estimated_remaining_time, 3600)
        minutes, seconds = divmod(rem, 60)

        print(f"\nEpoch {epoch + 1}/{self.params['epochs']} - Estimated time until finished: "
            f"{int(hours)} hours, {int(minutes)} minutes, {int(seconds)} seconds")

In [None]:
@tf.autograph.experimental.do_not_convert
def train():
    # EPOCHS = 5000
    global history
    global session
    
    num_train_files = sum(1 for _ in train_spectrogram_ds.unbatch()) 
    num_val_files = sum(1 for _ in val_spectrogram_ds.unbatch())
    
    print(f"Number of training files: {num_train_files}")
    print(f"Number of validation files: {num_val_files}")
    
    MAX_VAL_ACCURACY = 0.8
    MAX_VAL_LOSS = 0.2
    MAX_TRAIN_ACCURACY = 0.8
    MAX_TRAIN_LOSS = 0.2
    
    max_runs = RUNS_FIND_GOOD_AI  
    run = 0  
    
    time_callback = TimeHistory()
    # early_stopping = EarlyStoppingMultipleMetrics(patience=EPOCHS/4, monitor_metrics=['val_loss', 'val_accuracy'])
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=EPOCHS/4, restore_best_weights=True)
    # reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=10, min_lr=1e-6)
    reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
    
    
    while run < max_runs:
        print(f"Start of run {run+1}/{max_runs}")
        history = model.fit(
            train_spectrogram_ds,
            validation_data=val_spectrogram_ds,
            epochs=EPOCHS,
            callbacks=[time_callback, early_stopping],
            # callbacks=[time_callback, early_stopping, reduce_lr]
        )
        
        trained_epochs = len(history.epoch)
        print(f"Das Training wurde nach {trained_epochs} Epochen gestoppt.")
        best_epoch = early_stopping.best_epoch
        print(f"Das beste Modell wurde in Epoche {best_epoch + 1} gefunden.")
        
        
        val_loss, val_accuracy = model.evaluate(val_spectrogram_ds, verbose=0)
        train_loss, train_accuracy = model.evaluate(train_spectrogram_ds, verbose=0)
        
        batch_size = None
        for element in train_spectrogram_ds.take(1):
            batch_size = element[0].shape[0] 
            break
        
        session = SimpleNamespace(
            model=model,
            history=history,
            epochs=EPOCHS,
            callbacks=[time_callback, early_stopping],
            model_values = [val_loss, val_accuracy, train_loss, train_accuracy],
            model_batch_size = batch_size
        )
    
        print(f"Evaluated model with best weights: val_loss={val_loss}, val_accuracy={val_accuracy}")
    

        if train_accuracy > MAX_TRAIN_ACCURACY and train_loss < MAX_TRAIN_LOSS and val_loss < MAX_VAL_LOSS and val_accuracy > MAX_VAL_ACCURACY:
            print(f"Run {run+1} successful with val_loss={val_loss} and val_accuracy={val_accuracy}")
            break
        else:
            print(f"Run {run+1} not successful. val_loss={val_loss}, val_accuracy={val_accuracy} - Restarting training...")
            run += 1
    else:
        print("Maximum number of runs reached. Best model from the last run will be used.")

In [None]:
def run(_train_spectrogram_ds,_val_spectrogram_ds,_test_spectrogram_ds, _label_names, _EPOCHS, _RUNS_FIND_GOOD_AI):
    global train_spectrogram_ds,val_spectrogram_ds,test_spectrogram_ds, example_spectrograms, label_names, model, history, EPOCHS, RUNS_FIND_GOOD_AI
    
    train_spectrogram_ds = _train_spectrogram_ds
    val_spectrogram_ds =_val_spectrogram_ds
    test_spectrogram_ds=_test_spectrogram_ds
    label_names = _label_names
    EPOCHS = _EPOCHS
    RUNS_FIND_GOOD_AI = _RUNS_FIND_GOOD_AI
    
    for example_spectrograms,_ in train_spectrogram_ds.take(1):
        break
    
    init()
    build_model()
    # build_model_GPT()
    compile()
    train()
    
    return train_spectrogram_ds,val_spectrogram_ds,test_spectrogram_ds, session