### Trains audio models, stores benchmarks

In [1]:
import torchaudio

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import tensorflow as tf

import random, glob, os
import numpy as np

from pydub import AudioSegment
from pydub import effects
from utils.refactored_common import *
# from utils.refactored_common import unision_shuffled_copies
from tqdm.notebook import tqdm
import pydub
import librosa
try :
    from keras.utils import Sequence #   sequence =  keras.utils.Sequence
except:
    from keras.utils.all_utils import Sequence


# import tensorflow_io as tfio

import soundfile as sf
import audioflux
from scipy import signal

import matplotlib.pyplot as plt

In [4]:
from generators import base_generator_audio as BASE
from  curricula import selection
from models import base_cnn, transformer_classifier, wavenet

In [4]:
class AutoGen(BASE.BaseClassificationGenerator):
  def __init__(self, *args,**kwargs):
    super().__init__(*args, **kwargs)

  def __getitem__(self, index):
    a, b = super().__getitem__(index)
    return a, a

#### Run Params

Probably can move these to a cfg file, but ehhh

In [7]:
def return_checkpoints(target_path, log_path, early_stopping = False):
    checkpoint_filepath = target_path
    model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
        filepath=checkpoint_filepath,
        monitor='accuracy',
        mode='max',
        save_best_only=True)

    import datetime
    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=f"{log_path}_{datetime.datetime.now().strftime('%Y%m%d-%H%M%S')}", histogram_freq=1)
    os.makedirs(f"{log_path}", exist_ok=True)
    
    csv_callback = tf.keras.callbacks.CSVLogger(f"{log_path}_{datetime.datetime.now().strftime('%Y%m%d-%H%M%S')}.csv", append=True)


    if early_stopping:
        early_stopping_callback = tf.keras.callbacks.EarlyStopping(
            monitor='accuracy',
            min_delta=0.01,
            patience=3,
            verbose=0,
            mode='max',
            baseline=None,
            restore_best_weights=True
        )
        return [model_checkpoint_callback, csv_callback, early_stopping_callback]
    return [model_checkpoint_callback, csv_callback]

In [13]:
run_name = "urban_sound_CNN_CNN"   #! Convention: "dataset__irred_model__target_model__curriculum"
irred_chkpt = f"results/{run_name}_control.keras"
control_chkpt = f"results/{run_name}_irred.keras"
target_chkpt = f"results/{run_name}_target.keras"

irred_log = f"results/{run_name}_control"
control_log = f"results/{run_name}_irred"
target_log = f"results/{run_name}_target"

width = 37
height = 128
num_classes = 35
epochs = 9
cfg = "cfg.yaml"
base_dir = "data/audio/speech_commands/"
minibatch_size = 0.5
batch_size = 32
ext = 'wav'
return_spec = True
return_fft = False



params = yaml_load(cfg)
run_params = {
    "run_name": run_name,
    "irred_chkpt": irred_chkpt,
    "control_chkpt": control_chkpt,
    "target_chkpt": target_chkpt,
    "irred_log": irred_log,
    "control_log": control_log,
    "target_log": target_log,
    "width": width,
    "height": height,
    "num_classes": num_classes,
    "epochs": epochs,
    "cfg": cfg,
    "base_dir": base_dir,
    "minibatch_size": minibatch_size,
    "batch_size": batch_size,
    "ext": ext,
    "return_spec": return_spec,
}



#### Loading Dataloaders

In [15]:
#! Basic dataloaders
train_gen = BASE.BaseClassificationGenerator(params, base_dir, batch_size, gentype='train', return_spec=return_spec, return_fft=return_fft, ext=ext)

holdout_gen = BASE.BaseClassificationGenerator(params, base_dir, batch_size, gentype='val', return_spec=return_spec, return_fft=return_fft, ext=ext)

test_gen = BASE.BaseClassificationGenerator(params, base_dir, batch_size, gentype='test', return_spec=return_spec, return_fft=return_fft, ext=ext)

# train_auto = AutoGen(params, base_dir, batch_size, gentype='train', return_spec=return_spec, return_fft=return_fft, ext=ext)

# test_auto = AutoGen(params, base_dir, batch_size, gentype='test', return_spec=return_spec, return_fft=return_fft, ext=ext)

# holdout_auto = AutoGen(params, base_dir, batch_size, gentype='val', return_spec=return_spec, return_fft=return_fft, ext=ext)


base_class_params = train_gen.toJSON()

run_params["base_dataloader_params"] = base_class_params



In [16]:
a, b = train_gen[0]

In [17]:
a.shape

(32, 37, 128)

#### Calculating Baseline

In [19]:
control_model = model = wavenet.WavenetClassifier(
    height, width, kernel_size=4, hidden_channels = [64, 48, 32, 24, 16, 8, 4, 2]
)

control_model.compile(optimizer='adam', loss='mse', metrics=['mse'])

control_model.summary()

callbacks = return_checkpoints(control_chkpt, control_log)

In [20]:
control_model.fit(train_gen, validation_data=test_gen, epochs=30, callbacks=callbacks)

Epoch 1/30


ValueError: Input 0 of layer "functional_3" is incompatible with the layer: expected shape=(None, 25, 128), found shape=(None, 37, 128)

In [24]:
new_model = tf.keras.Model(control_model[0].input, model[0].get_layer('emb_bl').output)
new_model.trainable = False

input_ = tf.keras.layers.Input(shape=(width, height))
x = new_model(input_)
x = tf.keras.layers.Dense(128, activation='relu')(x)
x = tf.keras.layers.Dense(128, activation='relu')(x)
x = tf.keras.layers.Dense(num_classes, activation='softmax')(x)

trainey_model_control = tf.keras.Model(inputs=input_, outputs=x)


trainey_model_control.compile(
    loss='categorical_crossentropy',
    optimizer=tf.keras.optimizers.Adam(),
    metrics=['accuracy']
)


TypeError: 'Functional' object is not subscriptable

In [None]:
trainey_model_control.fit(train_gen, validation_data=holdout_gen, epochs=30, callbacks=callbacks)

#### Irreducibe Model Training

In [None]:
irred_model = wavenet.AutoEncoderWavenetBase(
    height, width, kernel_size=4, hidden_channels = [16, 8, 4]
)[0]

irred_model.compile(optimizer='adam', loss='mse', metrics=['mse'])

irred_model.summary()
callbacks = return_checkpoints(irred_chkpt, irred_log, True)



In [11]:
irred_model.fit(holdout_auto, validation_data=test_auto, epochs=epochs*2) #! lightweight

Epoch 1/20


[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 128ms/step - accuracy: 0.0972 - loss: 2.5628 - val_accuracy: 0.1003 - val_loss: 2.3196
Epoch 2/20
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 130ms/step - accuracy: 0.1766 - loss: 2.2454 - val_accuracy: 0.1707 - val_loss: 2.2366
Epoch 3/20
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 129ms/step - accuracy: 0.2080 - loss: 2.1183 - val_accuracy: 0.2064 - val_loss: 2.1489
Epoch 4/20
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 130ms/step - accuracy: 0.2572 - loss: 1.9445 - val_accuracy: 0.2195 - val_loss: 2.2603
Epoch 5/20
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 131ms/step - accuracy: 0.3283 - loss: 1.8041 - val_accuracy: 0.2373 - val_loss: 2.0888
Epoch 6/20
[1m187/187[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 127ms/step - accuracy: 0.3646 - loss: 1.7096 - val_accuracy: 0.2557 - val_loss: 2.0344
Epoch 7/20
[1m187/18

<keras.src.callbacks.history.History at 0x7f25800e20d0>

In [19]:
new_model_irred = tf.keras.Model(irred_model[0].input, irred_model[0].get_layer('emb_bl').output)
new_model_irred.trainable = False

input_ = tf.keras.layers.Input(shape=(width, height))
x = new_model_irred(input_)
x = tf.keras.layers.Dense(128, activation='relu')(x)
x = tf.keras.layers.Dense(128, activation='relu')(x)
x = tf.keras.layers.Dense(num_classes, activation='softmax')(x)

trainey_model_irred = tf.keras.Model(inputs=input_, outputs=x)


trainey_model_irred.compile(
    loss='categorical_crossentropy',
    optimizer=tf.keras.optimizers.Adam(),
    metrics=['accuracy']
)


TypeError: 'Functional' object is not subscriptable

#### RHO-LOSS Training

In [12]:
target_model = transformer_classifier.BaseTransformerClassifier(width, height, num_classes)

target_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

target_model.summary()

callbacks = return_checkpoints(target_chkpt, target_log)

In [13]:
irred_model = tf.keras.models.load_model(irred_chkpt)

train_rho_gen = BASE.rho_generator_audio(params, base_dir, batch_size, gentype='train', return_spec=return_spec, return_fft=return_fft, ext=ext, selector=selection.irreducible_loss_selector, irred_model=irred_model, target_model=target_model, epoch_cutoff=0)

In [14]:
target_model.fit(train_rho_gen, validation_data=test_gen, epochs=epochs, callbacks=callbacks)

Epoch 1/10
[1m468/468[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 154ms/step - accuracy: 0.2126 - loss: 2.1764((19, 25, 128), (19, 10))


[1m468/468[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m88s[0m 178ms/step - accuracy: 0.2128 - loss: 2.1757 - val_accuracy: 0.3166 - val_loss: 2.0141
Epoch 2/10
[1m468/468[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 159ms/step - accuracy: 0.4999 - loss: 1.2286((19, 25, 128), (19, 10))


[1m468/468[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m149s[0m 194ms/step - accuracy: 0.4999 - loss: 1.2284 - val_accuracy: 0.2527 - val_loss: 2.3225
Epoch 3/10
[1m467/468[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 163ms/step - accuracy: 0.6706 - loss: 0.8340((19, 25, 128), (19, 10))


[1m468/468[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m89s[0m 188ms/step - accuracy: 0.6707 - loss: 0.8337 - val_accuracy: 0.2479 - val_loss: 2.7805
Epoch 4/10
[1m468/468[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 153ms/step - 

<keras.src.callbacks.history.History at 0x7f2576044670>

#### Saving Params

In [15]:
import json
with open(f"results/{run_name}/runparams.json", 'w+') as f:
    json.dump(run_params, f, indent=4)