# Setup 

In [2]:
import os
import scipy
import librosa
import numpy as np
import matplotlib.pyplot as plt
import tensorflow.keras as keras
from skimage.transform import resize
from IPython.display import clear_output
from matplotlib.ticker import MaxNLocator
from keras.layers import Dense, Dropout, Flatten, Normalization, Conv1D, MaxPool1D, LSTM, Conv2D, MaxPool2D, GRU, BatchNormalization, SimpleRNN
import sklearn
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import cross_val_score
from tqdm import tqdm
import tensorflow as tf
tf.config.run_functions_eagerly(True)
from tcn import TCN, tcn_full_summary

# our usual useful functions

class PlotLossAccuracy(keras.callbacks.Callback):
    def on_train_begin(self, logs={}):
        self.i = 0
        self.x = []
        self.acc = []
        self.losses = []
        self.val_losses = []
        self.val_acc = []
        self.logs = []

    def on_epoch_end(self, epoch, logs={}):

        self.logs.append(logs)
        self.x.append(int(self.i))
        self.losses.append(logs.get('loss'))
        self.val_losses.append(logs.get('val_loss'))
        self.acc.append(logs.get('accuracy'))
        self.val_acc.append(logs.get('val_accuracy'))

        self.i += 1

        clear_output(wait=True)
        plt.figure(figsize=(16, 6))
        plt.plot([1, 2])
        plt.subplot(121)
        plt.plot(self.x, self.losses, label="train loss")
        plt.plot(self.x, self.val_losses, label="validation loss")
        plt.gca().xaxis.set_major_locator(MaxNLocator(integer=True))
        plt.ylabel('loss')
        plt.xlabel('epoch')
        plt.title('Model Loss')
        plt.legend()
        plt.subplot(122)
        plt.plot(self.x, self.acc, label="training accuracy")
        plt.plot(self.x, self.val_acc, label="validation accuracy")
        plt.legend()
        plt.ylabel('accuracy')
        plt.xlabel('epoch')
        plt.title('Model Accuracy')
        plt.gca().xaxis.set_major_locator(MaxNLocator(integer=True))
        plt.show();


def save_model_to_disk(model, filename_base):
    # save model and weights (don't change the filenames)
    model_json = model.to_json()
    with open(filename_base + ".json", "w") as json_file:
        json_file.write(model_json)
    # serialize weights to HDF5
    model.save_weights(f"{filename_base}.h5")
    print(f"Saved model to {filename_base}.json and weights to {filename_base}.h5")

x_d = np.load('xtrain.npy')
y_num = np.load('ytrain_num.npy')
y_gender = np.load('ytrain_gender.npy')
x_s = np.load('x_spectrum_train.npy')

def prepare_data(X,Y):
    X_train, X_validation, Y_train, Y_validation = sklearn.model_selection.train_test_split(X, Y, test_size=.1)
    one_hot_encoder = OneHotEncoder(sparse_output=False)
    Y_train = one_hot_encoder.fit_transform(Y_train.reshape(-1, 1))
    Y_validation = one_hot_encoder.fit_transform(Y_validation.reshape(-1, 1))
    X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 128, 1)
    X_validation = X_validation.reshape(X_validation.shape[0], X_validation.shape[1], 128, 1)
    return X_train, X_validation, Y_train, Y_validation

def scheduler(epoch, lr):
    if epoch < 10:
        return lr
    else:
        return lr * tf.math.exp(-0.1)
    
c_schedule = tf.keras.callbacks.LearningRateScheduler(scheduler)

#X_train, X_validation, Y_train, Y_validation = prepare_data(x_d, y_gender)
X_train_spectrum, X_validation_spectrum, Y_train, Y_validation = prepare_data(x_s, y_num)



2023-11-13 22:27:27.109624: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-11-13 22:27:27.851481: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-11-13 22:27:27.851563: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-11-13 22:27:27.855045: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-11-13 22:27:28.192599: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-11-13 22:27:28.195473: I tensorflow/core/platform/cpu_feature_guard.cc:182] This Tens

# Build this shit

## TCN

In [7]:
inputs = keras.layers.Input(shape=(8000,1,))
x = inputs
#x = BatchNormalization()
x = TCN(input_shape=(8000, 1), nb_filters=64, return_sequences=True, dilations=[1, 2, 4, 8, 16, 32])(x)
x = TCN(input_shape=x.shape, nb_filters=32, return_sequences=True, dilations=[1, 2, 4, 8, 16, 32])(x)
x = TCN(input_shape=x.shape, nb_filters=16, return_sequences=False, dilations=[1, 2, 4, 8, 16, 32])(x)


#x = Flatten(x)
predictions = Dense(10, activation='softmax')(x)

model = keras.models.Model(inputs=inputs, outputs=predictions)
opt = keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=opt,
                    loss='categorical_crossentropy',
                    metrics=['accuracy'])
model.summary()

Model: "model_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_5 (InputLayer)        [(None, 8000, 1)]         0         
                                                                 
 tcn_7 (TCN)                 (None, 8000, 64)          136256    
                                                                 
 tcn_8 (TCN)                 (None, 8000, 32)          42400     
                                                                 
 tcn_9 (TCN)                 (None, 16)                10704     
                                                                 
 dense_7 (Dense)             (None, 10)                170       
                                                                 
Total params: 189530 (740.35 KB)
Trainable params: 189530 (740.35 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


## LSTM

In [11]:
inputs = keras.layers.Input(shape=(8000,1,))
x = inputs

x = BatchNormalization()(x)
#x = LSTM(64, return_sequences=True)(x)
#x = GRU(64, return_sequences=True)(x)
x = SimpleRNN(64)(x)

x = Flatten()(x)

#x = Dropout(0.25)(x)
x = Dense(16, activation='relu')(x)
x = Dense(8, activation='relu')(x)
x = Dense(4, activation='relu')(x)


predictions = Dense(10, activation='softmax')(x)

model = keras.models.Model(inputs=inputs, outputs=predictions)
opt = keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=opt,
                    loss='categorical_crossentropy',
                    metrics=['accuracy'])
model.summary()

Model: "model_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_8 (InputLayer)        [(None, 8000, 1)]         0         
                                                                 
 batch_normalization_4 (Bat  (None, 8000, 1)           4         
 chNormalization)                                                
                                                                 
 simple_rnn_4 (SimpleRNN)    (None, 64)                4224      
                                                                 
 flatten_5 (Flatten)         (None, 64)                0         
                                                                 
 dense_17 (Dense)            (None, 16)                1040      
                                                                 
 dense_18 (Dense)            (None, 8)                 136       
                                                           

## Transformer

In [4]:
inputs = keras.layers.Input(shape=(8000, 1))
x = inputs

# Transformer Encoder Block
num_heads = 8
ff_dim = 64

# Multi-head self-attention
x = keras.layers.MultiHeadAttention(num_heads=num_heads, key_dim=ff_dim)(x, x)
x = Dropout(0.1)(x)
residual = x

# Feed-forward layer
x = Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(x)
x = Dropout(0.1)(x)
x = Conv1D(filters=1, kernel_size=1)(x)

# Add and normalize
x = tf.keras.layers.Add()([residual, x])
x = keras.layers.LayerNormalization(epsilon=1e-6)(x)

# Max pooling
x = MaxPool1D(pool_size=4)(x)

# Flatten and dense layers
x = Flatten()(x)
x = Dense(24, activation='relu')(x)
x = Dropout(0.3)(x)
x = Dense(16, activation='relu')(x)
x = Dropout(0.25)(x)
x = Dense(16, activation='relu')(x)

# Output layer
predictions = Dense(10, activation='softmax')(x)

# Build model
model = keras.models.Model(inputs=inputs, outputs=predictions)

# Compile model
opt = keras.optimizers.Adam(learning_rate=0.0001)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

# Display model summary
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_2 (InputLayer)        [(None, 8000, 1)]            0         []                            
                                                                                                  
 multi_head_attention (Mult  (None, 8000, 1)              3585      ['input_2[0][0]',             
 iHeadAttention)                                                     'input_2[0][0]']             
                                                                                                  
 dropout (Dropout)           (None, 8000, 1)              0         ['multi_head_attention[0][0]']
                                                                                                  
 conv1d (Conv1D)             (None, 8000, 64)             128       ['dropout[0][0]']       

# Training

In [17]:
num_epochs = 300
pltCallBack = PlotLossAccuracy()
model.fit(X_train_spectrum, Y_train,
        batch_size=64, epochs=num_epochs,
        validation_data=(X_validation_spectrum, Y_validation),
        callbacks=[pltCallBack, c_schedule])


In [None]:
model_1A =  # Figure this out
save_model_to_disk(model_1A, 'waveform_gender_model')

In [None]:
model_1B = # Figure this out
save_model_to_disk(model_1B, 'waveform_digit_model')

In [None]:
model_2A =  # Figure this out
save_model_to_disk(model_2A, 'spectrogram_gender_model')

Saved model to spectrogram_gender_model.json and weights to spectrogram_gender_model.h5


In [None]:
model_2B = model# Figure this out
save_model_to_disk(model_2B, 'spectrogram_digit_model')

Saved model to spectrogram_digit_model.json and weights to spectrogram_digit_model.h5
