In [129]:
import glob, os
import math
import datetime
import shutil
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt

import librosa
import pandas as pd
import tensorflow as tf
from tensorflow.keras import backend as bk

from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard 
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, BatchNormalization
from tensorflow.keras.layers import Convolution2D, MaxPooling2D, SeparableConv2D
from tensorflow.keras.regularizers import l2

from librosa.feature import melspectrogram
from librosa.display import specshow

'''
'''

#print("Num GPUs Available: ",len(tf.config.experimental.list_physical_devices('GPU')))


#TODO: export requirements

audio_settings= {
    "sr" : 44100,
    "n_fft" : 2205,
    "hop_length" : 441,
    "win_length" : 442,
    "n_mels" : 128,
    "fmin" : 10,
    "fmax" : 22050,
    }
r_settings= {
    "sr" : 44100,
    "n_fft" : 2205,
    "hop_length" : 441,
    "win_length" : 442,
    "n_mels" : 128,
    "fmin" : 10,
    "fmax" : 22050,
    }

model_settings = {
    'samplerate': 44100,
    'n_mels': 128,
    'fmin': 10,
    'fmax': 22050,
    'n_fft': 2205,
    'hop_length': 441,
    'frames': 500,
    'batch':10,
    'epochs': 80,
    'train_samples': 1600,
    'val_samples': 400,
    'lr': 0.01,
    'nesterov_momentum': 0.09
}

In [130]:
#Load Dataset
'''
According to esc50 documentation rooster class is = 1.
'''
#model path

MODEL_DIR = Path('model' + '/' + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")) 
TBOARD_LOGS = Path('tb_logs' + '/' + datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
#50 CLASSES SOUND DATA SET
AUDIO_DS_PATH = Path("Data/Dataset/audio")
DF_PATH =  Path("Data/Dataset/esc50.csv")

#Dataset as pandas dataframe
sdf = pd.read_csv(DF_PATH)

print("Number of entries in Data frame: ", len(sdf.index))
# Count number of rows in a dataframe that contains NaN any column
seriesObj = sdf.apply(lambda x: x.isnull().any(), axis=1)
numOfRows = len(seriesObj[seriesObj == True].index)
print('Number of Rows in dataframe which contain NaN in any column : ', numOfRows)

#PRINT COUNT Fold SETS
seriesObj = sdf.apply(lambda x: True if x['fold'] == 1 else False , axis=1)
# Count number of True in series
numOfRows = len(seriesObj[seriesObj == True].index)
#target classes non categorical indexes easier to work with keras
classes = sorted(sdf.target.unique())

print('Number of folds: ', len(sdf.fold.unique()))
print('Count per fold: ', numOfRows)
print('Classes indexes in non categorical index: ', classes )

#clean data set
# Removed some columsn that don not seem important
sdf = sdf.drop(['take','src_file', 'category','esc10','fold'], axis=1)
sdf.head()

Number of entries in Data frame:  2000
Number of Rows in dataframe which contain NaN in any column :  0
Number of folds:  5
Count per fold:  400
Classes indexes in non categorical index:  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]


Unnamed: 0,filename,target
0,1-100032-A-0.wav,0
1,1-100038-A-14.wav,14
2,1-100210-A-36.wav,36
3,1-100210-B-36.wav,36
4,1-101296-A-19.wav,19


In [131]:
#kfold-spliting right now not using fold columns in dataset
def split_data(dataframe):
    train, test = train_test_split(dataframe, test_size=0.2)
    return train, test
    
def get_fold_from(dataframe):
    kf = KFold(n_splits = 5,shuffle=True, random_state=1)    
    result = next(kf.split(dataframe), None)
    train = dataframe.iloc[result[0]]
    val =  dataframe.iloc[result[1]]
    return train, val

def get_audiop(audio_fn:str):
    return AUDIO_DS_PATH / audio_fn

def pre_process_stem(audio_path:Path):
    y, sr = librosa.load(audio_path, 44100)
    #audio_sample, _  = librosa.effects.trim(y)
    return y

def apply_log_db(windows):
    DB = librosa.amplitude_to_db(windows, ref=np.max)
    return DB

def compute_windows(audio_stem, audio_settings):
    windows = np.abs(librosa.stft(audio_stem,
                                  n_fft=audio_settings['n_fft'],
                                  hop_length=audio_settings['hop_length']
                                 )
                    )
    return windows

#mels will be the input for the network.
#TODO: Rename to load_audio_windows
def load_audio_windows(audio_path:Path, audio_settings, reshape: bool):
    audio_stem = pre_process_stem(audio_path)
    mels = compute_melspect_for(audio_stem, audio_settings)
    if  reshape:
        mels = np.expand_dims(mels, axis=-1)
    return mels


In [132]:
#one liner Mel's Spectrogram
def compute_melspect_for(audio_stem, audio_settings):
    S = librosa.feature.melspectrogram(audio_stem, sr=audio_settings['sr'],
                                       n_fft=audio_settings['n_fft'],
                                       hop_length=audio_settings['hop_length'],
                                       n_mels=audio_settings['n_mels'])
    #S_DB = librosa.power_to_db(S, ref=np.max)
    mellog = np.log(S + 1e-9)
    melnormalized = librosa.util.normalize(mellog)
    return melnormalized

### Define Model

In [141]:



def build_model(frames=501, bands=128, channels=1, num_labels=50,
                conv_size=(5,5), conv_block='conv',
                downsample_size=(4,2),
                fully_connected=64,
                n_stages=None, n_blocks_per_stage=None,
                filters=24, kernels_growth=2,
                dropout=0.5,
                use_strides=False):
    """
    Implements SB-CNN model from
    Deep Convolutional Neural Networks and Data Augmentation for Environmental Sound Classification
    Salamon and Bello, 2016.
    https://arxiv.org/pdf/1608.04363.pdf
    Based on https://gist.github.com/jaron/5b17c9f37f351780744aefc74f93d3ae
    but parameters are changed back to those of the original paper authors,
    and added Batch Normalization
    """
    print('Building Model')
    Conv2 = SeparableConv2D if conv_block == 'depthwise_separable' else Convolution2D
    assert conv_block in ('conv', 'depthwise_separable')
    kernel = conv_size
    if use_strides:
        strides = downsample_size
        pool = (1, 1)
    else:
        strides = (1, 1)
        pool = downsample_size

    block1 = [
        Convolution2D(filters, kernel, padding='same', strides=strides,
                      data_format='channels_last',
                      input_shape=(bands, frames, channels)),
        BatchNormalization(),
        MaxPooling2D(pool_size=pool),
        Activation('relu'),
    ]
    block2 = [
        Conv2(filters*kernels_growth, kernel, padding='same', strides=strides),
        BatchNormalization(),
        MaxPooling2D(pool_size=pool),
        Activation('relu'),
    ]
    block3 = [
        Conv2(filters*kernels_growth, kernel, padding='valid', strides=strides),
        BatchNormalization(),
        Activation('relu'),
    ]
    backend = [
        Flatten(),

        Dropout(dropout),
        Dense(fully_connected, kernel_regularizer=l2(0.001)),
        Activation('relu'),

        Dropout(dropout),
        Dense(num_labels, kernel_regularizer=l2(0.001)),
        Activation('softmax'),
    ]
    layers = block1 + block2 + block3 + backend
    model = Sequential(layers)
    return model

In [142]:
def dataframe_generator(data,audio_settings, batchsize):
    """
    Keras generator for lazy-loading
    data based on a pandas.DataFrame
    """
    while True:
        idx = np.random.choice(len(data), size=batchsize, replace=False)

        rows = data.iloc[idx, :].iterrows() #datailoc[idx, :].iterrows()[1]
        mels = []
        targets = []
        for _, row in rows:
            audio_p = get_audiop(row.filename) 
            mels.append(load_audio_windows(audio_p, audio_settings,True))
            targets.append(row.target)
        mels = np.asarray(mels)
        categorical_targets = tf.keras.utils.to_categorical(targets, num_classes=50)
        tf_ds = (mels, categorical_targets)
        yield  tf_ds
    '''
    filename = row.filename
    audio_path = get_audiop(filename)    
    mels_arr = load_audio_windows(audio_path, audio_settings) 
    #reshape if needed
    mels_arr = np.expand_dims(mels_arr, axis=-1)
    batch = 
    target = row.target
    tf_ds = (mels_arr, target) #bsize,n_mels,
    '''
        
        

In [143]:
def check_dir(p: Path):
    if p.is_dir():
        print('models directory exists')
    else:
        p.mkdir(exist_ok=True)
    return p

def train_model(out_dir,logs_dir, train, val, model, model_settings, audio_settings):
    frame_samples = model_settings['hop_length']
    window_frames = model_settings['frames']
    epochs = model_settings['epochs']
    batch_size = model_settings['batch']
    lr = model_settings['lr']
    momentum = model_settings['nesterov_momentum']
    
    
    optimizer = tf.keras.optimizers.SGD(lr=lr, momentum=momentum,
                                     nesterov=True)
    
    model.compile(loss="categorical_crossentropy",
                  optimizer=optimizer,
                  metrics=['accuracy'])
    
    model_path = './' + str(out_dir) + "/{epoch:02d}-{val_accuracy:.2f}.hdf5"
    print('model_path: ',model_path)
    checkpoint = ModelCheckpoint(
        model_path,
        monitor='val_loss',
        mode="auto",
        period=1,
        verbose=1,
        save_best_only=True,
    )
    earlystop_callback = EarlyStopping(monitor='val_loss',
                             patience=7,
                            verbose=1,
                            mode='auto')
    logs_path ='./' + str(logs_dir)
    tensorboard_callback = TensorBoard(log_dir=logs_path,
                                       update_freq= 'epoch',
                                       write_graph=True,
                                       profile_batch=100000000)
    lr_callback = LearningRateScheduler(lr_scheduler)
    
    train_tfds = dataframe_generator(train, audio_settings, batch_size)
    val_tfds = dataframe_generator(val, audio_settings, batch_size)

    #tensorboard_callback
    callbacks_list = [checkpoint,earlystop_callback,tensorboard_callback,lr_callback]
    '''hist = model.fit_generator(
        train_gen,
        validation_data=val_gen,
        steps_per_epoch=math.ceil(len(train) / batch_size),
        validation_steps=math.ceil(len(val) / batch_size),
        callbacks=callbacks_list,
        epochs=epochs,
        verbose=1,
    )'''
    
    steps_per_epoch = len(train) // batch_size
    validation_steps = len(val) // batch_size
    
    hist  = model.fit(x=train_tfds,
                      epochs= epochs, verbose=1,
                      callbacks=callbacks_list,
                      steps_per_epoch= steps_per_epoch,
                      validation_data= val_tfds ,
                      validation_steps=validation_steps,
                      shuffle=True, initial_epoch=0
                     )
    df = history_dataframe(hist)
    history_path = os.path.join(out_dir, "history.csv")
    df.to_csv(history_path)
    
    return hist
    
def history_dataframe(h):
    data = {}
    data["epoch"] = h.epoch
    for k, v in h.history.items():
        data[k] = v
    df = pd.DataFrame(data)
    return df

def predict_model( sample_path, audio_settings, model, method="mean"):

    out = []
    track = load_audio_window(audio_path, audio_settings, True)
    # shape (128,15214)
    
    for window_step in range(250,track.shape[1],250):
        s_init = window_step - 250
        s_end = window_step + 250
        sample = track[:,s_init:s_end,:]
        
        y_predict = model.predict(sample)
        if method == "mean":
            p = numpy.mean(predictions, axis=0)
            assert len(p) == 10
            out.append(p)
        elif method == "majority":
            votes = numpy.argmax(predictions, axis=1)
            p = numpy.bincount(votes, minlength=10) / len(votes)
            out.append(p)
    

    ret = numpy.stack(out)
    print(ret)
    assert len(ret.shape) == 2, ret.shape
    assert ret.shape[0] == len(out), ret.shape
    assert ret.shape[1] == 50, ret.shape  # classes

    return ret

def lr_scheduler(epoch, lr):
    if epoch < 10:
        return lr
    else:
        return lr * tf.math.exp(-0.1)    

def main():
    name = "vh_challenge"
    output_dir= check_dir(MODEL_DIR)
    logs_dir = check_dir(TBOARD_LOGS)
    #Check data exists
    
    train, val = split_data(sdf)
    print('Setup:')
    print('Train size: ', len(train))
    print('Validation size: ', len (val))
    print('Epochs: ', model_settings['epochs'])
    
    m = build_model()
    m.summary()
    print("Training model", name)

    t= train_model(output_dir,logs_dir, train, val, m, model_settings, r_settings)
        
        

In [None]:
main()

Setup:
Train size:  1600
Validation size:  400
Epochs:  80
Building Model
Model: "sequential_11"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_33 (Conv2D)           (None, 128, 501, 24)      624       
_________________________________________________________________
batch_normalization_33 (Batc (None, 128, 501, 24)      96        
_________________________________________________________________
max_pooling2d_22 (MaxPooling (None, 32, 250, 24)       0         
_________________________________________________________________
activation_55 (Activation)   (None, 32, 250, 24)       0         
_________________________________________________________________
conv2d_34 (Conv2D)           (None, 32, 250, 48)       28848     
_________________________________________________________________
batch_normalization_34 (Batc (None, 32, 250, 48)       192       
_____________________________________________

In [28]:
competition_track_path = Path('rooster_competition.wav')
model_path = Path('/model/20200928-013914/43-0.41.hdf5')
loaded_model = keras.models.load_model(model_path)
prediction = predict_model( competition_track_path , audio_settings, loaded_model, method="mean")


UsageError: Line magic function `%kill` not found.
