In [1]:
import random
import os
import shutil
import glob
import gc
import sys
import h5py
import time
import datetime
import pickle
import librosa
import warnings
import matplotlib.pyplot as plt

import numpy as np
from numpy import array
import pandas as pd
from pandas.plotting import parallel_coordinates
from pydub import AudioSegment

#imported for testing
import wave
import contextlib

# for outputing file
from scipy.cluster.vq import vq, kmeans, whiten
import scipy.stats.stats as st

from sklearn.cluster import KMeans
from sklearn.datasets import make_classification
from sklearn.datasets.samples_generator import make_blobs
from sklearn.decomposition import PCA as sklearnPCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import normalize
from sklearn.metrics import auc, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from sklearn import mixture
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.utils import class_weight

import keras

from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.constraints import maxnorm
from keras.layers import Convolution1D, Dense, MaxPooling1D, Flatten, Add, Dropout, Input, Activation
from keras.layers import TimeDistributed, Bidirectional, LSTM, LeakyReLU
from keras.models import Sequential
from keras import optimizers, regularizers
from keras.utils import np_utils, to_categorical

#from colorama import Fore, Back, Style

from IPython.display import clear_output


#warnings.filterwarnings('ignore')
#os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

# confirm TensorFlow sees the GPU
from tensorflow.python.client import device_lib
# assert 'GPU' in str(device_lib.list_local_devices())

# confirm Keras sees the GPU
from keras import backend
# print(len(backend.tensorflow_backend._get_available_gpus()) > 0)

#warnings.filterwarnings('ignore')
#os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

sys.path.insert(1, '..//components//')
import load_feat_directories

Using TensorFlow backend.


In [2]:
sample_rate = 44100
frame_number = 48
hop_length = 441  # frame size= 2 * hop
segment_length = int(sample_rate * 0.2)  # 0.2
segment_pad = int(sample_rate * 0.02)     # 0.02
overlapping = int(sample_rate * 0.1)   # 0.1

classes = 4
NumofFeaturetoUse = 272
n_neurons = 1024 * 4
dense_layers = 10
num_layers = 3
fillength = 3
nbindex = 1024 * 3
dropout = 0.2
n_batch = 128
n_epoch = 1000

In [3]:
def update_progress(progress):
    bar_length = 100
    if isinstance(progress, int):
        progress = float(progress)
    if not isinstance(progress, float):
        progress = 0
    if progress < 0:
        progress = 0
    if progress >= 1:
        progress = 1

    block = int(round(bar_length * progress))
    clear_output(wait = True)
    
    text = "Progress: [{0}] {1:.1f}%".format( "#" * block + "-" * (bar_length - block), progress * 100)
    print(text)

In [4]:
# allnoised_npy[0, 1, 2, 3, 4] ==> H, A, N, S, O
# homenoised_npy[0, 1, 2, 3, 4] ==> H, A, N, S, O
all_noised_npy = load_feat_directories.allnoised_npy
all_noised_npy_test = load_feat_directories.allnoised_npy_test
home_noised_npy = load_feat_directories.homenoised_npy
home_noised_npy_test = load_feat_directories.homenoised_npy_test

for index in range(0, 5):
    #x = os.path.exists(all_noised_npy[index])
    #y = os.path.exists(home_noised_npy[index])
    if not os.path.exists(all_noised_npy[index]):
        print(all_noised_npy[index] + ' does not exist. Breaking the loop... ')
        break

    if not os.path.exists(home_noised_npy[index]):
        print(home_noised_npy[index] + 'does not exist. Breaking the loop... ')
        break

In [5]:
def comprise_vector(path):
    vec_to_return = []
    for fname in os.listdir(path):
        current_vec = np.load(path + fname)
        vec_to_return.append(current_vec)

    vec_to_return = np.array(vec_to_return)
    return vec_to_return

def comprise_label(feature_vector, label):
    label_vec_to_ret = []
    length = len(list(feature_vector))
    for index in range(0, length):
        current_label = [label]
        label_vec_to_ret.append(current_label)
    label_vec_to_ret = np.array(label_vec_to_ret)

    return label_vec_to_ret

def float_compatible(input_np):

    x = np.where(input_np >= np.finfo(np.float32).max)
    for index in range(0, len(x[0])):
        x_position = x[0][index]
        y_position = x[1][index]
        input_np[x_position, y_position] = 0.0
    input_np = np.nan_to_num(input_np)

    return input_np

In [6]:
for index in [0, 1, 2, 3]:
    if not os.path.exists(home_noised_npy[index]):
        print(home_noised_npy[index] + 'does not exist.')
    else:
        path = home_noised_npy[index]
        print(path)
        if index == 0:
            h_feature_vector_home = comprise_vector(path)
            h_label_vector_home = comprise_label(h_feature_vector_home, index)
        elif index == 1:
            a_feature_vector_home = comprise_vector(path)
            a_label_vector_home = comprise_label(a_feature_vector_home, index)
        elif index == 2:
            n_feature_vector_home = comprise_vector(path)
            n_label_vector_home = comprise_label(n_feature_vector_home, index)
        elif index == 3:
            s_feature_vector_home = comprise_vector(path)
            s_label_vector_home = comprise_label(s_feature_vector_home, index)
        else:
            o_feature_vector_home = comprise_vector(path)
            o_label_vector_home = comprise_label(o_feature_vector_home, index)

for index in [0, 1, 2, 3]:
    
    if not os.path.exists(home_noised_npy_test[index]):
        print(home_noised_npy_test[index] + 'does not exist.')
    else:
        path = home_noised_npy_test[index]
        print(path)
        if index == 0:
            h_feature_vector_home_test = comprise_vector(path)
            h_label_vector_home_test = comprise_label(h_feature_vector_home_test, index)
        elif index == 1:
            a_feature_vector_home_test = comprise_vector(path)
            a_label_vector_home_test = comprise_label(a_feature_vector_home_test, index)
        elif index == 2:
            n_feature_vector_home_test = comprise_vector(path)
            n_label_vector_home_test = comprise_label(n_feature_vector_home_test, index)
        elif index == 3:
            s_feature_vector_home_test = comprise_vector(path)
            s_label_vector_home_test = comprise_label(s_feature_vector_home_test, index)
        else:
            o_feature_vector_home_test = comprise_vector(path)
            o_label_vector_home_test = comprise_label(o_feature_vector_home_test, index)


C://Users//yg9ca//Documents//Datasets//padded_deamplified_homenoised_reverberated//npy//Happy_npy//
C://Users//yg9ca//Documents//Datasets//padded_deamplified_homenoised_reverberated//npy//Angry_npy//
C://Users//yg9ca//Documents//Datasets//padded_deamplified_homenoised_reverberated//npy//Neutral_npy//
C://Users//yg9ca//Documents//Datasets//padded_deamplified_homenoised_reverberated//npy//Sad_npy//
C://Users//yg9ca//Documents//Datasets//padded_deamplified_homenoised_reverberated//npy//Happy_npy_test//
C://Users//yg9ca//Documents//Datasets//padded_deamplified_homenoised_reverberated//npy//Angry_npy_test//
C://Users//yg9ca//Documents//Datasets//padded_deamplified_homenoised_reverberated//npy//Neutral_npy_test//
C://Users//yg9ca//Documents//Datasets//padded_deamplified_homenoised_reverberated//npy//Sad_npy_test//


In [7]:
# Load training npy files
featureSet = float_compatible(np.vstack((h_feature_vector_home, a_feature_vector_home, n_feature_vector_home, s_feature_vector_home)))
Label = (np.vstack((h_label_vector_home, a_label_vector_home, n_label_vector_home, s_label_vector_home)))

Label[Label == 0] = 0
Label[Label == 1] = 1
Label[Label == 2] = 2
Label[Label == 3] = 3
#Label[Label == 4] = 4

Label = to_categorical(Label)
#featureSet = np.split(featureSet, np.array([NumofFeaturetoUse]), axis = 2)[0]
print('training data: ' + str(featureSet.shape))
print('training label: ' + str(Label.shape))

# Load testing npy files
featureSet_val = float_compatible(np.vstack((h_feature_vector_home_test, a_feature_vector_home_test, n_feature_vector_home_test, s_feature_vector_home_test)))
Label_val = (np.vstack((h_label_vector_home_test, a_label_vector_home_test, n_label_vector_home_test, s_label_vector_home_test)))

Label_val[Label_val == 0] = 0
Label_val[Label_val == 1] = 1
Label_val[Label_val == 2] = 2
Label_val[Label_val == 3] = 3
#Label_val[Label_val == 4] = 4

Label_val = to_categorical(Label_val)
#featureSet_val = np.split(featureSet_val, np.array([NumofFeaturetoUse]), axis = 2)[0]
print('evaluation data: ' + str(featureSet_val.shape))
print('evaluation label: ' + str(Label_val.shape))

training data: (34155, 48, 272)
training label: (34155, 4)
evaluation data: (7551, 48, 272)
evaluation label: (7551, 4)


In [8]:
adam = optimizers.Adam(lr = 1e-4, beta_1 = 0.9, beta_2 = 0.999, epsilon = None, decay = 0, amsgrad = True)
sgd = optimizers.SGD(lr = 0.01, decay = 1e-6, momentum = 0.9, nesterov = True)
rmsprop = optimizers.RMSprop(lr = 0.0001, rho = 0.9, epsilon = None, decay = 0.0)
adagrad = optimizers.Adagrad(lr = 0.01, epsilon = None, decay = 0.0)
adadelta = optimizers.Adadelta(lr = 1.0, rho = 0.95, epsilon = None, decay = 0.0)
adamax = optimizers.Adamax(lr = 0.002, beta_1 = 0.9, beta_2 = 0.999, epsilon = None, decay = 0.0)
nadam = optimizers.Nadam(lr = 0.002, beta_1 = 0.9, beta_2 = 0.999, epsilon = None, schedule_decay = 0.004)

In [9]:
def record(str_message, log_file):
    str_message = str_message + '\n'
    file = open(log_file, 'a')
    file.write(str_message)
    file.close()

def create_cnn(title, num_layers, n_neurons, n_batch, nbindex, dropout, classes, dense_layers):

    model = Sequential()

    model.add(Convolution1D(nb_filter=nbindex, filter_length=fillength, activation = 'relu',
                            input_shape=(featureSet.shape[1], featureSet.shape[2]), kernel_constraint=maxnorm(3)))
    model.add(MaxPooling1D(pool_size=2, strides=2, padding='valid'))
    model.add(Dropout(dropout))

    model.add(Convolution1D(nb_filter=nbindex*2, filter_length=fillength, activation = 'relu',
                            kernel_constraint=maxnorm(3)))
    model.add(MaxPooling1D(pool_size=2, strides=2, padding='valid'))
    model.add(Dropout(dropout))

    model.add(Convolution1D(nb_filter=nbindex*2, filter_length=fillength, activation = 'relu',
                            kernel_constraint=maxnorm(3)))
    model.add(MaxPooling1D(pool_size=2, strides=2, padding='valid'))
    model.add(Dropout(dropout))

    model.add(Convolution1D(nb_filter=nbindex, filter_length=fillength, activation = 'relu',
                            kernel_constraint=maxnorm(3)))  
    model.add(MaxPooling1D(pool_size=2, strides=2, padding='valid'))
    model.add(Dropout(dropout))
    
    model.add(Flatten())
    
    for i in range(0, dense_layers):
        model.add(Dense(n_neurons, activation = 'relu'))
        model.add(Dropout(dropout))

       
    #tf.Print(model.layers[len(layers)-2].output)
    
    model.add(Dense(classes, activation='softmax'))

    model.compile(loss='categorical_crossentropy', optimizer=adam,
                  metrics=['accuracy'])

    #model.summary()

    return model

In [10]:
def train_cnn(prefix, source):
    
    save_to_path = prefix

    checkpoint_filepath = prefix + "Checkpoint_" + source + title + ".hdf5"
    final_filepath = prefix + "Final_" + source + title + ".hdf5"

    if not os.path.exists(save_to_path):
        os.mkdir(save_to_path)

    X, X_test, Y, Y_test= train_test_split(featureSet, Label, test_size = 0.25, shuffle = True)
    model = create_cnn(title, num_layers, n_neurons, n_batch, nbindex, dropout, classes, dense_layers)
    checkpoint = ModelCheckpoint(checkpoint_filepath, monitor = 'val_acc', verbose = 0, save_best_only = True, mode = 'auto')
    early_stopping_monitor = EarlyStopping(patience = 50)
    callbacks_list = [checkpoint, early_stopping_monitor]
    model.fit(X, Y, nb_epoch = n_epoch, batch_size = n_batch,  callbacks = callbacks_list, validation_data = (X_test, Y_test), verbose = 1)
    model.save_weights(final_filepath)
    model.load(checkpoint_filepath)
    
    return model

In [11]:
title = 'Four_neurons_home_' + str(n_neurons) + '_filters_' + str(nbindex) + '_dropout_' + str(dropout) + '_epoch_' + str(n_epoch) + '_dense_' + str(dense_layers)

prefix = '..//..//modules//'
source = 'home_'

if not os.path.exists(prefix):
    os.mkdir(prefix)

final_filepath = prefix + "Checkpoint_" + source + title + ".hdf5"
model = train_cnn(prefix, source)

  if sys.path[0] == '':
  app.launch_new_instance()


ValueError: None values not supported.

In [None]:
def max_index(a): 
    a = list(a)
    # inbuilt function to find the position of minimum  
    minpos = a.index(min(a))  
    # inbuilt function to find the position of maximum  
    maxpos = a.index(max(a))    
    return maxpos

def predict_cnn(model):
    y_pred = []
    y_true = []

    for item in list(Label_val):
            y_true.append(max_index(item))

    for item in list(model.predict(featureSet_val)):
            y_pred.append(max_index(item))

    print('Accuracy: ' + str(accuracy_score(y_true, y_pred)))
    #print('Precision: ' + str(precision_score(y_true, y_pred)))
    #print('Recall: ' + str(recall_score(y_true, y_pred)))
    #print('f1 score: ' + str(f1_score(y_true, y_pred)))

    #tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    
    # Plot non-normalized confusion matrix
    #plot_confusion_matrix(y_true, y_pred, classes=[0, 1, 2, 3, 4],
    #                  title='Confusion matrix, without normalization')


In [None]:
model = keras.models.load_model(prefix + "Checkpoint_" + source + title + ".hdf5")
predict_cnn(model)