In [17]:
import os
import shutil
import sys
import h5py

import librosa

import numpy as np
from numpy import array

from sklearn.metrics import auc, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split

import tensorflow as tf


In [3]:
def mil_squared_error(y_true, y_pred):
    return tf.keras.backend.square(tf.keras.backend.max(y_pred) - tf.keras.backend.max(y_true))

adam = tf.keras.optimizers.Adam(learning_rate=1e-5)

In [5]:
# load training and testing ... 

condition = 'all'

new_train = '..//train//' + condition + '//'
new_test = '..//test//' + condition + '//'
new_val = '..//val//' + condition + '//'

def load_vectors(path):
    files = sorted(os.listdir(path))
    X = np.expand_dims(np.zeros((48, 272)), axis=0)
    y = []
    for npy in files:
        if 'Calm' in npy or 'Other' in npy:
            continue
        
        current = np.load(path+npy)
        X = np.vstack((X, current))
        label = [files.index(npy)]*len(current)       
        y = y + label       
    X = X[1:]
    y = to_categorical(y)    
    print(X.shape)
    print(y.shape)    
    return X, y
    
X_train, y_train = load_vectors(new_train)
X_test, y_test = load_vectors(new_test)

(34155, 48, 272)
(34155, 4)
(7551, 48, 272)
(7551, 4)


In [8]:
from tensorflow import keras
model = keras.models.load_model('..//models//4_class_models_acc_0.8718_f1_0.8713_cnn.hdf5', custom_objects={'mil_squared_error': mil_squared_error})

In [26]:
def split_model(model):
    first_half_model = keras.Sequential()
    second_half_model = keras.Sequential()
    for i in range(0, len(model.layers)):
        
        if i < len(model.layers) - 1:
            first_half_model.add(model.layers[i])
        else:
            second_half_model.add(model.layers[i])
            
    print('the original model has ' + str(len(model.layers)) + ' layers.')
    print('the penultimate (a.k.a. first half) model has ' + str(len(first_half_model.layers)) + ' layers.')
    print('the penultimate (a.k.a. second half) model has ' + str(len(second_half_model.layers)) + ' layers.')
    return first_half_model, second_half_model

In [28]:
first_half_model, second_half_model = split_model(model)

the original model has 34 layers.
the penultimate (a.k.a. first half) model has 33 layers.
the penultimate (a.k.a. second half) model has 1 layers.


In [39]:
# now, get emp_mean for each class

emp_vals = [[], [], [], []]

for X, y in zip(first_half_model.predict(X_train), y_train):
    emp_vals[np.argmax(y)].append(X)
    
emp_vals = np.asarray(emp_vals)

In [49]:
def get_emp_mean(emp_val):
    result = np.zeros(emp_val[0].shape).tolist()
    
    for penult_vector in emp_val:
        #penult_vector has size (1024, 0)
        for index in range(0, len(penult_vector)):
            result[index] = penult_vector[index] + result[index]
            
    for index in range(0, len(result)):
        result[index] = result[index]/len(result)       
    result = np.expand_dims(np.asarray(result), axis=0)
    print(result.shape)
    
    return result

In [50]:
emp_means = [get_emp_mean(emp_vals[0]), get_emp_mean(emp_vals[1]), \
            get_emp_mean(emp_vals[2]), get_emp_mean(emp_vals[3])]

(1, 1024)
(1, 1024)
(1, 1024)
(1, 1024)


In [54]:
# get emprical covariance
def get_emp_covar():
    
    flag = 0
    
    for X, y in zip(first_half_model.predict(X_train), y_train):
        X = np.expand_dims(X, axis=0)
        diff = X - emp_means[np.argmax(y)]
        transpose = np.transpose(diff)
        result = diff @ transpose
        
        if flag == 0:
            emp_covar = result
            flag = 1
        else:
            emp_covar = np.add(emp_covar, result)
    
    #division_vec = np.zeros(emp_covar.shape)
    #division_vec = division_vec + len(y_train)
            
    emp_covar = emp_covar/len(y_train)
    print(emp_covar.shape)
    
    return emp_covar

emp_covar = get_emp_covar()

(1, 1)


In [61]:
inv_emp_covar = np.linalg.inv(emp_covar)

def get_emp_mahalanobis(y_pred, c):
    emp_mean = emp_means[c]
    diff = y_pred - emp_mean
    transpose = np.transpose(diff)
    
    emp_mahalanobis = np.linalg.norm(transpose @ inv_emp_covar @ diff)
    
    #print(emp_mahalanobis.shape)
    
    return emp_mahalanobis

In [62]:
emp_mahalanobis_all_classes = [[], [], [], []]

for index in range(0, len(emp_vals)):
    for y_pred in emp_vals[index]:
        emp_m = get_emp_mahalanobis(y_pred, index)
        emp_mahalanobis_all_classes[index].append(emp_m)
        
np.shape(np.asarray(emp_mahalanobis_all_classes))

(4,)

In [65]:
emp_mahalanobis_all_classes_stds = []
emp_mahalanobis_all_classes_means = []

for index in range(0, len(emp_mahalanobis_all_classes)):
    mean = np.mean(emp_mahalanobis_all_classes[index])
    std = np.std(emp_mahalanobis_all_classes[index])
    
    emp_mahalanobis_all_classes_means.append(mean)
    emp_mahalanobis_all_classes_stds.append(std)    

In [88]:
def get_mahalanobis_coeff(m_mean, m_std, m_dists, threshold):    
    for i in np.linspace(0, 5, 500):
        count = 0
        for m_dist in m_dists:
            
            if m_mean - i*m_std < m_dist and m_mean + i*m_std > m_dist:
                count += 1
                
        if count/len(m_dists) > threshold:            
            #print(count/len(m_dists))
            return i

In [89]:
emp_mahalanobis_all_classes_coeffs = []

for m_mean, m_std, m_dists in zip(emp_mahalanobis_all_classes_means, \
                                 emp_mahalanobis_all_classes_stds, emp_mahalanobis_all_classes):
    coeff = get_mahalanobis_coeff(m_mean, m_std, m_dists, threshold=0.75)
    emp_mahalanobis_all_classes_coeffs.append(coeff)
    
print(emp_mahalanobis_all_classes_coeffs)

0.7519918051445481
0.753516333938294
0.7512270731077241
0.7516740438088753
[1.0521042084168337, 1.1623246492985972, 1.1122244488977955, 1.0220440881763526]


In [92]:
def check_if_in_distribution(y_pred, c):
    m_dist = get_emp_mahalanobis(y_pred, c)
    std = emp_mahalanobis_all_classes_stds[c]
    mean = emp_mahalanobis_all_classes_means[c]
    coeff = emp_mahalanobis_all_classes_coeffs[c]
    
    if mean - coeff*m_std < m_dist and mean + coeff*m_std > m_dist:
        return True
    else:
        return False

False