In [1]:
import tensorflow as tf

from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.constraints import maxnorm
from keras.layers import Convolution1D, Dense, MaxPooling1D, Flatten, Add, Dropout, Input, Activation
from keras.layers import TimeDistributed, Bidirectional, LSTM, LeakyReLU
from keras.models import Sequential
from keras import optimizers, regularizers
from keras.utils import np_utils, to_categorical
from keras.models import Model, load_model, Sequential
from keras.regularizers import l2

import keras

from IPython.display import clear_output
from tensorflow.python.client import device_lib
from tensorflow.python.keras import backend
import tensorflow as tf

from extract_feat import extract_feats_single_wav
import os
import numpy as np

Using TensorFlow backend.


Number of features are unspecified. Defaut is set to = 272.




In [12]:
def load_emp_miu(y):
    path = '..//models//emp_miu_class_' + str(y) + '.npy'
    return np.load(path)

def load_inv_emp_covar():
    path = '..//models//inv_emp_sigma.npy'
    return np.load(path)

def load_mahalanobis_mean(y):
    path = '..//models//mahalanobis_mean_class_' + str(y) + '.npy'
    return np.load(path)

def load_mahalanobis_std(y):
    path = '..//models//mahalanobis_std_class_' + str(y) + '.npy'
    return np.load(path)

def load_mahalanobis_coeff(y):
    path = '..//models//mahalanobis_threshold_coefficient_class_' + str(y) + '.npy'
    return np.load(path)

def is_in_distribution(x, predicted_y):
    
    assert(predicted_y == 0 or predicted_y == 1)
       
    emp_miu = load_emp_miu(predicted_y)
    inv_emp_sigma = load_inv_emp_covar()
    m_mean = load_mahalanobis_mean(predicted_y)
    m_std = load_mahalanobis_std(predicted_y)
    coeff = load_mahalanobis_coeff(predicted_y)
    
    upper = m_mean + coeff*m_std
    lower = m_mean - coeff*m_std

    try:
        m_dist = np.transpose(x-emp_miu) @ inv_emp_sigma @ (x-emp_miu)
    except:
        m = (x-emp_miu) * inv_emp_sigma * (x-emp_miu)
        m = np.linalg.norm(m)
    
    if np.linalg.norm(lower) < m and m < np.linalg.norm(upper):
        return True
    else:
        return False

In [3]:
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
tf.keras.backend.clear_session()
tf.compat.v1.reset_default_graph()

def mil_squared_error(y_true, y_pred):
    return tf.keras.backend.square(tf.keras.backend.max(y_pred) - tf.keras.backend.max(y_true))

adam = tf.keras.optimizers.Adam(learning_rate=1e-5)

model = tf.keras.models.load_model('..//models//cnn.hdf5', custom_objects={'mil_squared_error': mil_squared_error, 'adam': adam})

Num GPUs Available:  0


In [4]:
def extract_features_for_all_wavs(dest, label):
    result = np.expand_dims(np.zeros((48, 272)), axis=0)

    for wav in os.listdir(dest):
        vec = extract_feats_single_wav(dest + wav)
        if not str(vec.shape) == '(48, 272)':
            continue
        result = np.vstack((result, np.expand_dims(vec, axis=0)))

    result = result[1:]
    labels = np.expand_dims(np.asarray([label] * len(result)), axis=1)
    print(result.shape)
    print(labels.shape)

    return result, labels

In [18]:
# this part assumes that the model is trained.
# import sounds

test_dir = 'D://0123//'

def identify(test_dir, threshold):
    in_dist = 0
    out_dist = 0
    
    for wav in os.listdir(test_dir):
        
        if not wav.endswith('wav'):
            continue
        else:
            feat_vec = np.expand_dims(extract_feats_single_wav(test_dir + wav), axis=0)
            softmax = np.squeeze(model.predict(feat_vec))
            predicted_label = np.argmax(softmax)
            
            print(softmax)
            
            if is_in_distribution(softmax, predicted_label):
                in_dist += 1
            else:
                out_dist += 1


In [19]:
identify(test_dir, 0.8)

[[0.88159144 0.11840851]]
[[0.80359304 0.19640695]]
[[0.6059731  0.39402696]]
[[0.55613446 0.44386554]]
[[0.83264595 0.16735403]]
[[0.8734568  0.12654324]]
[[0.6386586 0.3613414]]
[[0.662006   0.33799398]]
[[0.8345549  0.16544507]]
[[0.696338   0.30366197]]
[[0.43510932 0.5648907 ]]
[[0.5365558  0.46344423]]
[[0.5182986  0.48170134]]
[[0.4044948  0.59550524]]
[[0.3742899  0.62571007]]
[[0.26235574 0.73764426]]


KeyboardInterrupt: 