In [1]:
from __future__ import print_function
import tensorflow.keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras import backend as K
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import sys


from datetime import datetime
%load_ext tensorboard


### Extract train features

In [2]:
X_train_bonafide_list = []
X_train_spoof_list = []
max_lpc_order = 50
lpc_lengths = np.arange(1, max_lpc_order + 1)


for l in lpc_lengths:
    train_features = pd.read_pickle('/nas/home/cborrelli/bot_speech/features/train_LPC_' + str(l) +'.pkl')
    #train_features.drop('coeff', axis=1, inplace=True)
    train_features.dropna(inplace=True)
    
    train_bonafide_features = train_features[train_features['label']=='bonafide']
    train_spoof_features = train_features[train_features['label']=='spoof']
    
    if l==1:
        train_spoof_features = train_features[train_features['label']=='spoof'].sample(
            n=train_bonafide_features.shape[0])
        selected_files = train_spoof_features['audio_filename']
    else:
        train_spoof_features = train_spoof_features[train_spoof_features['audio_filename'].isin(selected_files)]
    X_train_bonafide_list.append(np.array(train_bonafide_features['lpc_res_mean']).reshape((-1, 1)))
    X_train_bonafide_list.append(np.array(train_bonafide_features['lpc_res_max']).reshape((-1, 1)))
    X_train_bonafide_list.append(np.array(train_bonafide_features['lpc_res_min']).reshape((-1, 1)))
    X_train_bonafide_list.append(np.array(train_bonafide_features['lpc_res_var']).reshape((-1, 1)))
    
    X_train_bonafide_list.append(np.array(train_bonafide_features['lpc_gain_max']).reshape((-1, 1)))
    X_train_bonafide_list.append(np.array(train_bonafide_features['lpc_gain_min']).reshape((-1, 1)))
    X_train_bonafide_list.append(np.array(train_bonafide_features['lpc_gain_mean']).reshape((-1, 1)))
    X_train_bonafide_list.append(np.array(train_bonafide_features['lpc_gain_var']).reshape((-1, 1)))
    
    X_train_bonafide_list.append(np.array(train_bonafide_features['ltp_res_mean']).reshape((-1, 1)))
    X_train_bonafide_list.append(np.array(train_bonafide_features['ltp_res_max']).reshape((-1, 1)))
    X_train_bonafide_list.append(np.array(train_bonafide_features['ltp_res_min']).reshape((-1, 1)))
    X_train_bonafide_list.append(np.array(train_bonafide_features['ltp_res_var']).reshape((-1, 1)))
    
    X_train_bonafide_list.append(np.array(train_bonafide_features['ltp_gain_max']).reshape((-1, 1)))
    X_train_bonafide_list.append(np.array(train_bonafide_features['ltp_gain_min']).reshape((-1, 1)))
    X_train_bonafide_list.append(np.array(train_bonafide_features['ltp_gain_mean']).reshape((-1, 1)))
    X_train_bonafide_list.append(np.array(train_bonafide_features['ltp_gain_var']).reshape((-1, 1)))


    X_train_spoof_list.append(np.array(train_spoof_features['lpc_res_mean']).reshape((-1, 1)))
    X_train_spoof_list.append(np.array(train_spoof_features['lpc_res_max']).reshape((-1, 1)))
    X_train_spoof_list.append(np.array(train_spoof_features['lpc_res_min']).reshape((-1, 1)))
    X_train_spoof_list.append(np.array(train_spoof_features['lpc_res_var']).reshape((-1, 1)))
    
    X_train_spoof_list.append(np.array(train_spoof_features['lpc_gain_max']).reshape((-1, 1)))
    X_train_spoof_list.append(np.array(train_spoof_features['lpc_gain_min']).reshape((-1, 1)))
    X_train_spoof_list.append(np.array(train_spoof_features['lpc_gain_mean']).reshape((-1, 1)))
    X_train_spoof_list.append(np.array(train_spoof_features['lpc_gain_var']).reshape((-1, 1)))
    
    X_train_spoof_list.append(np.array(train_spoof_features['ltp_res_mean']).reshape((-1, 1)))
    X_train_spoof_list.append(np.array(train_spoof_features['ltp_res_max']).reshape((-1, 1)))
    X_train_spoof_list.append(np.array(train_spoof_features['ltp_res_min']).reshape((-1, 1)))
    X_train_spoof_list.append(np.array(train_spoof_features['ltp_res_var']).reshape((-1, 1)))
    
    X_train_spoof_list.append(np.array(train_spoof_features['ltp_gain_max']).reshape((-1, 1)))
    X_train_spoof_list.append(np.array(train_spoof_features['ltp_gain_min']).reshape((-1, 1)))
    X_train_spoof_list.append(np.array(train_spoof_features['ltp_gain_mean']).reshape((-1, 1)))
    X_train_spoof_list.append(np.array(train_spoof_features['ltp_gain_var']).reshape((-1, 1)))





In [3]:
X_train_bonafide = np.concatenate(X_train_bonafide_list, axis=1)
X_train_spoof = np.concatenate(X_train_spoof_list, axis=1)
X_train = np.concatenate((X_train_bonafide, X_train_spoof), axis=0)

y_train_bonafide = np.ones((X_train_bonafide.shape[0],))
y_train_spoof = np.zeros((X_train_spoof.shape[0],))

y_train = np.concatenate((y_train_bonafide, y_train_spoof), axis=0)

### Extract dev features

In [4]:
X_dev_bonafide_list = []
X_dev_spoof_list = []

for l in lpc_lengths:
    dev_features = pd.read_pickle('/nas/home/cborrelli/bot_speech/features/dev_LPC_' + str(l) +'.pkl')
    #dev_features.drop('coeff', axis=1, inplace=True)
    dev_features.dropna(inplace=True)  
    
    dev_bonafide_features = dev_features[dev_features['label']=='bonafide']
    dev_spoof_features = dev_features[dev_features['label']=='spoof']
    
    if l==1:
        dev_spoof_features = dev_features[dev_features['label']=='spoof'].sample(n=dev_bonafide_features.shape[0])
        selected_files = dev_spoof_features['audio_filename']
    else:
        dev_spoof_features = dev_spoof_features[dev_spoof_features['audio_filename'].isin(selected_files)]
    
    X_dev_bonafide_list.append(np.array(dev_bonafide_features['lpc_res_mean']).reshape((-1, 1)))
    X_dev_bonafide_list.append(np.array(dev_bonafide_features['lpc_res_max']).reshape((-1, 1)))
    X_dev_bonafide_list.append(np.array(dev_bonafide_features['lpc_res_min']).reshape((-1, 1)))
    X_dev_bonafide_list.append(np.array(dev_bonafide_features['lpc_res_var']).reshape((-1, 1)))
    X_dev_bonafide_list.append(np.array(dev_bonafide_features['lpc_gain_max']).reshape((-1, 1)))
    X_dev_bonafide_list.append(np.array(dev_bonafide_features['lpc_gain_min']).reshape((-1, 1)))
    X_dev_bonafide_list.append(np.array(dev_bonafide_features['lpc_gain_mean']).reshape((-1, 1)))
    X_dev_bonafide_list.append(np.array(dev_bonafide_features['lpc_gain_var']).reshape((-1, 1)))
    X_dev_bonafide_list.append(np.array(dev_bonafide_features['ltp_res_mean']).reshape((-1, 1)))
    X_dev_bonafide_list.append(np.array(dev_bonafide_features['ltp_res_max']).reshape((-1, 1)))
    X_dev_bonafide_list.append(np.array(dev_bonafide_features['ltp_res_min']).reshape((-1, 1)))
    X_dev_bonafide_list.append(np.array(dev_bonafide_features['ltp_res_var']).reshape((-1, 1)))
    X_dev_bonafide_list.append(np.array(dev_bonafide_features['ltp_gain_max']).reshape((-1, 1)))
    X_dev_bonafide_list.append(np.array(dev_bonafide_features['ltp_gain_min']).reshape((-1, 1)))
    X_dev_bonafide_list.append(np.array(dev_bonafide_features['ltp_gain_mean']).reshape((-1, 1)))
    X_dev_bonafide_list.append(np.array(dev_bonafide_features['ltp_gain_var']).reshape((-1, 1)))
    
    X_dev_spoof_list.append(np.array(dev_spoof_features['lpc_res_mean']).reshape((-1, 1)))
    X_dev_spoof_list.append(np.array(dev_spoof_features['lpc_res_max']).reshape((-1, 1)))
    X_dev_spoof_list.append(np.array(dev_spoof_features['lpc_res_min']).reshape((-1, 1)))
    X_dev_spoof_list.append(np.array(dev_spoof_features['lpc_res_var']).reshape((-1, 1)))
    X_dev_spoof_list.append(np.array(dev_spoof_features['lpc_gain_max']).reshape((-1, 1)))
    X_dev_spoof_list.append(np.array(dev_spoof_features['lpc_gain_min']).reshape((-1, 1)))
    X_dev_spoof_list.append(np.array(dev_spoof_features['lpc_gain_mean']).reshape((-1, 1)))
    X_dev_spoof_list.append(np.array(dev_spoof_features['lpc_gain_var']).reshape((-1, 1)))
    X_dev_spoof_list.append(np.array(dev_spoof_features['ltp_res_mean']).reshape((-1, 1)))
    X_dev_spoof_list.append(np.array(dev_spoof_features['ltp_res_max']).reshape((-1, 1)))
    X_dev_spoof_list.append(np.array(dev_spoof_features['ltp_res_min']).reshape((-1, 1)))
    X_dev_spoof_list.append(np.array(dev_spoof_features['ltp_res_var']).reshape((-1, 1)))
    X_dev_spoof_list.append(np.array(dev_spoof_features['ltp_gain_max']).reshape((-1, 1)))
    X_dev_spoof_list.append(np.array(dev_spoof_features['ltp_gain_min']).reshape((-1, 1)))
    X_dev_spoof_list.append(np.array(dev_spoof_features['ltp_gain_mean']).reshape((-1, 1)))
    X_dev_spoof_list.append(np.array(dev_spoof_features['ltp_gain_var']).reshape((-1, 1)))

In [5]:
X_dev_bonafide = np.concatenate(X_dev_bonafide_list, axis=1)
X_dev_spoof = np.concatenate(X_dev_spoof_list, axis=1)
X_dev = np.concatenate((X_dev_bonafide, X_dev_spoof), axis=0)

y_dev_bonafide = np.ones((X_dev_bonafide.shape[0],))
y_dev_spoof = np.zeros((X_dev_spoof.shape[0],))

y_dev = np.concatenate((y_dev_bonafide, y_dev_spoof), axis=0)

In [6]:
def load_features(dataset, max_lpc_order = 50):
    """
    Load train, dev or eval features from /nas/home/cborrelli/bot_speech/features folder
    :param dataset:  can be "train", "dev" or "eval"
    :param max_lpc_order: maximum order used in the LPC analysis
    :returns: feature matrix and the labels array
    """
    X_bonafide_list = []
    X_spoof_list = []
    lpc_lengths = np.arange(1, max_lpc_order + 1)

    for l in lpc_lengths:
        features = pd.read_pickle('/nas/home/cborrelli/bot_speech/features/'+ dataset +'_LPC_' + str(l) + '.pkl')
        features.dropna(inplace=True)

        bonafide_features = features[features['label'] == 'bonafide']
        spoof_features = features[features['label'] == 'spoof']

        if l == 1:
            spoof_features = features[features['label'] == 'spoof'].sample(
                n=bonafide_features.shape[0])
            selected_files = spoof_features['audio_filename']
        else:
            spoof_features = spoof_features[spoof_features['audio_filename'].isin(selected_files)]

        X_bonafide_list.append(np.array(bonafide_features['lpc_res_mean']).reshape((-1, 1)))
        X_bonafide_list.append(np.array(bonafide_features['lpc_res_max']).reshape((-1, 1)))
        X_bonafide_list.append(np.array(bonafide_features['lpc_res_min']).reshape((-1, 1)))
        X_bonafide_list.append(np.array(bonafide_features['lpc_res_var']).reshape((-1, 1)))

        X_bonafide_list.append(np.array(bonafide_features['lpc_gain_max']).reshape((-1, 1)))
        X_bonafide_list.append(np.array(bonafide_features['lpc_gain_min']).reshape((-1, 1)))
        X_bonafide_list.append(np.array(bonafide_features['lpc_gain_mean']).reshape((-1, 1)))
        X_bonafide_list.append(np.array(bonafide_features['lpc_gain_var']).reshape((-1, 1)))

        X_bonafide_list.append(np.array(bonafide_features['ltp_res_mean']).reshape((-1, 1)))
        X_bonafide_list.append(np.array(bonafide_features['ltp_res_max']).reshape((-1, 1)))
        X_bonafide_list.append(np.array(bonafide_features['ltp_res_min']).reshape((-1, 1)))
        X_bonafide_list.append(np.array(bonafide_features['ltp_res_var']).reshape((-1, 1)))

        X_bonafide_list.append(np.array(bonafide_features['ltp_gain_max']).reshape((-1, 1)))
        X_bonafide_list.append(np.array(bonafide_features['ltp_gain_min']).reshape((-1, 1)))
        X_bonafide_list.append(np.array(bonafide_features['ltp_gain_mean']).reshape((-1, 1)))
        X_bonafide_list.append(np.array(bonafide_features['ltp_gain_var']).reshape((-1, 1)))

        X_spoof_list.append(np.array(spoof_features['lpc_res_mean']).reshape((-1, 1)))
        X_spoof_list.append(np.array(spoof_features['lpc_res_max']).reshape((-1, 1)))
        X_spoof_list.append(np.array(spoof_features['lpc_res_min']).reshape((-1, 1)))
        X_spoof_list.append(np.array(spoof_features['lpc_res_var']).reshape((-1, 1)))

        X_spoof_list.append(np.array(spoof_features['lpc_gain_max']).reshape((-1, 1)))
        X_spoof_list.append(np.array(spoof_features['lpc_gain_min']).reshape((-1, 1)))
        X_spoof_list.append(np.array(spoof_features['lpc_gain_mean']).reshape((-1, 1)))
        X_spoof_list.append(np.array(spoof_features['lpc_gain_var']).reshape((-1, 1)))

        X_spoof_list.append(np.array(spoof_features['ltp_res_mean']).reshape((-1, 1)))
        X_spoof_list.append(np.array(spoof_features['ltp_res_max']).reshape((-1, 1)))
        X_spoof_list.append(np.array(spoof_features['ltp_res_min']).reshape((-1, 1)))
        X_spoof_list.append(np.array(spoof_features['ltp_res_var']).reshape((-1, 1)))

        X_spoof_list.append(np.array(spoof_features['ltp_gain_max']).reshape((-1, 1)))
        X_spoof_list.append(np.array(spoof_features['ltp_gain_min']).reshape((-1, 1)))
        X_spoof_list.append(np.array(spoof_features['ltp_gain_mean']).reshape((-1, 1)))
        X_spoof_list.append(np.array(spoof_features['ltp_gain_var']).reshape((-1, 1)))

    X_bonafide = np.concatenate(X_bonafide_list, axis=1)
    X_spoof = np.concatenate(X_spoof_list, axis=1)
    X = np.concatenate((X_bonafide, X_spoof), axis=0)

    y_bonafide = np.ones((X_bonafide.shape[0],))
    y_spoof = np.zeros((X_spoof.shape[0],))

    y = np.concatenate((y_bonafide, y_spoof), axis=0)

    return X, y

In [7]:
load_features('train')

(array([[ 0.00219729,  0.07756035, -0.10167341, ...,  1.06835803,
          1.26645163,  0.13137526],
        [ 0.00218541,  0.05919245, -0.08369585, ...,  1.04170821,
          1.22022052,  0.06855069],
        [ 0.00217857,  0.07167769, -0.09272506, ...,  1.07978179,
          1.24696034,  0.13072254],
        ...,
        [ 0.00153933,  0.09240214, -0.08909928, ...,  1.06779537,
          1.23711053,  0.05616819],
        [ 0.00342163,  0.12579175, -0.14826402, ...,  1.07997341,
          1.41756716,  0.18395814],
        [ 0.0020967 ,  0.14522463, -0.1185302 , ...,  1.05986068,
          1.23770532,  0.0584935 ]]), array([1., 1., 1., ..., 0., 0., 0.]))

In [8]:
X_train

array([[ 0.00219729,  0.07756035, -0.10167341, ...,  1.06835803,
         1.26645163,  0.13137526],
       [ 0.00218541,  0.05919245, -0.08369585, ...,  1.04170821,
         1.22022052,  0.06855069],
       [ 0.00217857,  0.07167769, -0.09272506, ...,  1.07978179,
         1.24696034,  0.13072254],
       ...,
       [ 0.00208819,  0.09397179, -0.08237127, ...,  1.0489835 ,
         1.21630223,  0.03587556],
       [ 0.00222016,  0.11238885, -0.15232602, ...,  1.06512453,
         1.31859065,  0.18185512],
       [ 0.00182159,  0.08232194, -0.08001406, ...,  1.05746939,
         1.27963544,  0.07774284]])

### Extract eval features

In [9]:
X_eval_bonafide_list = []
X_eval_spoof_list = []

for l in lpc_lengths:
    eval_features = pd.read_pickle('/nas/home/cborrelli/bot_speech/features/eval_LPC_' + str(l) +'.pkl')
    #eval_features = eval_features[~eval_features['audio_filename'].isin(excluded_files)]
    #eval_features.drop('coeff', axis=1, inplace=True)
    eval_features.dropna(inplace=True)

    eval_bonafide_features = eval_features[eval_features['label']=='bonafide']
    eval_spoof_features= eval_features[eval_features['label']=='spoof']
    
    if l==1:
        eval_spoof_features = eval_features[
            eval_features['label']=='spoof'].sample(n=eval_bonafide_features.shape[0])
        selected_files = eval_spoof_features['audio_filename']
    else:
        eval_spoof_features = eval_spoof_features[eval_spoof_features['audio_filename'].isin(selected_files)]
    
    X_eval_bonafide_list.append(np.array(eval_bonafide_features['lpc_res_mean']).reshape((-1, 1)))
    X_eval_bonafide_list.append(np.array(eval_bonafide_features['lpc_res_max']).reshape((-1, 1)))
    X_eval_bonafide_list.append(np.array(eval_bonafide_features['lpc_res_min']).reshape((-1, 1)))
    X_eval_bonafide_list.append(np.array(eval_bonafide_features['lpc_res_var']).reshape((-1, 1)))
    X_eval_bonafide_list.append(np.array(eval_bonafide_features['lpc_gain_max']).reshape((-1, 1)))
    X_eval_bonafide_list.append(np.array(eval_bonafide_features['lpc_gain_min']).reshape((-1, 1)))
    X_eval_bonafide_list.append(np.array(eval_bonafide_features['lpc_gain_mean']).reshape((-1, 1)))
    X_eval_bonafide_list.append(np.array(eval_bonafide_features['lpc_gain_var']).reshape((-1, 1)))
    X_eval_bonafide_list.append(np.array(eval_bonafide_features['ltp_res_mean']).reshape((-1, 1)))
    X_eval_bonafide_list.append(np.array(eval_bonafide_features['ltp_res_max']).reshape((-1, 1)))
    X_eval_bonafide_list.append(np.array(eval_bonafide_features['ltp_res_min']).reshape((-1, 1)))
    X_eval_bonafide_list.append(np.array(eval_bonafide_features['ltp_res_var']).reshape((-1, 1)))
    X_eval_bonafide_list.append(np.array(eval_bonafide_features['ltp_gain_max']).reshape((-1, 1)))
    X_eval_bonafide_list.append(np.array(eval_bonafide_features['ltp_gain_min']).reshape((-1, 1)))
    X_eval_bonafide_list.append(np.array(eval_bonafide_features['ltp_gain_mean']).reshape((-1, 1)))
    X_eval_bonafide_list.append(np.array(eval_bonafide_features['ltp_gain_var']).reshape((-1, 1)))

    X_eval_spoof_list.append(np.array(eval_spoof_features['lpc_res_mean']).reshape((-1, 1)))
    X_eval_spoof_list.append(np.array(eval_spoof_features['lpc_res_max']).reshape((-1, 1)))
    X_eval_spoof_list.append(np.array(eval_spoof_features['lpc_res_min']).reshape((-1, 1)))
    X_eval_spoof_list.append(np.array(eval_spoof_features['lpc_res_var']).reshape((-1, 1)))
    X_eval_spoof_list.append(np.array(eval_spoof_features['lpc_gain_max']).reshape((-1, 1)))
    X_eval_spoof_list.append(np.array(eval_spoof_features['lpc_gain_min']).reshape((-1, 1)))
    X_eval_spoof_list.append(np.array(eval_spoof_features['lpc_gain_mean']).reshape((-1, 1)))
    X_eval_spoof_list.append(np.array(eval_spoof_features['lpc_gain_var']).reshape((-1, 1)))
    X_eval_spoof_list.append(np.array(eval_spoof_features['ltp_res_mean']).reshape((-1, 1)))
    X_eval_spoof_list.append(np.array(eval_spoof_features['ltp_res_max']).reshape((-1, 1)))
    X_eval_spoof_list.append(np.array(eval_spoof_features['ltp_res_min']).reshape((-1, 1)))
    X_eval_spoof_list.append(np.array(eval_spoof_features['ltp_res_var']).reshape((-1, 1)))
    X_eval_spoof_list.append(np.array(eval_spoof_features['ltp_gain_max']).reshape((-1, 1)))
    X_eval_spoof_list.append(np.array(eval_spoof_features['ltp_gain_min']).reshape((-1, 1)))
    X_eval_spoof_list.append(np.array(eval_spoof_features['ltp_gain_mean']).reshape((-1, 1)))
    X_eval_spoof_list.append(np.array(eval_spoof_features['ltp_gain_var']).reshape((-1, 1)))

In [10]:
X_eval_bonafide = np.concatenate(X_eval_bonafide_list, axis=1)
X_eval_spoof = np.concatenate(X_eval_spoof_list, axis=1)
X_eval = np.concatenate((X_eval_bonafide, X_eval_spoof), axis=0)

y_eval_bonafide = np.ones((X_eval_bonafide.shape[0],))
y_eval_spoof = np.zeros((X_eval_spoof.shape[0],))

y_eval = np.concatenate((y_eval_bonafide, y_eval_spoof), axis=0)

In [11]:
X_train_spoof.shape

(2518, 800)

## Reshape data for CNN

In [12]:
n_features = 16
lpc_lenght = 50
reshaped_X_train = np.zeros([X_train.shape[0], n_features, lpc_lenght ])
for n in np.arange(X_train.shape[0]):
    for i in np.arange(n_features):
        reshaped_X_train[n, i, :] = X_train[n, i::n_features]

print(reshaped_X_train.shape)

(5036, 16, 50)


In [13]:
n_features = 16
lpc_lenght = 50
reshaped_X_eval = np.zeros([X_eval.shape[0], n_features, lpc_lenght ])
for n in np.arange(X_eval.shape[0]):
    for i in np.arange(n_features):
        reshaped_X_eval[n, i, :] = X_eval[n, i::n_features]

print(reshaped_X_eval.shape)

(14080, 16, 50)


In [14]:
n_features = 16
lpc_lenght = 50
reshaped_X_dev = np.zeros([X_dev.shape[0], n_features, lpc_lenght ])
for n in np.arange(X_dev.shape[0]):
    for i in np.arange(n_features):
        reshaped_X_dev[n, i, :] = X_dev[n, i::n_features]

print(reshaped_X_dev.shape)

(4834, 16, 50)


## Normalize features

In [187]:
def pippo(**kwargs):
    if 'minmax' in kwargs.keys():
        x_min, x_max = kwargs['minmax']
        print('minmax', x_min, x_max)
    elif 'zscore' in kwargs.keys():
        x_mu, x_std = kwargs['zscore']
        print('zscore', x_mu, x_std)
    else:        
        raise NotImplementedError('Normalization not defined')


In [188]:
kwargs = {'zscore': [0, 1]}
kwargs = {'minmax': [5, 10]}

In [189]:
pippo(aa=[5, 19])

NotImplementedError: Normalization not defined

In [231]:
preprocessing_root = '/nas/home/cborrelli/bot_speech/preprocessing/'



def preprocess_features(X, p_axis=(0), **kwargs):
    """
    Normalize feature following one algorithm
    :param X: feature matrix of dimension N_samples x n_features x LPC_length
    :param preprocessing: algorithm used for standardization or normalization 
    chosen between 'l2_norm', 'l1_norm','max_norm', 'min_maxscaler', 'z_score', 'no_norm'
    :param p_axis: axis along standardization is operated: 
    this parameter is not used for normalization,
    if 0 the feature is normalized along the LPC orders, if 1 along all the samples
    :param is_train: if the feature matrix is the training matrix then fit the scalers, 
    if not load the fitted scalers
    :return: normalized feature matrix of dimension N_samples x N_features x LPC_length
    """
    n_features = X.shape[1]
    
    norm_X = np.zeros(X.shape)
    print(kwargs)
    
    if 'minmax_scaler' in kwargs.keys():
        feat_min, feat_max = kwargs['minmax_scaler']
        
        if not np.any(feat_min) and not np.any(feat_max):
            feat_min = np.expand_dims(X.min(axis=p_axis), axis=p_axis)
            feat_max = np.expand_dims(X.max(axis=p_axis), axis=p_axis)
        
        norm_X = (X - feat_min)  / (feat_max - feat_min)
        return_args = [norm_X, {'minmax_scaler': [feat_min, feat_max]}]
        
    elif 'z_score' in kwargs.keys():
        feat_mu, feat_std = kwargs['z_score']
        
        if not np.any(feat_mu)  and not np.any(feat_std) :
            feat_mu = np.expand_dims(X.mean(axis=p_axis), axis=p_axis)
            feat_std = np.expand_dims(X.std(axis=p_axis), axis=p_axis)

        norm_X = (X - feat_mu) / feat_std
        return_args = [norm_X, {'z_score': [feat_mu, feat_std]}]
        
    elif 'l2_norm' in kwargs.keys():
        for feature_index in np.arange(n_features):
            norm_X[:, feature_index, :] = sklearn.preprocessing.normalize(X[:, feature_index, :], norm='l2')
        return_args = [norm_X, {'l2_norm': [None, None]}]
    
    elif 'l1_norm' in kwargs.keys():
        for feature_index in np.arange(n_features):
            norm_X[:, feature_index, :] = sklearn.preprocessing.normalize(X[:, feature_index, :], norm='l1')
        return_args = [norm_X, {'l1_norm': [None, None]}]
    
    elif 'max_norm'in kwargs.keys():
        for feature_index in np.arange(n_features):
            norm_X[:, feature_index, :] = sklearn.preprocessing.normalize(X[:, feature_index, :], norm='max')
        return_args = [norm_X, {'max_norm': [None, None]}]
        
    elif 'no_norm' in kwargs.keys():
        norm_X = X
        return_args = [norm_X, {'no_norm' : [None, None]}]
           
    else:        
        raise NotImplementedError('Normalization not defined') 
    
    return return_args


In [232]:
norm_X_train, preproc_params= preprocess_features(reshaped_X_train, l1_norm=[None, None])
norm_X_dev, _ = preprocess_features(reshaped_X_dev, **preproc_params)
#norm_X_eval = preprocess_features(reshaped_X_eval, preprocessing='l2_norm')

{'l1_norm': [None, None]}
{'l1_norm': [None, None]}


In [216]:
norm_X_train

array([[[ 4.41832664e-01,  4.43495058e-02,  2.09250123e-01, ...,
          2.01883962e-02,  2.36358609e-02,  8.85847078e-03],
        [-7.44347497e-01, -6.30830026e-01, -5.97645148e-01, ...,
         -7.07798210e-01, -7.23996949e-01, -7.31439465e-01],
        [ 3.70013134e-01,  3.14563699e-01,  2.69235488e-01, ...,
          4.18612522e-01,  4.21640454e-01,  4.25432472e-01],
        ...,
        [-2.20658137e-01,  6.68286323e-01,  5.48243405e-01, ...,
         -9.10625303e-01,  4.33646517e-01,  4.29338402e-01],
        [-1.48171172e-02, -1.40928974e-02, -1.42207832e-02, ...,
         -4.46443328e-01, -4.50374977e-01, -5.12594095e-01],
        [-1.42739372e-02, -1.40928965e-02, -1.40932361e-02, ...,
         -1.61427725e-01, -1.15970655e-01, -2.33603734e-01]],

       [[ 4.27781692e-01, -1.36128069e-02,  8.28449097e-02, ...,
         -5.51260245e-01, -5.62117534e-01, -5.69387440e-01],
        [-1.44782406e+00, -1.29716015e+00, -1.19433733e+00, ...,
         -1.23788243e+00, -1.23175176e

In [214]:
params

{'z_score': [array([[[ 1.82346414e-03,  7.91197841e-04,  5.22054687e-04,
            3.73302435e-04,  3.28627445e-04,  2.89115955e-04,
            2.71244668e-04,  2.51850839e-04,  2.41585068e-04,
            2.29644965e-04,  2.21841257e-04,  2.12799936e-04,
            2.03965661e-04,  1.97994002e-04,  1.92284998e-04,
            1.87206455e-04,  1.83578903e-04,  1.80223385e-04,
            1.77610953e-04,  1.74885422e-04,  1.72939053e-04,
            1.70803847e-04,  1.69152809e-04,  1.67186912e-04,
            1.65589376e-04,  1.63733693e-04,  1.62356077e-04,
            1.60749766e-04,  1.59484659e-04,  1.57945272e-04,
            1.56746477e-04,  1.55316157e-04,  1.54247269e-04,
            1.52966047e-04,  1.52004247e-04,  1.50832168e-04,
            1.49930735e-04,  1.48763891e-04,  1.47832963e-04,
            1.46678213e-04,  1.45746391e-04,  1.44544474e-04,
            1.43581246e-04,  1.42379448e-04,  1.41418285e-04,
            1.40289921e-04,  1.39355452e-04,  1.38240159e-0

In [48]:
a.shape

(16, 251800)

In [None]:

feature_list = ['lpc_res_mean', 'lpc_res_max', 'lpc_res_min', 
                'lpc_res_var', 'lpc_gain_max', 'lpc_gain_min', 'lpc_gain_mean', 'lpc_gain_var',
                'ltp_res_mean', 'ltp_res_max', 'ltp_res_min', 
                'ltp_res_var', 'ltp_gain_max', 'ltp_gain_min', 'ltp_gain_mean', 'ltp_gain_var']

n_features = 16
lpc_lenght = 50
feature_index = 1

for feature_index in np.arange(n_features):
    plt.figure(figsize=(16, 8))
    plt.title(feature_list[feature_index])
    plt.imshow(norm_X_train[:, feature_index, :], aspect='auto')
    
    plt.colorbar()

In [None]:
for sample_index in np.arange(0, norm_X_train.shape[0], 500):
    plt.figure(figsize=(16, 8))
    plt.imshow(norm_X_train[sample_index, :, :], aspect='auto')
    
    plt.colorbar()

## Train tutorial CNN

In [None]:
logdir = "logs/scalars/model1_" + datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tensorflow.keras.callbacks.TensorBoard(log_dir=logdir)

In [None]:
batch_size = 128
num_classes = 2
epochs = 12


# convert class vectors to binary class matrices
y_train_cat = tensorflow.keras.utils.to_categorical(y_train, num_classes)
y_dev_cat = tensorflow.keras.utils.to_categorical(y_dev, num_classes)



In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Input, Activation
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D

In [None]:
num_features = 16
max_lpc_order = 50

x = Input(shape=[num_features, max_lpc_order, 1])

c1 = Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=[num_features, max_lpc_order, 1], name='conv1')(x)
c2 = Conv2D(64, (3, 3), activation='relu', name='conv2')(c1)
mp1 = MaxPooling2D(pool_size=(2, 2), name='mp1')(c2)
dp1 = Dropout(0.25, name='dp1')(mp1)
fl1 = Flatten(name='fl1')(dp1)
d1 = Dense(128, activation='relu', name='d1')(fl1)
dp2 = Dropout(0.5, name='dp2')(d1)
d2 = Dense(num_classes, activation=None, name='d2')(dp2)
out =  Activation('softmax', name='output')(d2)
model_fun = Model(inputs=x, outputs=out)

model_fun.summary()

In [None]:
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=[16, 50, 1]))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

model.summary()

In [None]:
model_fun.compile(loss=tensorflow.keras.losses.categorical_crossentropy,
              optimizer=tensorflow.keras.optimizers.Adam(),
              metrics=['accuracy'])

reduce_lr = tensorflow.keras.callbacks.ReduceLROnPlateau(monitor='loss', factor=0.2,
                              patience=5, min_lr=0.001)

training_history = model_fun.fit(np.expand_dims(norm_X_train, axis=3), y_train_cat,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          callbacks=[reduce_lr, tensorboard_callback], 
          validation_data=[np.expand_dims(norm_X_dev, axis=3), y_dev_cat])

print("Average test loss: ", np.average(training_history.history['loss']))


In [None]:
score = model.evaluate(np.expand_dims(norm_X_dev, axis=3), y_dev_cat, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

## Train on train and dev

In [None]:
logdir = "logs/scalars/model2_" + datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tensorflow.keras.callbacks.TensorBoard(log_dir=logdir)

In [None]:
X_bonafide = np.concatenate((X_train_bonafide, X_dev_bonafide), axis=0)
X_spoof = np.concatenate((X_train_spoof, X_dev_spoof), axis=0)
                
X = np.concatenate((X_bonafide, X_spoof), axis=0)

In [None]:
y_bonafide = np.ones([X_bonafide.shape[0],])
y_spoof = np.zeros([X_spoof.shape[0],])

y = np.concatenate((y_bonafide, y_spoof), axis=0)

In [None]:
n_features = 16
lpc_lenght = 50
reshaped_X = np.zeros([X.shape[0], n_features, lpc_lenght ])
for n in np.arange(X.shape[0]):
    for i in np.arange(n_features):
        reshaped_X[n, i, :] = X[n, i::n_features]

print(reshaped_X.shape)

In [None]:
norm_X = np.zeros(reshaped_X.shape)

for feature_index in np.arange(n_features):
    norm_X[:, feature_index, :] = sklearn.preprocessing.normalize(reshaped_X[:, feature_index, :])


In [None]:
y_cat = tensorflow.keras.utils.to_categorical(y, num_classes)
y_eval_cat = tensorflow.keras.utils.to_categorical(y_eval, num_classes)

In [None]:
model2 = Sequential()
model2.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=[16, 50, 1]))
model2.add(Conv2D(64, (3, 3), activation='relu'))
model2.add(MaxPooling2D(pool_size=(2, 2)))
model2.add(Dropout(0.25))
model2.add(Flatten())
model2.add(Dense(128, activation='relu'))
model2.add(Dropout(0.5))
model2.add(Dense(num_classes, activation='softmax'))

model2.summary()

In [None]:

model2.compile(loss=tensorflow.keras.losses.categorical_crossentropy,
              optimizer=tensorflow.keras.optimizers.Adam(),
              metrics=['accuracy'])

reduce_lr = tensorflow.keras.callbacks.ReduceLROnPlateau(monitor='loss', factor=0.2,
                              patience=5, min_lr=0.001)
model2.fit(np.expand_dims(norm_X, axis=3), y_cat,
          batch_size=batch_size, validation_data=[np.expand_dims(norm_X_eval, axis=3), y_eval_cat],
          epochs=epochs,
          verbose=1, 
          callbacks=[reduce_lr, tensorboard_callback])

In [None]:
score = model2.evaluate(np.expand_dims(norm_X_eval, axis=3), y_eval_cat, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

## ASV metrics

In [None]:
from sklearn.metrics import roc_curve, roc_auc_score


In [None]:



def compute_det_curve(target_scores, nontarget_scores):

    n_scores = target_scores.size + nontarget_scores.size
    all_scores = np.concatenate((target_scores, nontarget_scores))
    labels = np.concatenate((np.ones(target_scores.size), np.zeros(nontarget_scores.size)))

    # Sort labels based on scores
    indices = np.argsort(all_scores, kind='mergesort')
    labels = labels[indices]

    # Compute false rejection and false acceptance rates
    tar_trial_sums = np.cumsum(labels)
    nontarget_trial_sums = nontarget_scores.size - (np.arange(1, n_scores + 1) - tar_trial_sums)

    frr = np.concatenate((np.atleast_1d(0), tar_trial_sums / target_scores.size))  # false rejection rates
    far = np.concatenate((np.atleast_1d(1), nontarget_trial_sums / nontarget_scores.size))  # false acceptance rates
    thresholds = np.concatenate((np.atleast_1d(all_scores[indices[0]] - 0.001), all_scores[indices]))  # Thresholds are the sorted scores

    return frr, far, thresholds

def compute_eer(target_scores, nontarget_scores):
    """ Returns equal error rate (EER) and the corresponding threshold. """
    frr, far, thresholds = compute_det_curve(target_scores, nontarget_scores)
    abs_diffs = np.abs(frr - far)
    min_index = np.argmin(abs_diffs)
    eer = np.mean((frr[min_index], far[min_index]))
    return eer, thresholds[min_index]

## ASV scores on model1

In [None]:
y_dev_hat = model.predict(np.expand_dims(norm_X_dev, axis=3))[:,0]


[fpr, tpr, th] = roc_curve(y_dev, y_dev_hat, pos_label=0)
roc_auc = 1 - roc_auc_score(y_dev, y_dev_hat)

acc = (tpr + (1-fpr)) / 2
best_th = th[np.where(acc == np.max(acc))]
print(best_th)
print(acc.max())

plt.plot(fpr, tpr, label='AUC={}'.format(roc_auc))
plt.plot([0, 1], [0, 1], ':k')
plt.plot([1, 0], [0, 1], ':k')
plt.axis([-0.01, 1, 0, 1.01])
plt.grid(True)
plt.legend()
plt.show()

In [None]:
target_score = y_dev_hat[np.where(y_dev==0)]
nontarget_score = y_dev_hat[np.where(y_dev==1)]

fnr = 1-tpr

plt.figure()
plt.plot(th, fnr)
plt.plot(th, fpr)
#plt.plot(th, np.abs(fnr-fpr))
plt.show()

compute_eer(target_score, nontarget_score)

## ASV scores on model2

In [None]:
y_eval_hat = model2.predict(np.expand_dims(norm_X_eval, axis=3))[:,0]


[fpr, tpr, th] = roc_curve(y_eval, y_eval_hat, pos_label=0)
roc_auc = 1 - roc_auc_score(y_eval, y_eval_hat)

acc = (tpr + (1-fpr)) / 2
best_th = th[np.where(acc == np.max(acc))]
print(best_th)
print(acc.max())

plt.plot(fpr, tpr, label='AUC={}'.format(roc_auc))
plt.plot([0, 1], [0, 1], ':k')
plt.plot([1, 0], [0, 1], ':k')
plt.axis([-0.01, 1, 0, 1.01])
plt.grid(True)
plt.legend()
plt.show()

In [None]:
target_score = y_eval_hat[np.where(y_eval==0)]
nontarget_score = y_eval_hat[np.where(y_eval==1)]

fnr = 1-tpr

plt.figure()
plt.plot(th, fnr)
plt.plot(th, fpr)
#plt.plot(th, np.abs(fnr-fpr))
plt.show()

compute_eer(target_score, nontarget_score)