In [1]:
import pandas as pd
import numpy as np
import sys
import os
import scipy.stats as stats
import time
from tqdm.notebook import tqdm
import gc
import tensorflow as tf
import tensorflow.keras

from tensorflow.keras.models import *
from tensorflow.keras.layers import *
from tensorflow.keras.optimizers import *
from tensorflow.keras.layers import Dense, Conv2D, MaxPool2D , Flatten
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

### UNET definition

In [2]:

os.environ["CUDA_VISIBLE_DEVICES"]="1"

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

In [3]:
gpus

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [4]:
def unet(pretrained_weights=None, input_size=(256, 256, 1)):
    inputs = Input(input_size)
    conv1 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(inputs)
    conv1 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv1)
    pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
    conv2 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool1)
    conv2 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv2)
    pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
    conv3 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool2)
    conv3 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv3)
    pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)
    conv4 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool3)
    conv4 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv4)
    drop4 = Dropout(0.5)(conv4)
    pool4 = MaxPooling2D(pool_size=(2, 2))(drop4)

    conv5 = Conv2D(1024, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool4)
    conv5 = Conv2D(1024, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv5)
    drop5 = Dropout(0.5)(conv5)

    up6 = Conv2D(512, 2, activation='relu', padding='same', kernel_initializer='he_normal')(
        UpSampling2D(size=(2, 2))(drop5))
    merge6 = concatenate([drop4, up6], axis=3)
    conv6 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge6)
    conv6 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv6)

    up7 = Conv2D(256, 2, activation='relu', padding='same', kernel_initializer='he_normal')(
        UpSampling2D(size=(2, 2))(conv6))
    merge7 = concatenate([conv3, up7], axis=3)
    conv7 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge7)
    conv7 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv7)

    up8 = Conv2D(128, 2, activation='relu', padding='same', kernel_initializer='he_normal')(
        UpSampling2D(size=(2, 2))(conv7))
    merge8 = concatenate([conv2, up8], axis=3)
    conv8 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge8)
    conv8 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv8)

    up9 = Conv2D(64, 2, activation='relu', padding='same', kernel_initializer='he_normal')(
        UpSampling2D(size=(2, 2))(conv8))
    merge9 = concatenate([conv1, up9], axis=3)
    conv9 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge9)
    conv9 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv9)
    conv9 = Conv2D(2, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv9)
    conv10 = Conv2D(1, 1, activation='sigmoid')(conv9)

    model = Model(inputs=inputs, outputs=conv10)

    model.compile(optimizer=Adam(lr=1e-4), loss='mean_squared_error', metrics=['accuracy'])
    encoder=Model(inputs=inputs, outputs=drop5)
    # model.summary()

    if (pretrained_weights):
        model.load_weights(pretrained_weights)

    return model, encoder

### Load CSV

In [5]:
train_txt_path = '../dataset/LA/ASVspoof2019_LA_cm_protocols/ASVspoof2019.LA.cm.train.trn.txt'

df_train = pd.read_csv(train_txt_path, sep=" ", header=None)
df_train.columns = ["speaker_id", "audio_filename", "null", "system_id", "label"]
df_train = df_train.drop(columns="null")

dev_txt_path = '../dataset/LA/ASVspoof2019_LA_cm_protocols/ASVspoof2019.LA.cm.dev.trl.txt'

df_dev = pd.read_csv(dev_txt_path, sep=" ", header=None)
df_dev.columns = ["speaker_id", "audio_filename", "null", "system_id", "label"]
df_dev = df_dev.drop(columns="null")

eval_txt_path = '../dataset/LA/ASVspoof2019_LA_cm_protocols/ASVspoof2019.LA.cm.eval.trl.txt'

df_eval = pd.read_csv(eval_txt_path, sep=" ", header=None)
df_eval.columns = ["speaker_id", "audio_filename", "null", "system_id", "label"]
df_eval = df_eval.drop(columns="null")

### Params

In [6]:
# i parametri da testare sono:
# nfft = 64, hop_size = 32 
# nfft = 128, hop_size = 64 fatto
# nfft = 256, hop_size = 128 
# nffr = 512, hop_size = 256

nfft = 64
hop_size = 32 

alg_list = ['A01', 'A02', 'A03', 'A04', 'A05', 'A06']

epochs = 100
batch_size = 1 #=16 per nfft > 512

input_size = nfft // 2

# Feature computation

In [10]:
train_feat_root_path = '../features/bicoherences/train_nfft_{}_hop_size_{}'.format(nfft, hop_size)

for alg in tqdm(alg_list, total=len(alg_list)):
    
    # carichiamo il modello corrispondente dalla cartella 
    model_folder = '../features/unet/models/train_nfft_{}_hop_size_{}_alg_{}.ckpt'.format(
        nfft, hop_size, alg)
    
    
    model, encoder = unet(model_folder, (input_size, input_size, 1))

    break
    # creiamo un campo in un dataframe per ora vuoto
    feat_name = 'unet_mse_alg_{}'.format(nfft, hop_size, alg)
    df_train[feat_name] = np.nan
    
    mag_volume = []
    for index, row in df_train.iterrows():
        feat_path = os.path.join(train_feat_root_path, row['audio_filename'] + '.npy')
        bicoh = np.load(feat_path)
        mag = np.abs(bicoh)
        
        mag_volume.append(mag)
        
    mag_volume = np.array(mag_volume)
    mag_volume = mag_volume[..., np.newaxis]
    
    output = model.predict(mag_volume, batch_size=batch_size)
    compressed = encoder.predict(mag_volume, batch_size=batch_size)
    output_compressed = encoder.predict(output, batch_size=batch_size)
            
    mse = np.mean(np.square(compressed - output_compressed), axis=(1,2,3))
        
    df_train.at[:, feat_name] = mse
    
df_train.to_pickle('../features/unet/train_nfft_{}_hop_size_{}.pkl'.format(nfft, hop_size))

HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))

In [None]:
dev_feat_root_path = '../features/bicoherences/dev_nfft_{}_hop_size_{}'.format(nfft, hop_size)

for alg in alg_list:
    
    model_folder = '../features/unet/models/train_nfft_{}_hop_size_{}_alg_{}.ckpt'.format(
        nfft, hop_size, alg)
    
    
    model, encoder = unet(model_folder, (input_size, input_size, 1))


    # creiamo un campo in un dataframe per ora vuoto
    feat_name = 'unet_mse_alg_{}'.format(alg)
    df_dev[feat_name] = np.nan
    
    mag_volume = []
    for index, row in tqdm(df_dev.iterrows(), total=len(df_dev)):
        feat_path = os.path.join(dev_feat_root_path, row['audio_filename'] + '.npy')
        bicoh = np.load(feat_path)
        mag = np.abs(bicoh)
        
        mag_volume.append(mag)
        
    mag_volume = np.array(mag_volume)
    mag_volume = mag_volume[..., np.newaxis]
    
    output = model.predict(mag_volume, batch_size=batch_size)
    compressed = encoder.predict(mag_volume, batch_size=batch_size)
    output_compressed = encoder.predict(output, batch_size=batch_size)
            
    mse = np.mean(np.square(compressed - output_compressed), axis=(1,2,3))
        
    df_dev.at[:, feat_name] = mse
    break

#df_dev.to_pickle('../features/unet/dev_nfft_{}_hop_size_{}.pkl'.format(nfft, hop_size))

In [None]:
df_dev

In [None]:
eval_feat_root_path = '../features/bicoherences/eval_nfft_{}_hop_size_{}'.format(
    nfft, hop_size)

for alg in alg_list:
    
    model_folder = '../features/unet/models/train_nfft_{}_hop_size_{}_alg_{}.ckpt'.format(
        nfft, hop_size, alg)
    
    
    model, encoder = unet(model_folder, (input_size, input_size, 1))


    # creiamo un campo in un dataframe per ora vuoto
    feat_name = 'unet_mse_alg_{}'.format(nfft, hop_size, alg)
    df_eval[feat_name] = np.nan
    
    mag_volume = []
    for index, row in tqdm(df_eval.iterrows(), total=len(df_eval)):
        feat_path = os.path.join(eval_feat_root_path, row['audio_filename'] + '.npy')
        bicoh = np.load(feat_path)
        mag = np.abs(bicoh)
        
        mag_volume = mag
        
        #mag_volume = np.array(mag_volume)
        mag_volume = mag_volume[np.newaxis, ..., np.newaxis]
    
        output = model.predict(mag_volume, batch_size=batch_size)
        compressed = encoder.predict(mag_volume, batch_size=batch_size)
        output_compressed = encoder.predict(output, batch_size=batch_size)
            
        mse = np.mean(np.square(compressed - output_compressed))
        
        df_eval.at[index, feat_name] = mse

df_eval.to_pickle('../features/unet/eval_nfft_{}_hop_size_{}.pkl'.format(nfft, hop_size))