In [1]:
# GMMs(Gaussian Mixture Models) front-end are LFCCs and CQCCs.
# My library
#from lfcc import *

# Library for dataloader
import os.path
import glob

# Library for LFCC-GMM
import numpy as np
import pandas as pd
import joblib
#from sklearn.mixture import GaussianMixture

# Library for reading flac audio file
import soundfile as sf
#from scipy.io.wavfile import read

# Library for pytorch
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import torchvision
from torchvision import models, transforms

import pycave

Matplotlib created a temporary config/cache directory at /tmp/matplotlib-9e2crdfr because the default path (/home/.cache/matplotlib) is not a writable directory; it is highly recommended to set the MPLCONFIGDIR environment variable to a writable directory, in particular to speed up the import of Matplotlib and to better support multiprocessing.


In [2]:
float_formatter = "{:.4f}".format

np.set_printoptions(formatter={'float_kind': float_formatter})

In [40]:
class Preprocess(object):
    """
    Preprocessing class for audio data
    
    Attributes:
    
    """
    def __init__(self):
        """
        Parameters
        ----------
        
        """
        self.extractor = None
        self.features = None
        
    def __call__(self, y, sr, feature, dynamic=True):
        """
        Extract fetures with lfcc, mfcc, cqcc and other method
        
        Parameters
        ----------
        
        """
        if feature == 'LFCC':
            self.extractor = LFCC(y, sr)
            
        elif feature == 'MFCC':
            self.extractor = MFCC(y, sr)
        
        elif feature == 'CQCC':
            self.extractor = CQCC(y, sr)
        else:
            print('Wrong feature extraction method specified')
            return None
        
        self.features = self.extractor.extract_feature(delta=True)
        
        return self.features

In [41]:
def make_datapath_list(phase='train'):
    """
    make a list containing a path to data
    
    Parameters
    ----------
    phase: 'train' or 'dev' or 'eval'
        specify whether data is for train or development or evaluation
    
    Returns
    ----------
    path_list : list
        return a list containing a path to data
    """
    
    root_path = "/DB/Audio/English/ASVspoof2019/LA/"
    target_path = os.path.join(root_path+'ASVspoof2019_LA_'+phase+'/flac/*.flac')
    print(target_path)
    
    path_list = []
    
    # Get a filepath to subdir by using glob module
    for path in sorted(glob.glob(target_path)):
        path_list.append(path)
    
    return path_list

# test
train_list = make_datapath_list(phase='train')
dev_list = make_datapath_list(phase='dev')

#print(train_list)

#print(dev_list)

#print(len(train_list), len(dev_list))
    

/DB/Audio/English/ASVspoof2019/LA/ASVspoof2019_LA_train/flac/*.flac
/DB/Audio/English/ASVspoof2019/LA/ASVspoof2019_LA_dev/flac/*.flac


In [42]:
# Make dataloader
class ASVspoofDataSet(data.Dataset):
    """
    Dataset class for ASVspoof2019, which derived from torch.utils.data.Dataset class
    
    Attributes:
    --------------
    file_list: list
        list containing a path to data
        
    transform: object
        instance of PreProcessor
    
    phase: str
        'train' or 'dev' or 'eval'
    """
    
    def __init__(self, file_list, label_list=None, preprocess=None, phase='train'):
        """
        Parameters
        ----------
        file_list: list
            list of audio files to read
        
        label_list: list
            list of labels('bonafide' or 'spoof'), which is changed to 0, 1
        
        transform: class PreProcess
            instance of PreProcess to be used for pre-process to audio data
        
        phase: str
            specify whether data is for training or development or evaluation('train' or 'dev' or 'eval')
            
        """
        
        self.phase = phase
        self.preprocess = preprocess
        self.root_path = '/DB/Audio/English/ASVspoof2019/LA/'
        #self.file_path = None
        self.file_list = file_list
        self.label_path = None
        self.label_list = label_list
        
        if self.phase == 'train':
            self.label_path = os.path.join(self.root_path+'ASVspoof2019_LA_cm_protocols/')
            self.label_list = []
            with open(self.label_path+'ASVspoof2019.LA.cm.train.trn.txt', mode='r') as protocols:
                for line in protocols:
                    line = line.split() # read line by line
                    filename, label = line[1], line[-1] # get filename and label from protocols file
                    self.label_list.append((filename, label))
                    
        elif self.phase == 'dev':
            self.label_path = os.path.join(self.root_path+'ASVspoof2019_LA_cm_protocols/')
            self.label_list = []
            with open(self.label_path+'ASVspoof2019.LA.cm.dev.trl.txt', mode='r') as protocols:
                for line in protocols:
                    line = line.split() # read line by line
                    filename, label = line[1], (line[0], line[3], line[-1]) # get items from protocols file
                    self.label_list.append((filename, label))
        else:
            print("You must pass either phase='train' or phase='dev'")
        
    def __len__(self): # this is needed to be overrided
        return len(self.file_list)
    
    def __getitem__(self, index): # this is also needed to be overrided
        """
        Get data and its label that was pre-processed
        """
        
        # load audio
        speech_path = self.file_list[index]
        speech, sr = sf.read(speech_path)
        
        # preprocessing and extract features
        features = self.preprocess(y=speech, sr=sr, feature='LFCC') # preprocess to speech, not implemented yet
        
        label = None
        
        speech_name = speech_path.split('/')[-1].rstrip('.flac')
        
        for fname, key in self.label_list:
            #print(fname)
            if fname == speech_name: # compare to speech_name with '==' annotation, check if they have same value.
                label = key
                #print("filename: {}, label: {}".format(fname, label))
        #print("sp name:", speech_name)
        if label is None:
            print('[debug print] filename:', speech_name)
        
        #features = torch.from_numpy(features)
        #print(type(features))
        return features, label
    
# test

process = Preprocess()

asvspoof_train = ASVspoofDataSet(file_list=train_list, preprocess=process, phase='train')

# get 10 files and its label
iterations = 10

for itr in range(iterations):
    #print(asvspoof_train.file_list[itr])
    feature, label = asvspoof_train.__getitem__(itr)
    print("60 vectors", feature.T.shape)
    print("audiofile label: ", label)
    print()

60 vectors (155, 60)
audiofile label:  spoof

60 vectors (177, 60)
audiofile label:  bonafide

60 vectors (119, 60)
audiofile label:  spoof

60 vectors (140, 60)
audiofile label:  spoof

60 vectors (234, 60)
audiofile label:  spoof

60 vectors (206, 60)
audiofile label:  spoof

60 vectors (178, 60)
audiofile label:  spoof

60 vectors (143, 60)
audiofile label:  spoof

60 vectors (122, 60)
audiofile label:  spoof

60 vectors (111, 60)
audiofile label:  spoof



In [None]:
batch_size = 32

# instanciate DataLoader
train_dataloader = data.DataLoader(asvspoof_train, batch_size=batch_size, shuffle=True)

val_dataloader = None #data.DataLoader()

dataloader_dict = {
    "train": train_dataloader,
    "val": val_dataloader
}

#batch_iterator = iter(dataloader_dict["train"])
#inputs, labels = next(batch_iterator) # get first element


In [None]:
Xg = pd.read_csv('./datasets/lfcc_genuine.csv')

n_genuine = Xg.shape[0]

In [None]:
import pandas as pd

Xs = pd.read_csv('./datasets/lfcc_spoofed.csv')

n_spoofed = Xs.shape[0]

In [None]:
print('n_genuine:{}, n_spoofed:{}'.format(n_genuine, n_spoofed))

In [None]:
"""
from sklearn.preprocessing import normalize
# Get tensor from numpy array
# DataFrame -> Numpy array -> normalize -> torch.tensor
Xg_normalized = normalize(Xg, norm='l2')
print(Xg_normalized.shape)

Xg_tensor = torch.from_numpy(Xg_normalized).float()

print(Xg_tensor.max(), Xg_tensor.min())
"""

In [None]:
# Caution::: Number of frames are changed as windowlength is changed as well
# shape = (873016, 60)
bonafide_df = pd.read_csv('./datasets/lfcc_bonafide_winlen20ms.csv')
bonafide_df.shape

In [None]:
from scipy.io import loadmat

In [1]:
import hdf5storage

mat_g = hdf5storage.loadmat('./datasets/genuineFeatureLFCC_v2.mat')

In [3]:
mat_s = hdf5storage.loadmat('./datasets/spoofFeatureLFCC_v2.mat')

In [None]:
Xg = bonafide_df.to_numpy()
Xg.shape

In [None]:
Xs = np.array(loadmat('./datasets/spoofFeatureLFCC.mat'))
Xs.shape

In [None]:
# Caution::: Number of frames are changed as windowlength is changed as well
# shape = (7809362, 60)
spoof_df = pd.read_csv('./datasets/lfcc_spoof_winlen20ms.csv')
spoof_df.shape

In [None]:
Xs = spoof_df[:5000000].to_numpy()4853674
Xs.shape

# GMM training for bonafide class

In [None]:
from sklearn.preprocessing import StandardScaler

# Scaling training set with mu=0, std=1
sscaler = StandardScaler()
sscaler.fit(Xg)
Xg_scaled = sscaler.transform(Xg)

print(Xg_scaled.mean(axis=0))
print(Xg_scaled.std(axis=0))

In [None]:
Xg_scaled = np.load('./datasets/scaled/train/all_bonafide.npy')

In [None]:
Xg_tensor_scaled = torch.from_numpy(Xg_scaled).float()
print(Xg_tensor_scaled.mean(axis=0))
print(Xg_tensor_scaled.var(axis=0))

In [10]:
Xg_tensor = torch.from_numpy(mat_g['genuine_matrix']).float()

In [11]:
Xg_tensor.shape

torch.Size([873016, 60])

In [25]:
from pycave.bayes import GMM
gmm_bonafide = GMM(num_components=512, num_features=60, covariance='diag')
gmm_bonafide.reset_parameters(max_iter=10)
history_g = gmm_bonafide.fit(Xg_tensor, gpu=False)

In [26]:
# Print model's state_dict
print("Model's state_dict:")
for param_tensor in gmm_bonafide.state_dict():
    print(param_tensor, "\t", gmm_bonafide.state_dict()[param_tensor].sum())

Model's state_dict:
component_weights 	 tensor(1.0000)
gaussian.means 	 tensor(-1842.4930)
gaussian.covars 	 tensor(23294.2891)


In [27]:
history_g.neg_log_likelihood

[168.67047119140625,
 36.3994140625,
 34.30910110473633,
 33.611724853515625,
 33.21047592163086,
 32.96519470214844,
 32.80242919921875,
 32.68584442138672,
 32.59552764892578,
 32.52019119262695,
 32.45286178588867,
 32.39021301269531,
 32.331642150878906,
 32.27769088745117,
 32.228145599365234,
 32.18339157104492,
 32.14390182495117,
 32.10991668701172,
 32.08110427856445,
 32.05655288696289]

In [28]:
# Save model
torch.save(gmm_bonafide.state_dict(), './models/lfcc_gmm_bonafide_winlen20ms_mat.pt')

In [10]:
# Make an instance with trained parameters
from pycave.bayes import GMM
gmm_bonafide = GMM(num_components=512, num_features=60, covariance='diag')
gmm_bonafide.load_state_dict(torch.load('./models/lfcc_gmm_bonafide_winlen20ms_mat.pt'))
gmm_bonafide.eval()

GMM(
  (gaussian): Gaussian(dim=60)
)

In [11]:
# Print model's state_dict
# Confirmation, make sure saved model and trained model have same parameters.
print("Model's state_dict:")
for param_tensor in gmm_bonafide.state_dict():
    print(param_tensor, "\t", gmm_bonafide.state_dict()[param_tensor].sum())

Model's state_dict:
component_weights 	 tensor(1.0000)
gaussian.means 	 tensor(-1842.4930)
gaussian.covars 	 tensor(23294.2891)


# GMM training for spoof class

In [None]:
from sklearn.preprocessing import StandardScaler

# Scaling training set with mu=0, std=1
sscaler = StandardScaler()
sscaler.fit(Xs)
Xs_scaled = sscaler.transform(Xs)

print(Xs_scaled.mean(axis=0))
print(Xs_scaled.std(axis=0))

In [None]:
Xs_scaled = np.load('./datasets/scaled/train/lfcc/all_spoof.npy')

In [None]:
Xs_tensor = torch.from_numpy(Xs_scaled).float()
print(Xs_tensor.mean(axis=0))
print(Xs_tensor.var(axis=0))

In [None]:
Xs_tensor = torch.from_numpy(Xs).float()
Xs_tensor.shape

In [None]:
del spoof_df, Xs

In [31]:
Xs_tensor = torch.from_numpy(mat_s['spoof_matrix']).float()
Xs_tensor.shape

torch.Size([7809362, 60])

In [33]:
# Make a model for spoof
from pycave.bayes import GMM

gmm_spoof = GMM(num_components=512, num_features=60, covariance='diag')
gmm_spoof.reset_parameters(max_iter=10)
history_s = gmm_spoof.fit(Xs_tensor, gpu=False)

In [34]:
# Print model's state_dict
print("Model's state_dict:")
for param_tensor in gmm_spoof.state_dict():
    print(param_tensor, "\t", gmm_spoof.state_dict()[param_tensor].sum())

Model's state_dict:
component_weights 	 tensor(1.)
gaussian.means 	 tensor(-1898.5071)
gaussian.covars 	 tensor(24112.1504)


In [35]:
history_s.neg_log_likelihood
#history_s.batch_loss

[169.619140625,
 35.970855712890625,
 33.644657135009766,
 32.65544128417969,
 32.084747314453125,
 31.698017120361328,
 31.38581085205078,
 31.109220504760742,
 30.85027313232422,
 30.638986587524414,
 30.48299217224121,
 30.370349884033203,
 30.285409927368164,
 30.216463088989258,
 30.156810760498047,
 30.1041259765625,
 30.054887771606445,
 30.007131576538086,
 29.965469360351562,
 29.93009376525879]

In [36]:
# Save model for bonafide
torch.save(gmm_spoof.state_dict(), './models/lfcc_gmm_spoof_win20ms_mat.pt')

In [8]:
# Make an instance with trained parameters
from pycave.bayes import GMM

gmm_spoof = GMM(num_components=512, num_features=60, covariance='diag')
gmm_spoof.load_state_dict(torch.load('./models/lfcc_gmm_spoof_win20ms_mat.pt'))
gmm_spoof.eval()

GMM(
  (gaussian): Gaussian(dim=60)
)

In [9]:
# Print model's state_dict
# Confirmation, make sure saved model and trained model have same parameters.
print("Model's state_dict:")
for param_tensor in gmm_spoof.state_dict():
    print(param_tensor, "\t", gmm_spoof.state_dict()[param_tensor].sum())

Model's state_dict:
component_weights 	 tensor(1.)
gaussian.means 	 tensor(-1898.5071)
gaussian.covars 	 tensor(24112.1504)


In [43]:
# Development dataset for validation

asvspoof_dev = ASVspoofDataSet(file_list=dev_list, preprocess=process, phase='dev')

# get 10 files and its label
iterations = 10

for itr in range(iterations):
    feature, label = asvspoof_dev.__getitem__(itr)
    #print("60 vectors", feature.T.shape)
    print("audiofile label:", label)

audiofile label: ('LA_0075', 'A01', 'spoof')
audiofile label: ('LA_0076', 'A05', 'spoof')
audiofile label: ('LA_0072', 'A06', 'spoof')
audiofile label: ('LA_0077', 'A04', 'spoof')
audiofile label: ('LA_0070', 'A04', 'spoof')
audiofile label: ('LA_0078', 'A04', 'spoof')
audiofile label: ('LA_0071', 'A06', 'spoof')
audiofile label: ('LA_0069', 'A05', 'spoof')
audiofile label: ('LA_0078', 'A04', 'spoof')
audiofile label: ('LA_0075', 'A04', 'spoof')


In [44]:
#!mv ../ASVspoof_2019_baseline_CM_v1/featureLFCC_dev.mat ./datasets/

In [47]:
import hdf5storage

mat_dev = hdf5storage.loadmat('./datasets/featureLFCC_dev.mat')

In [14]:
#dev_path = '/DB/Audio/English/ASVspoof2019/LA/ASVspoof2019_LA_dev/flac/*'

cm_LA_LFCC = []

for i, file in enumerate(glob.glob('../ASVspoof_2019_baseline_CM_v1/dev_dataset/*')):
    
    features = hdf5storage.loadmat(file)['x_fea']
    
    label = file.split('_')[-1].rstrip('.mat')
    
    #print(features.shape, label)
    
    feature_tensor = torch.from_numpy(features.T).float()
    
    # compute log-likelihood ratio
    score = -(gmm_bonafide.evaluate(feature_tensor) - gmm_spoof.evaluate(feature_tensor))
    
    cm_LA_LFCC.append((label, score))
    
print('Done!')
print(len(cm_LA_LFCC)) # This should be 24844

Done!
24844


In [None]:
#dev_path = '/DB/Audio/English/ASVspoof2019/LA/ASVspoof2019_LA_dev/flac/*'

cm_LA_LFCC = []

for itr, fname in enumerate(glob.glob('./datasets/scaled/dev/lfcc/*')):
    
    #feature, label = asvspoof_dev.__getitem__(itr)
    feature = np.load(fname)
    
    feature_tensor = torch.from_numpy(feature).float()
    
    label = fname.split('/')[-1].split('_')[0]
    
    # compute log-likelihood ratio
    score = -(gmm_bonafide.evaluate(feature_tensor) - gmm_spoof.evaluate(feature_tensor))
    
    cm_LA_LFCC.append((label, score))
    
print('Done!')
print(len(cm_LA_LFCC)) # This should be 24844
#np.savetxt('scores_cm_LA_LFCC.txt', score, fmt='%.5f')

In [None]:
# Test on development set
from sklearn import preprocessing

cm_LA_LFCC = []

for itr in range(len(asvspoof_dev)):
    
    feature, label = asvspoof_dev.__getitem__(itr)
    
    if label is None:
        continue
    
    feature_scaled = preprocessing.scale(feature.T)
    
    feature_tensor = torch.from_numpy(feature_scaled).float()
    #print(feature_tensor.shape)
    
    # compute log-likelihood ratio
    score = -(gmm_bonafide.evaluate(feature_tensor) - gmm_spoof.evaluate(feature_tensor))
    
    cm_LA_LFCC.append((*label, score))

print('Done!')
print(len(cm_LA_LFCC))
#np.savetxt('scores_cm_LA_LFCC.txt', score, fmt='%.5f')

In [None]:
# This is for evaluation of score with 3-labels
with open('./scores/scores_cm_LA_LFCC_torch_itr10_sentence_scaled_v2.txt', mode='w') as f:
    
    f.write('\n'.join('{} {} {} {}'.format(spkid, source, key, score) for spkid, source, key, score in cm_LA_LFCC))

In [15]:
# This is for evaluation of score with 1-label
with open('./scores/scores_cm_LA_LFCC_torch_winlen20ms_mat.txt', mode='w') as f:
    
    f.write('\n'.join('- - {} {}'.format(key, score) for key, score in cm_LA_LFCC))

In [None]:
with open('./scores/scores_cm_LA_LFCC_torch_winlen20ms_mat.txt') as f:
    for line in f:
        print(line)

In [None]:
joblib.dump(cm_LA_LFCC, './scores/cm_LA_LFCC_torch_itr10_sentence_scaled_v2.score')