In [1]:
import pandas as pd
import numpy as np
import sys
import os
import scipy.stats as stats
import time
import tqdm
import seaborn as sns
from sklearn import svm
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
from sklearn.preprocessing import normalize
from sklearn.ensemble import RandomForestClassifier

In [2]:
os.nice(2)

2

### Load CSV

In [9]:
train_txt_path = '../dataset/LA/ASVspoof2019_LA_cm_protocols/ASVspoof2019.LA.cm.train.trn.txt'

df_train = pd.read_csv(train_txt_path, sep=" ", header=None)
df_train.columns = ["speaker_id", "audio_filename", "null", "system_id", "label"]
df_train = df_train.drop(columns="null")

dev_txt_path = '../dataset/LA/ASVspoof2019_LA_cm_protocols/ASVspoof2019.LA.cm.dev.trl.txt'

df_dev = pd.read_csv(dev_txt_path, sep=" ", header=None)
df_dev.columns = ["speaker_id", "audio_filename", "null", "system_id", "label"]
df_dev = df_dev.drop(columns="null")

eval_txt_path = '../dataset/LA/ASVspoof2019_LA_cm_protocols/ASVspoof2019.LA.cm.eval.trl.txt'

df_eval = pd.read_csv(eval_txt_path, sep=" ", header=None)
df_eval.columns = ["speaker_id", "audio_filename", "null", "system_id", "label"]
df_eval = df_eval.drop(columns="null")

### Params

In [10]:
nfft = 64
hop_size = 32 

### Train features

In [11]:
train_feat_root_path = '../features/bicoherences/train_nfft_{}_hop_size_{}'.format(nfft, hop_size)

df_train["mean_mag"] = np.nan
df_train["var_mag"] = np.nan
df_train["skew_mag"] = np.nan
df_train["kurt_mag"] = np.nan

df_train["mean_phase"] = np.nan
df_train["var_phase"] = np.nan
df_train["skew_phase"] = np.nan
df_train["kurt_phase"] = np.nan


for index, row in tqdm.tqdm(df_train.iterrows(), total=df_train.shape[0]):
    feat_path = os.path.join(train_feat_root_path, row['audio_filename'] + '.npy')
    bicoh = np.load(feat_path)
    mag = np.abs(bicoh)
    phase = np.angle(bicoh)
    df_train.at[index, 'mean_mag'] = np.mean(mag)
    df_train.at[index, 'var_mag'] = np.var(mag)
    df_train.at[index, 'skew_mag'] = stats.skew(mag, axis=None)
    df_train.at[index, 'kurt_mag'] = stats.kurtosis(mag, axis=None)
    
    df_train.at[index, 'mean_phase'] = np.mean(phase)
    df_train.at[index, 'var_phase'] = np.var(phase)
    df_train.at[index, 'skew_phase'] = stats.skew(phase, axis=None)
    df_train.at[index, 'kurt_phase'] = stats.kurtosis(phase, axis=None)

df_train.to_pickle('../features/bicoherences/dataframes/train_bicoh_stats_nfft_{}_hop_size_{}.pkl'.format(nfft, hop_size))

100%|██████████| 25380/25380 [00:40<00:00, 622.79it/s]


df_train = pd.read_pickle('../features/bicoherences/dataframes/train_bicoh_stats_nfft_{}_hop_size_{}'.format(
    nfft, hop_size))

In [12]:
df_train

Unnamed: 0,speaker_id,audio_filename,system_id,label,mean_mag,var_mag,skew_mag,kurt_mag,mean_phase,var_phase,skew_phase,kurt_phase
0,LA_0079,LA_T_1138215,-,bonafide,0.223827,0.016810,0.202526,-0.571989,0.017894,3.213300,-0.013840,-1.310626
1,LA_0079,LA_T_1271820,-,bonafide,0.297825,0.029658,-0.191639,-1.012221,0.057381,3.299339,-0.041388,-1.320829
2,LA_0079,LA_T_1272637,-,bonafide,0.228574,0.025450,0.456904,-0.381960,0.047478,3.190470,-0.024370,-1.072441
3,LA_0079,LA_T_1276960,-,bonafide,0.230999,0.023956,0.441628,-0.505886,0.067232,2.554038,0.003582,-0.621040
4,LA_0079,LA_T_1341447,-,bonafide,0.149285,0.012732,0.589728,-0.071773,0.077657,3.511379,-0.022574,-1.230996
5,LA_0079,LA_T_1363611,-,bonafide,0.225957,0.015865,0.525418,-0.263805,0.045003,3.275922,-0.035826,-1.296963
6,LA_0079,LA_T_1596451,-,bonafide,0.300462,0.033812,0.033513,-1.234803,0.084743,4.029728,-0.058313,-1.347732
7,LA_0079,LA_T_1608170,-,bonafide,0.190606,0.013788,0.459796,-0.671269,0.019916,2.405807,-0.016441,-0.863616
8,LA_0079,LA_T_1684951,-,bonafide,0.321812,0.027735,0.352444,-0.684238,0.080826,3.354703,-0.011379,-1.327487
9,LA_0079,LA_T_1699801,-,bonafide,0.258390,0.033779,0.682516,-0.443214,0.057644,2.966702,0.004086,-1.027691


### Load dev features

In [13]:
dev_feat_root_path = '../features/bicoherences/dev_nfft_{}_hop_size_{}'.format(nfft, hop_size)

df_dev["mean_mag"] = np.nan
df_dev["var_mag"] = np.nan
df_dev["skew_mag"] = np.nan
df_dev["kurt_mag"] = np.nan

df_dev["mean_phase"] = np.nan
df_dev["var_phase"] = np.nan
df_dev["skew_phase"] = np.nan
df_dev["kurt_phase"] = np.nan


for index, row in tqdm.tqdm(df_dev.iterrows(), total=df_dev.shape[0]):
    feat_path = os.path.join(dev_feat_root_path, row['audio_filename'] + '.npy')
    bicoh = np.load(feat_path)
    mag = np.abs(bicoh)
    phase = np.angle(bicoh)
    df_dev.at[index, 'mean_mag'] = np.mean(mag)
    df_dev.at[index, 'var_mag'] = np.var(mag)
    df_dev.at[index, 'skew_mag'] = stats.skew(mag, axis=None)
    df_dev.at[index, 'kurt_mag'] = stats.kurtosis(mag, axis=None)
    
    df_dev.at[index, 'mean_phase'] = np.mean(phase)
    df_dev.at[index, 'var_phase'] = np.var(phase)
    df_dev.at[index, 'skew_phase'] = stats.skew(phase, axis=None)
    df_dev.at[index, 'kurt_phase'] = stats.kurtosis(phase, axis=None)


df_dev.to_pickle('../features/bicoherences/dataframes/dev_bicoh_stats_nfft_{}_hop_size_{}.pkl'.format(nfft, hop_size))

100%|██████████| 24844/24844 [00:40<00:00, 613.98it/s]


df_dev = pd.read_pickle('../features/bicoherences/dataframes/dev_bicoh_stats_nfft_{}_hop_size_{}'.format(
    nfft, hop_size))

### Load eval features

In [14]:
eval_feat_root_path = '../features/bicoherences/eval_nfft_{}_hop_size_{}'.format(nfft, hop_size)

df_eval["mean_mag"] = np.nan
df_eval["var_mag"] = np.nan
df_eval["skew_mag"] = np.nan
df_eval["kurt_mag"] = np.nan

df_eval["mean_phase"] = np.nan
df_eval["var_phase"] = np.nan
df_eval["skew_phase"] = np.nan
df_eval["kurt_phase"] = np.nan


for index, row in tqdm.tqdm(df_eval.iterrows(), total=df_eval.shape[0]):
    feat_path = os.path.join(eval_feat_root_path, row['audio_filename'] + '.npy')
    bicoh = np.load(feat_path)
    mag = np.abs(bicoh)
    phase = np.angle(bicoh)
    df_eval.at[index, 'mean_mag'] = np.mean(mag)
    df_eval.at[index, 'var_mag'] = np.var(mag)
    df_eval.at[index, 'skew_mag'] = stats.skew(mag, axis=None)
    df_eval.at[index, 'kurt_mag'] = stats.kurtosis(mag, axis=None)
    
    df_eval.at[index, 'mean_phase'] = np.mean(phase)
    df_eval.at[index, 'var_phase'] = np.var(phase)
    df_eval.at[index, 'skew_phase'] = stats.skew(phase, axis=None)
    df_eval.at[index, 'kurt_phase'] = stats.kurtosis(phase, axis=None)
    
df_eval.to_pickle('../features/bicoherences/dataframes/eval_bicoh_stats_nfft_{}_hop_size_{}.pkl'.format(nfft, hop_size))

100%|██████████| 71237/71237 [03:20<00:00, 354.53it/s]


df_eval = pd.read_pickle('../features/bicoherences/dataframes/eval_bicoh_stats_nfft_{}_hop_size_{}'.format(
    nfft, hop_size))