In [1]:
import numpy as np
import torch
from torch import Tensor
from torch.utils.data import DataLoader
from model5 import Model
# from model3 import Model
# from model4 import Model
# from model5 import Model
# from model6 import Model
# from model7 import Model
from core_scripts.startup_config import set_random_seed
import random
from torch.utils.data import Dataset
import soundfile as sf
from evaluation import calculate_tDCF_EER

In [2]:
def pad(x, max_len=64600):
    x_len = x.shape[0]
    if x_len >= max_len:
        return x[:max_len]
    # need to pad
    num_repeats = int(max_len / x_len) + 1
    padded_x = np.tile(x, (1, num_repeats))[:, :max_len][0]
    return padded_x


def pad_random(x: np.ndarray, max_len: int = 64600):
    x_len = x.shape[0]
    # if duration is already long enough
    if x_len >= max_len:
        stt = np.random.randint(x_len - max_len)
        return x[stt:stt + max_len]

    # if too short
    num_repeats = int(max_len / x_len) + 1
    padded_x = np.tile(x, (num_repeats))[:max_len]
    return padded_x

In [3]:
def genSpoof_list(dir_meta, is_train=False, is_eval=False):

    d_meta = {}
    file_list = []
    with open(dir_meta, "r") as f:
        l_meta = f.readlines()

    if is_train:
        for line in l_meta:
            _, key, _, _, label = line.strip().split(" ")
            file_list.append(key)
            d_meta[key] = 1 if label == "bonafide" else 0
        return d_meta, file_list

    elif is_eval:
        for line in l_meta:
            _, key, _, _, _ = line.strip().split(" ")
            #key = line.strip()
            file_list.append(key)
        return file_list
    else:
        for line in l_meta:
            _, key, _, _, label = line.strip().split(" ")
            file_list.append(key)
            d_meta[key] = 1 if label == "bonafide" else 0
        return d_meta, file_list


In [4]:
# class Dataset_ASVspoof2019_devNeval(Dataset):
#     def __init__(self, list_IDs, base_dir):
#         """self.list_IDs	: list of strings (each string: utt key),
#         """
#         self.list_IDs = list_IDs
#         self.base_dir = base_dir
#         self.cut = 64600  # take ~4 sec audio (64600 samples)

#     def __len__(self):
#         return len(self.list_IDs)

#     def __getitem__(self, index):
#         key = self.list_IDs[index]
#         X, _ = sf.read(str(self.base_dir + f"flac/{key}.flac"))
#         X_pad = pad(X, self.cut)
#         x_inp = Tensor(X_pad)
#         return x_inp, key
    
from data_utils_SSL import getMsValues
class Dataset_ASVspoof2019_devNeval(Dataset):
    def __init__(self, list_IDs, base_dir):
        """self.list_IDs	: list of strings (each string: utt key),
        """
        self.list_IDs = list_IDs
        self.base_dir = base_dir
        self.cut = 64600  # take ~4 sec audio (64600 samples)

    def __len__(self):
        return len(self.list_IDs)

    def __getitem__(self, index):
        key = self.list_IDs[index]
        X, _ = sf.read(str(self.base_dir + f"flac/{key}.flac"))
        X_pad = pad(X, self.cut)
        x_inp = Tensor(X_pad)
        ms_dict = getMsValues(X_pad, 16000)
        ms = ms_dict['power_modulation_spectrogram'][:, :, 0]
        ms_tensor = Tensor(ms)
        return x_inp, key, ms_tensor

In [5]:
class Arguments():
    database_path = "/DATA/nfsshare/rishith/datasets/asvSpoof2019/DS_10283_3336/LA/"
    protocols_path = "database/"
    seed = 1234
    track = "LA"
    is_eval = True
    cudnn_deterministic_toggle = True
    cudnn_benchmark_toggle = False
    # model_path = "/DATA/Rishith/SSL_Anti-spoofing/models_mlaad/model_LA_WCE_100_14_1e-06/epoch_99.pth"
    # model_path = "/DATA/Rishith/SSL_Anti-spoofing/models_combined/model_LA_WCE_100_32_1e-06/epoch_70.pth"
    # model_path = "/DATA/Rishith/SSL_Anti-spoofing/models3_msAdd/model_LA_WCE_100_24_1e-06/epoch_32.pth"
    # model_path = "/DATA/Rishith/SSL_Anti-spoofing/models5_msFusion_hdim256/model_LA_WCE_100_14_1e-06/epoch_51.pth"
    # model_path = "/DATA/Rishith/SSL_Anti-spoofing/models5_msFusion_hdim256_mlaad/model_LA_WCE_100_14_1e-06/epoch_99.pth"
    # model_path = "/DATA/Rishith/SSL_Anti-spoofing/models5_msFusion_hdim256_combined/model_LA_WCE_100_24_1e-06/epoch_25.pth"
    # model_path = "/DATA/Rishith/SSL_Anti-spoofing/models5_msFusion_hdim256_combined/model_LA_WCE_100_24_1e-06/epoch_99.pth"
    # model_path = "/DATA/Rishith/Abhishek/SSL_Anti-spoofing/pretrained_models/LA_model.pth"

    ## running combined model - 70th epoch
    model_path = "/DATA/Rishith/Abhishek/SSL_Anti-spoofing/models_seed=10(fusion)/model_LA_WCE_100_14_1e-06/epoch_32.pth"
    
    
args = Arguments()

In [6]:
set_random_seed(args.seed, args)

In [7]:
track = args.track
prefix      = 'ASVspoof_{}'.format(track)
prefix_2019 = 'ASVspoof2019.{}'.format(track)
prefix_2021 = 'ASVspoof2021.{}'.format(track)

device = 'cuda' if torch.cuda.is_available() else 'cpu'                  
print('Device: {}'.format(device))

model = Model(args,device)
nb_params = sum([param.view(-1).size()[0] for param in model.parameters()])
model =model.to(device)
print('nb_params:',nb_params)

if args.model_path:
    model.load_state_dict(torch.load(args.model_path,map_location=device))
    print('Model loaded : {}'.format(args.model_path))

Device: cuda
nb_params: 318287562
Model loaded : /DATA/Rishith/Abhishek/SSL_Anti-spoofing/models_seed=10(fusion)/model_LA_WCE_100_14_1e-06/epoch_32.pth


In [8]:
args.database_path

'/DATA/nfsshare/rishith/datasets/asvSpoof2019/DS_10283_3336/LA/'

In [9]:
# eval_database_path = args.database_path + "ASVspoof2019_{}_eval/".format(track)
# # eval_score_path = "eval_CM_scores_file_SSL_mlaadModel_epoch70_LA2019.txt"
# eval_score_path = "scores_output/eval_CM_scores_file_SSL_msFusion_hdim256_epoch51_LA2019.txt"
# # print("Start evaluation...")

# eval_trial_path = args.database_path+"ASVspoof2019_{}_cm_protocols/{}.cm.eval.trl.txt".format(
#             track, prefix_2019)

# file_eval = genSpoof_list(dir_meta=eval_trial_path,
#                               is_train=False,
#                               is_eval=True)
# eval_set = Dataset_ASVspoof2019_devNeval_ms(list_IDs=file_eval,
#                                             base_dir=eval_database_path)
# eval_loader = DataLoader(eval_set,
#                             batch_size=24,
#                             shuffle=False,
#                             drop_last=False,
#                             pin_memory=True)

In [10]:
eval_database_path = args.database_path + "ASVspoof2019_{}_eval/".format(track)
# eval_score_path = "eval_CM_scores_file_SSL_mlaadModel_epoch70_LA2019.txt"
# eval_score_path = "eval_new/eval_scores_pretrained_LA2019.txt"

## combined 70th epoch results
eval_score_path = "/DATA/Rishith/Abhishek/SSL_Anti-spoofing/testing_results/my_trained/my_trained_test_epoch32_a_new_fusion.txt"

# eval_score_path = "scores_output/eval_CM_scores_file_SSL_msFusion_hdim256_epoch51_LA2019_new.txt"
# eval_score_path = "scores_output/asvspoof2019_trainset.txt"
# print("Start evaluation...")

eval_trial_path = args.database_path+"ASVspoof2019_{}_cm_protocols/{}.cm.eval.trl.txt".format(
            track, prefix_2019)

file_eval = genSpoof_list(dir_meta=eval_trial_path,
                              is_train=False,
                              is_eval=True)
eval_set = Dataset_ASVspoof2019_devNeval(list_IDs=file_eval,
                                            base_dir=eval_database_path)
eval_loader = DataLoader(eval_set,
                            batch_size=24,
                            # batch_size=1,
                            shuffle=False,
                            drop_last=False,
                            pin_memory=True)

In [11]:
def produce_evaluation_file_ms(
    data_loader: DataLoader,
    model,
    device: torch.device,
    save_path: str,
    trial_path: str) -> None:
    """Perform evaluation and save the score to a file"""
    model.eval()
    with open(trial_path, "r") as f_trl:
        trial_lines = f_trl.readlines()
    fname_list = []
    score_list = []
    for batch_x, utt_id, ms in data_loader:
        batch_x = batch_x.to(device)
        ms = ms.to(device)
        with torch.no_grad():
            batch_out = model(batch_x, ms)
            batch_score = (batch_out[:, 1]).data.cpu().numpy().ravel()
        # add outputs
        fname_list.extend(utt_id)
        score_list.extend(batch_score.tolist())

    assert len(trial_lines) == len(fname_list) == len(score_list)
    with open(save_path, "w") as fh:
        for fn, sco, trl in zip(fname_list, score_list, trial_lines):
            _, utt_id, _, src, key = trl.strip().split(' ')
            assert fn == utt_id
            fh.write("{} {} {} {}\n".format(utt_id, src, key, sco))
    print("Scores saved to {}".format(save_path))


In [12]:
eval_score_path

'/DATA/Rishith/Abhishek/SSL_Anti-spoofing/testing_results/my_trained/my_trained_test_epoch32_a_new_fusion.txt'

In [13]:

produce_evaluation_file_ms(eval_loader, model, device,
                        eval_score_path, eval_trial_path)

Scores saved to /DATA/Rishith/Abhishek/SSL_Anti-spoofing/testing_results/my_trained/my_trained_test_epoch32_a_new_fusion.txt


In [14]:
from evaluation import calculate_tDCF_EER
import os

asv_scores_file = "ASVspoof2019_LA_asv_scores/ASVspoof2019.LA.asv.eval.gi.trl.scores.txt"
eval_eer, eval_tdcf = calculate_tDCF_EER(
            cm_scores_file=eval_score_path,
            asv_score_file=args.database_path + asv_scores_file,
            output_file="testing_results/my_trained/my_trained_test_epoch32_a_new_fusion.txt")


CM SYSTEM
	EER		= 0.231406218 % (Equal error rate for countermeasure)

TANDEM
	min-tDCF		= 0.007411373

BREAKDOWN CM SYSTEM
	EER A07		= 0.040744310 % (Equal error rate for A07
	EER A08		= 0.162977240 % (Equal error rate for A08
	EER A09		= 0.000000000 % (Equal error rate for A09
	EER A10		= 0.203721550 % (Equal error rate for A10
	EER A11		= 0.146004133 % (Equal error rate for A11
	EER A12		= 0.098461727 % (Equal error rate for A12
	EER A13		= 0.000000000 % (Equal error rate for A13
	EER A14		= 0.023771203 % (Equal error rate for A14
	EER A15		= 0.040744310 % (Equal error rate for A15
	EER A16		= 0.057717417 % (Equal error rate for A16
	EER A17		= 0.285210170 % (Equal error rate for A17
	EER A18		= 0.937119129 % (Equal error rate for A18
	EER A19		= 0.325954480 % (Equal error rate for A19


In [15]:
import numpy as np
from evaluation import compute_eer

In [16]:
# Load CM scores
eval_score_path = "scores_output/eval_CM_scores_file_SSL_msFusion_hdim256_epoch51_LA2019.txt"
# eval_score_path = "scores_output/asvspoof2019_trainset.txt"
eval_score_path = eval_score_path
cm_data = np.genfromtxt(eval_score_path, dtype=str)
# cm_utt_id = cm_data[:, 0]
# cm_sources = cm_data[:, 1]

cm_keys = cm_data[:, 2]
cm_scores = cm_data[:, 3].astype(float)
bona_cm = cm_scores[cm_keys == 'bonafide']
spoof_cm = cm_scores[cm_keys == 'spoof']


# cm_keys = cm_data[:, 1]
# cm_scores = cm_data[:, 2].astype(float)
# bona_cm = cm_scores[cm_keys == 'bonafide']
# spoof_cm = cm_scores[cm_keys == 'spoof']


OSError: scores_output/eval_CM_scores_file_SSL_msFusion_hdim256_epoch51_LA2019.txt not found.

In [None]:
cm_data

In [None]:
eer_cm, th = compute_eer(bona_cm, spoof_cm)
print(eer_cm* 100)
print(th)

In [None]:
import matplotlib.pyplot as plt

In [None]:
m,n = np.min(cm_scores), np.max(cm_scores)
plt.figure()
plt.hist(bona_cm,200)
plt.hist(spoof_cm,200)
plt.xlim([m,n])
plt.show()

In [None]:
bona_cm[bona_cm<th].shape

In [None]:
spoof_cm[spoof_cm>th].shape

In [None]:
plt.figure()
plt.subplot(211)
plt.hist(bona_cm,200, color='blue')
plt.xlim([m-1,n+1])
plt.xlabel('Scores')
plt.ylabel('Count')
plt.subplot(212)
plt.hist(spoof_cm,200, color='orange')
plt.xlim([m-1,n+1])
plt.xlabel('Scores')
plt.ylabel('Count')
plt.tight_layout()
plt.show()

In [None]:
eval_score_path = "scores_output/eval_CM_scores_file_SSL_msFusion_hdim256_epoch51_mlaad_new.txt"
# eval_score_path = "scores_output/asvspoof2019_trainset.txt"
eval_score_path = eval_score_path
cm_data = np.genfromtxt(eval_score_path, dtype=str)
# cm_utt_id = cm_data[:, 0]
# cm_sources = cm_data[:, 1]

cm_keys = cm_data[:, 1]
cm_scores = cm_data[:, 2].astype(float)
bona_cm = cm_scores[cm_keys == 'bonafide']
spoof_cm = cm_scores[cm_keys == 'spoof']

In [None]:
m,n = np.min(cm_scores), np.max(cm_scores)
plt.figure()
plt.subplot(211)
plt.hist(bona_cm,200, color='blue')
plt.xlim([m-1,n+1])
plt.xlabel("Scores")
plt.ylabel("Count")
plt.subplot(212)
plt.hist(spoof_cm,200, color='orange')
plt.xlim([m-1,n+1])
plt.xlabel("Scores")
plt.ylabel("Count")
plt.show()

In [None]:
# Load CM scores
eval_score_path = "scores_output/eval_CM_scores_file_SSL_msFusion_hdim256_combinedModel_epoch25_LA2019.txt"
# eval_score_path = "scores_output/asvspoof2019_trainset.txt"
eval_score_path = eval_score_path
cm_data = np.genfromtxt(eval_score_path, dtype=str)
# cm_utt_id = cm_data[:, 0]
# cm_sources = cm_data[:, 1]

cm_keys = cm_data[:, 2]
cm_scores = cm_data[:, 3].astype(float)
bona_cm = cm_scores[cm_keys == 'bonafide']
spoof_cm = cm_scores[cm_keys == 'spoof']


m,n = np.min(cm_scores), np.max(cm_scores)
plt.figure()
plt.subplot(211)
plt.hist(bona_cm,200, color='blue')
plt.xlim([m-1,n+1])
plt.xlabel("Scores")
plt.ylabel("Count")
plt.subplot(212)
plt.hist(spoof_cm,200, color='orange')
plt.xlim([m-1,n+1])
plt.xlabel("Scores")
plt.ylabel("Count")
plt.tight_layout()
plt.show()

In [None]:
eval_score_path = "scores_output/eval_CM_scores_file_SSL_msFusion_hdim256_combinedModel_epoch25_mlaad.txt"
# eval_score_path = "scores_output/asvspoof2019_trainset.txt"
eval_score_path = eval_score_path
cm_data = np.genfromtxt(eval_score_path, dtype=str)
# cm_utt_id = cm_data[:, 0]
# cm_sources = cm_data[:, 1]

cm_keys = cm_data[:, 1]
cm_scores = cm_data[:, 2].astype(float)
bona_cm = cm_scores[cm_keys == 'bonafide']
spoof_cm = cm_scores[cm_keys == 'spoof']

m,n = np.min(cm_scores), np.max(cm_scores)
plt.figure()
plt.subplot(211)
plt.hist(bona_cm,200, color='blue')
plt.xlim([m-1,n+1])
plt.xlabel("Scores")
plt.ylabel("Count")
plt.subplot(212)
plt.hist(spoof_cm,200, color='orange')
plt.xlim([m-1,n+1])
plt.xlabel("Scores")
plt.ylabel("Count")
plt.show()