In [None]:
! apt-get install libsox-fmt-all
# install the sox command line tool
! apt-get install sox
# install pysox
! pip install sox

! pip install torchcontrib
! pip install neptune-client

In [2]:
import argparse
import json
import os
import sys
import warnings
import requests
from importlib import import_module
from pathlib import Path
from shutil import copy
from typing import Dict, List, Union
import pandas as pd
import soundfile as sf
import numpy as np
from sklearn.model_selection import train_test_split
import neptune.new as neptune
from tqdm import tqdm

import torch
import torch.nn as nn
from torch import Tensor
from torch.utils.data import DataLoader, Dataset
from torch.cuda.amp import GradScaler, autocast
# from torch.utils.tensorboard import SummaryWriter
from torchcontrib.optim import SWA
import torchaudio

from data_utilsV2 import (Dataset_ASVspoof2019_train, genSpoof_list)
from evaluation import calculate_tDCF_EER
from utils import create_optimizer, seed_worker, set_seed, str_to_bool

warnings.filterwarnings("ignore", category=FutureWarning)

seed = 123

torch.cuda.empty_cache()

In [3]:
tracker = neptune.init(
    project="nipdep/sp-cup",
    api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiJkNWJjMDdhNC05NWY5LTQwNWQtYTQyNi0zNjNmYmYwZDg3M2YifQ==",
)  # your credentials

https://app.neptune.ai/nipdep/sp-cup/e/SPCUP-79
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#.stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.


In [4]:
_SAMPLE_DIR = "_assets"
SAMPLE_RIR_URL = "https://pytorch-tutorial-assets.s3.amazonaws.com/VOiCES_devkit/distant-16k/room-response/rm1/impulse/Lab41-SRI-VOiCES-rm1-impulse-mc01-stu-clo.wav"  # noqa: E501
SAMPLE_RIR_PATH = os.path.join(_SAMPLE_DIR, "rir.wav")

SAMPLE_WAV_SPEECH_URL = "https://pytorch-tutorial-assets.s3.amazonaws.com/VOiCES_devkit/source-16k/train/sp0307/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav"  # noqa: E501
SAMPLE_WAV_SPEECH_PATH = os.path.join(_SAMPLE_DIR, "speech.wav")

SAMPLE_NOISE_URL = "https://pytorch-tutorial-assets.s3.amazonaws.com/VOiCES_devkit/distant-16k/distractors/rm1/babb/Lab41-SRI-VOiCES-rm1-babb-mc01-stu-clo.wav"  # noqa: E501
SAMPLE_NOISE_PATH = os.path.join(_SAMPLE_DIR, "bg.wav")

os.makedirs(_SAMPLE_DIR, exist_ok=True)

In [5]:
def _fetch_data():
    uri = [
        (SAMPLE_RIR_URL, SAMPLE_RIR_PATH),
        (SAMPLE_WAV_SPEECH_URL, SAMPLE_WAV_SPEECH_PATH),
        (SAMPLE_NOISE_URL, SAMPLE_NOISE_PATH),
    ]
    for url, path in uri:
        with open(path, "wb") as file_:
            file_.write(requests.get(url).content)


_fetch_data()

In [6]:
def _get_sample(path, resample=None):
    effects = [["remix", "1"]]
    if resample:
        effects.extend(
            [
                ["lowpass", f"{resample // 2}"],
                ["rate", f"{resample}"],
            ]
        )
    return torchaudio.sox_effects.apply_effects_file(path, effects=effects)

def get_noise_sample(*, resample=None):
    return _get_sample(SAMPLE_NOISE_PATH, resample=resample)

noise, _ = get_noise_sample()

***
__File Operations__

In [None]:
! wget "https://www.dropbox.com/s/36yqmymkva2bwdi/spcup_2022_training_part1.zip?dl=1" -c -O 'spcup_2022_training_part1.zip'
! wget "https://www.dropbox.com/s/wsmlthhri29fb79/spcup_2022_unseen.zip?dl=1" -c -O 'spcup_2022_unseen.zip'

!unzip "./spcup_2022_training_part1.zip" -d "./spcup_2022_training/"
!unzip "./spcup_2022_unseen.zip" -d "./spcup_2022_unseen/"

!rm "./spcup_2022_training_part1.zip"
!rm "./spcup_2022_unseen.zip"

df1 = pd.read_csv('./spcup_2022_training/spcup_2022_training_part1/labels.csv')
df2 = pd.read_csv('./spcup_2022_unseen/spcup_2022_unseen/labels.csv')
df3 = pd.concat([df1, df2]).sample(frac=1)

df3.to_csv('./final_labels.csv', index=False)
!rm './spcup_2022_unseen/spcup_2022_unseen/labels.csv'

!cp -a "./spcup_2022_unseen/spcup_2022_unseen/". "./spcup_2022_training/spcup_2022_training_part1/"

! wget "https://www.dropbox.com/s/zylz07o2z0x308g/spcup_2022_eval_part2.zip?dl=1" -c -O "./spcup_2022_eval_part2.zip"
! unzip "./spcup_2022_eval_part2.zip" -d "./spcup_2022_eval_part2/"
!rm "./spcup_2022_eval_part2.zip"



***

In [8]:
def get_model(model_config: Dict, device: torch.device):
    """Define DNN model architecture"""
    module = import_module("{}".format(model_config["architecture"]))
    _model = getattr(module, "Model")
    model = _model(model_config).to(device)
    nb_params = sum([param.view(-1).size()[0] for param in model.parameters()])
    print("no. model params:{}".format(nb_params))

    return model

In [9]:
def get_loader(
        database_path: str,
        label_path: str,
        config: dict) -> List[torch.utils.data.DataLoader]:
    """Make PyTorch DataLoaders for train / developement / evaluation"""
    # == dropping part ==
    # track = config["track"]
    # prefix_2019 = "ASVspoof2019.{}".format(track)

    # trn_database_path = database_path / "ASVspoof2019_{}_train/".format(track)
    # dev_database_path = database_path / "ASVspoof2019_{}_dev/".format(track)
    # eval_database_path = database_path / "ASVspoof2019_{}_eval/".format(track)

    # trn_list_path = (database_path /
    #                  "ASVspoof2019_{}_cm_protocols/{}.cm.train.trn.txt".format(
    #                      track, prefix_2019))
    # dev_trial_path = (database_path /
    #                   "ASVspoof2019_{}_cm_protocols/{}.cm.dev.trl.txt".format(
    #                       track, prefix_2019))
    # eval_trial_path = (
    #     database_path /
    #     "ASVspoof2019_{}_cm_protocols/{}.cm.eval.trl.txt".format(
    #         track, prefix_2019))

    # d_label_trn, file_train = genSpoof_list(dir_meta=trn_list_path,
    #                                         is_train=True,
    #                                         is_eval=False)
    # print("no. training files:", len(file_train))
    # =====================

    label_df = pd.read_csv(label_path)
    X, y = label_df['track'].values, label_df['algorithm'].values
    # stratified split dataset into train-validation
    # set param as config["split_ratio"]
    X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2)


    train_set = Dataset_ASVspoof2019_train(list_IDs=X_train,
                                           labels=y_train,
                                           base_dir=database_path, is_train=True, is_dest=True)
    gen = torch.Generator()
    gen.manual_seed(seed)
    trn_loader = DataLoader(train_set,
                            batch_size=config["batch_size"],
                            shuffle=True,
                            drop_last=True,
                            pin_memory=True,
                            worker_init_fn=seed_worker,
                            generator=gen)

    # # == test dataset not yet given ==

    # _, file_dev = genSpoof_list(dir_meta=dev_trial_path,
    #                             is_train=False,
    #                             is_eval=False)
    # print("no. validation files:", len(file_dev))

#     dev_set = Dataset_ASVspoof2019_devNeval(list_IDs=config['file_dev'],
#                                             base_dir=config['dev_database_path'])
#     dev_loader = DataLoader(dev_set,
#                             batch_size=config["batch_size"],
#                             shuffle=False,
#                             drop_last=False,
#                             pin_memory=True)
    # =================================

    # == validation dataset {updated} == 

    eval_set = Dataset_ASVspoof2019_train(list_IDs=X_test,
                                           labels=y_test,
                                           base_dir=database_path, is_train=False, is_dest=True)
    eval_loader = DataLoader(eval_set,
                             batch_size=config["batch_size"],
                             shuffle=False,
                             drop_last=False,
                             pin_memory=True,
                             worker_init_fn=seed_worker,
                             generator=gen)

    return trn_loader, eval_loader

In [10]:
def produce_evaluation_file(
    data_loader: DataLoader,
    model,
    device: torch.device,
    save_path: str,
    trial_path: str) -> None:
    """Perform evaluation and save the score to a file"""
    model.eval()
    with open(trial_path, "r") as f_trl:
        trial_lines = f_trl.readlines()
    fname_list = []
    score_list = []
    for batch_x, utt_id in data_loader:
        batch_x = batch_x.to(device)
        with torch.no_grad():
            _, batch_out = model(batch_x)
            batch_score = (batch_out[:, 1]).data.cpu().numpy().ravel()
        # add outputs
        fname_list.extend(utt_id)
        score_list.extend(batch_score.tolist())

    assert len(trial_lines) == len(fname_list) == len(score_list)
    with open(save_path, "w") as fh:
        for fn, sco, trl in zip(fname_list, score_list, trial_lines):
            _, utt_id, _, src, key = trl.strip().split(' ')
            assert fn == utt_id
            fh.write("{} {} {} {}\n".format(utt_id, src, key, sco))
    print("Scores saved to {}".format(save_path))

In [11]:
def train_epoch(
    trn_loader: DataLoader,
    model,
    optim: Union[torch.optim.SGD, torch.optim.Adam],
    device: torch.device,
    scheduler: torch.optim.lr_scheduler,
    config: argparse.Namespace):
    """Train the model for one epoch"""
    
    running_loss = 0
    num_total = 0.0
    train_acc, correct_train, target_count = 0, 0, 0
    ii = 0
    model.train()
    scaler = GradScaler()

    # set objective (Loss) functions
    weight = torch.FloatTensor([0.1, 0.9]).to(device)
    # criterion = nn.CrossEntropyLoss(weight=weight)
    criterion = nn.CrossEntropyLoss()
    with tqdm(trn_loader, unit="batch") as tepoch:
      for batch_x, batch_y in tepoch:
          batch_size = batch_x.size(0)
          num_total += batch_size
          ii += 1
          batch_x = batch_x.to(device)
          batch_y = batch_y.view(-1).type(torch.int64).to(device)
          batch_y = batch_y.view(-1).to(device)
          with autocast():
            _, batch_out = model(batch_x)
            batch_loss = criterion(batch_out, batch_y)
            running_loss += batch_loss.item() * batch_size
          optim.zero_grad()
  #         scaler.scale(batch_loss).backward()
          batch_loss.backward()
  #         scaler.step(optim)
  #         scaler.update()
          optim.step()

          # print(batch_loss.cpu().numpy())
          # print(batch_loss.item() * batch_size, type(batch_loss.item() * batch_size))
          

          if config["optim_config"]["scheduler"] in ["cosine", "keras_decay"]:
              scheduler.step()
          elif scheduler is None:
              pass
          else:
              raise ValueError("scheduler error, got:{}".format(scheduler))
          
          # accuracy
          _, predicted = torch.max(batch_out.data, 1)
          target_count += batch_y.size(0)
          correct_train += (batch_y == predicted).sum().item()
          train_acc = (100 * correct_train) / target_count

    running_loss /= num_total
    return running_loss, train_acc

In [12]:
def eval_epoch(
    trn_loader: DataLoader,
    model,
    device: torch.device,
    config: argparse.Namespace):
    """Train the model for one epoch"""
    running_loss = 0
    num_total = 0.0
    val_acc, correct_train, target_count = 0, 0, 0
    ii = 0
    model.eval()

    # set objective (Loss) functions
    weight = torch.FloatTensor([0.1, 0.9]).to(device)
    # criterion = nn.CrossEntropyLoss(weight=weight)
    criterion = nn.CrossEntropyLoss()
    with tqdm(trn_loader, unit="batch") as tepoch:
      for batch_x, batch_y in tepoch:
          batch_size = batch_x.size(0)
          num_total += batch_size
          ii += 1
          batch_x = batch_x.to(device)
          # batch_y = batch_y.view(-1).type(torch.int64).to(device)
          batch_y = batch_y.view(-1).to(device)
          with autocast():
            _, batch_out = model(batch_x) #model(batch_x, Freq_aug=str_to_bool(config["freq_aug"]))
            batch_loss = criterion(batch_out, batch_y)
            running_loss += batch_loss.item() * batch_size
          
          # accuracy
          _, predicted = torch.max(batch_out.data, 1)
          target_count += batch_y.size(0)
          correct_train += (batch_y == predicted).sum().item()
          val_acc = (100 * correct_train) / target_count
        
    running_loss /= num_total
    return running_loss, val_acc

In [13]:
test_df = pd.read_csv('./spcup_2022_eval_part2/spcup_2022_eval_part2/labels_eval_part2.csv')
test_list = test_df['track'].values
test_list

array(['36d06279048026b1059236c4b5024f78.wav',
       '49550c0090367cd3bf8353b7c4a8f62a.wav',
       'b336ac2a2b8cf99ff922bbd3d29a68a3.wav', ...,
       'a9df966d00b7605602c11ae840f58453.wav',
       '139db88a8579c14fd22013e19897b015.wav',
       '803baa31a5ee19ebd30bd9d71031f95d.wav'], dtype=object)

In [14]:
config = {
    
    "database_path": "./spcup_2022_training/spcup_2022_training_part1/",
    "label_path": "./final_labels.csv",
    "asv_score_path": "ASVspoof2019_LA_asv_scores/ASVspoof2019.LA.asv.eval.gi.trl.scores.txt",
    "model_path": "./models/weights/AASIST.pth",
    "dev_database_path": './spcup_2022_eval_part1/spcup_2022_eval_part1/',
    "file_dev": test_list,
    "batch_size": 16,
    "num_epochs": 16,
    "loss": "CCE",
    "track": "LA",
    "eval_all_best": "True",
    "eval_output": "eval_scores_using_best_dev_model.txt",
    "cudnn_deterministic_toggle": "True",
    "cudnn_benchmark_toggle": "False",
    "model_config": {
        "architecture": "aasist",
        "nb_samp": 64600,
        "first_conv": 128,
        "filts": [70, [1, 32], [32, 32], [32, 64], [64, 64]],
        "gat_dims": [64, 32],
        "pool_ratios": [0.5, 0.7, 0.5, 0.5],
        "temperatures": [2.0, 2.0, 100.0, 100.0]
    },
    "optim_config": {
        "optimizer": "adam", 
        "amsgrad": "False",
        "base_lr": 0.0001,
        "lr_min": 0.00005,
        "betas": [0.9, 0.999],
        "weight_decay": 0.0001,
        "scheduler": "cosine"
    }
}

In [15]:
tracker["parameters"] = config

In [16]:
args = argparse.Namespace()
args.config = config
args.seed = seed
args.output_dir = './tmp/out'
args.comment = False

In [17]:
def main(args):

    # define database related paths
    output_dir = Path(args.output_dir)
    database_path = Path(config["database_path"])
    label_path = Path(config['label_path'])

    # set device
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print("Device: {}".format(device))
    # if device == "cpu":
    #     raise ValueError("GPU not detected!")

    # define model architecture
    model_config = args.config["model_config"]
    model = get_model(model_config, device)

    # define dataloaders
    trn_loader, eval_loader = get_loader(database_path, label_path, config)

    # get optimizer and scheduler
    optim_config = config["optim_config"]
    optim_config["epochs"] = config["num_epochs"]
    optim_config["steps_per_epoch"] = len(trn_loader)
    optimizer, scheduler = create_optimizer(model.parameters(), optim_config)
    optimizer_swa = SWA(optimizer)

    best_acc = 0.0
    # Training
    for epoch in range(config["num_epochs"]):
        print("Start training epoch{:03d}".format(epoch))
        training_loss, training_acc = train_epoch(trn_loader, model, optimizer, device,
                                   scheduler, config)
        eval_loss, eval_acc = eval_epoch(eval_loader, model, device, config)
        
        if eval_acc > best_acc: 
            torch.save(model.state_dict(), "./best_audio_model.pth")
        tracker['train/loss'].log(training_loss)
        tracker['train/acc'].log(training_acc)
        tracker['eval/loss'].log(eval_loss)
        tracker['eval/acc'].log(eval_acc)
        print(f'[{epoch}] Training Loss : {training_loss} / Training Accuracy : {training_acc} | Eval Loss : {eval_loss} / Eval Accuracy : {eval_acc}')

In [18]:
main(args)

Device: cuda
no. model params:298510
Start training epoch000


  1%|          | 2/300 [00:13<33:01,  6.65s/batch]


KeyboardInterrupt: ignored

In [None]:
# set device
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Device: {}".format(device))

In [None]:
best_path = './best_audio_model.pth'
pred_model = get_model(args.config["model_config"], device)
pred_model.load_state_dict(torch.load(best_path), strict=False)
pred_model = pred_model.to(device)
pred_model.eval()

In [None]:
def pad(x, max_len=64600):
    x_len = x.shape[0]
    if x_len >= max_len:
        return x[:max_len]
    # need to pad
    num_repeats = int(max_len / x_len) + 1
    padded_x = np.tile(x, (1, num_repeats))[:, :max_len][0]
    return padded_x


def pad_random(x: np.ndarray, max_len: int = 64600):
    x_len = x.shape[0]
    # if duration is already long enough
    if x_len >= max_len:
        stt = np.random.randint(x_len - max_len)
        return x[stt:stt + max_len]

    # if too short
    num_repeats = int(max_len / x_len) + 1
    padded_x = np.tile(x, (num_repeats))[:max_len]
    return padded_x

class Dataset_ASVspoof2019_devNeval(Dataset):
    def __init__(self, list_IDs, base_dir):
        """self.list_IDs	: list of strings (each string: utt key),
        """
        self.list_IDs = list_IDs
        self.base_dir = base_dir
        self.cut = 64600  # take ~4 sec audio (64600 samples)

    def __len__(self):
        return len(self.list_IDs)

    def __getitem__(self, index):
        key = self.list_IDs[index]
        X, _ = sf.read(f"{self.base_dir}/{key}")
        X_pad = pad(X, self.cut)
        x_inp = Tensor(X_pad)
        return x_inp, key

In [None]:
dev_set = Dataset_ASVspoof2019_devNeval(list_IDs=config['file_dev'],
                                        base_dir=config['dev_database_path'])
dev_loader = DataLoader(dev_set,
                        batch_size=config["batch_size"],
                        shuffle=False,
                        drop_last=False,
                        pin_memory=True)

In [None]:
pred_arr = []
idxs = []
m = nn.Softmax(dim=1)
for data, idx in dev_loader:
    data = data.to(device)
    with torch.no_grad():
        _, bt_preds = pred_model(data)
    print(bt_preds)
    _, predicted = torch.max(bt_preds.data, 1)
    print(predicted)
    break
    pred_arr.extend(list(predicted.cpu().numpy()))
    idxs.extend(idx)

In [None]:
# _df = pd.read_csv('./spcup_2022_eval_part1/spcup_2022_eval_part1/labels_eval_part1.csv')
pred_df = pd.DataFrame({'track': idxs, 'label': pred_arr})
pred_df.to_csv('./result.csv')

pred_df.head()

In [None]:
tracker.stop()