In [None]:
from types import SimpleNamespace
from functools import lru_cache
import os
import time
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
import pandas as pd
import numpy as np
import scipy.io.wavfile
import scipy.fftpack
import scipy.linalg
import torch
import torch.utils.data as data
import torch.nn as nn
import torch.optim as optim
import math

In [None]:
# 95% Confidence Interval for AUC. Hanley and McNeil (1982). https://gist.github.com/doraneko94/e24643136cfb8baf03ef8a314ab9615c
def roc_auc_score_ci(y_true, y_score, positive=1):
    AUC = roc_auc_score(y_true, y_score)
    N1 = sum(y_true == positive)
    N2 = sum(y_true != positive)
    Q1 = AUC / (2 - AUC)
    Q2 = 2*AUC**2 / (1 + AUC)
    SE_AUC = math.sqrt((AUC*(1 - AUC) + (N1 - 1)*(Q1 - AUC**2) + (N2 - 1)*(Q2 - AUC**2)) / (N1*N2))
    lower = AUC - 1.96*SE_AUC
    upper = AUC + 1.96*SE_AUC
    if lower < 0:
        lower = 0
    if upper > 1:
        upper = 1
    return AUC, (lower, upper)

In [None]:
# Create a dataset with (key, wave_file, target_id) entries
def make_dataset(kaldi_path, class_to_id):
    text_path = os.path.join(kaldi_path, 'text')     # labels
    wav_path = os.path.join(kaldi_path, 'wav.scp')   # audio files

    key_to_word = dict()
    key_to_wav = dict()
    
    with open(wav_path, 'rt') as wav_scp:
        for line in wav_scp:
            key, wav = line.strip().split(' ', 1)
            key_to_wav[key] = wav
            key_to_word[key] = None # default

    if os.path.isfile(text_path):
        with open(text_path, 'rt') as text:
            for line in text:
                key, word = line.strip().split(' ', 1)
                key_to_word[key] = word

    wavs = []
    for key, wav_command in key_to_wav.items():
        word = key_to_word[key]
        word_id = class_to_id[word] if word is not None else -1 # default for test
        wav_item = [key, wav_command, word_id]
        wavs.append(wav_item)

    return wavs

In [None]:
def wav_read(path):
    sr, y = scipy.io.wavfile.read(path)
    y = y/32768 # Normalize to -1..1
    return y, sr

In [None]:
vggish = torch.hub.load('harritaylor/torchvggish', 'vggish')
vggish.cuda().eval()

In [None]:
# The model use the output of VGGish (time averaged)
param_cache = dict()
def param_loader(path, max_seconds):
    try:
        y = param_cache[path]
    except:
        y, sfr = wav_read(path)
        y = y.astype(np.float32)
        if len(y) < 16000:
            y.resize(16000)
        y = vggish(y, 16000).detach()
        if y.ndim > 1:
            y = y.mean(dim=0)
        param_cache[path] = y

    return y

In [None]:
# Target values and id mapping
def get_classes():
    classes = ['neg', 'pos']
    weight = None
    class_to_id = {label: i for i, label in enumerate(classes)}
    return classes, weight, class_to_id

In [None]:
# PyTorch Dataset
class Loader(data.Dataset):

    def __init__(self, root, max_seconds=20):

        classes, weight, class_to_id = get_classes()
        self.root = root
        self.wavs = make_dataset(root, class_to_id)
        self.classes = classes
        self.weight = weight
        self.class_to_id = class_to_id
        self.loader = param_loader
        self.max_seconds = max_seconds

    def __getitem__(self, index):
        """
        Args:
            index (int): Index
        Returns:
            tuple: (key, params, target) where target is class_index of the target class.
        """
        key, path, target = self.wavs[index]
        path = '../input/covid/wavs16k/' + path
        params = self.loader(path, self.max_seconds)
        return key, params, target

    def __len__(self):
        return len(self.wavs)

## Baseline model using a pre-trained VGGish model

In [None]:
class VGGISH(nn.Module):
    def __init__(self, input_size=128, hidden_size=64):
        super().__init__()

        self.classifier = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.ReLU(True),           
            nn.Dropout(0.1),            
            nn.Linear(hidden_size, 1),
        )           

    def forward(self, x):
        # x shape: (B,E)

        # Mutilayer perceptron
        out = self.classifier(x)
        # out shape: (B,1)

        # Remove last dimension
        return out.squeeze(-1)
        # return shape: (B)

In [None]:
def train(loader, model, criterion, optimizer, epoch, cuda, log_interval, max_norm=1, verbose=True):
    model.train()
    global_epoch_loss = 0
    samples = 0
    for batch_idx, (_, data, target) in enumerate(loader):
        if cuda:
            data, target = data.cuda(), target.cuda()
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target.float())
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), max_norm)
        optimizer.step()
        global_epoch_loss += loss.data.item() * len(target)
        samples += len(target)
        if verbose and (batch_idx % log_interval == 0):
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, samples, len(loader.dataset), 100*samples/len(loader.dataset), global_epoch_loss/samples))
    return global_epoch_loss / samples

In [None]:
def test(loader, model, criterion, cuda, verbose=True, data_set='Test', save=None):
    model.eval()
    test_loss = 0
    tpred = []
    ttarget = []

    if save is not None:
        csv = open(save, 'wt')
        print('index,prob', file=csv)

    with torch.no_grad():
        for keys, data, target in loader:
            if cuda:
                data, target = data.cuda(), target.cuda()
            output = model(data)
            pred = output.sigmoid()
            tpred.append(pred.cpu().numpy())

            if target[0] != -1:
                loss = criterion(output, target.float()).data.item()
                test_loss += loss * len(target) # sum up batch loss 
                ttarget.append(target.cpu().numpy())

            if save is not None:
                for i, key in enumerate(keys):
                    print(f'{key},{pred[i]}', file=csv)
    
    if len(ttarget) > 0:
        test_loss /= len(loader.dataset)
        auc, auc_ci = roc_auc_score_ci(np.concatenate(ttarget), np.concatenate(tpred))
        if verbose:
            print('\n{} set: Average loss: {:.4f}, AUC: {:.1f}% ({:.1f}% - {:.1f}%)\n'.format(
                data_set, test_loss, 100 * auc, auc_ci[0]*100, auc_ci[1]*100))

        return test_loss, auc

In [None]:
args = SimpleNamespace(
    # general options
    train_path = '../input/covid/train',         # train data folder
    valid_path = '../input/covid/valid',         # valid data folder
    test_path = '../input/covid/test',           # test data folder
    batch_size = 20,                             # training and valid batch size
    test_batch_size = 20,                        # batch size for testing
    arch = 'VGGISH',                             # PASE, VGG11, VGG13, VGG16, VGG19
    epochs = 50,                                 # maximum number of epochs to train
    lr = 0.0002,                                 # learning rate
    momentum = 0.9,                              # SGD momentum, for SGD only
    optimizer = 'adam',                          # optimization method: sgd | adam
    seed = 1234,                                 # random seed
    log_interval = 5,                            # how many batches to wait before logging training status
    patience = 10,                               # how many epochs of no loss improvement should we wait before stop training
    checkpoint = '.',                            # checkpoints directory
    train = True,                                # train before testing
    cuda = True,                                 # use gpu
    num_workers = 0,                             # how many subprocesses to use for data loading
)

In [None]:
args.cuda = args.cuda and torch.cuda.is_available()
torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)
    print('Using CUDA with {0} GPUs'.format(torch.cuda.device_count()))


# build model
if args.arch == 'PASE':
    model = PASE(256)
if args.arch == 'VGGISH':
    model = VGGISH(hidden_size=64)
if args.cuda:
    model.cuda()

# Define criterion
criterion = nn.BCEWithLogitsLoss(reduction='mean') # This loss combines a Sigmoid layer and the BCELoss in one single class.

## Train model (Only new parameters)

In [None]:
# loading data
if args.train:
    train_dataset = Loader(args.train_path)
    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers)

    valid_dataset = Loader(args.valid_path)
    valid_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers)

    # define optimizer
    if args.optimizer.lower() == 'adam':
        optimizer = optim.Adam(model.parameters(), lr=args.lr)
    else:
        optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)

    best_valid_auc = 0
    iteration = 0
    epoch = 1
    best_epoch = epoch
    
    # trainint with early stopping
    t0 = time.time()
    while (epoch < args.epochs + 1) and (iteration < args.patience):
        train(train_loader, model, criterion, optimizer, epoch, args.cuda, args.log_interval)
        valid_loss, valid_auc = test(valid_loader, model, criterion, args.cuda, data_set='Validation')
        if not os.path.isdir(args.checkpoint):
            os.mkdir(args.checkpoint)
        torch.save(model.state_dict(), './{}/model{:03d}.pt'.format(args.checkpoint, epoch))
        if valid_auc <= best_valid_auc:
            iteration += 1
            print('AUC was not improved, iteration {0}'.format(str(iteration)))
        else:
            print('Saving state')
            iteration = 0
            best_valid_auc = valid_auc
            best_epoch = epoch
            state = {
                'valid_auc': valid_auc,
                'valid_loss': valid_loss,
                'epoch': epoch,
            }
            if not os.path.isdir(args.checkpoint):
                os.mkdir(args.checkpoint)
            torch.save(state, './{}/ckpt.pt'.format(args.checkpoint))
        epoch += 1
        print(f'Elapsed seconds: ({time.time() - t0:.0f}s)')
    print(f'Best AUC: {best_valid_auc*100:.1f}% on epoch {best_epoch}')

## Test Model

In [None]:
test_dataset = Loader(args.test_path)
test_loader = torch.utils.data.DataLoader(
    test_dataset, batch_size=args.test_batch_size, shuffle=False, num_workers=args.num_workers)

# get best epoch and model
state = torch.load('./{}/ckpt.pt'.format(args.checkpoint))
epoch = state['epoch']
print("Testing model (epoch {})".format(epoch))
model.load_state_dict(torch.load('./{}/model{:03d}.pt'.format(args.checkpoint, epoch)))
if args.cuda:
    model.cuda()

results = 'submission.csv'
print("Saving results in {}".format(results))
test(test_loader, model, criterion, args.cuda, save=results)