In [1]:
from types import SimpleNamespace
from functools import lru_cache
import os
import time
import math

import pandas as pd
import numpy as np
import scipy.io.wavfile
import scipy.fftpack
import scipy.linalg

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

import torch
import torch.utils.data as data
import torch.nn as nn
import torch.optim as optim

torch.cuda.is_available()


True

In [2]:
# import pkg_resources
# pkg_resources.get_distribution('pase').activate()
import pase
from pase.models.frontend import wf_builder

# Install PyTorch QRRN for the PASE encoder
# import torchqrnn
from torchqrnn import QRNN

import sys
sys.path.append("../yaafelib/embed/")

from models import pnet,ModPASE
from loader import Loader
from trainer import train,test

Current Model keys:  78
Current Pt keys:  78
Loading matching keys:  ['denseskips.0.weight', 'denseskips.1.weight', 'denseskips.2.weight', 'denseskips.3.weight', 'denseskips.4.weight', 'denseskips.5.weight', 'denseskips.6.weight', 'blocks.0.conv.low_hz_', 'blocks.0.conv.band_hz_', 'blocks.0.norm.weight', 'blocks.0.norm.bias', 'blocks.0.norm.running_mean', 'blocks.0.norm.running_var', 'blocks.0.norm.num_batches_tracked', 'blocks.0.act.weight', 'blocks.1.conv.weight', 'blocks.1.conv.bias', 'blocks.1.norm.weight', 'blocks.1.norm.bias', 'blocks.1.norm.running_mean', 'blocks.1.norm.running_var', 'blocks.1.norm.num_batches_tracked', 'blocks.1.act.weight', 'blocks.2.conv.weight', 'blocks.2.conv.bias', 'blocks.2.norm.weight', 'blocks.2.norm.bias', 'blocks.2.norm.running_mean', 'blocks.2.norm.running_var', 'blocks.2.norm.num_batches_tracked', 'blocks.2.act.weight', 'blocks.3.conv.weight', 'blocks.3.conv.bias', 'blocks.3.norm.weight', 'blocks.3.norm.bias', 'blocks.3.norm.running_mean', 'blocks.3

In [None]:

# loading data
train_dataset = Loader(args.train_path)
train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=args.batch_size, shuffle=True,
    num_workers=args.num_workers, pin_memory=args.cuda, sampler=None)

valid_dataset = Loader(args.valid_path)
valid_loader = torch.utils.data.DataLoader(
    valid_dataset, batch_size=args.batch_size, shuffle=None,
    num_workers=args.num_workers, pin_memory=args.cuda, sampler=None)



## Baseline model using a pre-trained [PASE+ model](https://github.com/santi-pdp/pase), LSTM, Average Poling, and Max Poling

In [26]:
args = SimpleNamespace(
    # general options
    # train_path = traindf, #'../input/covid/train',         # train data folder
    # valid_path = valdf,  #'../input/covid/valid',         # valid data folder
    # test_path = tessdf,  #'../input/covid/test',           # test data folder
    batch_size = 32,                             # training and valid batch size
    test_batch_size = 20,                        # batch size for testing
    arch = 'PASE',                               # PASE, VGG11, VGG13, VGG16, VGG19
    epochs = 50,                                 # maximum number of epochs to train
    lr = 0.0001,                                 # learning rate
    momentum = 0.9,                              # SGD momentum, for SGD only
    optimizer = 'adam',                          # optimization method: sgd | adam
    seed = 1234,                                 # random seed
    log_interval = 5,                            # how many batches to wait before logging training status
    patience = 10,                               # how many epochs of no loss improvement should we wait before stop training
    checkpoint = '.',                            # checkpoints directory
    train = True,                                # train before testing
    cuda = True,                                 # use gpu
    num_workers = 2,                             # how many subprocesses to use for data loading
    grad_clip = 1.0                             # gradient clipping
)

In [None]:
from loader import traindf,testdf
valdf,tessdf = train_test_split(testdf,test_size=0.5,shuffle=True)

datalists = dict(
    train = traindf,
    test = tessdf,
    val = valdf
)



In [None]:
dataloaders  = {}
for i,v in datalists.items():
    # loading data
    dataset = Loader(v)
    dataloaders[i] =  torch.utils.data.DataLoader(
        dataset, batch_size=args.batch_size, shuffle=True,
        num_workers=args.num_workers, pin_memory=args.cuda, sampler=None)


In [25]:
dataloaders

{'train': <torch.utils.data.dataloader.DataLoader at 0x7fb994240b20>,
 'test': <torch.utils.data.dataloader.DataLoader at 0x7fb9d077e790>,
 'val': <torch.utils.data.dataloader.DataLoader at 0x7fb99423c580>}

In [9]:
args.cuda = args.cuda and torch.cuda.is_available()
torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)
    print('Using CUDA with {0} GPUs'.format(torch.cuda.device_count()))


# build model
# freeze PASE+ parameters
for param in pnet.parameters():
    param.requires_grad = False
if args.arch == 'PASE':
    model = ModPASE(pnet,input_size=256,num_layers=3)
if args.cuda:
    model.cuda()
if torch.cuda.device_count()>1:
    model = nn.DataParallel(model,device_ids=[0,1,2,3])

# Define criterion
criterion = nn.BCEWithLogitsLoss(reduction='mean') # This loss combines a Sigmoid layer and the BCELoss in one single class.

Using CUDA with 5 GPUs


## Train model (Only new parameters)

In [10]:

# define optimizer
if args.optimizer.lower() == 'adam':
    optimizer = optim.Adam(model.parameters(), lr=args.lr)
else:
    optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)

best_valid_auc = 0
iteration = 0
epoch = 1
best_epoch = epoch


In [11]:
valid_loss, valid_auc = test(valid_loader, model, criterion, cuda=True, data_set='Validation')


  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,



Validation set: Average loss: 0.6289, AUC: 44.3% (40.9% - 47.7%)



In [12]:
os.path.isdir(args.checkpoint)

True

In [13]:

# trainint with early stopping
t0 = time.time()
while (epoch < args.epochs + 1) and (iteration < args.patience):
    train(train_loader, model, criterion, optimizer, epoch, args.cuda, args.log_interval)
        # weight=train_dataset.weight)
    valid_loss, valid_auc = test(valid_loader, model, criterion, args.cuda, data_set='Validation')


    ## saving
    if not os.path.isdir(args.checkpoint):
        os.mkdir(args.checkpoint)
    torch.save(model.state_dict(), './{}/model{:03d}.pt'.format(args.checkpoint, epoch))
    if valid_auc <= best_valid_auc:
        iteration += 1
        print('AUC was not improved, iteration {0}'.format(str(iteration)))
    else:
        print('Saving state')
        iteration = 0
        best_valid_auc = valid_auc
        best_epoch = epoch
        state = {
            'valid_auc': valid_auc,
            'valid_loss': valid_loss,
            'epoch': epoch,
        }
        if not os.path.isdir(args.checkpoint):
            os.mkdir(args.checkpoint)
        torch.save(state, './{}/ckpt.pt'.format(args.checkpoint))
    epoch += 1
    print(f'Elapsed seconds: ({time.time() - t0:.0f}s)')
print(f'Best AUC: {best_valid_auc*100:.1f}% on epoch {best_epoch}')

  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,



Validation set: Average loss: -2.1367, AUC: 48.7% (45.2% - 52.1%)

Saving state
Elapsed seconds: (66s)


  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,



Validation set: Average loss: -6.6372, AUC: 48.5% (45.0% - 52.0%)

AUC was not improved, iteration 1
Elapsed seconds: (135s)


  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,




KeyboardInterrupt: 

In [17]:
print(f'Best AUC: {best_valid_auc*100:.1f}% on epoch {best_epoch}')

Best AUC: 71.0% on epoch 28


## Test Model

In [18]:
test_dataset = Loader(args.test_path)
test_loader = torch.utils.data.DataLoader(
    test_dataset, batch_size=args.test_batch_size, shuffle=None,
    num_workers=args.num_workers, pin_memory=args.cuda, sampler=None)

# get best epoch and model
state = torch.load('./{}/ckpt.pt'.format(args.checkpoint))
epoch = state['epoch']
print("Testing model (epoch {})".format(epoch))
model.load_state_dict(torch.load('./{}/model{:03d}.pt'.format(args.checkpoint, epoch)))
if args.cuda:
    model.cuda()

results = 'submission.csv'
print("Saving results in {}".format(results))
test(test_loader, model, criterion, args.cuda, save=results)

Testing model (epoch 28)
Saving results in submission.csv
