In [1]:
from types import SimpleNamespace
from functools import lru_cache
import os
import time
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import scipy.io.wavfile
import scipy.fftpack
import scipy.linalg
import torch
import torch.utils.data as data
import torch.nn as nn
import torch.optim as optim
import math

In [8]:

import sys
sys.path.append("../yaafelib/embed/")

from models import ModPASE #, vggish
from loader import Loader, useful_df,vggish,make_dataset
from trainer import Trainer

## Baseline model 

using a pre-trained VGGish model

In [5]:
vggish = nn.DataParallel(vggish,device_ids=[0,1,2,3])

In [6]:
traindf,testdf = train_test_split(useful_df,shuffle=True,
                                random_state=41,
                                stratify=useful_df.status)
valdf,tessdf = train_test_split(testdf,test_size=0.5,shuffle=True)

datalists = dict(
    train = traindf,
    test = tessdf,
    val = valdf
)

In [7]:
args = SimpleNamespace(
    # general options
    train_path = '../input/covid/train',         # train data folder
    valid_path = '../input/covid/valid',         # valid data folder
    test_path = '../input/covid/test',           # test data folder
    batch_size = 20,                             # training and valid batch size
    test_batch_size = 20,                        # batch size for testing
    arch = 'VGGISH',                             # PASE, VGG11, VGG13, VGG16, VGG19
    epochs = 50,                                 # maximum number of epochs to train
    lr = 0.0002,                                 # learning rate
    momentum = 0.9,                              # SGD momentum, for SGD only
    optimizer = 'adam',                          # optimization method: sgd | adam
    seed = 1234,                                 # random seed
    log_interval = 5,                            # how many batches to wait before logging training status
    patience = 10,                               # how many epochs of no loss improvement should we wait before stop training
    checkpoint = '.',                            # checkpoints directory
    train = True,                                # train before testing
    cuda = True,                                 # use gpu
    num_workers = 0,                             # how many subprocesses to use for data loading
)

In [9]:
dataloaders  = {}
for i,v in datalists.items():
    # loading data
    dataset = torch.utils.data.TensorDataset(make_dataset(v))
    dataloaders[i] =  torch.utils.data.DataLoader(
        dataset, batch_size=args.batch_size, shuffle=True,
        num_workers=args.num_workers, pin_memory=args.cuda, sampler=None)

100%|██████████| 8858/8858 [51:05<00:00,  2.89it/s] 


AttributeError: 'tuple' object has no attribute 'size'

In [10]:
x,y = make_dataset(useful_df)

 39%|███▊      | 4569/11811 [25:56<41:58,  2.88it/s]  

In [None]:
ddf = pd.DataFrame(x)
ddf['y'] = y
ddf['uuid'] = useful_df.uuid
ddf.index = useful_df.index

In [None]:
COUGHVID = "/home/shubham/datasets/coughvid/public_dataset"

ddf.to_csv(COUGHVID+"/use_features_vggish.csv")

In [27]:
xv =x#.unsqueeze(1)

In [31]:
y.shape

torch.Size([128])

In [32]:
xs = xv.numpy().astype(np.float32)[:12]

y = vggish(xs,16000)

ValueError: negative dimensions are not allowed

In [None]:
args.cuda = args.cuda and torch.cuda.is_available()
torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)
    print('Using CUDA with {0} GPUs'.format(torch.cuda.device_count()))


# build model
if args.arch == 'PASE':
    model = PASE(256)
if args.arch == 'VGGISH':
    model = VGGISH(hidden_size=64)
if args.cuda:
    model.cuda()

# Define criterion
criterion = nn.BCEWithLogitsLoss(reduction='mean') # This loss combines a Sigmoid layer and the BCELoss in one single class.

## Train model (Only new parameters)

In [None]:
ls ../input

## Test Model

In [None]:
test_dataset = Loader(args.test_path)
test_loader = torch.utils.data.DataLoader(
    test_dataset, batch_size=args.test_batch_size, shuffle=False, num_workers=args.num_workers)

# get best epoch and model
state = torch.load('./{}/ckpt.pt'.format(args.checkpoint))
epoch = state['epoch']
print("Testing model (epoch {})".format(epoch))
model.load_state_dict(torch.load('./{}/model{:03d}.pt'.format(args.checkpoint, epoch)))
if args.cuda:
    model.cuda()

results = 'submission.csv'
print("Saving results in {}".format(results))
test(test_loader, model, criterion, args.cuda, save=results)