In [1]:
%reset -f
%reload_ext autoreload
%autoreload 2
%matplotlib inline
%config Completer.use_jedi = False

In [2]:
import sys
sys.path.insert(0,"..")
from tqdm import tqdm
# from tqdm.notebook import tqdm as tqdm
import numpy as np
import pandas as pd
import torch
torch.backends.cudnn.benchmark = True
torch.manual_seed(42)
import torch.nn as nn
import torch.optim as optim
import torchaudio
from torch.utils.data import Dataset, ConcatDataset
from torch.utils.data.sampler import SubsetRandomSampler
from torchvision import datasets, transforms
torchaudio.set_audio_backend('soundfile')
import os
import random
from pathlib import Path
from IPython.display import Audio
from sklearn.preprocessing import LabelEncoder

import matplotlib.pyplot as plt
import psutil
import requests
from sklearn.model_selection import train_test_split
from utils import *
from datasets import *
from models import *
from transforms import *
from sklearn.metrics import confusion_matrix, balanced_accuracy_score
import  seaborn as sns

Available workers: 16


In [3]:
num_epochs = 35
batch_size = 64
batch_size_val = 64
validation_split = .3
shuffle_dataset = True
num_workers = 4
random_seed= 42
setting = 'raw'
modeltype = 'conv1draw'

In [4]:
outs = 6
if modeltype == 'conv1draw':
    model = Conv1dNetRAW(outputs=outs, dropout_p=.2)
elif modeltype == 'inception':
    model = nn.Sequential(
                    InceptionBlock(
                        in_channels=1, 
                        n_filters=32, 
                        kernel_sizes=[5, 11, 23],
                        bottleneck_channels=32,
                        use_residual=True,
                        activation=nn.ReLU()
                    ),
                    InceptionBlock(
                        in_channels=32*4, 
                        n_filters=32, 
                        kernel_sizes=[5, 11, 23],
                        bottleneck_channels=32,
                        use_residual=True,
                        activation=nn.ReLU()
                    ),
                    nn.AdaptiveAvgPool1d(output_size=1),
                    Flatten(out_features=32*4*1),
                    nn.Linear(in_features=4*32*1, out_features=outs))

In [5]:
optimizer = optim.Adam(model.parameters(), lr=0.001, eps=1e-3, amsgrad=True)

model, optimizer = load_checkpoint(f'{BASE_DATACREATEDDIR}/mosquito_model_best.pth.tar', model, optimizer)

In [6]:
outs=2
for param in model.parameters():
    param.requires_grad = False
    
num_ftrs = model.fc1.in_features
model.fc1 = nn.Linear(num_ftrs, outs)

model = model.to('cuda', dtype=torch.float)

optimizer = optim.Adam(model.parameters(), lr=0.001, eps=1e-3, amsgrad=True)
scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr=0.0001, max_lr=0.01, cycle_momentum=False, mode="triangular")
# early_stopping = EarlyStopping(patience=10, verbose=1)

if outs == 1:
    criterion = torch.nn.BCEWithLogitsLoss()
elif outs > 1:
    criterion = nn.CrossEntropyLoss()

## Data Loading

In [7]:
train_df = pd.read_parquet('../data_created/suzmel_train.parquet')
test_df = pd.read_parquet('../data_created/suzmel_test.parquet')

transforms_list_train = [Bandpass(), RandomRoll() ,RandomFlip()]#, RandomNoise(), RandomRoll()]
transforms_list_test = [Bandpass()]


In [8]:
X, y = train_df.iloc[:, 0], train_df.iloc[:, 1]
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.13, shuffle=True, random_state=42, stratify=y)
X_test, y_test = test_df.iloc[:,0], test_df.iloc[:,1]

train_dataset = DataFrameset(pd.concat([X_train, y_train], axis=1), transform=transforms.Compose(transforms_list_train))
valid_dataset = DataFrameset(pd.concat([X_valid, y_valid], axis=1), transform=transforms.Compose(transforms_list_test))
test_dataset = DataFrameset(pd.concat([X_test, y_test], axis=1), transform=transforms.Compose(transforms_list_test))

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True, worker_init_fn=worker_init_fn)
valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size_val, shuffle=True, num_workers=4, pin_memory=True, worker_init_fn=worker_init_fn)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size_val, shuffle=False, num_workers=4, pin_memory=True, worker_init_fn=worker_init_fn)

## Training

In [9]:
results = {"loss":[], "val_loss":[], "train_accuracy":[], "valid_accuracy":[]}
best_valacc = 0

# Model training
for epoch in range(num_epochs):
    # Going through the training set
    correct_train = 0
    model.train()
    for x_batch,y_batch,_,_ in tqdm(train_dataloader, desc='Training..\t'):        
        y_batch = torch.as_tensor(y_batch).type(torch.LongTensor)
        x_batch,y_batch = x_batch.cuda(), y_batch.cuda()
        for param in model.parameters():
            param.grad = None
        pred = model(x_batch)
        if outs == 1:
            y_batch = y_batch.type(torch.FloatTensor).cuda()
            correct_train += ((pred>0.5).int().squeeze() == y_batch.squeeze()).float().sum().item()
            loss = criterion(pred, y_batch.unsqueeze(1))
        elif outs == 2:
            y_batch = y_batch.type(torch.LongTensor).cuda()
            correct_train += (pred.argmax(axis=1) == y_batch).float().sum().item()
            loss = criterion(pred, y_batch)
        loss.backward()
        optimizer.step()
    train_accuracy = correct_train / len(train_dataset) * 100.
    
    # Going through the validation set
    correct_valid = 0
    model.eval()
    for x_batch,y_batch,_,_ in tqdm(valid_dataloader, desc='Validating..\t'):
        y_batch = torch.as_tensor(y_batch)
        x_batch,y_batch = x_batch.cuda(), y_batch.cuda()
        pred = model(x_batch)
        if outs == 1:
            y_batch = y_batch.type(torch.FloatTensor).cuda()
            correct_valid += ((pred>0.5).int().squeeze().cuda() == y_batch.squeeze()).float().sum().item()
            val_loss = criterion(pred, y_batch.unsqueeze(1))
        elif outs == 2:
            y_batch = y_batch.type(torch.LongTensor).cuda()
            correct_valid += (pred.argmax(axis=1) == y_batch).float().sum().item()
            val_loss = criterion(pred, y_batch)
    valid_accuracy = correct_valid / len(valid_dataset) * 100.

    scheduler.step()
#     early_stopping(val_loss, model)
#     if early_stopping.early_stop:
#         print("Early stopping")
#         break

        # Printing results
    print(f"Epoch {epoch}: train_acc: {train_accuracy:.1f}% loss: {loss:.7f},  val_loss: {val_loss:.7f} val_acc: {valid_accuracy:.1f}%")
        
    is_best = valid_accuracy > best_valacc
    if is_best:
        print(f"Validation accuracy improved from {best_valacc:.2f} to {valid_accuracy:.2f}. Saving model..")
    best_valacc = max(valid_accuracy, best_valacc)
    save_checkpoint({
        'epoch': epoch,
        'state_dict': model.state_dict(),
        'best_valacc': best_valacc,
        'loss': results['loss'].append(loss),
        'val_loss': results['val_loss'].append(val_loss),
        'train_accuracy': results['train_accuracy'].append(train_accuracy),
        'valid_accuracy': results['valid_accuracy'].append(valid_accuracy),
        'optimizer' : optimizer.state_dict(),
    }, is_best)


Training..	: 100%|██████████| 377/377 [00:06<00:00, 57.12it/s]
Validating..	: 100%|██████████| 57/57 [00:01<00:00, 56.78it/s]


Epoch 0: train_acc: 79.7% loss: 0.4224440,  val_loss: 0.4487791 val_acc: 82.7%
Validation accuracy improved from 0.00 to 82.70. Saving model..


Training..	: 100%|██████████| 377/377 [00:05<00:00, 63.80it/s]
Validating..	: 100%|██████████| 57/57 [00:00<00:00, 59.09it/s]


Epoch 1: train_acc: 82.6% loss: 0.6429533,  val_loss: 0.7039512 val_acc: 83.7%
Validation accuracy improved from 82.70 to 83.73. Saving model..


Training..	: 100%|██████████| 377/377 [00:05<00:00, 63.56it/s]
Validating..	: 100%|██████████| 57/57 [00:00<00:00, 58.43it/s]


Epoch 2: train_acc: 83.9% loss: 0.6060840,  val_loss: 0.2805285 val_acc: 83.8%
Validation accuracy improved from 83.73 to 83.76. Saving model..


Training..	: 100%|██████████| 377/377 [00:06<00:00, 59.87it/s]
Validating..	: 100%|██████████| 57/57 [00:00<00:00, 58.12it/s]


Epoch 3: train_acc: 84.2% loss: 0.3241203,  val_loss: 0.2525963 val_acc: 84.5%
Validation accuracy improved from 83.76 to 84.48. Saving model..


Training..	: 100%|██████████| 377/377 [00:05<00:00, 63.08it/s]
Validating..	: 100%|██████████| 57/57 [00:01<00:00, 56.15it/s]


Epoch 4: train_acc: 84.9% loss: 0.3210216,  val_loss: 0.3852542 val_acc: 85.1%
Validation accuracy improved from 84.48 to 85.12. Saving model..


Training..	: 100%|██████████| 377/377 [00:05<00:00, 63.31it/s]
Validating..	: 100%|██████████| 57/57 [00:00<00:00, 58.74it/s]


Epoch 5: train_acc: 85.3% loss: 0.2872152,  val_loss: 0.2993922 val_acc: 85.9%
Validation accuracy improved from 85.12 to 85.87. Saving model..


Training..	: 100%|██████████| 377/377 [00:05<00:00, 63.04it/s]
Validating..	: 100%|██████████| 57/57 [00:00<00:00, 57.71it/s]


Epoch 6: train_acc: 85.7% loss: 0.2865900,  val_loss: 0.3170904 val_acc: 85.7%


Training..	: 100%|██████████| 377/377 [00:06<00:00, 62.54it/s]
Validating..	: 100%|██████████| 57/57 [00:00<00:00, 57.45it/s]


Epoch 7: train_acc: 85.7% loss: 0.2681815,  val_loss: 0.6268072 val_acc: 85.8%


Training..	: 100%|██████████| 377/377 [00:05<00:00, 63.57it/s]
Validating..	: 100%|██████████| 57/57 [00:00<00:00, 57.72it/s]


Epoch 8: train_acc: 85.9% loss: 0.2399020,  val_loss: 0.7340786 val_acc: 86.1%
Validation accuracy improved from 85.87 to 86.12. Saving model..


Training..	: 100%|██████████| 377/377 [00:05<00:00, 63.24it/s]
Validating..	: 100%|██████████| 57/57 [00:00<00:00, 57.01it/s]


Epoch 9: train_acc: 85.8% loss: 0.3320285,  val_loss: 0.3461214 val_acc: 86.5%
Validation accuracy improved from 86.12 to 86.48. Saving model..


Training..	: 100%|██████████| 377/377 [00:06<00:00, 62.62it/s]
Validating..	: 100%|██████████| 57/57 [00:01<00:00, 56.33it/s]


Epoch 10: train_acc: 86.1% loss: 0.3986571,  val_loss: 0.3219432 val_acc: 86.2%


Training..	: 100%|██████████| 377/377 [00:05<00:00, 63.19it/s]
Validating..	: 100%|██████████| 57/57 [00:00<00:00, 57.07it/s]


Epoch 11: train_acc: 86.0% loss: 0.2340779,  val_loss: 0.3574805 val_acc: 86.8%
Validation accuracy improved from 86.48 to 86.76. Saving model..


Training..	: 100%|██████████| 377/377 [00:05<00:00, 63.23it/s]
Validating..	: 100%|██████████| 57/57 [00:00<00:00, 58.10it/s]


Epoch 12: train_acc: 86.2% loss: 0.2571208,  val_loss: 0.1991973 val_acc: 86.5%


Training..	: 100%|██████████| 377/377 [00:05<00:00, 63.21it/s]
Validating..	: 100%|██████████| 57/57 [00:00<00:00, 57.69it/s]


Epoch 13: train_acc: 86.6% loss: 0.2882986,  val_loss: 0.3459485 val_acc: 87.1%
Validation accuracy improved from 86.76 to 87.06. Saving model..


Training..	: 100%|██████████| 377/377 [00:05<00:00, 63.07it/s]
Validating..	: 100%|██████████| 57/57 [00:00<00:00, 57.87it/s]


Epoch 14: train_acc: 86.5% loss: 0.2020562,  val_loss: 0.3848433 val_acc: 87.0%


Training..	: 100%|██████████| 377/377 [00:06<00:00, 62.51it/s]
Validating..	: 100%|██████████| 57/57 [00:00<00:00, 58.02it/s]


Epoch 15: train_acc: 86.5% loss: 0.3554147,  val_loss: 0.1436121 val_acc: 86.7%


Training..	: 100%|██████████| 377/377 [00:05<00:00, 62.84it/s]
Validating..	: 100%|██████████| 57/57 [00:01<00:00, 56.45it/s]


Epoch 16: train_acc: 86.6% loss: 0.3005840,  val_loss: 0.2499464 val_acc: 86.9%


Training..	: 100%|██████████| 377/377 [00:05<00:00, 63.18it/s]
Validating..	: 100%|██████████| 57/57 [00:00<00:00, 58.22it/s]


Epoch 17: train_acc: 86.2% loss: 0.2146599,  val_loss: 0.3243809 val_acc: 87.2%
Validation accuracy improved from 87.06 to 87.20. Saving model..


Training..	: 100%|██████████| 377/377 [00:06<00:00, 62.72it/s]
Validating..	: 100%|██████████| 57/57 [00:00<00:00, 57.43it/s]


Epoch 18: train_acc: 86.7% loss: 0.4130803,  val_loss: 0.1365829 val_acc: 87.3%
Validation accuracy improved from 87.20 to 87.26. Saving model..


Training..	: 100%|██████████| 377/377 [00:06<00:00, 62.80it/s]
Validating..	: 100%|██████████| 57/57 [00:00<00:00, 57.73it/s]


Epoch 19: train_acc: 86.7% loss: 0.3448161,  val_loss: 0.3441311 val_acc: 87.0%


Training..	: 100%|██████████| 377/377 [00:05<00:00, 62.89it/s]
Validating..	: 100%|██████████| 57/57 [00:00<00:00, 57.67it/s]


Epoch 20: train_acc: 86.6% loss: 0.3231362,  val_loss: 0.3570566 val_acc: 87.3%


Training..	: 100%|██████████| 377/377 [00:06<00:00, 62.81it/s]
Validating..	: 100%|██████████| 57/57 [00:00<00:00, 57.53it/s]


Epoch 21: train_acc: 86.8% loss: 0.3517003,  val_loss: 0.2854388 val_acc: 86.9%


Training..	: 100%|██████████| 377/377 [00:05<00:00, 62.87it/s]
Validating..	: 100%|██████████| 57/57 [00:00<00:00, 57.89it/s]


Epoch 22: train_acc: 87.0% loss: 0.2263359,  val_loss: 0.3540784 val_acc: 87.2%


Training..	: 100%|██████████| 377/377 [00:06<00:00, 62.53it/s]
Validating..	: 100%|██████████| 57/57 [00:00<00:00, 57.80it/s]
Training..	:   0%|          | 0/377 [00:00<?, ?it/s]

Epoch 23: train_acc: 86.8% loss: 0.3735074,  val_loss: 0.3533736 val_acc: 87.3%


Training..	: 100%|██████████| 377/377 [00:06<00:00, 62.36it/s]
Validating..	: 100%|██████████| 57/57 [00:00<00:00, 57.81it/s]


Epoch 24: train_acc: 86.5% loss: 0.3092632,  val_loss: 0.3695936 val_acc: 87.2%


Training..	: 100%|██████████| 377/377 [00:06<00:00, 62.58it/s]
Validating..	: 100%|██████████| 57/57 [00:00<00:00, 57.42it/s]


Epoch 25: train_acc: 86.6% loss: 0.2304866,  val_loss: 0.5171823 val_acc: 87.6%
Validation accuracy improved from 87.26 to 87.62. Saving model..


Training..	: 100%|██████████| 377/377 [00:06<00:00, 62.24it/s]
Validating..	: 100%|██████████| 57/57 [00:00<00:00, 57.75it/s]


Epoch 26: train_acc: 86.7% loss: 0.1764176,  val_loss: 0.4814872 val_acc: 87.3%


Training..	: 100%|██████████| 377/377 [00:05<00:00, 62.84it/s]
Validating..	: 100%|██████████| 57/57 [00:01<00:00, 55.79it/s]


Epoch 27: train_acc: 86.6% loss: 0.3652677,  val_loss: 0.3129235 val_acc: 87.0%


Training..	: 100%|██████████| 377/377 [00:06<00:00, 62.64it/s]
Validating..	: 100%|██████████| 57/57 [00:00<00:00, 57.11it/s]


Epoch 28: train_acc: 86.8% loss: 0.4395269,  val_loss: 0.1913311 val_acc: 87.5%


Training..	: 100%|██████████| 377/377 [00:06<00:00, 62.32it/s]
Validating..	: 100%|██████████| 57/57 [00:00<00:00, 57.78it/s]


Epoch 29: train_acc: 86.7% loss: 0.3292819,  val_loss: 0.4274959 val_acc: 87.7%
Validation accuracy improved from 87.62 to 87.70. Saving model..


Training..	: 100%|██████████| 377/377 [00:06<00:00, 62.51it/s]
Validating..	: 100%|██████████| 57/57 [00:01<00:00, 56.49it/s]


Epoch 30: train_acc: 86.9% loss: 0.2410693,  val_loss: 0.2084381 val_acc: 87.4%


Training..	: 100%|██████████| 377/377 [00:06<00:00, 62.62it/s]
Validating..	: 100%|██████████| 57/57 [00:00<00:00, 57.43it/s]


Epoch 31: train_acc: 87.0% loss: 0.4580456,  val_loss: 0.1549955 val_acc: 87.3%


Training..	: 100%|██████████| 377/377 [00:06<00:00, 62.35it/s]
Validating..	: 100%|██████████| 57/57 [00:01<00:00, 56.60it/s]


Epoch 32: train_acc: 86.9% loss: 0.1918588,  val_loss: 0.2634807 val_acc: 87.5%


Training..	: 100%|██████████| 377/377 [00:06<00:00, 62.09it/s]
Validating..	: 100%|██████████| 57/57 [00:00<00:00, 57.19it/s]


Epoch 33: train_acc: 87.1% loss: 0.1891162,  val_loss: 0.4563924 val_acc: 87.5%


Training..	: 100%|██████████| 377/377 [00:06<00:00, 62.22it/s]
Validating..	: 100%|██████████| 57/57 [00:00<00:00, 57.40it/s]


Epoch 34: train_acc: 86.7% loss: 0.2947086,  val_loss: 0.2443962 val_acc: 87.4%


## Testing on test set

In [10]:
model, optimizer = load_checkpoint(f'{BASE_DATACREATEDDIR}/model_best.pth.tar', model, optimizer)

In [11]:
model = model.to('cuda', dtype=torch.float)

_ = test_model(model,test_dataloader, test_dataset)

Testing..	: 100%|██████████| 368/368 [00:05<00:00, 65.98it/s]


Accuracy: 79.61
Balanced accuracy: 77.60
Confusion matrix: 
[[0.62664441 0.37335559]
 [0.07460115 0.92539885]]


## Testing on extra mebios data

In [12]:
extra_df = pd.read_parquet('../data_created/suzmel_extra.parquet')
X_extra, y_extra = extra_df.iloc[:,0], extra_df.iloc[:,1]

extra_dataset = DataFrameset(pd.concat([X_extra, y_extra], axis=1), transform=transforms.Compose(transforms_list_test))
extra_dataloader = DataLoader(extra_dataset, batch_size=batch_size_val, shuffle=False, num_workers=4, pin_memory=True, worker_init_fn=worker_init_fn)

In [13]:
_ = test_model(model,extra_dataloader, extra_dataset)

Testing..	: 100%|██████████| 317/317 [00:04<00:00, 65.91it/s]

Accuracy: 93.37
Balanced accuracy: 83.41
Confusion matrix: 
[[0.72435897 0.27564103]
 [0.05613672 0.94386328]]



