In [1]:
%reset -f
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torchaudio
from torch.utils.data import Dataset
from torch.utils.data.sampler import SubsetRandomSampler
from torchvision import datasets, transforms
from tsai.all import *

torchaudio.USE_SOUNDFILE_LEGACY_INTERFACE = False
torchaudio.set_audio_backend('soundfile')
import os
import random
from pathlib import Path

import matplotlib.pyplot as plt
import psutil
import requests

from utils import *
from wingbeat_datasets import *
from wingbeat_models import *

print(f'Total RAM      : {bytes2GB(psutil.virtual_memory().total):5.2f} GB')
print(f'Available RAM  : {bytes2GB(psutil.virtual_memory().available):5.2f} GB\n')

  '"sox" backend is being deprecated. '


Total RAM      : 31.21 GB
Available RAM  : 22.91 GB



In [3]:
num_epochs = 35
batch_size = 32
batch_size_val = batch_size * 2
validation_split = .2
shuffle_dataset = True
num_workers = psutil.cpu_count()
random_seed= 42

In [4]:
dmel1 = WingbeatsDataset("Melanogaster_RL/Y", custom_label=0,transform=FilterWingbeat('bandpass'))
dmel2 = WingbeatsDataset("Melanogaster_RL/Z", custom_label=0,transform=FilterWingbeat('bandpass'))

dsuz1 = WingbeatsDataset("Suzukii_RL/L", custom_label=1,transform=FilterWingbeat('bandpass'))
dsuz2 = WingbeatsDataset("Suzukii_RL/R", custom_label=1,transform=FilterWingbeat('bandpass'))

Found 29002 in dataset: Melanogaster_RL/Y, and 1 label(s): ['Y']
Label(s) changed to 0
Found 24763 in dataset: Melanogaster_RL/Z, and 1 label(s): ['Z']
Label(s) changed to 0
Found 21940 in dataset: Suzukii_RL/L, and 1 label(s): ['L']
Label(s) changed to 1
Found 14348 in dataset: Suzukii_RL/R, and 1 label(s): ['R']
Label(s) changed to 1


In [5]:
transformed_dataset = ConcatDataset([dmel1, dsuz1])

train_size = int(0.8 * len(transformed_dataset))
valid_size = len(transformed_dataset) - train_size
train_dataset, valid_dataset = torch.utils.data.random_split(transformed_dataset, [train_size, valid_size])
test_dataset = ConcatDataset([dmel2, dsuz2])

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, num_workers=num_workers)
valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size_val, num_workers=num_workers)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size_val, num_workers=num_workers)

In [6]:
model = Conv1dNet()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, betas=(0.9, 0.999))
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True)
early_stopping = EarlyStopping(patience=7, verbose=False)
print(model)

Conv1dNet(
  (conv1): Conv1d(1, 16, kernel_size=(3,), stride=(1,))
  (bn1): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool1): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv1d(16, 32, kernel_size=(3,), stride=(1,))
  (bn2): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool2): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv1d(32, 64, kernel_size=(3,), stride=(1,))
  (bn3): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool3): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv4): Conv1d(64, 128, kernel_size=(3,), stride=(1,))
  (bn4): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool4): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv5): Conv1d(128, 256, kernel_size=(3,), stride=(1,))
 

In [7]:
# from sklearn import preprocessing
# import itertools

# le = preprocessing.LabelEncoder()
# all_labels = [transformed_dataset.datasets[i].labels for i in range(len(transformed_dataset.datasets))]
# all_labels = list(itertools.chain.from_iterable(all_labels))
# le.fit(all_labels)

In [8]:
# Choosing whether to train on a gpu
train_on_gpu = torch.cuda.is_available()
print(f'Train on gpu: {train_on_gpu}')# Number of gpus
model = model.to('cuda', dtype=torch.float)

Train on gpu: True


In [9]:
# Model training
for epoch in range(num_epochs):
    # Going through the training set
    correct_train = 0
    model.train()
    for x_batch,y_batch,path_batch in train_dataloader:        

        y_batch = torch.as_tensor(y_batch).type(torch.LongTensor)
        x_batch,y_batch = x_batch.cuda(), y_batch.cuda()
        
        optimizer.zero_grad()
        pred = model(x_batch)
        loss = criterion(pred, y_batch)
        loss.backward()
        optimizer.step()
        correct_train += (pred.argmax(axis=1) == y_batch).float().sum().item()
    train_accuracy = correct_train / (len(train_dataloader)*batch_size) * 100.
    # Going through the validation set
    correct_valid = 0
    model.eval()
    for x_batch,y_batch,path_batch in valid_dataloader:
        
        y_batch = torch.as_tensor(y_batch).type(torch.LongTensor)
        x_batch,y_batch = x_batch.cuda(), y_batch.cuda()

        pred = model(x_batch)
        val_loss = criterion(pred, y_batch)
        correct_valid += (pred.argmax(axis=1) == y_batch).float().sum().item()
    valid_accuracy = correct_valid / (len(valid_dataloader)*batch_size_val) * 100.
    scheduler.step(val_loss)

    # Printing results
    print(f"Epoch {epoch}: train_acc: {train_accuracy:.2f}% loss: {loss:.3f},  val_loss: {val_loss:.3f} val_acc: {valid_accuracy:.2f}%")

    early_stopping(val_loss, model)

    if early_stopping.early_stop:
        print("Early stopping")
        break


Epoch 0: train_acc: 83.76% loss: 0.399,  val_loss: 21.126 val_acc: 57.17%
Epoch 1: train_acc: 90.45% loss: 0.239,  val_loss: 41.426 val_acc: 57.17%
EarlyStopping counter: 1 out of 7
Epoch 2: train_acc: 92.33% loss: 0.119,  val_loss: 22.338 val_acc: 57.17%
EarlyStopping counter: 2 out of 7
Epoch 3: train_acc: 93.48% loss: 0.070,  val_loss: 43.447 val_acc: 57.17%
EarlyStopping counter: 3 out of 7
Epoch     5: reducing learning rate of group 0 to 1.0000e-03.
Epoch 4: train_acc: 94.17% loss: 0.068,  val_loss: 25.790 val_acc: 57.40%
EarlyStopping counter: 4 out of 7
Epoch 5: train_acc: 96.00% loss: 0.023,  val_loss: 0.001 val_acc: 71.99%
Epoch 6: train_acc: 96.31% loss: 0.015,  val_loss: 0.767 val_acc: 67.41%
EarlyStopping counter: 1 out of 7
Epoch 7: train_acc: 96.56% loss: 0.015,  val_loss: 0.000 val_acc: 80.48%
Epoch 8: train_acc: 96.74% loss: 0.010,  val_loss: 0.001 val_acc: 91.30%
EarlyStopping counter: 1 out of 7
Epoch 9: train_acc: 96.90% loss: 0.012,  val_loss: 0.671 val_acc: 67.27%

## Testing

In [10]:
correct_test = 0
model.eval()
for x_batch,y_batch,path_batch in test_dataloader:

    y_batch = torch.as_tensor(y_batch).type(torch.LongTensor)
    x_batch,y_batch = x_batch.cuda(), y_batch.cuda()

    pred = model(x_batch)
    val_loss = criterion(pred, y_batch)
    correct_test += (pred.argmax(axis=1) == y_batch).float().sum().item()
test_accuracy = correct_test / (len(test_dataloader)*batch_size_val) * 100.
print(test_accuracy)

89.49652777777779
