In [11]:
# Set path variables
import os
import sys
cwd = os.getcwd()
project_dir = os.path.abspath(os.path.join(cwd, os.pardir))
sys.path.append(project_dir)

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

from torchsummary import summary

from src import audio_util
from src.audio_dataset import AudioDS
from src.trainer import Trainer
from src.model_ab import *

In [2]:
# Set device to GPU if possible
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
torch.cuda.is_available()

cpu


False

In [3]:
# Load label annotation csv
train_annotations = 'mtat_train_label.csv'
val_annotations = 'mtat_val_label.csv'
test_annotations = 'mtat_test_label.csv'

# Data path
from pathlib import Path
cwd = Path.cwd()
DATA_DIR = cwd.parent / 'data'

In [4]:
# Transformations on dataset
SAMPLE_RATE = 16000
DURATION_IN_SEC = 29.1
MEL_SPEC_DB_TRANSFORMATION = audio_util.get_audio_transforms(SAMPLE_RATE,
                                                            n_fft=512,
                                                            hop_length=256,
                                                            n_mels=96,
                                                            top_db=80)

train_data = AudioDS(annotations_file=train_annotations,
                     data_dir=DATA_DIR,
                     target_sample_rate=SAMPLE_RATE,
                     target_length=DURATION_IN_SEC,
                     transformation=MEL_SPEC_DB_TRANSFORMATION)

val_data = AudioDS(annotations_file=val_annotations,
                   data_dir=DATA_DIR,
                   target_sample_rate=SAMPLE_RATE,
                   target_length=DURATION_IN_SEC,
                   transformation=MEL_SPEC_DB_TRANSFORMATION)

test_data = AudioDS(annotations_file=val_annotations,
                    data_dir=DATA_DIR,
                    target_sample_rate=SAMPLE_RATE,
                    target_length=DURATION_IN_SEC,
                    transformation=MEL_SPEC_DB_TRANSFORMATION)

In [5]:
# Hyperparameters
BATCH_SIZE = 16
LEARNING_RATE = 0.001
EPOCHS = 50

In [6]:
train_dataloader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
val_dataloader = DataLoader(val_data, batch_size=BATCH_SIZE, shuffle=False)
test_dataloader = DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=False)

In [7]:
# Display batch information
train_features, train_labels, files = next(iter(train_dataloader))
print(f"Feature batch shape: {train_features.size()}")
print(f"Labels batch shape: {train_labels.size()}")

Feature batch shape: torch.Size([16, 1, 96, 1819])
Labels batch shape: torch.Size([16, 50])


### FCN4 Model

In [12]:
# Instantiate model
fcn4 = FullyConvNet4()

# Instantiate trainer
trainer = Trainer(fcn4, train_dataloader, val_dataloader, LEARNING_RATE, device)

In [13]:
input_size = (train_features.size()[1:])
print(summary(fcn4, input_size))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1        [-1, 128, 96, 1819]           1,280
         MaxPool2d-2         [-1, 128, 48, 454]               0
            Conv2d-3         [-1, 384, 48, 454]         442,752
         MaxPool2d-4          [-1, 384, 12, 90]               0
            Conv2d-5          [-1, 768, 12, 90]       2,654,976
         MaxPool2d-6           [-1, 768, 4, 11]               0
            Conv2d-7          [-1, 2048, 4, 11]      14,157,824
         MaxPool2d-8           [-1, 2048, 1, 1]               0
            Conv2d-9             [-1, 50, 1, 1]         102,450
Total params: 17,359,282
Trainable params: 17,359,282
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.67
Forward/backward pass size (MB): 266.11
Params size (MB): 66.22
Estimated Total Size (MB): 333.00
--------------------------------

In [15]:
# Run training
trainer.train(epochs=EPOCHS, save_directory='../models')

Training started


KeyboardInterrupt: 

In [14]:
trainer.evaluate(test_dataloader)

(0.1423581662494324,
 0.08595194085027727,
 0.8910117290756215,
 0.4172853250163826)

In [15]:
# Save model
path = '../models/spec_fcn4.pth'
trainer.save_model(path)

In [11]:
# Load a model
# path = '../models/test_alex.pth'
# trainer.load_model(path)
# print(trainer.history)

{'train_loss': [], 'train_accuracy': [], 'train_roc_auc': [], 'train_pr_auc': [], 'val_loss': [], 'val_accuracy': [], 'val_roc_auc': [], 'val_pr_auc': []}


### FCN5 Model

In [16]:
# Instantiate model
fcn5 = FullyConvNet5()

# Instantiate trainer
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(fcn5.parameters(), lr=LEARNING_RATE)

trainer_fcn5 = Trainer(fcn5, train_dataloader, val_dataloader, criterion, optimizer, device)

In [17]:
from torchsummary import summary
input_size = (train_features.size()[1:])
print(summary(fcn5, input_size))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1        [-1, 128, 96, 1819]           1,280
         MaxPool2d-2         [-1, 128, 48, 454]               0
            Conv2d-3         [-1, 256, 48, 454]         295,168
         MaxPool2d-4         [-1, 256, 24, 113]               0
            Conv2d-5         [-1, 512, 24, 113]       1,180,160
         MaxPool2d-6          [-1, 512, 12, 28]               0
            Conv2d-7         [-1, 1024, 12, 28]       4,719,616
         MaxPool2d-8           [-1, 1024, 4, 5]               0
            Conv2d-9           [-1, 2048, 4, 5]      18,876,416
        MaxPool2d-10           [-1, 2048, 1, 1]               0
           Conv2d-11             [-1, 50, 1, 1]         102,450
Total params: 25,175,090
Trainable params: 25,175,090
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.67
Fo

In [None]:
# Run training
trainer_fcn5.train(epochs=EPOCHS)

Training:  70%|███████   | 7/10 [54:26<23:18, 466.04s/it, epoch=7, training loss=0.12, validation loss=0.146]   

In [None]:
trainer_fcn5.evaluate(test_dataloader)

In [None]:
# Save model
path = '../models/spec_fcn5.pth'
trainer.save_model(path)

In [11]:
# Load a model
# path = '../models/test_alex.pth'
# trainer.load_model(path)
# print(trainer.history)

{'train_loss': [], 'train_accuracy': [], 'train_roc_auc': [], 'train_pr_auc': [], 'val_loss': [], 'val_accuracy': [], 'val_roc_auc': [], 'val_pr_auc': []}
