In [12]:
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from src.audio_dataset import AudioDS
from src.audio_util import *
from src.trainer import Trainer
from src.model_alex import FullyConvNet4, FullyConvNet5

In [13]:
# Set device to GPU if possible
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.cuda.is_available()

False

In [14]:
# Load label annotation csv
train_annotations = 'mtat_train_label.csv'
val_annotations = 'mtat_val_label.csv'
test_annotations = 'mtat_test_label.csv'

# Data path
from pathlib import Path
cwd = Path.cwd()
DATA_DIR = cwd.parent / 'data'

In [15]:
# Transformations on dataset
SAMPLE_RATE = 16000
DURATION_IN_SEC = 29.1
MEL_SPEC_DB_TRANSFORMATION = AudioUtil.get_audio_transforms(SAMPLE_RATE,
                                                            n_fft=512,
                                                            hop_length=256,
                                                            n_mels=96,
                                                            top_db=80)

train_data = AudioDS(annotations_file=train_annotations,
                     data_dir=DATA_DIR,
                     target_sample_rate=SAMPLE_RATE,
                     target_length=DURATION_IN_SEC,
                     transformation=MEL_SPEC_DB_TRANSFORMATION)

val_data = AudioDS(annotations_file=val_annotations,
                   data_dir=DATA_DIR,
                   target_sample_rate=SAMPLE_RATE,
                   target_length=DURATION_IN_SEC,
                   transformation=MEL_SPEC_DB_TRANSFORMATION)

test_data = AudioDS(annotations_file=val_annotations,
                    data_dir=DATA_DIR,
                    target_sample_rate=SAMPLE_RATE,
                    target_length=DURATION_IN_SEC,
                    transformation=MEL_SPEC_DB_TRANSFORMATION)

In [16]:
# Hyperparameters
BATCH_SIZE = 32
LEARNING_RATE = 0.001
EPOCHS = 10

In [17]:
train_dataloader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
val_dataloader = DataLoader(val_data, batch_size=BATCH_SIZE, shuffle=False)
test_dataloader = DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=False)

In [18]:
# Display batch information
train_features, train_labels = next(iter(train_dataloader))
print(f"Feature batch shape: {train_features.size()}")
print(f"Labels batch shape: {train_labels.size()}")

Feature batch shape: torch.Size([32, 1, 96, 1819])
Labels batch shape: torch.Size([32, 50])


### FCN4 Model

In [20]:
# Instantiate model
fcn4 = FullyConvNet4()

# Instantiate trainer
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(fcn4.parameters(), lr=LEARNING_RATE)

trainer = Trainer(fcn4, train_dataloader, val_dataloader, criterion, optimizer, device)

In [21]:
from torchsummary import summary
input_size = (train_features.size()[1:])
print(summary(fcn4, input_size))

torch.Size([1, 96, 1819])
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 16, 96, 1819]             160
         MaxPool2d-2          [-1, 16, 48, 454]               0
            Conv2d-3          [-1, 32, 48, 454]           4,640
         MaxPool2d-4           [-1, 32, 12, 90]               0
            Conv2d-5           [-1, 64, 12, 90]          18,496
         MaxPool2d-6            [-1, 64, 4, 11]               0
            Conv2d-7           [-1, 128, 4, 11]          73,856
         MaxPool2d-8            [-1, 128, 1, 1]               0
            Conv2d-9             [-1, 50, 1, 1]           6,450
Total params: 103,602
Trainable params: 103,602
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.67
Forward/backward pass size (MB): 30.15
Params size (MB): 0.40
Estimated Total Size (MB): 31.22
---------------

In [22]:
# Run training
trainer.train(epochs=EPOCHS)

Training:   0%|          | 0/10 [01:56<?, ?it/s]


KeyboardInterrupt: 

In [None]:
trainer.evaluate(test_dataloader)

In [None]:
# Save model
path = '../models/spec_fcn4.pth'
trainer.save_model(path)

In [11]:
# Load a model
# path = '../models/test_alex.pth'
# trainer.load_model(path)
# print(trainer.history)

{'train_loss': [], 'train_accuracy': [], 'train_roc_auc': [], 'train_pr_auc': [], 'val_loss': [], 'val_accuracy': [], 'val_roc_auc': [], 'val_pr_auc': []}


### FCN5 Model

In [20]:
# Instantiate model
fcn5 = FullyConvNet5()

# Instantiate trainer
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(fcn5.parameters(), lr=LEARNING_RATE)

trainer_fcn5 = Trainer(fcn5, train_dataloader, val_dataloader, criterion, optimizer, device)

In [21]:
from torchsummary import summary
input_size = (train_features.size()[1:])
print(summary(fcn5, input_size))

torch.Size([1, 96, 1819])
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 16, 96, 1819]             160
         MaxPool2d-2          [-1, 16, 48, 454]               0
            Conv2d-3          [-1, 32, 48, 454]           4,640
         MaxPool2d-4           [-1, 32, 12, 90]               0
            Conv2d-5           [-1, 64, 12, 90]          18,496
         MaxPool2d-6            [-1, 64, 4, 11]               0
            Conv2d-7           [-1, 128, 4, 11]          73,856
         MaxPool2d-8            [-1, 128, 1, 1]               0
            Conv2d-9             [-1, 50, 1, 1]           6,450
Total params: 103,602
Trainable params: 103,602
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.67
Forward/backward pass size (MB): 30.15
Params size (MB): 0.40
Estimated Total Size (MB): 31.22
---------------

In [22]:
# Run training
trainer_fcn5.train(epochs=EPOCHS)

Training:   0%|          | 0/10 [01:56<?, ?it/s]


KeyboardInterrupt: 

In [None]:
trainer_fcn5.evaluate(test_dataloader)

In [None]:
# Save model
path = '../models/spec_fcn5.pth'
trainer.save_model(path)

In [11]:
# Load a model
# path = '../models/test_alex.pth'
# trainer.load_model(path)
# print(trainer.history)

{'train_loss': [], 'train_accuracy': [], 'train_roc_auc': [], 'train_pr_auc': [], 'val_loss': [], 'val_accuracy': [], 'val_roc_auc': [], 'val_pr_auc': []}
