In [5]:
import pandas as pd
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from src.audio_dataset import AudioDS
from src.audio_util import *
from src.trainer import Trainer
from src.model_alex import FullyConvNet

In [6]:
# Set device to GPU if possible
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.cuda.is_available()

False

In [7]:
# Load label annotation csv
DATA_DIR = '../data/'
train_annotations = pd.read_csv(DATA_DIR + 'mtat_train_label.csv', index_col=0).reset_index(drop=True)
val_annotations = pd.read_csv(DATA_DIR + 'mtat_val_label.csv', index_col=0).reset_index(drop=True)
test_annotations = pd.read_csv(DATA_DIR + 'mtat_test_label.csv', index_col=0).reset_index(drop=True)

In [8]:
# Transformations on dataset
SAMPLE_RATE = 16000
DURATION_IN_SEC = 29.1
MEL_SPEC_DB_TRANSFORMATION = AudioUtil.get_audio_transforms(SAMPLE_RATE,
                                                            n_fft=512,
                                                            hop_length=256,
                                                            n_mels=96,
                                                            top_db=80)

train_data = AudioDS(annotations_file=train_annotations,
                     data_dir=DATA_DIR,
                     target_sample_rate=SAMPLE_RATE,
                     target_length=DURATION_IN_SEC,
                     transformation=MEL_SPEC_DB_TRANSFORMATION)

val_data = AudioDS(annotations_file=val_annotations,
                   data_dir=DATA_DIR,
                   target_sample_rate=SAMPLE_RATE,
                   target_length=DURATION_IN_SEC,
                   transformation=MEL_SPEC_DB_TRANSFORMATION)

test_data = AudioDS(annotations_file=val_annotations,
                    data_dir=DATA_DIR,
                    target_sample_rate=SAMPLE_RATE,
                    target_length=DURATION_IN_SEC,
                    transformation=MEL_SPEC_DB_TRANSFORMATION)

In [9]:
# Hyperparameters
BATCH_SIZE = 32
LEARNING_RATE = 0.001
EPOCHS = 10

In [10]:
train_dataloader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
val_dataloader = DataLoader(val_data, batch_size=BATCH_SIZE, shuffle=False)
test_dataloader = DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=False)

In [11]:
# Display batch information
train_features, train_labels = next(iter(train_dataloader))
print(f"Feature batch shape: {train_features.size()}")
print(f"Labels batch shape: {train_labels.size()}")

Feature batch shape: torch.Size([32, 1, 96, 1819])
Labels batch shape: torch.Size([32, 50])


In [13]:
# Instantiate model
model = FullyConvNet()

# Instantiate trainer
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

trainer = Trainer(model, train_dataloader, val_dataloader, criterion, optimizer, device)

In [14]:
# Run training
trainer.train(epochs=EPOCHS)

Training:  10%|█         | 1/10 [00:10<01:36, 10.71s/it, epoch=1, loss=0.114]

Epoch [1/10], Loss: 0.4562
Epoch [1], Validation Accuracy: 0.01%
ROC AUC: 0.55, AUC PR: 0.08


Training:  20%|██        | 2/10 [00:25<01:46, 13.34s/it, epoch=2, loss=0.0559]

Epoch [2/10], Loss: 0.2238
Epoch [2], Validation Accuracy: 0.04%
ROC AUC: 0.57, AUC PR: 0.14


Training:  30%|███       | 3/10 [00:41<01:40, 14.33s/it, epoch=3, loss=0.0485]

Epoch [3/10], Loss: 0.1939
Epoch [3], Validation Accuracy: 0.01%
ROC AUC: 0.57, AUC PR: 0.12


Training:  40%|████      | 4/10 [00:58<01:33, 15.61s/it, epoch=4, loss=0.0491]

Epoch [4/10], Loss: 0.1964
Epoch [4], Validation Accuracy: 0.01%
ROC AUC: 0.57, AUC PR: 0.12


Training:  50%|█████     | 5/10 [01:13<01:16, 15.36s/it, epoch=5, loss=0.0453]

Epoch [5/10], Loss: 0.1811
Epoch [5], Validation Accuracy: 0.01%
ROC AUC: 0.57, AUC PR: 0.17


Training:  60%|██████    | 6/10 [01:28<01:00, 15.07s/it, epoch=6, loss=0.042] 

Epoch [6/10], Loss: 0.1681
Epoch [6], Validation Accuracy: 0.01%
ROC AUC: 0.58, AUC PR: 0.16


Training:  70%|███████   | 7/10 [01:43<00:44, 14.92s/it, epoch=7, loss=0.0461]

Epoch [7/10], Loss: 0.1844
Epoch [7], Validation Accuracy: 0.01%
ROC AUC: 0.59, AUC PR: 0.18


Training:  80%|████████  | 8/10 [01:57<00:29, 14.75s/it, epoch=8, loss=0.0416]

Epoch [8/10], Loss: 0.1665
Epoch [8], Validation Accuracy: 0.01%
ROC AUC: 0.59, AUC PR: 0.21


Training:  90%|█████████ | 9/10 [02:11<00:14, 14.67s/it, epoch=9, loss=0.0414]

Epoch [9/10], Loss: 0.1655
Epoch [9], Validation Accuracy: 0.01%
ROC AUC: 0.59, AUC PR: 0.20


Training: 100%|██████████| 10/10 [02:26<00:00, 14.71s/it, epoch=10, loss=0.0415]

Epoch [10/10], Loss: 0.1659


Training: 100%|██████████| 10/10 [02:32<00:00, 15.22s/it, epoch=10, loss=0.0415]

Epoch [10], Validation Accuracy: 0.01%
ROC AUC: 0.61, AUC PR: 0.21



