# Imports and dataset loading

In [1]:
import torch
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
from torchvision import models, transforms
import torch.nn as nn
from torch.nn import functional as F
import torch.optim as optim
import gc
import os

# clear cuda memory and collect garbage -> free up memory
gc.collect()
torch.cuda.empty_cache()

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # check if gpu is available
print('Device: ',device) 

x=np.load('datasets/scaled_spec_resampled_array.npy')
y=np.load('datasets/labels_array.npy')-1 # labels start from 1, we want them to start from 0
x = x.reshape(x.shape[0], 1, x.shape[1], x.shape[2]) # add channel dimension for CNN

print(x.shape, y.shape)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42) 

class MyDataset(Dataset):
    def __init__(self, x, y):
        self.x = torch.tensor(x, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)
    def __len__(self):
        return len(self.x)
    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]

train_dataset = MyDataset(x_train, y_train)
test_dataset = MyDataset(x_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

Device:  cuda:0
(1754, 1, 2048, 80) (1754,)


custom training and testing funcs

In [2]:
def test(model, test_loader, criterion):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for data in test_loader:
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    val_loss = running_loss / len(test_loader)
    val_acc = correct / total
    return val_loss, val_acc

def train(model, train_loader, test_loader, criterion, optimizer, epochs, checkpointname):
    model.train()
    running_loss = 0.0
    epoch_bar = tqdm(range(epochs), position=0)

    best_val_acc = 0
    val_loss, val_acc = test(model, test_loader, criterion)

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), checkpointname)

    for epoch in epoch_bar:

        batch_bar=tqdm(enumerate(train_loader, 0), total=len(train_loader), position=1, leave=False)

        for i, data in batch_bar:
        #for i, data in enumerate(train_loader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            batch_bar.set_description('Train loss: %.3f' % (loss.item()))

        train_loss = running_loss / len(train_loader)
        epoch_bar.set_description('Train loss: %.3f' % train_loss)
        val_loss, val_acc = test(model, test_loader, criterion)

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), checkpointname)

        print('Epoch: %d, Train Loss: %.3f, Val Loss: %.3f, Val Acc: %.3f' % (epoch, train_loss, val_loss, val_acc))
    return model


## Train ResNet18 and ResNet34

In [3]:
resnet18 = models.resnet18(pretrained=True)
resnet18.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False) #change input channels to 1 to use single layer spectrogram images
resnet18.fc = nn.Linear(in_features=512, out_features=6, bias=True) #change output classes to 6   

#check for checkpoint save and load resnet18.pth
if os.path.exists('checkpoints/resnet18.pth'):
    resnet18.load_state_dict(torch.load('checkpoints/resnet18.pth'))
    print('Model loaded')
else:
    print('No model found, loading pretrained model')

resnet18 = resnet18.to(device)

#freeze every layer except the first conv1 layer and the last fc layer, to adapt the model to our data
for name, param in resnet18.named_parameters():
    if name not in ['conv1.weight', 'fc.weight', 'fc.bias']:
        param.requires_grad = False
    else:
        param.requires_grad = True

optimizer = optim.Adam(resnet18.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
resnet18=train(resnet18, train_loader, test_loader, criterion, optimizer, 100, 'checkpoints/resnet18.pth')



No model found, loading pretrained model


  0%|          | 0/100 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [4]:
torch.cuda.empty_cache()

In [5]:
resnet34 = models.resnet34(pretrained=True)
resnet34.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
resnet34.fc = nn.Linear(in_features=512, out_features=6, bias=True)    

#check for checkpoint save and load resnet34.pth
if os.path.exists('checkpoints/resnet34.pth'):
    resnet34.load_state_dict(torch.load('checkpoints/resnet34.pth'))
    print('Model loaded')
else:
    print('No model found, loading pretrained model')

resnet34 = resnet34.to(device)

#freeze every layer except the first conv1 and last fc layers of resnet50 to adapt the model to our data
for name, param in resnet34.named_parameters():
    if name not in ['conv1.weight', 'fc.weight', 'fc.bias']:
        param.requires_grad = False
    else:
        param.requires_grad = True

optimizer = optim.Adam(resnet34.parameters(), lr=0.001, weight_decay=0.001, betas=(0.9, 0.999))
criterion = nn.CrossEntropyLoss()

resnet34 = train(resnet34, train_loader, test_loader, criterion, optimizer, 100, 'checkpoints/resnet34.pth')



No model found, loading pretrained model


  0%|          | 0/100 [00:12<?, ?it/s]


KeyboardInterrupt: 

## Training repeated with only the center portions of the spectrograms

In [6]:
x=np.load('datasets/scaled_spec_resampled_array.npy') #load the dataset
x=x[:,724:1324,:] #take only the 600 frequency bins around the centre of the spectrogram
y=np.load('datasets/labels_array.npy')-1 # labels start from 1, we want them to start from 0
x = x.reshape(x.shape[0], 1, x.shape[1], x.shape[2]) # add channel dimension for CNN

print(x.shape, y.shape)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

class MyDataset(Dataset):
    def __init__(self, x, y):
        self.x = torch.tensor(x, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)
    def __len__(self):
        return len(self.x)
    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]

train_dataset = MyDataset(x_train, y_train)
test_dataset = MyDataset(x_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

(1754, 1, 600, 80) (1754,)


In [None]:
resnet18 = models.resnet18(pretrained=True)
resnet18.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
resnet18.fc = nn.Linear(in_features=512, out_features=6, bias=True)    

resnet18 = resnet18.to(device)

#check for checkpoint save and load resnet18_smalldata.pth
if os.path.exists('checkpoints/resnet18_smalldata.pth'):
    resnet18.load_state_dict(torch.load('checkpoints/resnet18_smalldata.pth'))
    print('Model loaded')
else:
    print('No model found, loading pretrained model')

#freeze every layer except the first conv1 layer and the last fc layer, to adapt the model to our data
for name, param in resnet18.named_parameters():
    if name not in ['conv1.weight', 'fc.weight', 'fc.bias']:
        param.requires_grad = False
    else:
        param.requires_grad = True

optimizer = optim.Adam(resnet18.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

resnet18=train(resnet18, train_loader,test_loader, criterion, optimizer, 100, 'checkpoints/resnet18_smalldata.pth')

In [None]:
torch.cuda.empty_cache()

In [None]:
resnet34 = models.resnet34(pretrained=True)
resnet34.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
resnet34.fc = nn.Linear(in_features=512, out_features=6, bias=True)    

#check for checkpoint save and load resnet34.pth
if os.path.exists('resnet34_smalladata.pth'):
    resnet34.load_state_dict(torch.load('resnet34_smalladata.pth'))
    print('Model loaded')
else:
    print('No model found, loading pretrained model')

resnet34 = resnet34.to(device)

#freeze every layer except the first conv1 and last fc layers of resnet50
for name, param in resnet34.named_parameters():
    if name not in ['conv1.weight', 'fc.weight', 'fc.bias']:
        param.requires_grad = False
    else:
        param.requires_grad = True

optimizer = optim.Adam(resnet34.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

resnet34=train(resnet34, train_loader,test_loader, criterion, optimizer, 100, 'resnet34_smalladata.pth')