In [0]:
import numpy as np
import cv2
import matplotlib.pyplot as plt
import os
import shutil
import pickle
import torch
import torch.optim as optim
import torch.nn as nn
import torchvision
from google.colab import drive, files

In [0]:
epochs = 1
batch_size = 50
lr = 1e-4

num_classes = 5


In [0]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(torch.cuda.get_device_name(0))
print(device)

from google.colab import drive
drive.mount('/drive', force_remount=True)
path = '/drive/My Drive/BaumeisterAI/Colabs/envelopeDetection/'


In [0]:
model = torchvision.models.resnet18(pretrained=False, progress=True)
num_classes = 5
model.fc = nn.Linear(512, num_classes)

model.train()

In [0]:
%load_ext tensorboard
%tensorboard --logdir logs/tensorboard

from tensorboardcolab import TensorBoardColab
tb = TensorBoardColab()

In [0]:
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
#optimizer = optim.Adam(model.parameters(), lr=lr)

In [0]:
model.train()
for epoch in range(epochs):
    
    running_loss = 0.0
    running_loss_classifier = 0.0
    running_loss_box_reg = 0.0
    running_loss_objectness = 0.0
    running_loss_rpn_box_reg = 0.0
        
    for i in range(1750, len(training_images)):
      
        optimizer.zero_grad()

        image = [torch.from_numpy(training_images[i]).view(1, 256, 256).float().cuda()]
        targets = [{'labels':training_targets[i]['labels'].cuda(), 'boxes':training_targets[i]['boxes'].cuda()}]
        outputs = model(image, targets)
        
        loss_classifier = outputs['loss_classifier']
        loss_box_reg = outputs['loss_box_reg'] 
        loss_objectness = outputs['loss_objectness']
        loss_rpn_box_reg = outputs['loss_rpn_box_reg'] * 1e-1
        
        running_loss_classifier += loss_classifier
        running_loss_box_reg += loss_box_reg
        running_loss_objectness += loss_objectness
        running_loss_rpn_box_reg += loss_rpn_box_reg
        
        loss = loss_classifier + loss_box_reg + loss_objectness + loss_rpn_box_reg     
        
        loss.backward()
        optimizer.step()
        
        running_loss += loss
        if i%50 == 49:
            print("Epoch:{}, Step:{}, Loss:{}".format(epoch+1, i+1, running_loss/50))
            running_loss = 0.0
        
        universal_step = i + len(training_images)*total_pass
        tb.save_value('Train Loss', 'total_loss', universal_step, loss.item())
        tb.save_value('Train Loss', 'loss_classifier', universal_step, loss_classifier)
        tb.save_value('Train Loss', 'loss_box_reg', universal_step, loss_box_reg)
        tb.save_value('Train Loss', 'loss_objectness', universal_step, loss_objectness)
        tb.save_value('Train Loss', 'loss_rpn_box_reg', universal_step, loss_rpn_box_reg)

total_pass += 1        

print('Finished training.')

In [0]:
from torch.utils.data import Dataset, DataLoader

class MusicnetComposers(Dataset):
    def __init__(self, csv_path, dataset_path, composers, split_duration, n_samples, fs):
        musicnet_metadata = pd.read_csv(csv_path)
        with open(dataset_path, 'rb') as npz:
          musicnet_dataset = np.load(dataset_path, encoding = 'latin1', allow_pickle=True)
        self.dataset = np.zeros((len(composers)*n_samples, fs*split_duration))
        self.labels = np.zeros(len(composers)*n_samples, dtype=int)
        count = 0
        for composer in composers:
          composerMetadata = musicnet_metadata.loc[musicnet_metadata.composer == composer]
          composer_data = []
          for row in composerMetadata.itertuples():
            print(row.Index)
            id = str(row.id)
            duration = row.seconds
            sound, _ = musicnet_dataset[id]
            n_splits = np.floor(duration/split_duration).astype('int64')
            for i in range(n_splits):
              start = i*fs*split_duration
              end = (i+1)*fs*split_duration
              split = sound[start:end]
              composer_data.append(split)
            if len(composer_data) >= n_samples:
              break
          index = np.random.randint(low=0, high=len(composer_data), size=n_samples)
          composer_data = [composer_data[i] for i in index]
          print(len(composer_data))
          print(np.array(composer_data).shape)
          print(self.dataset[count*n_samples:(count+1)*n_samples].shape)

          self.dataset[count*n_samples:(count+1)*n_samples] = np.array(composer_data)
          self.labels[count*n_samples:(count+1)*n_samples] = count
          count += 1
          if len(composer_data) >= n_samples:
            break

        self.composers = composers
        self.split_duration = split_duration
        self.n_samples = n_samples
        self.fs = fs
        
    def __getitem__(self, index):

        sound = torch.tensor(self.dataset[index]).reshape(1,-1)
        spectogram = MelSpectrogram(sample_rate=self.fs, \
                                     n_fft=2048, \
                                     win_length=None, \
                                     hop_length=512, \
                                     f_min=0.0, \
                                     f_max=None, \
                                     pad=0, \
                                     n_mels=128, \
                                     \
                                     )(sound)
      
        return self.dataset[index], self.labels[index]
    
    def __len__(self):
        return len(self.labels)

    
csv_path = 'musicnet_metadata.csv'
dataset_path = 'musicnet.npz'
fs = 44100
composers = ['Schubert', 'Beethoven', 'Brahms', 'Mozart', 'Bach']
split_duration = 20
n_samples = 100
train_set = MusicnetComposers(csv_path, dataset_path, composers, split_duration, n_samples, fs)
print("Train set size: " + str(len(train_set)))



In [0]:
optimizer = optim.Adam(model.parameters(), lr = 0.01, weight_decay = 0.0001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size = 20, gamma = 0.1)

In [0]:
def train(model, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        data = data.to(device)
        target = target.to(device)
        data = data.requires_grad_() #set requires_grad to True for training
        output = model(data)
        output = output.permute(1, 0, 2) #original output dimensions are batchSizex1x10 
        loss = F.nll_loss(output[0], target) #the loss functions expects a batchSizex10 input
        loss.backward()
        optimizer.step()
        if batch_idx % log_interval == 0: #print training stats
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss))

In [0]:
def test(model, epoch):
    model.eval()
    correct = 0
    for data, target in test_loader:
        data = data.to(device)
        target = target.to(device)
        output = model(data)
        output = output.permute(1, 0, 2)
        pred = output.max(2)[1] # get the index of the max log-probability
        correct += pred.eq(target).cpu().sum().item()
    print('\nTest set: Accuracy: {}/{} ({:.0f}%)\n'.format(
        correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [0]:
log_interval = 20
for epoch in range(1, 41):
    if epoch == 31:
        print("First round of training complete. Setting learn rate to 0.001.")
    scheduler.step()
    train(model, epoch)
    test(model, epoch)