In [1]:
import os
import json
import librosa
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from data_loader import TANG
from utils import *
from model import AutoEncoderRNN, DecoderRNN
from train_model import train_model

device=torch.device('cuda:0')
sequence_length = 100
batch_size = 500

dataset=TANG(seq_len=sequence_length, dataset_location='data/', normalize=False)
dataset_size = len(dataset)
indices = list(range(dataset_size))
split = int(np.floor(0.15 * dataset_size))
np.random.shuffle(indices)
train_indices, test_indices = indices[split:], indices[:split]
train_sampler = SubsetRandomSampler(train_indices)
test_sampler = SubsetRandomSampler(test_indices)
data_loaders={}
dataset_sizes = {}
data_loaders['train'] = DataLoader(dataset, batch_size=batch_size, sampler=train_sampler)
data_loaders['val'] = DataLoader(dataset, batch_size=batch_size, sampler=test_sampler)
dataset_sizes['val'] = len(test_indices)
dataset_sizes['train'] = len(train_indices)
print("Number of training/test patches:", (len(train_indices),len(test_indices)), dataset.__len__())

num_epochs = 10
learning_rate = 10
input_size = 69
output_size = 16
hidden_size = 64
#model = nn.LSTM(input_size, output_size, num_layers = 3, bidirectional = False, batch_first = True)
model = AutoEncoderRNN(input_size, hidden_size, num_layers = 2, seq_len = sequence_length, batch_size = batch_size, batch_first=True)
predictor = DecoderRNN(hidden_size, output_size, num_layers = 2, bidirectional =False, batch_first=True)
model = model.to(device)
predictor = predictor.to(device)
criterion = nn.MSELoss()
optimizer1 = torch.optim.SGD(model.parameters(), lr=learning_rate)
optimizer2 = torch.optim.SGD(predictor.parameters(), lr=learning_rate)
#model, losses = train_model(model, predictor, device, data_loaders, dataset_sizes, criterion, optimizer1, optimizer2, num_epochs=num_epochs, batch_size = batch_size, train_predictor = False)
model, losses = train_model(model, predictor, device, data_loaders, dataset_sizes, criterion, optimizer1, optimizer2, num_epochs=num_epochs, batch_size = batch_size, train_predictor = True)
print(model)

Loading the dataset...
Loading file tang.pickle
Number of training/test patches: (75739, 13365) 89104


In [2]:
num_epochs = 10
learning_rate = 10
input_size = 69
output_size = 16
hidden_size = 64
#model = nn.LSTM(input_size, output_size, num_layers = 3, bidirectional = False, batch_first = True)
model = AutoEncoderRNN(input_size, hidden_size, num_layers = 2, seq_len = sequence_length, batch_size = batch_size, batch_first=True)
predictor = DecoderRNN(hidden_size, output_size, num_layers = 2, bidirectional =False, batch_first=True)
model = model.to(device)
predictor = predictor.to(device)
criterion = nn.MSELoss()
optimizer1 = torch.optim.SGD(model.parameters(), lr=learning_rate)
optimizer2 = torch.optim.SGD(predictor.parameters(), lr=learning_rate)
#model, losses = train_model(model, predictor, device, data_loaders, dataset_sizes, criterion, optimizer1, optimizer2, num_epochs=num_epochs, batch_size = batch_size, train_predictor = False)
model, losses = train_model(model, predictor, device, data_loaders, dataset_sizes, criterion, optimizer1, optimizer2, num_epochs=num_epochs, batch_size = batch_size, train_predictor = True)
print(model)

  0%|          | 0/152 [00:00<?, ?it/s]


NameError: name 'musics' is not defined

In [None]:
import copy
import time
losses = {'train': [], 'val': []}
since = time.time()
best_model_wts = copy.deepcopy(model.state_dict())
best_loss = 100
for epoch in range(num_epochs):
    for phase in ['train', 'val']:
        if phase == 'val':
            # scheduler.step()
            model.eval()
        else:
            model.train()
        running_loss = 0.0
        with tqdm(total = dataset_sizes[phase]//batch_size + 1) as pb:
            for step, (moves, musics, uid, series_uid) in enumerate(data_loaders[phase]):
                moves = moves.to(device)
                music = musics.to(device)
                mean = music.mean(dim=(0,1))
                std = music.std(dim=(0,1))
                for i in range(16):
                    if std[i] == 0:
                        std[i] = 1 
                #print(mean,std)
                music = (music-mean)/std
                moves = (moves-moves.mean(dim=(0,1)))/moves.std(dim=(0,1))
                #print(music[0][0]!=music[0][0])
                
                optimizer.zero_grad()
                with torch.set_grad_enabled(phase != 'val'):
                    outputs, _ = model(moves)
                    #print(outputs[0][0], music[0][0])
                    #inv_idx = torch.arange(sequence_length - 1, -1, -1).long()
                    loss = criterion(outputs, music)
                    if phase != 'val':
                        loss.backward()
                        optimizer.step()
                running_loss += loss.item() 
                pb.set_description('Epoch {} / {}  {} Loss: {:4f}'.format(epoch + 1, num_epochs, phase, running_loss/(step+1)))
                pb.update(1)
                
        epoch_loss = running_loss * batch_size / dataset_sizes[phase]
        losses[phase].append(epoch_loss) 
        if phase == 'val' and epoch_loss < best_loss:
            best_loss = epoch_loss
            best_model_wts = copy.deepcopy(model.state_dict())
        #pb.set_description('Epoch {} / {}  {} Loss: {:4f}'.format(epoch + 1, num_epochs, phase, epoch_loss))
    #pb.update(1)
        
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:0f}s'.format(time_elapsed // 60, time_elapsed % 60))
print('Best val loss: {:4f}'.format(best_loss))
model.load_state_dict(best_model_wts)

In [None]:
plt.figure(figsize=(20, 10))

for phase in ['train', 'val']:
    plt.plot(losses[phase], label='{} loss'.format(phase))
 
plt.legend()

plt.title('train/val losses')

plt.xlabel('iteration')
plt.ylabel('loss')

plt.grid(True)