## LSTM на оригинальном датасете

Попробуем сделать модель LSTM, похожую на ту, что описана в соседнем Notebook, но для нашей текущей задачи

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import numpy as np

Сделаем также пользовательский импорт

In [2]:
from decode_patterns import data_conversion

Загружаем датасет с помощью DataLoader

In [89]:
# по аналогии с предыдущим примером:
# первая строка -- ожидаемый выходной сигнал, вторая -- входные данные :)
# dataset_train = torch.tensor([
#     [[1,3,1,4,2,3,3,2], [1,0,0,1,8,4,7,2], [2,4,2,3,2,5,6,3]],
#     [[2,2,1,1,4,3,4,2], [2,0,4,2,2,4,2,8], [3,5,3,4,6,1,1,2]],
# ], dtype=torch.float)

# import dataset
drum, bass = data_conversion.make_lstm_dataset(height=64, limit=1000, patterns_file="decode_patterns/patterns.pairs.tsv")



# define shuffling of dataset
def shuffle(A, B, p=0.8):
    # take 80% to training, other to testing
    L = len(drum)
    idx = np.arange(L) < p*L
    np.random.shuffle(idx)
    yield A[idx]
    yield B[idx]
    yield A[np.logical_not(idx)]
    yield B[np.logical_not(idx)]
    
    
# we can select here a validation set
drum, bass, drum_validation, bass_validation = shuffle(drum, bass)
    
# and we can shuffle train and test set like this:
# drum_train, bass_train, drum_test, bass_test = shuffle(drum, bass)

In [90]:
bass_train.shape, drum_train.shape, bass_test.shape, drum_test.shape

(torch.Size([384, 64, 36]),
 torch.Size([384, 64, 14]),
 torch.Size([96, 64, 36]),
 torch.Size([96, 64, 14]))

Модель определим в самом простом варианте, который только можно себе представить -- как в примере с конечным автоматом

In [110]:
# попробуем определить модель LSTM как конечный автомат
class DrumNBassLSTM(nn.Module):
    def __init__(self):
        super(DrumNBassLSTM, self).__init__()
        # one input neuron, one output neuron, one layer in LSTM block
        self.input_size = 14
        self.hidden_size = 36
        self.layer_count = 8
        self.lstm = nn.LSTM(self.input_size, self.hidden_size, self.layer_count)
    
    def forward(self, input):
        # пусть в input у нас приходит вектор размерности (64, 32, 14)
        # то есть 64 отсчёта, тридцать два примера (минибатч), 14 значение в каждом (барабанная партия)
        output, _ = self.lstm(input)
        # пробуем превратить это в классификацию (для NLLLoss)
#         output = F.log_softmax(output, dim=1)
        return output

In [111]:
# часть обучения
dnb_lstm = DrumNBassLSTM()

criterion = nn.MSELoss()
# criterion = nn.NLLLoss() # -- этот товарищ требует, чтобы LSTM выдавал классы,
# criterion = nn.CrossEntropyLoss() # и этот тоже
# (числа от 0 до C-1), но как всё-таки его заставить это делать?...
optimizer = optim.SGD(dnb_lstm.parameters(), lr=0.001, momentum=0.9)

Найденные баги и их решения:

https://stackoverflow.com/questions/56741087/how-to-fix-runtimeerror-expected-object-of-scalar-type-float-but-got-scalar-typ

https://stackoverflow.com/questions/49206550/pytorch-error-multi-target-not-supported-in-crossentropyloss/49209628

https://stackoverflow.com/questions/56243672/expected-target-size-50-88-got-torch-size50-288-88

In [127]:
epoch_count = 300
batch_size = 32
shuffle_every_epoch = True
    
if shuffle_every_epoch:
    print(f"shuffle_every_epoch is on")
else:
    print(f"shuffle_every_epoch is off")
    # shuffle train and test set:
    drum_train, bass_train, drum_test, bass_test = shuffle(drum, bass)
    drum_train = torch.tensor(drum_train, dtype=torch.float)
    bass_train = torch.tensor(bass_train, dtype=torch.float)
    drum_test = torch.tensor(drum_test, dtype=torch.float)
    drum_test = torch.tensor(drum_test, dtype=torch.float)
        
for epoch in range(epoch_count):  # loop over the dataset multiple times
    print(f"Epoch #{epoch}")
    if shuffle_every_epoch:
        # shuffle train and test set:
        drum_train, bass_train, drum_test, bass_test = shuffle(drum, bass)
        drum_train = torch.tensor(drum_train, dtype=torch.float)
        bass_train = torch.tensor(bass_train, dtype=torch.float)
        drum_test = torch.tensor(drum_test, dtype=torch.float)
        bass_test = torch.tensor(bass_test, dtype=torch.float)
        
    examples_count = drum_train.size()[0]
    examples_id = 0
    
    running_loss = 0.0
    runnint_count = 0
    batch_id = 0
    while examples_id < examples_count:
        batch_drum_train = drum_train[examples_id:examples_id + batch_size,:,:].transpose(0,1)
        batch_bass_train = bass_train[examples_id:examples_id + batch_size,:,:].transpose(0,1)
        # transpose нужен для обмена размерности батча и размерности шагов
        # print(f"i:{i}, batch_drum_train:{batch_drum_train.size()}, batch_bass_train:{batch_bass_train.size()}")

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        bass_outputs = dnb_lstm(batch_drum_train)
#         bass_outputs = bass_outputs.reshape(bass_outputs.size()[0], -1)
#         batch_bass_train = batch_bass_train.reshape(batch_bass_train.size()[0], -1)
#         print(f"bass_outputs:{bass_outputs.size()} batch_bass_train: {batch_bass_train.size()}")
#         print(f"bass_outputs:{bass_outputs} batch_bass_train: {batch_bass_train}")
        
        # loss = criterion(bass_outputs, batch_bass_train.long())
        loss = criterion(bass_outputs, batch_bass_train)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        runnint_count += 1
        period = 5
        if batch_id % period == 0 or examples_id + batch_size >= examples_count:
            print('[%d, %5d] train loss: %.7f' %
                  (epoch + 1, batch_id + 1, running_loss / runnint_count))
            running_loss = 0.0
            runnint_count = 1
            
        # update batch info
        examples_id += batch_size
        batch_id += 1
        
    # here we can insert measure error on test set

#should check accuracy on validation set
print('Finished Training')

shuffle_every_epoch is on
Epoch #0
[1,     1] train loss: 0.0136524
[1,     6] train loss: 0.0115293
[1,    11] train loss: 0.0128308
[1,    16] train loss: 0.0124043
[1,    20] train loss: 0.0107828
Epoch #1
[2,     1] train loss: 0.0133313
[2,     6] train loss: 0.0115083
[2,    11] train loss: 0.0128082
[2,    16] train loss: 0.0119504
[2,    20] train loss: 0.0111531
Epoch #2
[3,     1] train loss: 0.0129065
[3,     6] train loss: 0.0113117
[3,    11] train loss: 0.0130923
[3,    16] train loss: 0.0115865
[3,    20] train loss: 0.0108517
Epoch #3
[4,     1] train loss: 0.0132091
[4,     6] train loss: 0.0112073
[4,    11] train loss: 0.0135270
[4,    16] train loss: 0.0116406
[4,    20] train loss: 0.0105689
Epoch #4
[5,     1] train loss: 0.0134345
[5,     6] train loss: 0.0112862
[5,    11] train loss: 0.0126832
[5,    16] train loss: 0.0117854
[5,    20] train loss: 0.0109123
Epoch #5
[6,     1] train loss: 0.0132339
[6,     6] train loss: 0.0112988
[6,    11] train loss: 0.0133

KeyboardInterrupt: 

In [129]:

batch_drum_train = drum_train[:1,:,:].transpose(0,1)
batch_bass_train = bass_train[:1,:,:].transpose(0,1)
with torch.no_grad():

    # forward + backward + optimize
    bass_outputs = dnb_lstm(batch_drum_train)

In [141]:
sum((bass_outputs.squeeze() + 1) / 2)


tensor([33.7636, 30.3495, 30.0864, 35.6125, 31.8778, 31.1510, 35.1207, 33.9974,
        30.9092, 29.5502, 29.6476, 33.2156, 30.5292, 31.5056, 29.9812, 33.4993,
        32.6440, 30.6445, 32.0401, 32.3189, 32.1354, 34.6041, 31.7607, 34.7456,
        34.5019, 27.9688, 31.6641, 35.0432, 31.7548, 31.8498, 31.6478, 32.1741,
        32.9007, 28.7890, 29.7973, 32.2894])