## Генерация монофонической музыки с кондишнингом

Импортируем torch и numpy:

In [85]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import numpy as np
import random

Сделаем также пользовательский импорт

In [86]:
from decode_patterns import data_conversion

Загружаем датасет

In [91]:
# import dataset
drum, bass = data_conversion.make_lstm_dataset_conditioning(height=64,
                                                            limit=1000,
                                                            patterns_file="decode_patterns/patterns.pairs.tsv",
                                                            mono=True)
# print(drum[0])
# drum, bass = np.array(drum), np.array(bass)
# print(drum[0])

# define shuffling of dataset
def shuffle(A, B, p=0.8):
    # take 80% to training, other to testing
    L = len(A)
    pivot = int(p*L)
    random.shuffle(A)
    random.shuffle(B)
    yield A[:pivot]
    yield B[:pivot]
    yield A[pivot:]
    yield B[pivot:]
    
    
# we can select here a validation set
drum, bass, drum_validation, bass_validation = shuffle(drum, bass)
    
# and we can shuffle train and test set like this:
# drum_train, bass_train, drum_test, bass_test = shuffle(drum, bass)

In [93]:
drum_validation[16]

NumpyImage(image=array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 1.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [

Модель определим в самом простом варианте, который только можно себе представить -- как в примере с конечным автоматом

In [94]:
# Encoder = LSTM
# Decoder = FCNN
class DrumNBass_LSTM_to_FCNN(nn.Module):
    def __init__(self):
        super(DrumNBass_LSTM_to_FCNN, self).__init__()
        # one input neuron, one output neuron, one layer in LSTM block
        self.input_size = 14
        self.lstm_hidden_size = 26
        self.lstm_layer_count = 1
        self.lstm = nn.LSTM(self.input_size, self.lstm_hidden_size, self.lstm_layer_count)
        self.lstm_embed_layer = nn.Linear(self.lstm_hidden_size, 1)
        self.sigm = nn.Sigmoid()
        
    def encoder(self, input):
        # пусть в input у нас приходит вектор размерности (32, 128, 14)
        # где имеется 32 примера (минибатч) по 128 отсчётов, 14 значений в каждом (барабанная партия)
        # Тогда его надо транспонировать в размерность (128, 32, 14)
        input = input.transpose(0,1)
        output, _ = self.lstm(input)
        output = self.sigm(self.lstm_embed_layer(output))*37
        return output
    
    def decoder(self, input):
        return input
    
    def forward(self, input):
        result = self.encoder(torch.tensor(list(map(lambda p: p.image, input)), dtype=torch.float))
        # добавляем conditioning
        # result.append([input.tempo, input.instrument])
        result = self.decoder(result)
        return result

In [95]:
# часть обучения
dnb_lstm = DrumNBass_LSTM_to_FCNN()

criterion = nn.MSELoss()

# оценим также и разнообразие мелодии по её.. дисперсии?)
# def melody_variety(melody):
#     return 1/(1 + (melody.sum(axis=2) > 1).int())
    
# criterion = nn.NLLLoss() # -- этот товарищ требует, чтобы LSTM выдавал классы,
# criterion = nn.CrossEntropyLoss() # и этот тоже
# (числа от 0 до C-1), но как всё-таки его заставить это делать?...
# optimizer = optim.SGD(dnb_lstm.parameters(), lr=0.001, momentum=0.9)
optimizer = optim.Adam(dnb_lstm.parameters(), lr=0.001)

Найденные баги и их решения:

https://stackoverflow.com/questions/56741087/how-to-fix-runtimeerror-expected-object-of-scalar-type-float-but-got-scalar-typ

https://stackoverflow.com/questions/49206550/pytorch-error-multi-target-not-supported-in-crossentropyloss/49209628

https://stackoverflow.com/questions/56243672/expected-target-size-50-88-got-torch-size50-288-88

In [108]:
epoch_count = 500
batch_size = 256
shuffle_every_epoch = True
    
if shuffle_every_epoch:
    print(f"shuffle_every_epoch is on")
else:
    print(f"shuffle_every_epoch is off")
    # shuffle train and test set:
    drum_train, bass_train, drum_test, bass_test = shuffle(drum, bass)
        
for epoch in range(epoch_count):  # loop over the dataset multiple times
    print(f"Epoch #{epoch}")
    if shuffle_every_epoch:
        # shuffle train and test set:
        drum_train, bass_train, drum_test, bass_test = shuffle(drum, bass)
        
    examples_count = len(drum_train)
    examples_id = 0
    
    running_loss = 0.0
    runnint_count = 0
    batch_id = 0
    while examples_id < examples_count:
        batch_drum_train = drum_train[examples_id:examples_id + batch_size]
        batch_bass_train = bass_train[examples_id:examples_id + batch_size]
        
        batch_bass_train_raw = torch.tensor(list(map(lambda p: p.image, batch_bass_train)), dtype=torch.float)
        batch_bass_train_raw = batch_bass_train_raw.transpose(0, 1)
        # transpose нужен БЫЛ для обмена размерности батча и размерности шагов

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        bass_outputs = dnb_lstm(batch_drum_train).squeeze()
#         bass_outputs = bass_outputs.reshape(bass_outputs.size()[0], -1)
#         batch_bass_train = batch_bass_train.reshape(batch_bass_train.size()[0], -1)
#         print(f"bass_outputs:{bass_outputs.size()} batch_bass_train: {batch_bass_train.size()}")
#         print(f"bass_outputs:{bass_outputs} batch_bass_train: {batch_bass_train}")
        
        # loss = criterion(bass_outputs, batch_bass_train.long())
        loss = criterion(bass_outputs, batch_bass_train_raw)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        runnint_count += 1
        period = 5
        if batch_id % period == 0 or examples_id + batch_size >= examples_count:
            print('[%d, %5d] train loss: %.7f' %
                  (epoch + 1, batch_id + 1, running_loss / runnint_count))
            running_loss = 0.0
            runnint_count = 1
            
        # update batch info
        examples_id += batch_size
        batch_id += 1
        
    # here we can insert measure error on test set

#should check accuracy on validation set
print('Finished Training')

shuffle_every_epoch is on
Epoch #0
[1,     1] train loss: 28.7314854
[1,     3] train loss: 20.7457123
Epoch #1
[2,     1] train loss: 29.0978489
[2,     3] train loss: 20.2607212
Epoch #2
[3,     1] train loss: 30.8472576
[3,     3] train loss: 19.1461252
Epoch #3
[4,     1] train loss: 30.3425446
[4,     3] train loss: 20.7995338
Epoch #4
[5,     1] train loss: 29.9841366
[5,     3] train loss: 19.8327236
Epoch #5
[6,     1] train loss: 27.6150818
[6,     3] train loss: 19.2308470
Epoch #6
[7,     1] train loss: 31.6969414
[7,     3] train loss: 19.3956477
Epoch #7
[8,     1] train loss: 30.5470753
[8,     3] train loss: 19.8454577
Epoch #8
[9,     1] train loss: 31.8124866
[9,     3] train loss: 18.3017495
Epoch #9
[10,     1] train loss: 29.9887238
[10,     3] train loss: 19.2276859
Epoch #10
[11,     1] train loss: 31.9611263
[11,     3] train loss: 20.0223440
Epoch #11
[12,     1] train loss: 30.6956787
[12,     3] train loss: 19.3831418
Epoch #12
[13,     1] train loss: 31.49341

[103,     1] train loss: 28.1820908
[103,     3] train loss: 20.0367050
Epoch #103
[104,     1] train loss: 29.7157860
[104,     3] train loss: 18.0000725
Epoch #104
[105,     1] train loss: 27.6471233
[105,     3] train loss: 18.6831741
Epoch #105
[106,     1] train loss: 27.5312004
[106,     3] train loss: 20.0077883
Epoch #106
[107,     1] train loss: 30.0931702
[107,     3] train loss: 18.5425822
Epoch #107
[108,     1] train loss: 27.8343620
[108,     3] train loss: 19.1943773
Epoch #108
[109,     1] train loss: 27.1991177
[109,     3] train loss: 18.3654277
Epoch #109
[110,     1] train loss: 26.5292759
[110,     3] train loss: 19.8883368
Epoch #110
[111,     1] train loss: 26.6493549
[111,     3] train loss: 20.4923522
Epoch #111
[112,     1] train loss: 25.3714256
[112,     3] train loss: 20.3374659
Epoch #112
[113,     1] train loss: 26.7329941
[113,     3] train loss: 19.8648879
Epoch #113
[114,     1] train loss: 27.3965340
[114,     3] train loss: 20.2863102
Epoch #114
[115

[201,     3] train loss: 19.1579348
Epoch #201
[202,     1] train loss: 27.2719536
[202,     3] train loss: 18.9451764
Epoch #202
[203,     1] train loss: 28.1660423
[203,     3] train loss: 17.9565531
Epoch #203
[204,     1] train loss: 28.4628468
[204,     3] train loss: 19.1123473
Epoch #204
[205,     1] train loss: 26.0691242
[205,     3] train loss: 20.3905163
Epoch #205
[206,     1] train loss: 27.4014263
[206,     3] train loss: 18.6769555
Epoch #206
[207,     1] train loss: 27.9602318
[207,     3] train loss: 19.1028055
Epoch #207
[208,     1] train loss: 28.7978764
[208,     3] train loss: 19.9825357
Epoch #208
[209,     1] train loss: 29.8750973
[209,     3] train loss: 17.8352534
Epoch #209
[210,     1] train loss: 26.7148476
[210,     3] train loss: 20.1192767
Epoch #210
[211,     1] train loss: 27.8816757
[211,     3] train loss: 19.2965908
Epoch #211
[212,     1] train loss: 27.9196453
[212,     3] train loss: 19.7565893
Epoch #212
[213,     1] train loss: 24.8081093
[213

[300,     1] train loss: 30.3923187
[300,     3] train loss: 17.6899738
Epoch #300
[301,     1] train loss: 27.6293888
[301,     3] train loss: 17.8493633
Epoch #301
[302,     1] train loss: 27.4966278
[302,     3] train loss: 19.7745558
Epoch #302
[303,     1] train loss: 27.9991074
[303,     3] train loss: 19.2103895
Epoch #303
[304,     1] train loss: 30.2026062
[304,     3] train loss: 17.5811253
Epoch #304
[305,     1] train loss: 28.0624447
[305,     3] train loss: 18.5894216
Epoch #305
[306,     1] train loss: 28.4760761
[306,     3] train loss: 18.7672176
Epoch #306
[307,     1] train loss: 30.1471806
[307,     3] train loss: 18.3585606
Epoch #307
[308,     1] train loss: 27.3481598
[308,     3] train loss: 18.5899919
Epoch #308
[309,     1] train loss: 30.1893063
[309,     3] train loss: 20.0835444
Epoch #309
[310,     1] train loss: 32.1561852
[310,     3] train loss: 18.3760217
Epoch #310
[311,     1] train loss: 28.7718067
[311,     3] train loss: 17.8580774
Epoch #311
[312

[398,     3] train loss: 19.5867825
Epoch #398
[399,     1] train loss: 29.6713867
[399,     3] train loss: 18.1012936
Epoch #399
[400,     1] train loss: 29.7954197
[400,     3] train loss: 17.8211708
Epoch #400
[401,     1] train loss: 30.1878490
[401,     3] train loss: 17.4114666
Epoch #401
[402,     1] train loss: 28.0783558
[402,     3] train loss: 18.2314981
Epoch #402
[403,     1] train loss: 26.1399498
[403,     3] train loss: 16.7252471
Epoch #403
[404,     1] train loss: 29.1355400
[404,     3] train loss: 17.9241695
Epoch #404
[405,     1] train loss: 26.9448223
[405,     3] train loss: 20.4522317
Epoch #405
[406,     1] train loss: 25.8406010
[406,     3] train loss: 19.5218582
Epoch #406
[407,     1] train loss: 28.5256939
[407,     3] train loss: 19.9732679
Epoch #407
[408,     1] train loss: 30.1805439
[408,     3] train loss: 17.5216904
Epoch #408
[409,     1] train loss: 28.6391373
[409,     3] train loss: 19.7009761
Epoch #409
[410,     1] train loss: 25.5798473
[410

[497,     1] train loss: 29.0546112
[497,     3] train loss: 18.8042183
Epoch #497
[498,     1] train loss: 27.4269485
[498,     3] train loss: 19.2074114
Epoch #498
[499,     1] train loss: 27.3134022
[499,     3] train loss: 18.9359773
Epoch #499
[500,     1] train loss: 27.0005932
[500,     3] train loss: 19.5210088
Finished Training


In [109]:
with torch.no_grad():
    bass_outputs = dnb_lstm(drum_train)

In [110]:
result = bass_outputs.squeeze().int()
result

tensor([[4, 4, 3,  ..., 4, 4, 2],
        [1, 1, 0,  ..., 1, 1, 0],
        [2, 3, 4,  ..., 2, 2, 4],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [4, 4, 4,  ..., 4, 4, 4],
        [0, 0, 0,  ..., 0, 0, 0]], dtype=torch.int32)

Попробуем сохранить результаты работы сети. На anaconda нет mido, поэтому сохраняем результаты работы просто в массивчик npy... Однако, как альтернатива, его можно поставить чере pip в conda:
https://github.com/mido/mido/issues/198

In [112]:
import mido
from decode_patterns.data_conversion import build_track, DrumMelodyPair, NumpyImage, Converter


converter = Converter((16,50))

batch_drum = drum_train + drum_test + drum_validation
batch_bass = bass_train + bass_test + bass_validation
with torch.no_grad():
    bass_outputs = dnb_lstm(batch_drum)
    bass_outputs = bass_outputs.squeeze().int()
    
    for i in range(bass_outputs.size()[1]):
        bass_seq = bass_outputs[:,i]
#         bass_seq = batch_bass[:,i]
#         print(f"bass_seq:{bass_seq.size()}")
        bass_output = []
        for bass_note in bass_seq:
            bass_row = np.eye(1, 36, bass_note - 1)[0]
            bass_output.append(bass_row)
        bass_output = torch.tensor(bass_output).int().squeeze()
#         print(f"bass_output:{bass_output.size()}")
        
#         print(f"batch_drum:{batch_drum[:,i,:].size()}, bass_output:{bass_output.size()}")
            
        img_dnb = np.concatenate((batch_drum[i].image,bass_output), axis=1)
#         print(f"img_dnb:{list(bass_output)}")
        numpy_pair = NumpyImage(np.array(img_dnb)
                                , batch_drum[i].tempo
                                , batch_drum[i].instrument
                                , 1
                                , batch_drum[i].min_note)
        pair = converter.convert_numpy_image_to_pair(numpy_pair)
#         print(f"pair.melody:{pair.melody}")
        mid = build_track(pair, tempo=pair.tempo)
        mid.save(f"midi/npy/sample{i+1}.mid")
#         np.save(f"midi/npy/drum{i+1}.npy", batch_drum[:,i,:].int())
#         np.save(f"midi/npy/bass{i+1}.npy", bass_outputs[:,i,:])