In [1]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
from decode_patterns.create_images import create_images, crop_data, train_test
import numpy as np

In [2]:
# TODO change
batch_size = 100
num_epochs = 400
num_epochs = int(num_epochs)

In [3]:
# prepare data
limit = 10000

drum, bass, tempo = create_images(file_name="../patterns_pairs.tsv", limit=limit)


In [4]:
# model class
class FeedforwardNeuralNetModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(FeedforwardNeuralNetModel, self).__init__()
        # Linear function 1: 128 * 14 = 1792 --> 2048
        # веса накидываются тут
        self.fc1 = nn.Linear(input_dim, hidden_dim) 
        # решение по весам
        self.relu1 = nn.ReLU()

        # Linear function 2: 2048 --> 2048
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        # Non-linearity 2
        self.relu2 = nn.ReLU()

        # Linear function 3: 2048 --> 2048
        self.fc3 = nn.Linear(hidden_dim, hidden_dim)
        # Non-linearity 3
        self.relu3 = nn.ReLU()

        # Linear function 4 (readout): 2048 --> 128 * 36 = 4608
        self.fc4 = nn.Linear(hidden_dim, output_dim)  
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        # Linear function 1
        out = self.fc1(x)
        # Non-linearity 1
        out = self.relu1(out)

        # Linear function 2
        out = self.fc2(out)
        # Non-linearity 2
        out = self.relu2(out)

        # Linear function 2
        out = self.fc3(out)
        # Non-linearity 2
        out = self.relu3(out)

        # Linear function 4 (readout)
        out = self.fc4(out)
        out = self.sigmoid(out)
        return out


In [5]:
# instantiate model class

input_dim = 128 * 14 + 16
output_dim = 128 * 36
hidden_dim = 2048
model = FeedforwardNeuralNetModel(input_dim, hidden_dim, output_dim)

#######################
#  USE GPU FOR MODEL  #
#######################

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

FeedforwardNeuralNetModel(
  (fc1): Linear(in_features=1808, out_features=2048, bias=True)
  (relu1): ReLU()
  (fc2): Linear(in_features=2048, out_features=2048, bias=True)
  (relu2): ReLU()
  (fc3): Linear(in_features=2048, out_features=2048, bias=True)
  (relu3): ReLU()
  (fc4): Linear(in_features=2048, out_features=4608, bias=True)
  (sigmoid): Sigmoid()
)

In [85]:
# instantiate loss class
criterion = nn.MSELoss()

In [None]:
learning_rate = 0.01

In [88]:
# instantiate optimizer class
# менять learning_rate по мере обучения

optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [96]:
k = 0
l = 0
iter = 0
iter_epoch = 0
for epoch in range(num_epochs):
    train_set, test_set = train_test(drum, bass, batch_size=batch_size, img_size=(128, 50))
    iter_epoch += 1
    print(f"Epoch # {iter_epoch}")
    for i, (images, labels) in enumerate(zip(*train_set)):
        #######################
        #  USE GPU FOR MODEL  #
        #######################
        images = images.view(-1, input_dim).requires_grad_().to(device).float()
        labels = labels.to(device).float()
        
        # Clear gradients w.r.t. parameters
        optimizer.zero_grad()

        # Forward pass to get output/logits
        outputs = model(images)

        # Calculate Loss: softmax --> MSELoss 
        k = outputs.size()
        l = labels.view(-1, output_dim).size()
        loss = criterion(outputs, labels.view(-1, output_dim))

        # Getting gradients w.r.t. parameters
        loss.backward()

        # Updating parameters
        optimizer.step()

        iter += 1

        if iter % 100 == 0:
            error = 0
            for images, lables in zip(*test_set):
            # Calculate Accuracy         
                correct = 0
                total = 0
                # Iterate through test dataset
                outputs = model(images.view(-1, input_dim).to(device).float())
                error += ((outputs - lables.view(-1, output_dim).to(device)) ** 2).mean()
                # Print Loss
            print('Iteration: {}. error: {}'.format(iter, error / test_set[1].size()[0]))

Epoch # 1
Epoch # 2
Iteration: 100. error: 0.2497334544016643
Epoch # 3
Iteration: 200. error: 0.2497313906902254
Epoch # 4
Iteration: 300. error: 0.24972930074887628
Epoch # 5
Iteration: 400. error: 0.24972689345388463
Epoch # 6
Epoch # 7
Iteration: 500. error: 0.24972528361262425
Epoch # 8
Iteration: 600. error: 0.24972225002577997
Epoch # 9
Iteration: 700. error: 0.24972091410749528
Epoch # 10
Iteration: 800. error: 0.2497185214691871
Epoch # 11
Epoch # 12
Iteration: 900. error: 0.24971718170887444
Epoch # 13
Iteration: 1000. error: 0.2497149902681617
Epoch # 14
Iteration: 1100. error: 0.24971282474803058
Epoch # 15
Iteration: 1200. error: 0.24971040060808422
Epoch # 16
Epoch # 17
Iteration: 1300. error: 0.24970803844037173
Epoch # 18
Iteration: 1400. error: 0.24970621793174538
Epoch # 19
Iteration: 1500. error: 0.24970452570981927
Epoch # 20
Iteration: 1600. error: 0.24970187080407572
Epoch # 21
Epoch # 22
Iteration: 1700. error: 0.24970034197237082
Epoch # 23
Iteration: 1800. erro

Iteration: 14100. error: 0.24944366277066848
Epoch # 178
Iteration: 14200. error: 0.2494421609511673
Epoch # 179
Iteration: 14300. error: 0.2494391835310034
Epoch # 180
Iteration: 14400. error: 0.24943809136442321
Epoch # 181
Epoch # 182
Iteration: 14500. error: 0.24943639373400495
Epoch # 183
Iteration: 14600. error: 0.24943434579787735
Epoch # 184
Iteration: 14700. error: 0.24943163808602198
Epoch # 185
Iteration: 14800. error: 0.24942999940359808
Epoch # 186
Epoch # 187
Iteration: 14900. error: 0.24942776259355692
Epoch # 188
Iteration: 15000. error: 0.24942585184188476
Epoch # 189
Iteration: 15100. error: 0.24942302295991484
Epoch # 190
Iteration: 15200. error: 0.24942007526860732
Epoch # 191
Epoch # 192
Iteration: 15300. error: 0.24941908243202568
Epoch # 193
Iteration: 15400. error: 0.24941686143118177
Epoch # 194
Iteration: 15500. error: 0.2494155457812178
Epoch # 195
Iteration: 15600. error: 0.2494134436100574
Epoch # 196
Epoch # 197
Iteration: 15700. error: 0.24941124745440932

Iteration: 27800. error: 0.24915838218656747
Epoch # 349
Iteration: 27900. error: 0.24915549844733267
Epoch # 350
Iteration: 28000. error: 0.24915256160619811
Epoch # 351
Epoch # 352
Iteration: 28100. error: 0.24915066568101757
Epoch # 353
Iteration: 28200. error: 0.24914855260500846
Epoch # 354
Iteration: 28300. error: 0.24914822513431853
Epoch # 355
Iteration: 28400. error: 0.24914419362023507
Epoch # 356
Epoch # 357
Iteration: 28500. error: 0.249142239379073
Epoch # 358
Iteration: 28600. error: 0.24914123848401223
Epoch # 359
Iteration: 28700. error: 0.24913781989743852
Epoch # 360
Iteration: 28800. error: 0.2491362025566516
Epoch # 361
Epoch # 362
Iteration: 28900. error: 0.2491326219807545
Epoch # 363
Iteration: 29000. error: 0.2491319922002563
Epoch # 364
Iteration: 29100. error: 0.24912941101294667
Epoch # 365
Iteration: 29200. error: 0.2491271792422358
Epoch # 366
Epoch # 367
Iteration: 29300. error: 0.2491268077506611
Epoch # 368
Iteration: 29400. error: 0.24912398383881468
Ep

KeyboardInterrupt: 

In [30]:
print(k, l)


torch.Size([100, 4608]) torch.Size([100, 4608])


In [55]:
outputs = model(test_set[0].view(-1, 128 * 14).to(device))
lables = test_set[1].view(-1, 128 * 36).to(device)
mse_loss = ((lables[0]-outputs[0])**2).mean()

In [17]:
train_set, test_set = train_test(drum, bass, batch_size = batch_size)

In [45]:
res1 = model(test_set[0].view(-1, 128 * 14).to(device)).cpu()

In [61]:
(res1 > 0.5).int() 

tensor([[0, 0, 1,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], dtype=torch.int32)

In [13]:
drum_set = torch.cat((train_set[0].reshape([-1, 128, 14]), test_set[0]), 0)

NameError: name 'train_set' is not defined

In [103]:
torch.save(model.state_dict(), "../model_1_state")

In [6]:
model = FeedforwardNeuralNetModel(input_dim - 16, hidden_dim, output_dim)
model.to(device)
model.load_state_dict(torch.load("../model_0_state"))
model.eval()

FeedforwardNeuralNetModel(
  (fc1): Linear(in_features=1792, out_features=2048, bias=True)
  (relu1): ReLU()
  (fc2): Linear(in_features=2048, out_features=2048, bias=True)
  (relu2): ReLU()
  (fc3): Linear(in_features=2048, out_features=2048, bias=True)
  (relu3): ReLU()
  (fc4): Linear(in_features=2048, out_features=4608, bias=True)
  (sigmoid): Sigmoid()
)

In [7]:
(train_set, train_tempo), (test_set, test_tempo) = train_test(drum, bass, tempo, batch_size = batch_size)
drum_set = torch.cat((train_set[0].reshape([-1, 1, 128 * 14 + 16]), test_set[0]), 0)
temp_set = np.concatenate((train_tempo,test_tempo))
result = []
for d in drum_set:
    # output = (model(d.view(-1, 128 * 14 + 16).float().to(device)).cpu() > 0.5).float().reshape([128, 36])
    d = d[:, :-16]
    output = (model(d.view(-1, 128 * 14).float().to(device)).cpu() > 0.5).float().reshape([128, 36])
    d = d.reshape([128, 14]).float()
    result.append(np.array(torch.cat((d, output), 1)))
result = np.array(result)

In [8]:
output = model(drum_set[15][:, :-16].view(-1, 128 * 14).float().to(device)).cpu()

In [9]:
output.sum()

tensor(33.9058, grad_fn=<SumBackward0>)

In [None]:
import mido
from decode_patterns.data_conversion import build_track, DrumMelodyPair, Converter

converter = Converter((128,50))

# batch_drum = torch.cat((drum_train, drum_test, torch.tensor(drum_validation))).transpose(0,1)
# batch_bass = torch.cat((bass_train.int(), bass_test.int(), torch.tensor(bass_validation).int())).transpose(0,1)
with torch.no_grad():

    bass_outputs = result

    for i in range(len(bass_outputs)):
        img_dnb = bass_outputs[i]
        t = temp_set[i]
        pair = converter.convert_numpy_image_to_pair(np.array(img_dnb))
#         print(f"pair.melody:{pair.melody}")
        mid = build_track(pair, tempo=t)
        mid.save(f"../midi/model_0_gen/sample{i+1}.mid")
#         np.save(f"midi/npy/drum{i+1}.npy", batch_drum[:,i,:].int())
#         np.save(f"midi/npy/bass{i+1}.npy", bass_outputs[:,i,:])