In [1]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
from decode_patterns.create_images import create_images, crop_data, train_test
import numpy as np

In [2]:
# TODO change
batch_size = 100
num_epochs = 200
num_epochs = int(num_epochs)

In [3]:
# prepare data
limit = 10000

drumnbass, _ = create_images(patterns_file="../patterns_pairs.tsv", limit=limit)
drum, bass = crop_data(drumnbass)


In [4]:
# model class
class FeedforwardNeuralNetModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(FeedforwardNeuralNetModel, self).__init__()
        # Linear function 1: 128 * 14 = 1792 --> 2048
        # веса накидываются тут
        self.fc1 = nn.Linear(input_dim, hidden_dim) 
        # решение по весам
        self.relu1 = nn.ReLU()

        # Linear function 2: 2048 --> 2048
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        # Non-linearity 2
        self.relu2 = nn.ReLU()

        # Linear function 3: 2048 --> 2048
        self.fc3 = nn.Linear(hidden_dim, hidden_dim)
        # Non-linearity 3
        self.relu3 = nn.ReLU()

        # Linear function 4 (readout): 2048 --> 128 * 36 = 4608
        self.fc4 = nn.Linear(hidden_dim, output_dim)  
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        # Linear function 1
        out = self.fc1(x)
        # Non-linearity 1
        out = self.relu1(out)

        # Linear function 2
        out = self.fc2(out)
        # Non-linearity 2
        out = self.relu2(out)

        # Linear function 2
        out = self.fc3(out)
        # Non-linearity 2
        out = self.relu3(out)

        # Linear function 4 (readout)
        out = self.fc4(out)
        out = self.sigmoid(out)
        return out


In [6]:
# instantiate model class

input_dim = 128 * 14
output_dim = 128 * 36
hidden_dim = 2048
model = FeedforwardNeuralNetModel(input_dim, hidden_dim, output_dim)

#######################
#  USE GPU FOR MODEL  #
#######################

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

FeedforwardNeuralNetModel(
  (fc1): Linear(in_features=1792, out_features=2048, bias=True)
  (relu1): ReLU()
  (fc2): Linear(in_features=2048, out_features=2048, bias=True)
  (relu2): ReLU()
  (fc3): Linear(in_features=2048, out_features=2048, bias=True)
  (relu3): ReLU()
  (fc4): Linear(in_features=2048, out_features=4608, bias=True)
  (sigmoid): Sigmoid()
)

In [39]:
# instantiate loss class
criterion = nn.MSELoss()

In [42]:
# instantiate optimizer class
# менять learning_rate по мере обучения
learning_rate = 0.01
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
k = 0
l = 0
iter = 0
iter_epoch = 0
for epoch in range(num_epochs):
    train_set, test_set = train_test(drum, bass, batch_size=batch_size, img_size=(128, 50))
    iter_epoch += 1
    print(f"Epoch # {iter_epoch}")
    for i, (images, labels) in enumerate(zip(*train_set)):
        #######################
        #  USE GPU FOR MODEL  #
        #######################
        images = images.view(-1, input_dim).requires_grad_().to(device)
        labels = labels.to(device)
        
        # Clear gradients w.r.t. parameters
        optimizer.zero_grad()

        # Forward pass to get output/logits
        outputs = model(images)

        # Calculate Loss: softmax --> MSELoss 
        k = outputs.size()
        l = labels.view(-1, output_dim).size()
        loss = criterion(outputs, labels.view(-1, output_dim))

        # Getting gradients w.r.t. parameters
        loss.backward()

        # Updating parameters
        optimizer.step()

        iter += 1

        if iter % 100 == 0:
            error = 0
            for images, lables in zip(*test_set):
            # Calculate Accuracy         
                correct = 0
                total = 0
                # Iterate through test dataset
                outputs = model(images.view(-1, input_dim).to(device))
                error += ((outputs - lables.view(-1, output_dim).to(device)) ** 2).mean()
                # Print Loss
            print('Iteration: {}. error: {}'.format(iter, error / test_set[1].size()[0]))

Epoch # 1
Epoch # 2
Iteration: 100. error: 0.02036287821829319
Epoch # 3
Iteration: 200. error: 0.020292140543460846
Epoch # 4
Iteration: 300. error: 0.02011115476489067
Epoch # 5
Iteration: 400. error: 0.020035646855831146
Epoch # 6
Epoch # 7
Iteration: 500. error: 0.020233988761901855
Epoch # 8
Iteration: 600. error: 0.020471399649977684
Epoch # 9
Iteration: 700. error: 0.01977088488638401
Epoch # 10
Iteration: 800. error: 0.020421497523784637
Epoch # 11
Epoch # 12
Iteration: 900. error: 0.020159335806965828
Epoch # 13
Iteration: 1000. error: 0.020233547315001488
Epoch # 14
Iteration: 1100. error: 0.020211849361658096
Epoch # 15
Iteration: 1200. error: 0.02036159113049507
Epoch # 16
Epoch # 17
Iteration: 1300. error: 0.020201439037919044
Epoch # 18
Iteration: 1400. error: 0.020122867077589035
Epoch # 19
Iteration: 1500. error: 0.020064732059836388
Epoch # 20
Iteration: 1600. error: 0.01993146911263466
Epoch # 21
Epoch # 22
Iteration: 1700. error: 0.02007601596415043
Epoch # 23
Iterat

In [30]:
print(k, l)


torch.Size([100, 4608]) torch.Size([100, 4608])


In [55]:
outputs = model(test_set[0].view(-1, 128 * 14).to(device))
lables = test_set[1].view(-1, 128 * 36).to(device)
mse_loss = ((lables[0]-outputs[0])**2).mean()

In [17]:
train_set, test_set = train_test(drum, bass, batch_size = batch_size)

In [45]:
res1 = model(test_set[0].view(-1, 128 * 14).to(device)).cpu()

In [61]:
(res1 > 0.5).int() 

tensor([[0, 0, 1,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], dtype=torch.int32)

In [13]:
drum_set = torch.cat((train_set[0].reshape([-1, 128, 14]), test_set[0]), 0)

NameError: name 'train_set' is not defined

In [44]:
# torch.save(model.state_dict(), "../model_state")

In [9]:
model = FeedforwardNeuralNetModel(input_dim, hidden_dim, output_dim)
model.to(device)
model.load_state_dict(torch.load("../model_0_state"))
model.eval()

FeedforwardNeuralNetModel(
  (fc1): Linear(in_features=1792, out_features=2048, bias=True)
  (relu1): ReLU()
  (fc2): Linear(in_features=2048, out_features=2048, bias=True)
  (relu2): ReLU()
  (fc3): Linear(in_features=2048, out_features=2048, bias=True)
  (relu3): ReLU()
  (fc4): Linear(in_features=2048, out_features=4608, bias=True)
  (sigmoid): Sigmoid()
)

In [10]:
train_set, test_set = train_test(drum, bass, batch_size = batch_size)
drum_set = torch.cat((train_set[0].reshape([-1, 128, 14]), test_set[0]), 0)
result = []
for d in drum_set:
    output = (model(d.view(-1, 128 * 14).to(device)).cpu() > 0.5).float().reshape(128, 36)
    result.append(np.array(torch.cat((d, output), 1)))
result = np.array(result)

In [32]:
output = model(drum_set[1].view(-1, 32 * 14).to(device)).cpu()

In [34]:
output.sum()

tensor(0., grad_fn=<SumBackward0>)

In [11]:
import mido
from decode_patterns.data_conversion import build_track, DrumMelodyPair, Converter

converter = Converter((128,50))

# batch_drum = torch.cat((drum_train, drum_test, torch.tensor(drum_validation))).transpose(0,1)
# batch_bass = torch.cat((bass_train.int(), bass_test.int(), torch.tensor(bass_validation).int())).transpose(0,1)
with torch.no_grad():

    bass_outputs = result

    for i in range(bass_outputs.shape[0]):
        img_dnb = bass_outputs[i]
            
        pair = converter.convert_numpy_image_to_pair(np.array(img_dnb))
#         print(f"pair.melody:{pair.melody}")
        mid = build_track(pair, tempo=240)
        mid.save(f"../midi/sample{i+1}.mid")
#         np.save(f"midi/npy/drum{i+1}.npy", batch_drum[:,i,:].int())
#         np.save(f"midi/npy/bass{i+1}.npy", bass_outputs[:,i,:])