In [1]:
import os
os.chdir('..')

In [2]:
pwd

'/mnt/md1/user_victor/automatic_melody_harmonization'

In [3]:
from tonal import pianoroll2number, joint_prob2pianoroll96
from tonal import tonal_centroid, chord482note, chord962note, note2number
import numpy as np
import torch
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
import pickle
from model.MusicVAE_GRU import MusicVAE
from decode import *
import matplotlib.pyplot as plt

In [6]:
melody_framewise = np.load('./data/melody_data.npy')
chord_groundtruth_idx = np.load('./data/chord_groundtruth.npy')

melody = np.load('./data/melody_baseline.npy')
chord = np.load('./data/number_96.npy')
chord_onehot = np.load('./data/onehot_96.npy')
length = np.load('./data/length.npy')

f = open('./data/tempos', 'rb')
tempos = pickle.load(f)
f.close()
f = open('./data/downbeats', 'rb')
downbeats = pickle.load(f)
f.close()

In [7]:
val_size = 500
print('splitting testing set...')
val_melody_framewise = melody_framewise[:val_size]
val_chord_groundtruth_idx = chord_groundtruth_idx[:val_size]

val_chord = torch.from_numpy(chord_onehot[:val_size]).float()
val_melody = torch.from_numpy(melody[:val_size]).float()
val_length = torch.from_numpy(length[:val_size])



splitting testing set...


In [6]:
# Load model
device = 'cpu'
print('building model...')
model = MusicVAE(teacher_forcing = False, eps_i=0, device = device).to(device)
model.load_state_dict(torch.load('output_models/model_musicvae_gru.pth',map_location='cpu'))
# print(model)
model.eval()
# val_length, val_melody = val_length.to(device), val_melody.to(device)

building model...


MusicVAE(
  (encoder): GRU(96, 512, num_layers=2, batch_first=True, bidirectional=True)
  (hidden2mean): Linear(in_features=2048, out_features=32, bias=True)
  (hidden2logv): Linear(in_features=2048, out_features=32, bias=True)
  (latent2conductor_input): Linear(in_features=32, out_features=96, bias=True)
  (latent2conductor_hidden): Linear(in_features=32, out_features=512, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
  (conductor): GRU(96, 512, num_layers=2, batch_first=True)
  (decoder): GRU(608, 512, num_layers=2, batch_first=True)
  (outputs2chord): Linear(in_features=512, out_features=96, bias=True)
)

In [52]:
val_length[0]

tensor(17)

In [8]:
## Multi Batch inference
torch.manual_seed(0)
model.batch_size = 500
z = torch.randn(500,32).to(device)
_, samples = model.decode(z, None)

In [15]:
z[10]

tensor([-0.1292, -0.0546,  0.4083,  1.1264,  1.9351,  1.0077,  1.0046, -0.4335,
        -1.2426,  1.2846,  0.2438,  0.5304, -0.0145, -2.2357,  1.4660, -1.2191,
         0.6442,  3.9300, -0.1244,  0.2953,  0.3827, -0.5497, -0.9940,  1.3459,
         1.9457, -1.2904, -2.3495, -2.0689,  0.9094, -0.6946,  1.9595, -1.1038])

In [14]:
logits = torch.max(samples[10][:20],-1)
gen_chord_index = logits.indices
gen_chord_index

tensor([ 1,  0, 80, 80, 80, 80, 80, 80,  1,  1, 80, 80, 80, 80, 80, 80,  0,  1,
        80, 80])

In [20]:
## Single Batch inference
model.batch_size = 1
z_ = z[10].unsqueeze(0)
_, sample = model.decode(z_, None)

In [21]:
logits = torch.max(sample[0][:20],-1)
gen_chord_index = logits.indices
gen_chord_index

tensor([ 1,  0, 80, 80, 80, 80, 80, 80,  1,  1, 80, 80, 80, 80, 80, 80,  0,  1,
        80, 80])