In [2]:
import os
os.chdir('..')

In [3]:
pwd

'/mnt/md1/user_victor/automatic_melody_harmonization'

In [5]:
from tonal import pianoroll2number, joint_prob2pianoroll96
from tonal import tonal_centroid, chord482note, chord962note, note2number
import numpy as np
import torch
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
import pickle
from model.surprise_CVAE_all_chords import CVAE
from decode import *
import matplotlib.pyplot as plt
import math

In [7]:
melody_framewise = np.load('./data/melody_data.npy')
chord_groundtruth_idx = np.load('./data/chord_groundtruth.npy')

melody = np.load('./data/melody_baseline.npy')
chord = np.load('./data/chord_indices.npy')
chord_onehot = np.load('./data/chord_onehot.npy')
length = np.load('./data/length.npy')

f = open('./data/tempos', 'rb')
tempos = pickle.load(f)
f.close()
f = open('./data/downbeats', 'rb')
downbeats = pickle.load(f)
f.close()

In [6]:
val_size = 500
print('splitting testing set...')
val_melody_framewise = melody_framewise[:val_size]
val_chord_groundtruth_idx = chord_groundtruth_idx[:val_size]

val_chord = torch.from_numpy(chord_onehot[:val_size]).float()
val_melody = torch.from_numpy(melody[:val_size]).float()
val_length = torch.from_numpy(length[:val_size])


splitting testing set...


In [8]:
# Load model
device = 'cpu'
print('building model...')
model_surp = CVAE(device = device).to(device)
# model_surp.load_state_dict(torch.load('output_models/model_weight_surprisingness_cvae.pth',map_location='cpu'))
model_surp.load_state_dict(torch.load('output_models/model_surprise_cvae_update.pth'))
print(model_surp)
model_surp.eval()
val_length, val_melody = val_length.to(device), val_melody.to(device)

building model...
CVAE(
  (encoder): LSTM(928, 256, num_layers=2, batch_first=True, bidirectional=True)
  (encoder_output2mean): Linear(in_features=512, out_features=16, bias=True)
  (encoder_output2logv): Linear(in_features=512, out_features=16, bias=True)
  (surprise_prenet): Linear(in_features=1, out_features=256, bias=True)
  (latent2decoder_input): Linear(in_features=848, out_features=128, bias=True)
  (decoder): LSTM(128, 256, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True)
  (outputs2chord): Linear(in_features=512, out_features=96, bias=True)
)


In [9]:
surp = np.load('./data/surprisingness.npy')
norm = np.max(surp)

In [90]:
class profile_type():
    def __init__(self):
        
        self.norm = None

In [10]:
## Profile function
norm = np.max(surp)

class profile_type():
    def __init__(self,type_num,length,norm):
        
        self.norm = norm
        self.length = length
        self.profile = None
        
        if type_num == 1:
            self.profile = self.type1()
            
        if type_num == 2:
            self.profile = self.type2()
            
        if type_num == 3:
            self.profile = self.type3()
            
        if type_num == 4:
            self.profile = self.type4()
            
        if type_num == 5:
            self.profile = self.type5()
            
        if type_num == 6:
            self.profile = self.type6()
        
    def type1(self):
        
#         x = torch.arange(0,self.length,1) * norm
#         x1 = torch.Tensor([0] * (self.length / 2))
#         x2 = torch.Tensor([self.norm] * (self.length / 2)) 
#         x = torch.cat([x1,x2])
        x = torch.arange(0,self.length,1)
        y = self.norm / (1 + torch.exp(-( x - self.length / 2 )))
        
        return y

    def type2(self):   
#         x = torch.arange(self.length,0,-1) * norm
#         x1 = torch.Tensor([self.norm] * (self.length / 2))
#         x2 = torch.Tensor([0] * (self.length / 2 )) 
#         x = torch.cat([x1,x2])

        x = torch.arange(0,self.length,1)
        y = - self.norm / (1 + torch.exp(-( x - self.length / 2 ))) + self.norm
            
        return y

    def type3(self):   
        x = torch.Tensor([0] * self.length)
        return x

    def type4(self):   
        x = torch.Tensor([self.norm] * self.length)
        return x

    def type5(self):   
#         x1 = torch.arange(0,self.length // 2,1) * self.norm
#         x2 = torch.arange(self.length // 2,0,-1) * self.norm
#         x = torch.cat([x1,x2])
        mu, sigma = self.length / 2, self.length / 8 # mean and standard deviation
        x = torch.arange(0,self.length + 1,1)
        y = 1 / (sigma * math.sqrt(2 * math.pi)) * torch.exp(- (x - mu) ** 2 / (2 * sigma ** 2))
        max_value = max(y)
        ratio = self.norm / max_value
        y *= ratio
        
        return y

    def type6(self):   
#         factor = self.length // 4
#         x = torch.Tensor([0,0,norm,norm] * factor) 
        mu, sigma = self.length / 2, self.length / 8 # mean and standard deviation
        x = torch.arange(0,self.length + 1,1)
        y = 1 / (sigma * math.sqrt(2 * math.pi)) * torch.exp(- (x - mu) ** 2 / (2 * sigma ** 2))
        max_value = max(y)
        ratio = self.norm / max_value
        y = -y * ratio + self.norm
    
        return y

def write_pianoroll(chord_pred,song_index,type_num):
    ########## Random sampling ###########
    # Proceed chord decode
    print('proceed chord decode...')
    joint_prob = chord_pred.cpu().detach().numpy()

    # Append argmax index to get pianoroll array
    accompany_pianoroll = argmax2pianoroll(joint_prob)

    # augment chord into frame base
    BEAT_RESOLUTION = 24
    BEAT_PER_CHORD = 2

    accompany_pianoroll_framewise, sample_chord_groundtruth_framewise = sequence2frame(accompany_pianoroll, sample_chord_groundtruth_idx)

    # length into frame base
    sample_length_framewise = sample_length * BEAT_RESOLUTION * BEAT_PER_CHORD

    # write pianoroll
    result_dir = 'results/surprising_cvae'
    filename = str(song_index) + '-surprisingness-type-' + str(type_num)
    write_one_pianoroll(result_dir, filename, sample_melody_framewise, accompany_pianoroll_framewise, sample_chord_groundtruth_framewise, sample_length_framewise, sample_tempo, sample_downbeat)


In [14]:
# Sampling
# torch.manual_seed(0)
song_index = 8
sample_melody = val_melody[song_index].unsqueeze(0)
sample_length = val_length[song_index]
sample_melody_framewise = np.expand_dims(val_melody_framewise[song_index],axis=0)
sample_chord_groundtruth_idx = np.expand_dims(val_chord_groundtruth_idx[song_index],axis=0)
sample_tempo = [tempos[song_index]]
sample_downbeat = [downbeats[song_index]]

## Surprising profile
for type_num in range(1,7):
    s = profile_type(type_num,sample_length.item(),norm).profile
    pad = nn.ConstantPad2d((0, 272 - s.shape[0]), 0)
    surprise = pad(s).unsqueeze(0).unsqueeze(2)
    print(surprise.shape)
    surprise = model_surp.surprise_prenet(surprise)
    print(sample_length)

    latent_size = 16

    # for k in range(10):
    z = torch.randn(1,272,latent_size)
#     z = torch.cat([latent,sample_melody,surprisingness], dim=-1)
    output, chord_pred = model_surp.decode(z,sample_melody, surprise)

    gen_chord_index = torch.max(chord_pred[0][:sample_length],-1).indices
    print(gen_chord_index)
    #     print(gen_chord_index.shape)

    ########## Random sampling ###########
    # Proceed chord decode
    print('proceed chord decode...')
    joint_prob = chord_pred.cpu().detach().numpy()

    # Append argmax index to get pianoroll array
#     accompany_pianoroll = argmax2pianoroll(joint_prob)
    accompany_pianoroll = argmax2pianoroll_all(joint_prob)

    # augment chord into frame base
    BEAT_RESOLUTION = 24
    BEAT_PER_CHORD = 2

    accompany_pianoroll_framewise, sample_chord_groundtruth_framewise = sequence2frame(accompany_pianoroll, sample_chord_groundtruth_idx)

    # length into frame base
    sample_length_framewise = sample_length * BEAT_RESOLUTION * BEAT_PER_CHORD

    # write pianoroll
    result_dir = 'results/surprising_cvae_update'
    filename = str(song_index) + '-surprisingness-function-type-' + str(type_num)
    write_one_pianoroll(result_dir, filename, sample_melody_framewise, accompany_pianoroll_framewise, sample_chord_groundtruth_framewise, sample_length_framewise, sample_tempo, sample_downbeat)


torch.Size([1, 272, 1])
tensor(45)
tensor([ 1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
         1,  1,  1,  1,  1,  1,  1,  1,  1,  1, 64, 64, 56, 56,  1,  1, 41, 41,
         1,  1,  1,  1,  1,  1,  1,  0,  0])
proceed chord decode...
accompany_pianoroll shape (1, 272, 128)
augment chord into frame base...
accompany_pianoroll frame shape: (1, 13056, 128)
groundtruth_pianoroll frame shape: (1, 13056, 128)
write pianoroll...
Finished!
torch.Size([1, 272, 1])
tensor(45)
tensor([64,  1,  1,  1,  1,  1,  1, 64, 64, 64, 80, 80, 63, 63, 24, 24, 41, 64,
         1, 56, 56, 56,  1,  1,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0])
proceed chord decode...
accompany_pianoroll shape (1, 272, 128)
augment chord into frame base...
accompany_pianoroll frame shape: (1, 13056, 128)
groundtruth_pianoroll frame shape: (1, 13056, 128)
write pianoroll...
Finished!
torch.Size([1, 272, 1])
tensor(45)
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0

1.7320508075688772

In [62]:
# Load surp all model
device = 'cpu'
print('building model...')
model_surp_average = CVAE(device = device).to(device)
model_surp_average.load_state_dict(torch.load('output_models/model_surprisingness_all_cvae.pth',map_location='cpu'))
# model.load_state_dict(torch.load('output_models/model_ctdcvae_conv_prenet.pth'))
print(model_surp_average)
model_surp_average.eval()


building model...
CVAE(
  (encoder): LSTM(96, 256, num_layers=2, batch_first=True, bidirectional=True)
  (encoder_output2mean): Linear(in_features=512, out_features=16, bias=True)
  (encoder_output2logv): Linear(in_features=512, out_features=16, bias=True)
  (surprisingness_prenet): Linear(in_features=1, out_features=256, bias=True)
  (latent2decoder_input): Linear(in_features=848, out_features=128, bias=True)
  (decoder): LSTM(128, 256, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True)
  (outputs2chord): Linear(in_features=512, out_features=96, bias=True)
)


CVAE(
  (encoder): LSTM(96, 256, num_layers=2, batch_first=True, bidirectional=True)
  (encoder_output2mean): Linear(in_features=512, out_features=16, bias=True)
  (encoder_output2logv): Linear(in_features=512, out_features=16, bias=True)
  (surprisingness_prenet): Linear(in_features=1, out_features=256, bias=True)
  (latent2decoder_input): Linear(in_features=848, out_features=128, bias=True)
  (decoder): LSTM(128, 256, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True)
  (outputs2chord): Linear(in_features=512, out_features=96, bias=True)
)

In [63]:
surp_average = np.load('./data/surprisingness_average.npy')
norm_average = np.max(surp_average)

In [71]:
# Sampling
# torch.manual_seed(0)
song_index = 8
sample_melody = val_melody[song_index].unsqueeze(0)
sample_length = val_length[song_index]
sample_melody_framewise = np.expand_dims(val_melody_framewise[song_index],axis=0)
sample_chord_groundtruth_idx = np.expand_dims(val_chord_groundtruth_idx[song_index],axis=0)
sample_tempo = [tempos[song_index]]
sample_downbeat = [downbeats[song_index]]

## Surprising profile
for type_num in range(1,7):
    s = profile_type(type_num,sample_length,norm_average).profile
    pad = nn.ConstantPad2d((0, 272 - s.shape[0]), 0)
    surprisingness = pad(s).unsqueeze(0).unsqueeze(2)
    print(surprisingness.shape)
    surprisingness = model_surp_average.surprisingness_prenet(surprisingness)
    print(sample_length)

    latent_size = 16

    # for k in range(10):
    latent = torch.randn(1,272,latent_size)
    z = torch.cat([latent,sample_melody,surprisingness], dim=-1)
    output, chord_pred = model_surp_average.decode(z)

    gen_chord_index = torch.max(chord_pred[0][:sample_length],-1).indices
    print(gen_chord_index)
    #     print(gen_chord_index.shape)

    ########## Random sampling ###########
    # Proceed chord decode
    print('proceed chord decode...')
    joint_prob = chord_pred.cpu().detach().numpy()

    # Append argmax index to get pianoroll array
    accompany_pianoroll = argmax2pianoroll(joint_prob)

    # augment chord into frame base
    BEAT_RESOLUTION = 24
    BEAT_PER_CHORD = 2

    accompany_pianoroll_framewise, sample_chord_groundtruth_framewise = sequence2frame(accompany_pianoroll, sample_chord_groundtruth_idx)

    # length into frame base
    sample_length_framewise = sample_length * BEAT_RESOLUTION * BEAT_PER_CHORD

    # write pianoroll
    result_dir = 'results/surprising_average_cvae'
    filename = str(song_index) + '-surprisingness-average-type-' + str(type_num)
    write_one_pianoroll(result_dir, filename, sample_melody_framewise, accompany_pianoroll_framewise, sample_chord_groundtruth_framewise, sample_length_framewise, sample_tempo, sample_downbeat)



torch.Size([1, 272, 1])
tensor(45)
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0])
proceed chord decode...
accompany_pianoroll shape (1, 272, 128)
augment chord into frame base...
accompany_pianoroll frame shape: (1, 13056, 128)
groundtruth_pianoroll frame shape: (1, 13056, 128)
write pianoroll...
Finished!
torch.Size([1, 272, 1])
tensor(45)
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0])
proceed chord decode...
accompany_pianoroll shape (1, 272, 128)
augment chord into frame base...
accompany_pianoroll frame shape: (1, 13056, 128)
groundtruth_pianoroll frame shape: (1, 13056, 128)
write pianoroll...
Finished!
torch.Size([1, 272, 1])
tensor(45)
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 