In [1]:

import numpy as np
import tensorflow as tf
import midi_statistics
import utils
import os
from gensim.models import Word2Vec
import pickle
import matplotlib.pyplot as plt
import torch
from tqdm import tqdm
from models import Diffusion #, UNet_1D
from dilated import DiffWave, DiffWave_wText
from modules import UNet
from torch.utils.data import DataLoader
from dataset import MIDIDataset 
from torchtext.data.metrics import bleu_score
import random

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda


In [2]:

syll_model_path = './enc_models/syllEncoding_20190419.bin'
word_model_path = './enc_models/wordLevelEncoder_20190419.bin'


In [3]:

syllModel = Word2Vec.load(syll_model_path)
wordModel = Word2Vec.load(word_model_path)

'''
lyrics = [['Then','Then'],['the','the'],['rain','rainstorm'],['storm','rainstorm'],['came','came'],
          ['ov','over'],['er','over'],['me','me'],['and','and'],['i','i'],['felt','felt'],['my','my'],
          ['spi','spirit'],['rit','spirit'],['break','break']]
lyrics = [['E','Everywhere'],['very','Everywhere'],['where','Everywhere'],['I','I'],['look','look'],
         ['I','I'],['found','found'],['you','you'],['look','looking'],['king','looking'],['back','back']]

lyrics = [['Must','Must'],['have','have'],['been','been'],['love','love'],
          ['but','but'],['its','its'],['o','over'],['ver','over'],['now','now'],['lay','lay'],['a','a'],
          ['whis','whisper'],['per','whisper'],['on','on'],['my','my'],['pil','pillow'],['low','pillow']]
lyrics = [['You','You'],['turn','turn'],['my','my'],['nights','nights'],
          ['in','into'],['to','into'],['days','days'],['Lead','Lead'],['me','me'],['mys','mysterious'],['te','mysterious'],
          ['ri','mysterious'],['ous','mysterious'],['ways','ways']]
'''
# test data (0~2)
#lyrics = [['Peo', 'People'], ['ple', 'People'], ['get', 'get'], ['rea', 'ready'], ['dy', 'ready'], ['a', 'a'], ['train', 'train'], ['a', 'a'], ['you', 'you'], ['need', 'need'], ['no', 'no'], ['bag', 'baggage'], ['gage', 'baggage'], ['you', 'you'], ['just', 'just'], ['get', 'get'], ['on', 'on'], ['board', 'board'], ['you', 'you'], ['need', 'need']]
#lyrics = [['hear', 'hear'], ['the', 'the'], ['die', 'diesels'], ['sels', 'diesels'], ['need', 'need'], ['no', 'no'], ['tic', 'ticket'], ['ket', 'ticket'], ['you', 'you'], ['just', 'just'], ['thank', 'thank'], ['the', 'the'], ['Lord', 'Lord'], ['so', 'so'], ['peo', 'people'], ['ple', 'people'], ['get', 'get'], ['rea', 'ready'], ['dy', 'ready'], ['coast', 'coast']]
#lyrics = [['gon', 'gonna'], ['na', 'gonna'], ['be', 'be'], ['a', 'a'], ['migh', 'mighty'], ['ty', 'mighty'], ['king', 'king'], ['So', 'So'], ['en', 'enemies'], ['e', 'enemies'], ['mies', 'enemies'], ['ZazuWell', 'ZazuWell'], ['nev', 'never'], ['er', 'never'], ['seen', 'seen'], ['a', 'a'], ['king', 'king'], ['of', 'of'], ['beasts', 'beasts'], ['With', 'With']]

lyrics_list = [[['Peo', 'People'], ['ple', 'People'], ['get', 'get'], ['rea', 'ready'], ['dy', 'ready'], ['a', 'a'], ['train', 'train'], ['a', 'a'], ['you', 'you'], ['need', 'need'], ['no', 'no'], ['bag', 'baggage'], ['gage', 'baggage'], ['you', 'you'], ['just', 'just'], ['get', 'get'], ['on', 'on'], ['board', 'board'], ['you', 'you'], ['need', 'need']],
[['hear', 'hear'], ['the', 'the'], ['die', 'diesels'], ['sels', 'diesels'], ['need', 'need'], ['no', 'no'], ['tic', 'ticket'], ['ket', 'ticket'], ['you', 'you'], ['just', 'just'], ['thank', 'thank'], ['the', 'the'], ['Lord', 'Lord'], ['so', 'so'], ['peo', 'people'], ['ple', 'people'], ['get', 'get'], ['rea', 'ready'], ['dy', 'ready'], ['coast', 'coast']],
[['gon', 'gonna'], ['na', 'gonna'], ['be', 'be'], ['a', 'a'], ['migh', 'mighty'], ['ty', 'mighty'], ['king', 'king'], ['So', 'So'], ['en', 'enemies'], ['e', 'enemies'], ['mies', 'enemies'], ['ZazuWell', 'ZazuWell'], ['nev', 'never'], ['er', 'never'], ['seen', 'seen'], ['a', 'a'], ['king', 'king'], ['of', 'of'], ['beasts', 'beasts'], ['With', 'With']]]


In [4]:
# Set seed for reproducibility
torch.manual_seed(0)
random.seed(0)
np.random.seed(0)
tf.random.set_seed(0)

In [5]:
train = np.load('./data/processed_dataset_matrices/train_data_matrix.npy')
validate = np.load('./data/processed_dataset_matrices/valid_data_matrix.npy')
test = np.load('./data/processed_dataset_matrices/test_data_matrix.npy')

print("Training set: ", np.shape(train)[0], " songs, Validation set: ", np.shape(validate)[0], " songs, "
      "Test set: ", np.shape(test)[0], " songs.")

NUM_MIDI_FEATURES = 3
NUM_SYLLABLE_FEATURES = 20
SONGLENGTH = 20
BATCH_SIZE = 256

# Load datasets
dataset_train = MIDIDataset(train, NUM_MIDI_FEATURES, SONGLENGTH, NUM_SYLLABLE_FEATURES)
dataset_valid = MIDIDataset(validate, NUM_MIDI_FEATURES, SONGLENGTH, NUM_SYLLABLE_FEATURES)
dataset_test = MIDIDataset(test, NUM_MIDI_FEATURES, SONGLENGTH, NUM_SYLLABLE_FEATURES)
dataloader_train = DataLoader(dataset_train, batch_size=BATCH_SIZE, shuffle=True)
dataloader_valid = DataLoader(dataset_valid, batch_size=BATCH_SIZE, shuffle=True)
dataloader_test = DataLoader(dataset_valid, batch_size=BATCH_SIZE, shuffle=True)

Training set:  11149  songs, Validation set:  1051  songs, Test set:  1051  songs.
Num samples: 11149
Num samples: 1051
Num samples: 1051


In [6]:
def lyrics_encode(lyr_list):
    enc_lyr = []
    
    for lyrics in lyr_list:
        length_song = len(lyrics)
        cond = []
        
        for i in range(20):
            if i < length_song:
                syll2Vec = syllModel.wv[lyrics[i][0]]
                word2Vec = wordModel.wv[lyrics[i][1]]
                cond.append(np.concatenate((syll2Vec,word2Vec)))
            else:
                cond.append(np.concatenate((syll2Vec,word2Vec)))
        
        
        flattened_cond = []
        for x in cond:
            for y in x:
                flattened_cond.append(y)
        enc_lyr.append(flattened_cond)
    return np.array(enc_lyr)

In [7]:
def lyrics_to_sentence(lyr):
    # input: [['Then','Then'],['the','the'],['rain','rainstorm'],['storm','rainstorm'],['came','came'],
    #          ['ov','over'],['er','over'],['me','me'],['and','and'],['i','i'],['felt','felt'],['my','my'],
    #          ['spi','spirit'],['rit','spirit'],['break','break']],
    # output: Then the rainstorm came over me and i felt my spirit break
    sentence = ""
    for w, s in lyr:
        sentence += w + " "
    return sentence

In [8]:
def make_music_baseline(lyr_data, length_song):
    model_path = './saved_gan_models/saved_model_best_overall_mmd'

    with tf.compat.v1.Session(graph=tf.Graph()) as sess:
        tf.compat.v1.saved_model.loader.load(sess, [], model_path)
        graph = tf.compat.v1.get_default_graph()
        keep_prob = graph.get_tensor_by_name("model/keep_prob:0")
        input_metadata = graph.get_tensor_by_name("model/input_metadata:0")
        input_songdata = graph.get_tensor_by_name("model/input_data:0")
        output_midi = graph.get_tensor_by_name("output_midi:0")

        for i, fc in enumerate(lyr_data):
            feed_dict = {}
            feed_dict[keep_prob.name] = 1.0
            condition = []
            feed_dict[input_metadata.name] = condition
            feed_dict[input_songdata.name] = np.random.uniform(size=(1, 20, 3))
            condition.append(np.split(np.asarray(fc), 20))
            feed_dict[input_metadata.name] = condition
            generated_features = sess.run(output_midi, feed_dict)
            sample = [x[0, :] for x in generated_features]
            sample = midi_statistics.tune_song(utils.discretize(sample))
            midi_pattern = utils.create_midi_pattern_from_discretized_data(sample[0:length_song])
            destination = "poster/model1/model1"+str(i)+".mid"
            midi_pattern.write(destination)

        print('done')

In [9]:
def make_music_om(lyr_data):
    our_model_path = 'saved_models/saved_model'
    lr = 9e-4
    diffusion = Diffusion()
    model = DiffWave_wText(device=device).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    mse = torch.nn.MSELoss()

    state_dict = torch.load(our_model_path)
    model.load_state_dict(state_dict['model'])
    optimizer.load_state_dict(state_dict['optimizer'])
    print('Model loaded!', our_model_path)
    
    #sample_lyrics = torch.Tensor(lyrics_encode(lyrics_list)).to(device)
    sample_lyrics = torch.Tensor(lyr_data).to(device)
    #sampled_melodies = diffusion.sample_1d_wText(model, syllable_embs, n=syllable_embs.shape[0]).cpu().detach()
    sampled_melodies = diffusion.sample_1d_wText(model, sample_lyrics, n=sample_lyrics.shape[0]).cpu().detach()
    for i,sampled_melody in enumerate(sampled_melodies):
        sampled_melody = sampled_melody.transpose(1, 0).numpy() # -> [melody len, 3]
        denormed_melody = dataset_test.denormalize2(sampled_melody)

        #denormed_melody = np.concatenate((denormed_melody, np.ones(denormed_melody.shape), np.zeros(denormed_melody.shape)), axis=1)
        denormed_melody = dataset_test.discretize(denormed_melody)
        tuned_melody = midi_statistics.tune_song(denormed_melody)
        midi_melody_tuned = dataset_test.create_midi_pattern_from_discretized_data(tuned_melody)
        destination = "poster/model2/model2"+str(i)+".mid"
        midi_melody_tuned.write(destination)

In [25]:
make_music_baseline(test[:,60:], SONGLENGTH)

INFO:tensorflow:Restoring parameters from ./saved_gan_models/saved_model_best_overall_mmd\variables\variables
done


In [26]:
make_music_om(test[:,60:])

Model loaded! saved_models/saved_model


In [27]:
midis = test[:,:60]
syls = test[:,60:]

In [28]:
for i, midi in enumerate(midis):
    midi = midi.reshape(20,3)
    melody = dataset_test.create_midi_pattern_from_discretized_data(midi)
    destination = "poster/gt/gt"+str(i)+".mid"
    melody.write(destination)