In [1]:
import tensorflow as tf
import numpy as np
import os
import random
import copy

import keras
from keras.layers import Input, Dense, Conv2D, Dropout, Flatten, Reshape
from keras.optimizers import RMSprop, Adam
from keras.models import Model
from keras.models import Sequential
from keras.callbacks import LambdaCallback

from pypianoroll import Multitrack, Track
from matplotlib import pyplot as plt
import pypianoroll as ppr

import my_config

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
# set up config
config = copy.deepcopy(my_config.config_5b)
dataset = np.load(config['dataset_path'])
print(config)

{'track_num': 5, 'program_nums': [0, 0, 24, 32, 48], 'is_drums': [True, False, False, False, False], 'track_names': ['Drums', 'Piano', 'Guitar', 'Bass', 'Ensemble'], 'tempo': 120, 'velocity': 100, 'dataset_path': '/Users/mac/Desktop/Brain/MuseGAN/training_data/lastfm_alternative_5b_phrase.npy', 'dataset_name': 'lastfm_alternative_5b_phrase', 'pause_between_samples': 96, 'num_bar': 4, 'num_beat': 4, 'num_pitch': 84, 'num_track': 8, 'num_timestep': 96, 'beat_resolution': 24, 'lowest_pitch': 24}


In [3]:
# load dataset
dataset = dataset[0:100]
dataset.shape

(100, 6, 4, 96, 84, 5)

In [4]:
maxlen = 96

In [5]:
# feature
reshaped_dataset = dataset.reshape((len(dataset),-1,84,5))

feature = []
label = []

for now_song in reshaped_dataset:
    for i in range(0, len(now_song), 24):
        if (i + maxlen + 1) < len(now_song):
            feature.append(now_song[i:i+maxlen])
            label.append(now_song[i+maxlen])
# label
# batch

In [8]:
# split data to train and validation
dataset_size = len(dataset)
val_ratio = 0.1

train_size = int(dataset_size * (1 - val_ratio))
# eval_size = dataset_size - train_size

x_train = np.array(feature[0:train_size])
y_train = np.array(label[0:train_size])

x_val = np.array(feature[train_size: dataset_size])
y_val = np.array(label[train_size: dataset_size])

In [9]:
print('x_train shape:', x_train.shape)
print('y_train shape:', y_train.shape)
print('x_val shape:', x_val.shape)
print('y_val shape:', y_val.shape)

x_train shape: (90, 96, 84, 5)
y_train shape: (90, 84, 5)
x_val shape: (10, 96, 84, 5)
y_val shape: (10, 84, 5)


In [10]:
# build model
xx = Input(shape=(maxlen, 84, 5))
xxx = Flatten()(xx)
xxx = Dense(84*5, activation='relu')(xxx)
xxx = Reshape((84, 5))(xxx)
model = Model(xx, xxx)
model.summary()

model.compile(loss='mean_squared_error',
              optimizer=RMSprop(),
              metrics=['accuracy'])

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 96, 84, 5)         0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 40320)             0         
_________________________________________________________________
dense_1 (Dense)              (None, 420)               16934820  
_________________________________________________________________
reshape_1 (Reshape)          (None, 84, 5)             0         
Total params: 16,934,820
Trainable params: 16,934,820
Non-trainable params: 0
_________________________________________________________________


In [11]:
# snapshot

In [12]:
generate_length = 1000

In [18]:
def write_midi(filepath, pianorolls, config):
    is_drums = config['is_drums']
    track_names = config['track_names']
    tempo = config['tempo']
    beat_resolution = config['beat_resolution']
    program_nums = config['program_nums']
    
    if not np.issubdtype(pianorolls.dtype, np.bool_):
        raise TypeError("Support only binary-valued piano-rolls")
    if isinstance(program_nums, int):
        program_nums = [program_nums]
    if isinstance(is_drums, int):
        is_drums = [is_drums]

    if program_nums is None:
        program_nums = [0] * len(pianorolls)
    if is_drums is None:
        is_drums = [False] * len(pianorolls)

    multitrack = Multitrack(beat_resolution=beat_resolution, tempo=tempo)
    for idx in range(pianorolls.shape[2]):
        if track_names is None:
            track = Track(pianorolls[..., idx], program_nums[idx],
                          is_drums[idx])
        else:
            track = Track(pianorolls[..., idx], program_nums[idx],
                          is_drums[idx], track_names[idx])
        multitrack.append_track(track)
    multitrack.write(filepath)

In [19]:
def save_midi(filepath, phrases, config):
    if not np.issubdtype(phrases.dtype, np.bool_):
        raise TypeError("Support only binary-valued piano-rolls")

    reshaped = phrases.reshape(-1, phrases.shape[1] * phrases.shape[2],
                               phrases.shape[3], phrases.shape[4])
    
    # print("reshaped shape:", reshaped.shape)
    # result final shape: (5, 1, 96, 84, 5)

    pad_width = ((0, 0), (0, config['pause_between_samples']),
                 (config['lowest_pitch'],
                  128 - config['lowest_pitch'] - config['num_pitch']),
                 (0, 0))
    
    # pad width 表示前补和后补的长度
    # print('pad_width:',pad_width)
    padded = np.pad(reshaped, pad_width, 'constant')
    
    print("padded shape:", padded.shape)
    pianorolls = padded.reshape(-1, padded.shape[2], padded.shape[3])
    print("pianorolls shape:", pianorolls.shape)
    write_midi(filepath, pianorolls, config)

In [20]:
def on_epoch_end(epoch, logs):
    start_index = random.randint(0, len(x_train)-1)
    
    result = []
    x_pred = np.array([x_train[start_index]])
    
    print('x_pred shape:', x_pred.shape)
    
    result = copy.deepcopy(x_pred)
    print("result shape:", result.shape)

    print(config)
    
    for i in range(generate_length):
        y_pred = model.predict(x_pred, verbose = 0)
        # print("y_pred shape:", y_pred.shape)
        result = np.append(result, [y_pred], axis = 1)
        
        # print("before x_pred shape:", x_pred[:,1:maxlen,:,:].shape)
        
        x_pred = np.append(x_pred[:,1:maxlen,:,:], [y_pred], axis = 1) 
        
        # print("after x_pred shape:", x_pred.shape)

        
        # print("result shape:", result.shape)
        # print("x_pred shape:", x_pred.shape)
        
    print('result shape:',result.shape)
    result = np.array(result, dtype=np.bool_)
    # print('result:',result)
    
    need_length = (generate_length + maxlen) // (96*4) * (96*4)
    result = result[0]
    result = result[0:need_length]
    
    # now is stard piano roll
    print('result shape:',result.shape)
    
    result = result.reshape((-1,4,96,84,5))
    
    print('result final shape:',result.shape)

    save_midi('/Users/mac/Desktop/test_ppr/test_train_%d.mid' % (epoch+1), result, config)
    # print('2333')

In [21]:
print_callback = LambdaCallback(on_epoch_end=on_epoch_end)

In [23]:
# train
model.fit(  x = x_train, 
            y = y_train,   
            validation_data = (x_val, y_val),
            batch_size = 2,
            verbose = 1,
            epochs = 5,
            callbacks = [print_callback])

Train on 90 samples, validate on 10 samples
Epoch 1/5
x_pred shape: (1, 96, 84, 5)
result shape: (1, 96, 84, 5)
{'track_num': 5, 'program_nums': [0, 0, 24, 32, 48], 'is_drums': [True, False, False, False, False], 'track_names': ['Drums', 'Piano', 'Guitar', 'Bass', 'Ensemble'], 'tempo': 120, 'velocity': 100, 'dataset_path': '/Users/mac/Desktop/Brain/MuseGAN/training_data/lastfm_alternative_5b_phrase.npy', 'dataset_name': 'lastfm_alternative_5b_phrase', 'pause_between_samples': 96, 'num_bar': 4, 'num_beat': 4, 'num_pitch': 84, 'num_track': 8, 'num_timestep': 96, 'beat_resolution': 24, 'lowest_pitch': 24}
result shape: (1, 1096, 84, 5)
result shape: (768, 84, 5)
result final shape: (2, 4, 96, 84, 5)
padded shape: (2, 480, 128, 5)
pianorolls shape: (960, 128, 5)
Epoch 2/5
x_pred shape: (1, 96, 84, 5)
result shape: (1, 96, 84, 5)
{'track_num': 5, 'program_nums': [0, 0, 24, 32, 48], 'is_drums': [True, False, False, False, False], 'track_names': ['Drums', 'Piano', 'Guitar', 'Bass', 'Ensemble'

<keras.callbacks.History at 0x19ad5fb70>

In [None]:
# tensorboard