In [1]:
import numpy as np


#数字矩阵转one hot编码的函数
def build_one_hot(data, max_value):
    data = np.eye(max_value, dtype=np.int32)[data]
    data[data == 0] = -1

    return data


build_one_hot(np.arange(9).reshape(3, 3), 9)

array([[[ 1, -1, -1, -1, -1, -1, -1, -1, -1],
        [-1,  1, -1, -1, -1, -1, -1, -1, -1],
        [-1, -1,  1, -1, -1, -1, -1, -1, -1]],

       [[-1, -1, -1,  1, -1, -1, -1, -1, -1],
        [-1, -1, -1, -1,  1, -1, -1, -1, -1],
        [-1, -1, -1, -1, -1,  1, -1, -1, -1]],

       [[-1, -1, -1, -1, -1, -1,  1, -1, -1],
        [-1, -1, -1, -1, -1, -1, -1,  1, -1],
        [-1, -1, -1, -1, -1, -1, -1, -1,  1]]], dtype=int32)

In [2]:
def get_data():
    #加载数据
    data = np.load('../datas/chorales/Jsb16thSeparated.npz',
                   encoding='bytes')['train']

    #一共229首曲子,每个曲子长度不定,都是4个声部
    for i in range(10):
        print('data[%d]=' % i, data[i].shape, data[i].dtype)

    print('data=', data.shape, data.dtype)

    #筛除数据中的nan,这数据集做的简直是一坨屎
    new_data = []
    for song in data:
        new_song = []
        for time in song:
            #time -> [4]

            if np.isnan(time).any():
                continue

            new_song.append(time)

        new_song = np.array(new_song, dtype=np.int32)
        new_data.append(new_song)

    print('new_data=', len(new_data), new_data[0].shape, new_data[1].shape)

    #截取每首曲子的前32个拍子
    data_cut = []
    for song in new_data:
        data_cut.append(song[:32])

    #[229, 32, 4]
    data_cut = np.array(data_cut)

    print('data_cut=', data_cut.shape, data_cut.dtype)

    #分成两条音轨,每条音轨16个拍子
    #[229, 32, 4] -> [229, 2, 16, 4]
    data_cut = data_cut.reshape([229, 2, 16, 4])

    #转one hot编码
    #[229, 2, 16, 4] -> [229, 2, 16, 4, 84]
    data_cut = build_one_hot(data_cut, max_value=84)

    #交换最后两个维度
    #[229, 2, 16, 4, 84] -> [229, 2, 16, 84, 4]
    data_cut = data_cut.transpose([0, 1, 2, 4, 3])

    return data_cut


data = get_data()

data.shape, data.dtype

data[0]= (192, 4) float16
data[1]= (228, 4) float16
data[2]= (208, 4) float16
data[3]= (432, 4) float16
data[4]= (260, 4) float16
data[5]= (212, 4) float16
data[6]= (292, 4) float16
data[7]= (180, 4) float16
data[8]= (132, 4) float16
data[9]= (192, 4) float16
data= (229,) object
new_data= 229 (192, 4) (228, 4)
data_cut= (229, 32, 4) int32


((229, 2, 16, 84, 4), dtype('int32'))

In [3]:
def merge_note(note, duration=None):
    if duration is None:
        duration = np.full(note.shape, fill_value=0.25, dtype=np.float32)

    #从前往后遍历
    for i in range(len(note) - 1):
        j = i + 1

        #判断相连的两个note是否相同,并且duration相加不大于1.0
        if note[i] == note[j] and duration[i] + duration[j] <= 1.0:

            #duration合并
            duration[i] += duration[j]

            #删除重复的note
            note = np.delete(note, j, axis=0)
            duration = np.delete(duration, j, axis=0)

            #递归调用
            return merge_note(note, duration)

    return note, duration


print(merge_note(np.arange(5)))
print(merge_note(np.ones(5)))

print(merge_note(np.arange(7)))
print(merge_note(np.ones(7)))

(array([0, 1, 2, 3, 4]), array([0.25, 0.25, 0.25, 0.25, 0.25], dtype=float32))
(array([1., 1.]), array([1.  , 0.25], dtype=float32))
(array([0, 1, 2, 3, 4, 5, 6]), array([0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25], dtype=float32))
(array([1., 1.]), array([1.  , 0.75], dtype=float32))


In [4]:
import music21


def save_to_mid(data, filename):
    #data -> [32, 4]
    stream = music21.stream.Score()
    stream.append(music21.tempo.MetronomeMark(number=66))

    for i in range(4):
        channel = music21.stream.Part()

        notes, durations = merge_note(data[:, i])
        notes, durations = notes.tolist(), durations.tolist()
        for n, d in zip(notes, durations):
            note = music21.note.Note(n)
            note.duration = music21.duration.Duration(d)
            channel.append(note)

        stream.append(channel)

    stream.write('midi', fp=filename)


save_to_mid(data[0].argmax(axis=2).reshape(32, 4), 'sample.mid')

In [5]:
def show(file):
    f = music21.midi.MidiFile()
    f.open(file)
    f.read()
    f.close()
    music21.midi.translate.midiFileToStream(f).show('midi')


show('sample.mid')

In [6]:
import keras

weight_init = keras.initializers.RandomNormal(mean=0., stddev=0.02)

cls = keras.models.Sequential([
    keras.layers.Conv3D(filters=128,
                        kernel_size=(2, 1, 1),
                        padding='valid',
                        strides=(1, 1, 1),
                        kernel_initializer=weight_init,
                        input_shape=(2, 16, 84, 4)),
    keras.layers.LeakyReLU(),
    keras.layers.Conv3D(filters=128,
                        kernel_size=(1, 1, 1),
                        padding='valid',
                        strides=(1, 1, 1),
                        kernel_initializer=weight_init),
    keras.layers.LeakyReLU(),
    keras.layers.Conv3D(filters=128,
                        kernel_size=(1, 1, 12),
                        padding='same',
                        strides=(1, 1, 12),
                        kernel_initializer=weight_init),
    keras.layers.LeakyReLU(),
    keras.layers.Conv3D(filters=128,
                        kernel_size=(1, 1, 7),
                        padding='same',
                        strides=(1, 1, 7),
                        kernel_initializer=weight_init),
    keras.layers.LeakyReLU(),
    keras.layers.Conv3D(filters=128,
                        kernel_size=(1, 2, 1),
                        padding='same',
                        strides=(1, 2, 1),
                        kernel_initializer=weight_init),
    keras.layers.LeakyReLU(),
    keras.layers.Conv3D(filters=128,
                        kernel_size=(1, 2, 1),
                        padding='same',
                        strides=(1, 2, 1),
                        kernel_initializer=weight_init),
    keras.layers.LeakyReLU(),
    keras.layers.Conv3D(filters=256,
                        kernel_size=(1, 4, 1),
                        padding='same',
                        strides=(1, 2, 1),
                        kernel_initializer=weight_init),
    keras.layers.LeakyReLU(),
    keras.layers.Conv3D(filters=512,
                        kernel_size=(1, 3, 1),
                        padding='same',
                        strides=(1, 2, 1),
                        kernel_initializer=weight_init),
    keras.layers.LeakyReLU(),
    keras.layers.Flatten(),
    keras.layers.Dense(1024, kernel_initializer=weight_init),
    keras.layers.LeakyReLU(),
    keras.layers.Dense(1, activation=None, kernel_initializer=weight_init),
])

cls

Using TensorFlow backend.







<keras.engine.sequential.Sequential at 0x7f969b21acc0>

In [7]:
def get_gen():

    def TemporalNetwork():
        return keras.models.Sequential([
            keras.layers.Reshape([1, 1, 32], input_shape=(32, )),
            keras.layers.Conv2DTranspose(filters=1024,
                                         kernel_size=(2, 1),
                                         padding='valid',
                                         strides=(1, 1),
                                         kernel_initializer=weight_init),
            keras.layers.BatchNormalization(momentum=0.9),
            keras.layers.Activation('relu'),
            keras.layers.Conv2DTranspose(filters=32,
                                         kernel_size=(1, 1),
                                         padding='valid',
                                         strides=(1, 1),
                                         kernel_initializer=weight_init),
            keras.layers.BatchNormalization(momentum=0.9),
            keras.layers.Activation('relu'),
            keras.layers.Reshape([2, 32]),
        ])

    def BarGenerator():
        return keras.models.Sequential([
            keras.layers.Dense(1024, input_shape=(128, )),
            keras.layers.BatchNormalization(momentum=0.9),
            keras.layers.Activation('relu'),
            keras.layers.Reshape([2, 1, 512]),
            keras.layers.Conv2DTranspose(filters=512,
                                         kernel_size=(2, 1),
                                         padding='same',
                                         strides=(2, 1),
                                         kernel_initializer=weight_init),
            keras.layers.BatchNormalization(momentum=0.9),
            keras.layers.Activation('relu'),
            keras.layers.Conv2DTranspose(filters=256,
                                         kernel_size=(2, 1),
                                         padding='same',
                                         strides=(2, 1),
                                         kernel_initializer=weight_init),
            keras.layers.BatchNormalization(momentum=0.9),
            keras.layers.Activation('relu'),
            keras.layers.Conv2DTranspose(filters=256,
                                         kernel_size=(2, 1),
                                         padding='same',
                                         strides=(2, 1),
                                         kernel_initializer=weight_init),
            keras.layers.BatchNormalization(momentum=0.9),
            keras.layers.Activation('relu'),
            keras.layers.Conv2DTranspose(filters=256,
                                         kernel_size=(1, 7),
                                         padding='same',
                                         strides=(1, 7),
                                         kernel_initializer=weight_init),
            keras.layers.BatchNormalization(momentum=0.9),
            keras.layers.Activation('relu'),
            keras.layers.Conv2DTranspose(filters=1,
                                         kernel_size=(1, 12),
                                         padding='same',
                                         strides=(1, 12),
                                         kernel_initializer=weight_init),
            keras.layers.Activation('tanh'),
            keras.layers.Reshape([1, 16, 84, 1]),
        ])

    input_chord = keras.layers.Input(shape=(32, ))
    input_style = keras.layers.Input(shape=(32, ))
    input_melody = keras.layers.Input(shape=(4, 32))
    input_groove = keras.layers.Input(shape=(4, 32))

    output_chord = TemporalNetwork()(input_chord)

    output = []
    for i in range(2):
        output_c = []

        for j in range(4):

            output_melody = keras.models.Sequential([
                keras.layers.Lambda(lambda x: x[:, j, :]),
                TemporalNetwork(),
                keras.layers.Lambda(lambda x: x[:, i, :])
            ])(input_melody)

            concat = keras.layers.Concatenate(axis=1)([
                keras.layers.Lambda(lambda x: x[:, i, :])(output_chord),
                input_style, output_melody,
                keras.layers.Lambda(lambda x: x[:, j, :])(input_groove)
            ])
            output_c.append(BarGenerator()(concat))

        output.append(keras.layers.Concatenate(axis=-1)(output_c))

    output = keras.layers.Concatenate(axis=1)(output)

    gen = keras.models.Model(
        [input_chord, input_style, input_melody, input_groove], output)

    return gen


gen = get_gen()

gen






<keras.engine.training.Model at 0x7f95d6f2f8d0>

In [8]:
from functools import partial


def get_gan():

    class RandomMerge(keras.layers.merge._Merge):

        def __init__(self):
            super().__init__()

        def _merge_function(self, inputs):
            alpha = keras.backend.random_uniform((64, 1, 1, 1, 1))
            return (alpha * inputs[0]) + ((1 - alpha) * inputs[1])

    def set_trainable(model, trainable):
        model.trainable = trainable
        for layer in model.layers:
            layer.trainable = trainable

    set_trainable(gen, False)

    input_cls = keras.layers.Input(shape=[2, 16, 84, 4])
    input_chord = keras.layers.Input(shape=(32, ))
    input_style = keras.layers.Input(shape=(32, ))
    input_melody = keras.layers.Input(shape=(4, 32))
    input_groove = keras.layers.Input(shape=(4, 32))

    output_gen = gen([input_chord, input_style, input_melody, input_groove])

    output_cls_fake = cls(output_gen)
    output_cls_real = cls(input_cls)

    input_merge = RandomMerge()([input_cls, output_gen])

    output_cls_merge = cls(input_merge)

    def get_grads_loss(y_true, y_pred, input_merge):
        grads = keras.backend.gradients(y_pred, input_merge)[0]
        grads = keras.backend.square(grads)
        grads = keras.backend.sum(grads, axis=np.arange(1, len(grads.shape)))
        grads = keras.backend.sqrt(grads)
        grads = keras.backend.square(1 - grads)
        return keras.backend.mean(grads)

    grads_loss = partial(get_grads_loss, input_merge=input_merge)

    def wasserstein(y_true, y_pred):
        return -keras.backend.mean(y_true * y_pred)

    cls_model = keras.models.Model(
        inputs=[
            input_cls, input_chord, input_style, input_melody, input_groove
        ],
        outputs=[output_cls_real, output_cls_fake, output_cls_merge])

    cls_model.compile(loss=[wasserstein, wasserstein, grads_loss],
                      optimizer=keras.optimizers.Adam(lr=0.001,
                                                      beta_1=0.5,
                                                      beta_2=0.9),
                      loss_weights=[1, 1, 10])

    set_trainable(cls, False)
    set_trainable(gen, True)

    gan = keras.models.Model(
        [input_chord, input_style, input_melody, input_groove],
        output_cls_fake)

    gan.compile(optimizer=keras.optimizers.Adam(lr=0.001,
                                                beta_1=0.5,
                                                beta_2=0.9),
                loss=wasserstein)

    set_trainable(cls, True)

    return gan, cls_model


gan, cls_model = get_gan()

gan, cls_model




(<keras.engine.training.Model at 0x7f95d62fd2b0>,
 <keras.engine.training.Model at 0x7f95d63dac18>)

In [9]:
def test():
    chord = np.random.normal(0, 1, (1, 32))
    style = np.random.normal(0, 1, (1, 32))
    melody = np.random.normal(0, 1, (1, 4, 32))
    groove = np.random.normal(0, 1, (1, 4, 32))

    #[1, 2, 16, 84, 4]
    pred = gen.predict([chord, style, melody, groove])

    #[1, 2, 16, 84, 4] -> [1, 2, 16, 4]
    pred = pred.argmax(axis=3)

    #[1, 2, 16, 4] -> [32, 4]
    pred = pred.reshape(32, 4)

    save_to_mid(pred, 'pred.mid')

    show('pred.mid')


test()

In [10]:
def train():

    def train_cls():
        pos = np.ones((64, 1), dtype=np.int32)
        neg = -np.ones((64, 1), dtype=np.int32)
        dummy = np.zeros((64, 1), dtype=np.int32)

        chord = np.random.normal(0, 1, (64, 32))
        style = np.random.normal(0, 1, (64, 32))
        melody = np.random.normal(0, 1, (64, 4, 32))
        groove = np.random.normal(0, 1, (64, 4, 32))

        data_sub = data[np.random.randint(0, data.shape[0], 64)]

        loss_cls = cls_model.train_on_batch(
            [data_sub, chord, style, melody, groove], [pos, neg, dummy])

        return loss_cls

    def train_gen():
        pos = np.ones((64, 1), dtype=np.int32)

        chord = np.random.normal(0, 1, (64, 32))
        style = np.random.normal(0, 1, (64, 32))
        melody = np.random.normal(0, 1, (64, 4, 32))
        groove = np.random.normal(0, 1, (64, 4, 32))

        loss_gen = gan.train_on_batch([chord, style, melody, groove], pos)

        return loss_gen

    for epoch in range(1000):
        for _ in range(5):
            loss_cls = train_cls()

        loss_gen = train_gen()

        if epoch % 50 == 0:
            print(epoch, loss_cls, loss_gen)


train()

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


  'Discrepancy between trainable weights and collected trainable'
  'Discrepancy between trainable weights and collected trainable'


0 [8.912887, -0.85698175, -0.034562703, 0.98044306] 0.0040728305


  'Discrepancy between trainable weights and collected trainable'


50 [-27.792698, -267.06592, 231.58945, 0.76837736] -282.16797
100 [-27.88805, -257.02512, 218.39185, 1.0745221] -225.14125
150 [-18.23226, -43.738167, 19.566328, 0.59395784] -33.739716
200 [-16.835743, -75.27756, 53.224827, 0.52169865] -45.045284
250 [-15.013928, -43.07686, 21.97362, 0.60893106] -14.732184
300 [-14.297857, -106.17346, 89.668396, 0.22072089] -97.43548
350 [-14.1342125, -32.94745, 15.056252, 0.37569845] -12.34396
400 [-12.606506, -67.773705, 48.30908, 0.68581194] -37.58142
450 [-11.947504, -14.487357, 1.7038689, 0.08359844] -15.795556
500 [-11.203049, -20.237303, 8.629093, 0.040516045] -6.5508084
550 [-12.3838825, -23.784214, 9.745614, 0.16547178] -12.909263
600 [-11.5849285, -35.62585, 22.524584, 0.15163384] -24.991646
650 [-11.719839, -28.288511, 14.372057, 0.21966158] -16.3292
700 [-10.081033, -24.602768, 12.748704, 0.17730309] -10.602691
750 [-10.870434, -20.221205, 8.281688, 0.10690833] -7.7463045
800 [-9.710244, -26.209225, 14.81569, 0.16832903] -12.453541
850 [-9.

In [13]:
test()