# Music Generation GAN
Approach used for reference: https://towardsdatascience.com/generating-pokemon-inspired-music-from-neural-networks-bc240014132

In [0]:
import os
import sys
import numpy as np
import glob
from __future__ import print_function, division
import random
import pickle
from keras.layers import Input, Dense, Reshape, Dropout, CuDNNLSTM, Bidirectional
from keras.layers import BatchNormalization, Activation, ZeroPadding2D
from keras.layers.advanced_activations import LeakyReLU
from keras.models import Sequential, Model
from keras.optimizers import Adam
from keras.utils import np_utils
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from IPython.display import HTML
from google.colab import files, drive
from music21 import *

In [0]:
"""
Helper functions for processing MIDI files
"""
def get_notes():
    """
    Get all the notes and chords from the midi files
    """
    if os.path.exists('midi.pickle'):
        notes = pickle.load(open("midi.pickle","rb"))
    else:
        notes = []

        for file in glob.glob("midi_data/*.mid"):
            midi = converter.parse(file)

            print("Parsing %s" % file)

            notes_to_parse = None

            try: # file has instrument parts
                s2 = instrument.partitionByInstrument(midi)
                notes_to_parse = s2.parts[0].recurse() 
            except: # file has notes in a flat structure
                notes_to_parse = midi.flat.notes
                
            for element in notes_to_parse:
                if isinstance(element, note.Note):
                    notes.append(str(element.pitch))
                elif isinstance(element, chord.Chord):
                    notes.append('.'.join(str(n) for n in element.normalOrder))
        with open('midi.pickle', 'wb') as filepath:
                pickle.dump(notes, filepath)
    return notes

def prepare_sequences(notes, n_vocab):
    """ Prepare the sequences used by the Neural Network """
    sequence_length = 100

    # Get all pitch names
    pitchnames = sorted(set(item for item in notes))

    # Create a dictionary to map pitches to integers
    note_to_int = dict((note, number) for number, note in enumerate(pitchnames))

    network_input = []
    network_output = []

    # create input sequences and the corresponding outputs
    for i in range(0, len(notes) - sequence_length, 1):
        sequence_in = notes[i:i + sequence_length]
        sequence_out = notes[i + sequence_length]
        network_input.append([note_to_int[char] for char in sequence_in])
        network_output.append(note_to_int[sequence_out])

    n_patterns = len(network_input)

    # Reshape the input into a format compatible with LSTM layers
    network_input = np.reshape(network_input, (n_patterns, sequence_length, 1))
    
    # Normalize input between -1 and 1
    network_input = (network_input - float(n_vocab)/2) / (float(n_vocab)/2)
    network_output = np_utils.to_categorical(network_output)

    return (network_input, network_output)

def generate_notes(model, network_input, n_vocab):
    """ Generate notes from the neural network based on a sequence of notes """
    # pick a random sequence from the input as a starting point for the prediction
    start = numpy.random.randint(0, len(network_input)-1)
    
    # Get pitch names and store in a dictionary
    pitchnames = sorted(set(item for item in notes))
    int_to_note = dict((number, note) for number, note in enumerate(pitchnames))

    pattern = network_input[start]
    prediction_output = []

    # generate 500 notes
    for note_index in range(500):
        prediction_input = numpy.reshape(pattern, (1, len(pattern), 1))
        prediction_input = prediction_input / float(n_vocab)

        prediction = model.predict(prediction_input, verbose=0)

        index = numpy.argmax(prediction)
        result = int_to_note[index]
        prediction_output.append(result)
        
        pattern = numpy.append(pattern,index)
        #pattern.append(index)
        pattern = pattern[1:len(pattern)]

    return prediction_output
  
def create_midi(prediction_output, filename):
    """ convert the output from the prediction to notes and create a midi file
        from the notes """
    offset = 0
    output_notes = []

    # create note and chord objects based on the values generated by the model
    for item in prediction_output:
        pattern = item[0]
        # pattern is a chord
        if ('.' in pattern) or pattern.isdigit():
            notes_in_chord = pattern.split('.')
            notes = []
            for current_note in notes_in_chord:
                new_note = note.Note(int(current_note))
                new_note.storedInstrument = instrument.Piano()
                notes.append(new_note)
            new_chord = chord.Chord(notes)
            new_chord.offset = offset
            output_notes.append(new_chord)
        # pattern is a note
        else:
            new_note = note.Note(pattern)
            new_note.offset = offset
            new_note.storedInstrument = instrument.Piano()
            output_notes.append(new_note)

        # increase offset each iteration so that notes do not stack
        offset += 0.5

    midi_stream = stream.Stream(output_notes)
    midi_stream.write('midi', fp='{}.mid'.format(filename))

In [0]:
"""
Functions to initialize networks
"""
def build_discriminator(seq_shape):
    model = Sequential()
    model.add(CuDNNLSTM(512, input_shape=seq_shape, return_sequences=True))
    model.add(Bidirectional(CuDNNLSTM(512)))
    model.add(Dense(512))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(256))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(1, activation='sigmoid'))
    model.summary()

    seq = Input(shape=seq_shape)
    validity = model(seq)

    return Model(seq, validity)

def build_generator(seq_shape, latent_dim):

    model = Sequential()
    model.add(Dense(256, input_dim=latent_dim))
    model.add(LeakyReLU(alpha=0.2))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Dense(512))
    model.add(LeakyReLU(alpha=0.2))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Dense(1024))
    model.add(LeakyReLU(alpha=0.2))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Dense(np.prod(seq_shape), activation='tanh'))
    model.add(Reshape(seq_shape))
    model.summary()
    
    noise = Input(shape=(latent_dim,))
    seq = model(noise)

    return Model(noise, seq)

In [0]:
"""
Functions for performing and plotting training
"""
def train(discriminator, generator, combined, notes, epochs, batch_size=128, sample_interval=50):

    # Load and convert the data
    n_vocab = len(set(notes))
    X_train, y_train = prepare_sequences(notes, n_vocab)

    # Adversarial ground truths
    real = np.ones((batch_size, 1))
    fake = np.zeros((batch_size, 1))
    
    # Training the model
    for epoch in range(epochs):

        # Training the discriminator
        # Select a random batch of note sequences
        idx = np.random.randint(0, X_train.shape[0], batch_size)
        real_seqs = X_train[idx]

        noise = np.random.normal(0, 1, (batch_size, latent_dim))

        # Generate a batch of new note sequences
        gen_seqs = generator.predict(noise)

        # Train the discriminator
        d_loss_real = discriminator.train_on_batch(real_seqs, real)
        d_loss_fake = discriminator.train_on_batch(gen_seqs, fake)
        d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)


        #  Training the Generator
        noise = np.random.normal(0, 1, (batch_size, latent_dim))

        # Train the generator (to have the discriminator label samples as real)
        g_loss = combined.train_on_batch(noise, real)

        # Print the progress and save into loss lists
        if epoch % sample_interval == 0:
            print ("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" % (epoch, d_loss[0], 100*d_loss[1], g_loss))
            disc_loss.append(d_loss[0])
            gen_loss.append(g_loss)

def plot_loss(disc_loss, gen_loss):
    plt.plot(disc_loss, c='red')
    plt.plot(gen_loss, c='blue')
    plt.title("GAN Loss per Epoch")
    plt.legend(['Discriminator', 'Generator'])
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.savefig('GAN_Loss_per_Epoch_final.png', transparent=False)
    plt.close()

In [8]:
# Set sequence length to 100 notes
seq_length = 100
seq_shape = (seq_length, 1)
latent_dim = 1000
disc_loss = []
gen_loss =[]

optimizer = Adam(0.0002, 0.5)

# Build and compile the discriminator
discriminator = build_discriminator(seq_shape)
discriminator.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

# Build the generator
generator = build_generator(seq_shape, latent_dim)

# The generator takes noise as input and generates note sequences
z = Input(shape=(latent_dim,))
generated_seq = generator(z)

# For the combined model we will only train the generator
discriminator.trainable = False

# The discriminator takes generated images as input and determines validity
validity = discriminator(generated_seq)

# The combined model  (stacked generator and discriminator)
# Trains the generator to fool the discriminator
combined = Model(z, validity)
combined.compile(loss='binary_crossentropy', optimizer=optimizer)

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
cu_dnnlstm_3 (CuDNNLSTM)     (None, 100, 512)          1054720   
_________________________________________________________________
bidirectional_2 (Bidirection (None, 1024)              4202496   
_________________________________________________________________
dense_8 (Dense)              (None, 512)               524800    
_________________________________________________________________
leaky_re_lu_6 (LeakyReLU)    (None, 512)               0         
_________________________________________________________________
dense_9 (Dense)              (None, 256)               131328    
_________________________________________________________________
leaky_re_lu_7 (LeakyReLU)    (None, 256)               0         
_________________________________________________________________
dense_10 (Dense)             (None, 1)                

In [0]:
notes = get_notes()

In [0]:
train(discriminator, generator, combined, notes, epochs=5000, batch_size=32, sample_interval=1)
plot_loss(disc_loss, gen_loss)









  'Discrepancy between trainable weights and collected trainable'






  'Discrepancy between trainable weights and collected trainable'


0 [D loss: 0.696695, acc.: 45.31%] [G loss: 0.689703]
1 [D loss: 0.668604, acc.: 76.56%] [G loss: 0.687677]


  'Discrepancy between trainable weights and collected trainable'


2 [D loss: 0.630965, acc.: 73.44%] [G loss: 0.691364]
3 [D loss: 0.546545, acc.: 82.81%] [G loss: 0.711633]
4 [D loss: 0.421584, acc.: 78.12%] [G loss: 0.846053]
5 [D loss: 0.297144, acc.: 90.62%] [G loss: 1.211569]
6 [D loss: 0.142243, acc.: 98.44%] [G loss: 3.936227]
7 [D loss: 0.002166, acc.: 100.00%] [G loss: 9.128134]
8 [D loss: 0.007691, acc.: 100.00%] [G loss: 11.909663]
9 [D loss: 0.000951, acc.: 100.00%] [G loss: 12.731110]
10 [D loss: 0.235037, acc.: 96.88%] [G loss: 2.548332]
11 [D loss: 0.140999, acc.: 98.44%] [G loss: 2.792893]
12 [D loss: 0.147357, acc.: 95.31%] [G loss: 2.344263]
13 [D loss: 0.141752, acc.: 96.88%] [G loss: 3.200874]
14 [D loss: 0.089833, acc.: 96.88%] [G loss: 5.909007]
15 [D loss: 0.141520, acc.: 98.44%] [G loss: 6.441468]
16 [D loss: 0.100843, acc.: 96.88%] [G loss: 5.054346]
17 [D loss: 0.083480, acc.: 96.88%] [G loss: 6.381463]
18 [D loss: 0.095199, acc.: 96.88%] [G loss: 5.358906]
19 [D loss: 0.209385, acc.: 90.62%] [G loss: 4.312189]
20 [D loss: 0

In [0]:
"""
This will work sometimes and not work other times.
There's an issue with the mapping of outputs to notes
"""
def generate(generator, latent_dim, input_notes):
    # Get pitch names and store in a dictionary
    notes = input_notes
    pitchnames = sorted(set(item for item in notes))
    int_to_note = dict((number, note) for number, note in enumerate(pitchnames))
    
    # Use random noise to generate sequences
    noise = np.random.normal(0, 1, (1, latent_dim))
    predictions = generator.predict(noise)
    
    n = len(pitchnames) / 2
    pred_notes = [x*n+n for x in predictions[0]]
    # pred_notes = [x*242+242 for x in predictions[0]]
    pred_notes = [int_to_note[int(x)] for x in pred_notes]
    
    create_midi(pred_notes, 'gan_final')

generate(gan,notes)

NameError: ignored