In [21]:
max_length = 100
latent_dim = 32
lstm_dim = 512
steps = 1000
batch_size = 128

# midi_dir =  '/content/gdrive/My Drive/Colab/midi_files'
# out_dir = '/content/gdrive/My Drive/Colab/vae_new_output_2'
midi_dir =  './midi_songs'
out_dir = './gan_new_output_1'

In [22]:
import os
import glob
from music21 import converter, instrument, note, chord, stream

def parse_midi_files(dir):
    notes = []
    songs = []
    file_list = []
    
    files = glob.glob(os.path.join(dir, '*.mid'))

    for file in files:
        song = []
        
        file_list.append(os.path.basename(file))
        midi = converter.parse(file)

        print("Parsing %s" % file)

        notes_to_parse = None

        try: # file has instrument parts
            s2 = instrument.partitionByInstrument(midi)
            notes_to_parse = s2.parts[0].recurse() 
        except: # file has notes in a flat structure
            notes_to_parse = midi.flat.notes

        for element in notes_to_parse:
            if isinstance(element, note.Note):
                song.append(str(element.pitch))
            elif isinstance(element, chord.Chord):
                song.append('.'.join(str(n) for n in element.normalOrder))
        songs.append(song)
        notes += song

    return notes, songs, file_list

In [23]:
notes, songs, file_list = parse_midi_files(midi_dir)

Parsing ./midi_songs/bwv782.mid
Parsing ./midi_songs/bwv783.mid
Parsing ./midi_songs/bwv781.mid
Parsing ./midi_songs/bwv780.mid
Parsing ./midi_songs/bwv784.mid
Parsing ./midi_songs/bwv785.mid
Parsing ./midi_songs/bwv778.mid
Parsing ./midi_songs/bwv786.mid
Parsing ./midi_songs/bwv779.mid
Parsing ./midi_songs/bwv774.mid
Parsing ./midi_songs/bwv775.mid
Parsing ./midi_songs/bwv777.mid
Parsing ./midi_songs/bwv776.mid
Parsing ./midi_songs/bwv772.mid
Parsing ./midi_songs/bwv773.mid


In [24]:
pitchnames = sorted(set(item for item in notes))
n_vocab = len(pitchnames)

note_to_int = dict((note, number) for number, note in enumerate(pitchnames))
int_to_note = dict([[number, note] for note, number in note_to_int.items()])

In [25]:
import numpy as np

def prepare_sequences(notes, sequence_length=100):
    # get all pitch names
    pitchnames = sorted(set(item for item in notes))
    n_vocab = len(pitchnames)
    
    # convert notes to one-hot encoded
    one_hot_notes = []
    for note in notes:
        one_hot_note = np.zeros(n_vocab)
        one_hot_note[note_to_int[note]] = 1
        one_hot_notes.append(one_hot_note)

    network_input = []
    network_output = []

    # create input sequences and the corresponding outputs
    for i in range(0, len(one_hot_notes) - sequence_length, 1):
        sequence_in = one_hot_notes[i:i + sequence_length]
        sequence_out = one_hot_notes[i + sequence_length]
        network_input.append(sequence_in)
        network_output.append(sequence_out)

    n_patterns = len(network_input)

    # reshape the input into a format compatible with LSTM layers
    network_input = np.reshape(network_input, (n_patterns, sequence_length, n_vocab))

    network_output = np.array(network_output)

    return (network_input, network_output)

In [26]:
network_input, network_output = prepare_sequences(notes, sequence_length=max_length)

In [27]:
from keras.layers import Input
from keras.layers import RepeatVector, Dense, TimeDistributed
from keras.layers import LSTM, CuDNNLSTM 
from keras.optimizers import Adam
from keras.models import Model
from tqdm import tqdm

In [28]:
def Generator(latent_dim=32, max_length=100, lstm_dim=512, n_vocab=None):
    model_input = Input(shape=(latent_dim,))
    x = RepeatVector(max_length)(model_input)
    x = LSTM(lstm_dim, return_sequences=True)(x)
    model_output = TimeDistributed(Dense(n_vocab, activation='softmax'))(x)
    model = Model(model_input, model_output)
    
    return model

In [33]:
def Discriminator(max_length=100, n_vocab=None, lstm_dim=512, opt=Adam(lr=1e-4)):
    model_input = Input(shape=(max_length, n_vocab))
    x = LSTM(lstm_dim)(model_input)
    model_output = Dense(2, activation='softmax')(x)
    model = Model(model_input, model_output)
    model.compile(loss='binary_crossentropy', optimizer=opt)
    
    return model

In [39]:
def combined_network(generator, discriminator, latent_dim=32, opt=Adam(lr=1e-3)):
    gan_input = Input(shape=[latent_dim])
    x = generator(gan_input)
    gan_output = discriminator(x)
    model = Model(gan_input, gan_output)
    model.compile(loss='binary_crossentropy', optimizer=opt)
    
    return model

In [40]:
def make_trainable(net, val):
    net.trainable = val
    for l in net.layers:
        l.trainable = val

In [41]:
network_input.shape

(8327, 100, 124)

In [44]:
generator = Generator(latent_dim=latent_dim, max_length=max_length, lstm_dim=lstm_dim, n_vocab=n_vocab)
discriminator = Discriminator(max_length=max_length, n_vocab=n_vocab, lstm_dim=lstm_dim, opt=Adam(lr=1e-4))
make_trainable(discriminator, False)
GAN = combined_network(generator, discriminator, latent_dim=32, opt=Adam(lr=1e-3))

for e in tqdm(range(steps)):  
    input_batch = network_input[np.random.randint(0, network_input.shape[0], size=batch_size),:,:]    
    noise_gen = np.random.uniform(0,1,size=[batch_size, latent_dim])
    generated_melodies = generator.predict(noise_gen)

    make_trainable(discriminator,True)

    X = np.concatenate((input_batch, generated_melodies))
    y = np.zeros([2 * batch_size, 2])
    y[:batch_size,1] = 1
    y[batch_size:,0] = 1

    discriminator.train_on_batch(X,y)

    make_trainable(discriminator,False)

    noise_gen = np.random.uniform(0,1,size=[batch_size, latent_dim])
    y2 = np.zeros([batch_size, 2])
    y2[:,1] = 1

    GAN.train_on_batch(noise_gen, y2 )

  4%|▍         | 39/1000 [08:09<3:01:52, 11.36s/it]

KeyboardInterrupt: 