## Prepare Data

In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/gdrive


In [2]:
!ls -l /content/gdrive/My\ Drive/Colab

total 2739
-rw------- 1 root root   72252 May 25 02:44 3.6-classifying-newswires.ipynb
-rw------- 1 root root  414705 May 25 02:34 5.2-using-convnets-with-small-datasets.ipynb
drwx------ 2 root root    4096 Aug  5 07:59 DL4US
-rw------- 1 root root 1546306 Jun  3 14:27 file_manupilation_test.ipynb
-rw------- 1 root root    3766 Aug  5 12:43 google_drive_test.ipynb
drwx------ 2 root root    4096 Sep 18 11:48 midi_songs
-rw------- 1 root root    3694 Aug  5 12:44 mount_google_drive.ipynb
-rw------- 1 root root  733350 Mar 18  2018 multi-class_classification_of_handwritten_digits.ipynb のコピー
drwx------ 2 root root    4096 Sep 28 14:19 music_gan
drwx------ 2 root root    4096 Sep 29 10:21 music_gan2
drwx------ 2 root root    4096 Sep 29 12:05 music_gan3
drwx------ 2 root root    4096 Sep 29 12:47 music_gan4
drwx------ 2 root root    4096 Sep 30 13:42 music_gan_freeze


In [0]:
!mkdir /content/gdrive/My\ Drive/Colab/music_gan

mkdir: cannot create directory ‘/content/gdrive/My Drive/Colab/music_gan’: File exists


In [3]:
!pip install music21

Collecting music21
[?25l  Downloading https://files.pythonhosted.org/packages/4a/db/317c21f4b5b970c3bfb5ff321e333059faf775621ae6433abcd4c68c69db/music21-5.3.0.tar.gz (18.0MB)
[K    100% |████████████████████████████████| 18.0MB 1.8MB/s 
[?25hBuilding wheels for collected packages: music21
  Running setup.py bdist_wheel for music21 ... [?25l- \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | done
[?25h  Stored in directory: /root/.cache/pip/wheels/53/8b/a6/be1921c60a68f0bea31c6b6a0a7b125badd61294d6a694407f
Successfully built music21
Installing collected packages: music21
Successfully installed music21-5.3.0


In [0]:
import glob
import pickle
import numpy
from music21 import converter, instrument, note, chord

In [0]:
def parse_midi_files():
    """ Get all the notes and chords from the midi files in the ./midi_songs directory """
    notes = []
    songs = []

    for file in glob.glob("/content/gdrive/My Drive/Colab/midi_songs/*.mid"):
        song = []
        midi = converter.parse(file)

        print("Parsing %s" % file)

        notes_to_parse = None

        try: # file has instrument parts
            s2 = instrument.partitionByInstrument(midi)
            notes_to_parse = s2.parts[0].recurse() 
        except: # file has notes in a flat structure
            notes_to_parse = midi.flat.notes

        for element in notes_to_parse:
            if isinstance(element, note.Note):
                song.append(str(element.pitch))
            elif isinstance(element, chord.Chord):
                song.append('.'.join(str(n) for n in element.normalOrder))
        songs.append(song)
        notes += song

    return notes, songs


In [6]:
notes, songs = parse_midi_files()

Parsing /content/gdrive/My Drive/Colab/midi_songs/bwv786.mid
Parsing /content/gdrive/My Drive/Colab/midi_songs/bwv785.mid
Parsing /content/gdrive/My Drive/Colab/midi_songs/bwv784.mid
Parsing /content/gdrive/My Drive/Colab/midi_songs/bwv783.mid
Parsing /content/gdrive/My Drive/Colab/midi_songs/bwv782.mid
Parsing /content/gdrive/My Drive/Colab/midi_songs/bwv781.mid
Parsing /content/gdrive/My Drive/Colab/midi_songs/bwv780.mid
Parsing /content/gdrive/My Drive/Colab/midi_songs/bwv779.mid
Parsing /content/gdrive/My Drive/Colab/midi_songs/bwv778.mid
Parsing /content/gdrive/My Drive/Colab/midi_songs/bwv777.mid
Parsing /content/gdrive/My Drive/Colab/midi_songs/bwv776.mid
Parsing /content/gdrive/My Drive/Colab/midi_songs/bwv775.mid
Parsing /content/gdrive/My Drive/Colab/midi_songs/bwv774.mid
Parsing /content/gdrive/My Drive/Colab/midi_songs/bwv773.mid
Parsing /content/gdrive/My Drive/Colab/midi_songs/bwv772.mid


In [0]:
max_length = 100

pitchnames = sorted(set(item for item in notes))
n_vocab = len(pitchnames)

note_to_int = dict((note, number) for number, note in enumerate(pitchnames))
int_to_note = dict([[number, note] for note, number in note_to_int.items()])

In [0]:
def prepare_sequences(notes, sequence_length=100):
    # get all pitch names
    pitchnames = sorted(set(item for item in notes))
    n_vocab = len(pitchnames)
    
    # convert notes to one-hot encoded
    one_hot_notes = []
    for note in notes:
        one_hot_note = np.zeros(n_vocab)
        one_hot_note[note_to_int[note]] = 1
        one_hot_notes.append(one_hot_note)

    network_input = []
    network_output = []

    # create input sequences and the corresponding outputs
    for i in range(0, len(one_hot_notes) - sequence_length, 1):
        sequence_in = one_hot_notes[i:i + sequence_length]
        sequence_out = one_hot_notes[i + sequence_length]
        network_input.append(sequence_in)
        network_output.append(sequence_out)

    n_patterns = len(network_input)

    # reshape the input into a format compatible with LSTM layers
    network_input = numpy.reshape(network_input, (n_patterns, sequence_length, n_vocab))

    network_output = np.array(network_output)

    return (network_input, network_output)

In [0]:
import numpy as np

network_input, network_output = prepare_sequences(notes, sequence_length=max_length)

## Generator Network

In [18]:
import keras
from keras import layers
import numpy as np

latent_dim = 32

generator_input = keras.Input(shape=(latent_dim,))

x = layers.RepeatVector(max_length)(generator_input)
x = layers.CuDNNLSTM(512, return_sequences=True)(x)
x = layers.CuDNNLSTM(512, return_sequences=True)(x)
    
x = layers.TimeDistributed(layers.Dense(n_vocab, activation='softmax'))(x)

generator = keras.models.Model(generator_input, x)
generator.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         (None, 32)                0         
_________________________________________________________________
repeat_vector_2 (RepeatVecto (None, 100, 32)           0         
_________________________________________________________________
cu_dnnlstm_5 (CuDNNLSTM)     (None, 100, 512)          1118208   
_________________________________________________________________
cu_dnnlstm_6 (CuDNNLSTM)     (None, 100, 512)          2101248   
_________________________________________________________________
time_distributed_2 (TimeDist (None, 100, 124)          63612     
Total params: 3,283,068
Trainable params: 3,283,068
Non-trainable params: 0
_________________________________________________________________


## Discriminator Network

In [19]:
discriminator_input = layers.Input(shape=(max_length, n_vocab))
x = layers.CuDNNLSTM(512, return_sequences=True)(discriminator_input)
x = layers.CuDNNLSTM(512)(x)
x = layers.Dense(1, activation='sigmoid')(x)

discriminator = keras.models.Model(discriminator_input, x)
discriminator.summary()

discriminator_optimizer = keras.optimizers.RMSprop(lr=0.0008, clipvalue=1.0, decay=1e-8)
discriminator.compile(optimizer=discriminator_optimizer, loss='binary_crossentropy')

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_5 (InputLayer)         (None, 100, 124)          0         
_________________________________________________________________
cu_dnnlstm_7 (CuDNNLSTM)     (None, 100, 512)          1306624   
_________________________________________________________________
cu_dnnlstm_8 (CuDNNLSTM)     (None, 512)               2101248   
_________________________________________________________________
dense_4 (Dense)              (None, 1)                 513       
Total params: 3,408,385
Trainable params: 3,408,385
Non-trainable params: 0
_________________________________________________________________


## Adversarial Network

In [0]:
discriminator.trainable = False

gan_input = keras.Input(shape=(latent_dim,))
gan_output = discriminator(generator(gan_input))
gan = keras.models.Model(gan_input, gan_output)

gan_optimizer = keras.optimizers.RMSprop(lr=0.0004, clipvalue=1.0, decay=1e-8)
gan.compile(optimizer=gan_optimizer, loss='binary_crossentropy')

In [13]:
gan.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, 32)                0         
_________________________________________________________________
model_1 (Model)              (None, 100, 124)          3283068   
_________________________________________________________________
model_2 (Model)              (None, 1)                 3408385   
Total params: 6,691,453
Trainable params: 3,283,068
Non-trainable params: 3,408,385
_________________________________________________________________


In [0]:
import os
from music21 import instrument, note, stream, chord

def create_midi(prediction_output, file_path):
    """ convert the output from the prediction to notes and create a midi file
        from the notes """
    offset = 0
    output_notes = []

    # create note and chord objects based on the values generated by the model
    for pattern in prediction_output:
        # pattern is a chord
        if ('.' in pattern) or pattern.isdigit():
            notes_in_chord = pattern.split('.')
            notes = []
            for current_note in notes_in_chord:
                new_note = note.Note(int(current_note))
                new_note.storedInstrument = instrument.Piano()
                notes.append(new_note)
            new_chord = chord.Chord(notes)
            new_chord.offset = offset
            output_notes.append(new_chord)
        # pattern is a note
        else:
            new_note = note.Note(pattern)
            new_note.offset = offset
            new_note.storedInstrument = instrument.Piano()
            output_notes.append(new_note)

        # increase offset each iteration so that notes do not stack
        offset += 0.5

    midi_stream = stream.Stream(output_notes)
    midi_stream.write('midi', fp=file_path)

In [21]:
import csv

iterations = 20001
batch_size = 20
save_dir = '/content/gdrive/My Drive/Colab/music_gan_freeze'

f = open(os.path.join(save_dir, 'gan_log.csv'),'a')
writer = csv.writer(f)

d_loss = float('inf')
a_loss = 9999.9

# Start training loop
start = 0
for step in range(iterations):
    # Sample random points in the latent space
    random_latent_vectors = np.random.normal(size=(batch_size, latent_dim))

    # Decode them to fake images
    generated_songs = generator.predict(random_latent_vectors)

    # Combine them with real images
    stop = start + batch_size
    real_songs = network_input[start: stop]
    combined_songs = np.concatenate([generated_songs, real_songs])

    # Assemble labels discriminating real from fake images
    labels = np.concatenate([np.ones((batch_size, 1)),
                             np.zeros((batch_size, 1))])
    # Add random noise to the labels - important trick!
    labels += 0.05 * np.random.random(labels.shape)

    # Train the discriminator
    if d_loss > a_loss * 0.7:
        d_loss = discriminator.train_on_batch(combined_songs, labels)

    # sample random points in the latent space
    random_latent_vectors = np.random.normal(size=(batch_size, latent_dim))

    # Assemble labels that say "all real songs"
    misleading_targets = np.zeros((batch_size, 1))

    # Train the generator (via the gan model,
    # where the discriminator weights are frozen)
    if a_loss > d_loss * 0.7:
        a_loss = gan.train_on_batch(random_latent_vectors, misleading_targets)
    
    writer.writerow([step, d_loss, a_loss])
    
    start += batch_size
    if start > len(network_input) - batch_size:
      start = 0

    # Occasionally save / plot
    if step % 100 == 0:
        # Save model weights
        gan.save_weights(os.path.join('/content/gdrive/My Drive/Colab/music_gan_freeze', 'gan.h5'))

        # Print metrics
        print('discriminator loss at step %s: %s' % (step, d_loss))
        print('adversarial loss at step %s: %s' % (step, a_loss))

        prediction_indices = np.argmax(generated_songs, axis=2)

        prediction_song = [int_to_note[index] for index in prediction_indices[0]]

        # Save Generated Song Midi
        create_midi(prediction_song, os.path.join(save_dir, 'generated_song_' + str(step) + '.mid'))

f.close()


  'Discrepancy between trainable weights and collected trainable'


discriminator loss at step 0: 0.69246185
adversarial loss at step 0: 0.872691
discriminator loss at step 100: 0.593648
adversarial loss at step 100: 0.96513003
discriminator loss at step 200: 0.6597153
adversarial loss at step 200: 0.14114648
discriminator loss at step 300: 0.110154666
adversarial loss at step 300: 0.24791975
discriminator loss at step 400: 0.110154666
adversarial loss at step 400: 0.24711016
discriminator loss at step 500: 0.110154666
adversarial loss at step 500: 0.24735793
discriminator loss at step 600: 0.110154666
adversarial loss at step 600: 0.2443426
discriminator loss at step 700: 0.110154666
adversarial loss at step 700: 0.24311094
discriminator loss at step 800: 0.110154666
adversarial loss at step 800: 0.24372578
discriminator loss at step 900: 0.110154666
adversarial loss at step 900: 0.24372347
discriminator loss at step 1000: 0.110154666
adversarial loss at step 1000: 0.24064454
discriminator loss at step 1100: 0.110154666
adversarial loss at step 1100: 