<a href="https://colab.research.google.com/github/sourcecode369/unconventional-neural-networks/blob/master/music-generation/Generate_Music_with_Transofrmers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# clone the repository for midi songs and notes file
!git clone https://github.com/Skuldur/Classical-Piano-Composer.git

Cloning into 'Classical-Piano-Composer'...
remote: Enumerating objects: 334, done.[K
remote: Total 334 (delta 0), reused 0 (delta 0), pack-reused 334
Receiving objects: 100% (334/334), 721.79 MiB | 11.95 MiB/s, done.
Resolving deltas: 100% (41/41), done.


In [0]:
# move the files outside of the git folder to the main folder
!mv Classical-Piano-Composer/midi_songs midi_songs
!mv Classical-Piano-Composer/data data

In [3]:
# check the existence of the files
!ls -GFlash --color

total 28K
4.0K drwxr-xr-x 1 root 4.0K Jan 12 14:45 [0m[01;34m.[0m/
4.0K drwxr-xr-x 1 root 4.0K Jan 12 14:41 [01;34m..[0m/
4.0K drwxr-xr-x 4 root 4.0K Jan 12 14:45 [01;34mClassical-Piano-Composer[0m/
4.0K drwxr-xr-x 1 root 4.0K Jan  8 16:41 [01;34m.config[0m/
4.0K drwxr-xr-x 2 root 4.0K Jan 12 14:44 [01;34mdata[0m/
4.0K drwxr-xr-x 2 root 4.0K Jan 12 14:44 [01;34mmidi_songs[0m/
4.0K drwxr-xr-x 1 root 4.0K Dec 18 16:52 [01;34msample_data[0m/


In [4]:
# remove some files in order to run it on colab and prevent it from using all of the ram
# the more the data the better the generated sound will be
import os
files = os.listdir('midi_songs/')
file_path = [os.path.join('midi_songs/',i) for i in files]
print(f"Last 10 file paths: {file_path[-10:]}")
print(f"Number of files: {len(file_path)}")

Last 10 file paths: ['midi_songs/balamb.mid', 'midi_songs/AT.mid', 'midi_songs/cosmo.mid', 'midi_songs/ff8-lfp.mid', 'midi_songs/tpirtsd-piano.mid', 'midi_songs/costadsol.mid', 'midi_songs/Final_Fantasy_Matouyas_Cave_Piano.mid', 'midi_songs/Kingdom_Hearts_Dearly_Beloved.mid', 'midi_songs/FF3_Battle_(Piano).mid', 'midi_songs/FFIX_Piano.mid']
Number of files: 92


In [0]:
# try different values of retaining the amount data
# remove the files
for f in file_path[10:]:
  os.remove(f)

In [6]:
# importing necessary modules
from __future__ import absolute_import, print_function, unicode_literals, division
import os
import gc
gc.enable()
import warnings
warnings.simplefilter("ignore")
import glob
import pickle
import numpy as np
from music21 import converter, instrument, note, chord 
from keras.models import Sequential, Model, load_model
from keras.layers import Dense, Dropout, BatchNormalization as BatchNorm, LSTM, Activation 
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau, TensorBoard
from keras.utils import plot_model, np_utils

Using TensorFlow backend.


In [0]:
def train_network():
  notes = get_notes()
  n_vocab = len(set(notes))
  network_input, network_output = prepare_sequences(notes, n_vocab)
  model = create_network(network_input, n_vocab)
  train(model, network_input, network_output)

In [0]:
def get_notes():
  notes = []
  for file in glob.glob("midi_songs/*.mid"):
    midi = converter.parse(file)
    print(f"Parsing {file}.")
    notes_to_parse = None
    try:
      s2 = instrument.partitionByInstrument(midi)
      notes_to_parse = s2.parts[0].recurse()
    except:
      notes_to_parse = midi.flat.notes 
    for element in notes_to_parse:
      if isinstance(element, note.Note):
        notes.append(str(element.pitch))
      elif isinstance(element, chord.Chord):
        notes.append('.'.join(str(n) for n in element.normalOrder))
  with open('data/notes','wb') as filepath:
    pickle.dump(notes, filepath)
  return notes

In [0]:
def prepare_sequences(notes, n_vocab):
  sequence_length = 1000
  pitchnames = sorted(set(item for item in notes))
  note_to_int = dict((note, number) for number, note in enumerate(pitchnames)) 
  network_input = []
  network_output = []

  for i in range(0, len(notes)-sequence_length, 1):
    sequence_in = notes[i:i+sequence_length]
    sequence_out = notes[i+sequence_length]
    network_input.append([note_to_int[char] for char in sequence_in])
    network_output.append(note_to_int[sequence_out])
  n_patterns = len(network_input)
  network_input = np.reshape(network_input, (n_patterns, sequence_length, 1))
  network_input = network_input / float(n_vocab)
  network_output = np_utils.to_categorical(network_output)
  return (network_input, network_output)

In [0]:
def create_network(network_input, n_vocab):
  model = Sequential()
  model.add(LSTM(
      128,
      input_shape=(network_input.shape[1], network_input.shape[2]),
      recurrent_dropout=0.3,
      return_sequences=True
  ))
  model.add(LSTM(64, recurrent_dropout=0.3))
  model.add(BatchNorm())
  model.add(Dropout(0.3))
  model.add(Dense(32))
  model.add(Activation('relu'))
  model.add(BatchNorm())
  model.add(Dropout(0.3))
  model.add(Dense(n_vocab))
  model.add(Activation('softmax'))
  model.compile(loss='categorical_crossentropy',optimizer='rmsprop')
  return model

In [0]:
def train(model, network_input, network_output):
  filepath = "weights-improvement-{epoch:02d}-{loss:.4f}-bigger.hdf5"
  checkpoint = ModelCheckpoint(
      filepath,
      monitor='loss',
      verbose=0,
      save_best_only=True,
      mode="min"
  )
  reduce_lr = ReduceLROnPlateau(monitor='loss', verbose=1, patience=4, mode='min', min_lr=0.000001, factor=0.3)
  early_stopping = EarlyStopping(monitor="loss",verbose=1, patience=6, mode='min')
  callbacks_list = [checkpoint, reduce_lr, early_stopping]

  model.fit(network_input, network_output, epochs=200, batch_size=512, callbacks=callbacks_list)

In [12]:
if __name__ == "__main__":
  train_network()

Parsing midi_songs/ViviinAlexandria.mid.
Parsing midi_songs/Still_Alive-1.mid.
Parsing midi_songs/roseofmay-piano.mid.
Parsing midi_songs/sobf.mid.
Parsing midi_songs/Gold_Silver_Rival_Battle.mid.
Parsing midi_songs/sera_.mid.
Parsing midi_songs/ultimafro.mid.
Parsing midi_songs/FF3_Third_Phase_Final_(Piano).mid.
Parsing midi_songs/ff4pclov.mid.
Parsing midi_songs/rufus.mid.




Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where



Epoch 1/200





Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch

In [0]:
# try:
#   from google.colab import files
#   files.upload(
#   )
# except Exception as ex:
#   print(ex)

# importing necessary modules
from __future__ import absolute_import, print_function, unicode_literals, division
import os
import gc
gc.enable()
import warnings
warnings.simplefilter("ignore")
import glob
import pickle
import numpy
from music21 import converter, instrument, note, chord, stream  
from keras.models import Sequential, Model, load_model
from keras.layers import Dense, Dropout, BatchNormalization as BatchNorm, LSTM, Activation 
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau, TensorBoard
from keras.utils import plot_model, np_utils

In [0]:
def generate():
    """ Generate a piano midi file """
    #load the notes used to train the model
    with open('data/notes', 'rb') as filepath:
        notes = pickle.load(filepath)

    # Get all pitch names
    pitchnames = sorted(set(item for item in notes))
    # Get all pitch names
    n_vocab = len(set(notes))

    network_input, normalized_input = prepare_sequences(notes, pitchnames, n_vocab)
    model = create_network(normalized_input, n_vocab)
    prediction_output = generate_notes(model, network_input, pitchnames, n_vocab)
    create_midi(prediction_output)

In [0]:
def prepare_sequences(notes, pitchnames, n_vocab):
    """ Prepare the sequences used by the Neural Network """
    # map between notes and integers and back
    note_to_int = dict((note, number) for number, note in enumerate(pitchnames))

    sequence_length = 1000
    network_input = []
    output = []
    for i in range(0, len(notes) - sequence_length, 1):
        sequence_in = notes[i:i + sequence_length]
        sequence_out = notes[i + sequence_length]
        network_input.append([note_to_int[char] for char in sequence_in])
        output.append(note_to_int[sequence_out])

    n_patterns = len(network_input)

    # reshape the input into a format compatible with LSTM layers
    normalized_input = numpy.reshape(network_input, (n_patterns, sequence_length, 1))
    # normalize input
    normalized_input = normalized_input / float(n_vocab)

    return (network_input, normalized_input)

In [0]:
def create_network(network_input, n_vocab):
    """ create the structure of the neural network """
    model = Sequential()
    model.add(LSTM(
        128,
        input_shape=(network_input.shape[1], network_input.shape[2]),
        recurrent_dropout=0.3,
        return_sequences=True
    ))
    model.add(LSTM(64, recurrent_dropout=0.3))
    model.add(BatchNorm())
    model.add(Dropout(0.3))
    model.add(Dense(32))
    model.add(Activation('relu'))
    model.add(BatchNorm())
    model.add(Dropout(0.3))
    model.add(Dense(n_vocab))
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy',optimizer='rmsprop')

    # Load the weights to each node
    model.load_weights('weights.hdf5')

    return model

In [0]:
def generate_notes(model, network_input, pitchnames, n_vocab):
    """ Generate notes from the neural network based on a sequence of notes """
    # pick a random sequence from the input as a starting point for the prediction
    start = numpy.random.randint(0, len(network_input)-1)

    int_to_note = dict((number, note) for number, note in enumerate(pitchnames))

    pattern = network_input[start]
    prediction_output = []

    # generate 500 notes
    for note_index in range(500):
        prediction_input = numpy.reshape(pattern, (1, len(pattern), 1))
        prediction_input = prediction_input / float(n_vocab)

        prediction = model.predict(prediction_input, verbose=0)

        index = numpy.argmax(prediction)
        result = int_to_note[index]
        prediction_output.append(result)

        pattern.append(index)
        pattern = pattern[1:len(pattern)]

    return prediction_output

In [0]:
def create_midi(prediction_output):
    """ convert the output from the prediction to notes and create a midi file
        from the notes """
    offset = 0
    output_notes = []

    # create note and chord objects based on the values generated by the model
    for pattern in prediction_output:
        # pattern is a chord
        if ('.' in pattern) or pattern.isdigit():
            notes_in_chord = pattern.split('.')
            notes = []
            for current_note in notes_in_chord:
                new_note = note.Note(int(current_note))
                new_note.storedInstrument = instrument.Piano()
                notes.append(new_note)
            new_chord = chord.Chord(notes)
            new_chord.offset = offset
            output_notes.append(new_chord)
        # pattern is a note
        else:
            new_note = note.Note(pattern)
            new_note.offset = offset
            new_note.storedInstrument = instrument.Piano()
            output_notes.append(new_note)

        # increase offset each iteration so that notes do not stack
        offset += 0.5

    midi_stream = stream.Stream(output_notes)

    midi_stream.write('midi', fp='test_output.mid')

In [0]:
if __name__ == '__main__':
    generate()