In [None]:
!gdown --id 1EI0V4JRkAMhjwayaDIRC57iq8liHEiCS
!unzip chopin.zip
#!rm -r mid_data
!mkdir mid_data
!mkdir data
!mkdir mid_out
!cp -r /content/*.mid /content/mid_data
!rm /content/*.mid

In [None]:
import glob
import pickle
import numpy
import matplotlib.pyplot as plt
from music21 import converter, instrument, note, chord, stream
from keras.models import Sequential
from keras.layers import LSTM, Dropout, Dense, Activation, BatchNormalization
from keras.utils import np_utils
from keras.callbacks import ModelCheckpoint


In [None]:
def mid_to_list(file_name):
   # return notes and cordes from .mid file as a list
   # NB: MIDI files are specific music files extention contain info about instruction to create the 
   # music (like a music sheet) 

  musical_elements = [] 
  for file in glob.glob(file_name):
      midi = converter.parse(file)
      notes_to_parse = None
      parts = instrument.partitionByInstrument(midi)
      if parts: # file has instrument parts
          notes_to_parse = parts.parts[0].recurse()
      else: # file has notes in a flat structure
          notes_to_parse = midi.flat.notes
      for element in notes_to_parse:
          if isinstance(element, note.Note):
              musical_elements.append(str(element.pitch))
          elif isinstance(element, chord.Chord):
              musical_elements.append('.'.join(str(n) for n in element.normalOrder))
  with open('data/musical_elements', 'wb') as filepath:
    pickle.dump(musical_elements, filepath)
  return musical_elements 
 
def list_to_midi(prediction_output, file_name):
    # return .mid file from list of music elements
    offset = 0
    output_notes = []

    # create note and chord objects based on the values generated by the model
    for pattern in prediction_output:
        # pattern is a chord
        if ('.' in pattern) or pattern.isdigit():
            notes_in_chord = pattern.split('.')
            notes = []
            for current_note in notes_in_chord:
                new_note = note.Note(int(current_note))
                new_note.storedInstrument = instrument.Piano()
                notes.append(new_note)
            new_chord = chord.Chord(notes)
            new_chord.offset = offset
            output_notes.append(new_chord)
        # pattern is a note
        else:
            new_note = note.Note(pattern)
            new_note.offset = offset
            new_note.storedInstrument = instrument.Piano()
            output_notes.append(new_note)

        # increase offset each iteration so that notes do not stack
        offset += 0.5

    midi_stream = stream.Stream(output_notes)

    midi_stream.write('midi', fp=file_name)

In [None]:
# #test encoder and decoder
# res = mid_to_list("mid_data/chp_op18_format0.mid")
# list_to_midi(res, 'mid_out/test.mid')

In [None]:
def pre_process_data(musical_elts, n_vocab, seq_len=100, stride=1):
    #preprocess musical elements and return data as matrix of LSTM inputs and outputs
    dataX = []
    dataY = []

    # create input sequences and the corresponding outputs
    for i in range(0, len(musical_elts) - seq_len, stride):
        seq_in = musical_elts[i:i + seq_len]
        seq_out = musical_elts[i + seq_len]
        dataX.append([music_el_to_int[el] for el in seq_in])
        dataY.append(music_el_to_int[seq_out])

    n_patterns = len(dataX)
    print("Total examples synthesised from this dataset: ", n_patterns)

    # reshape the input into a format compatible with LSTM layers
    normalized_X = numpy.reshape(dataX, (n_patterns, seq_len, 1))
    # normalize input
    normalized_X = normalized_X / float(n_vocab)

    categorical_Y = np_utils.to_categorical(dataY)

    return normalized_X, categorical_Y, dataX 

In [None]:
musical_elements = mid_to_list("mid_data/*.mid")
n_vocab = len(set(musical_elements))
print("Total Vocab", n_vocab)
print("vocabulary is the number of distinct values (notes, cordes) used in the midi file to make the song")

# get sorted set of music elements
sorted_set_of_elements = sorted(set(musical_elements))

# map : music_element to int
music_el_to_int = dict((musical_el, i) for i, musical_el in enumerate(sorted_set_of_elements))
int_to_music_el = dict((i, musical_el) for i, musical_el in enumerate(sorted_set_of_elements))

In [None]:
normalized_X, categorical_Y, dataX  = pre_process_data(musical_elements, n_vocab)
print("shape of X", normalized_X.shape)
print("shape of Y", categorical_Y.shape)

In [None]:
#creating the LSTM model
model = Sequential()
model.add(LSTM(512, return_sequences=True, input_shape=(normalized_X.shape[1], normalized_X.shape[2])))
model.add(Dropout(0.2))
model.add(LSTM(512, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(n_vocab))
model.add(Activation('softmax'))

# model = Sequential()
# model.add(
#     LSTM(512,input_shape=(dataX.shape[1], dataX.shape[2]), recurrent_dropout=0.3,return_sequences=True)
#     )
# model.add(LSTM(512, return_sequences=True, recurrent_dropout=0.3,))
# model.add(LSTM(512))
# model.add(BatchNormalization())
# model.add(Dropout(0.3))
# model.add(Dense(256))
# model.add(Activation('relu'))
# model.add(BatchNormalization())
# model.add(Dropout(0.3))
# model.add(Dense(n_vocab))
# model.add(Activation('softmax'))

In [None]:
#we compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [None]:
model.summary()

In [None]:
#set up checkpoints config
cpfilepath = "v1-{epoch:02d}-{loss:.4f}-bigger.hdf5"
checkpoint = ModelCheckpoint(cpfilepath, monitor='loss', verbose=0, save_best_only=True, mode='min')
callbacks_list = [checkpoint]



In [None]:
#load pretrained model
filename = "pre_trained_weights.hdf5"
model.load_weights(filename)


In [None]:
#training
history = model.fit(normalized_X, categorical_Y, epochs=50, batch_size=128, callbacks=callbacks_list)
plt.plot(history.history['loss'])
plt.show()

In [None]:
#music generation
start = numpy.random.randint(0, len(dataX)-1)

pattern = dataX[start]
predicted_out = []

#start generation
for note_index in range(500):
  x = numpy.reshape(pattern, (1, len(pattern), 1))
  x = x / float(n_vocab)
  prediction = model.predict(x, verbose=0)
  index = numpy.argmax(prediction)
  result = int_to_music_el[index]
  predicted_out.append(result)
  pattern.append(index)
  pattern = pattern[1:len(pattern)]

In [None]:
#create MIDI music file
list_to_midi(predicted_out, 'mid_out/test.mid')

In [None]:
#utils
#!zip -r weights_to_zip v1-*
#files.download("weights_to_zip.zip")


In [None]:
  #music analysis Lab :)
   
  musical_elements = [] 
  raw_elements     = [] 
  for file in glob.glob("mid_data/chp_op18_format0.mid"):
      midi = converter.parse(file)  
      notes_to_parse = None
      parts = instrument.partitionByInstrument(midi)
      if parts: # file has instrument parts
          notes_to_parse = parts.parts[0].recurse()
      else: # file has notes in a flat structure
          notes_to_parse = midi.flat.notes
      for element in notes_to_parse:
          raw_elements.append(element)
          if isinstance(element, note.Note):
              musical_elements.append(str(element.pitch))
          elif isinstance(element, chord.Chord):
              musical_elements.append('.'.join(str(n) for n in element.normalOrder))



In [None]:
from music21 import key

raw_elements
str_elts = []
for elm in raw_elements:
  print(elm)
  # if isinstance(elm, note.Note):
  #   print(str(elm))
