### This file will show how to convert midi files (audio) into suitable inputs (numerical) for the neural network. 

In [1]:
import numpy as np
import glob
import pickle
import numpy
from keras.utils import np_utils
from music21 import converter, instrument, note, chord, stream

Using TensorFlow backend.


**Upload the midi files to the notebook by pressing the charcoal right arrow and pressing the UPLOAD button. **

In [2]:
%%time

raw_notes = []


for i, file in enumerate(glob.glob('*.mid')):
    midi = converter.parse(file)
    notes_to_parse = None
    parts = instrument.partitionByInstrument(midi)
    if parts: # file has instrument parts
        notes_to_parse = parts.parts[0].recurse()
    else: # file has notes in a flat structure
        notes_to_parse = midi.flat.notes
    for element in notes_to_parse:
        if isinstance(element, note.Note):
            raw_notes.append(str(element.pitch))
        elif isinstance(element, chord.Chord):
            raw_notes.append('.'.join(str(n) for n in element.normalOrder))
    print(f'{i+1}. {file} has been converted.')

1. noc55_1.mid has been converted.
2. polpstbb.mid has been converted.
3. maz41_2.mid has been converted.
4. pol71_2.mid has been converted.
5. pol53.mid has been converted.
6. pre28_1.mid has been converted.
7. son2_3.mid has been converted.
8. bal_23.mid has been converted.
9. pol26_2.mid has been converted.
10. val64_2.mid has been converted.
11. val64_3.mid has been converted.
12. val34_1.mid has been converted.
13. eco72_1.mid has been converted.
14. etu10_10.mid has been converted.
15. maz06_2.mid has been converted.
16. val18.mid has been converted.
17. maz07_3.mid has been converted.
18. maz07_2.mid has been converted.
19. maz50_2.mid has been converted.
20. bal_38.mid has been converted.
21. etu25_05.mid has been converted.
22. pre28_8.mid has been converted.
23. maz06_4.mid has been converted.
24. ron73.mid has been converted.
25. maz41_3.mid has been converted.
26. etu25_08.mid has been converted.
27. tar43.mid has been converted.
28. fan49.mid has been converted.
29. bol19.

In [3]:
n_notes = len(set(raw_notes))
len(raw_notes)

262983

In [4]:
raw_notes

['C5',
 'F5',
 'E-5',
 'C#5',
 'C5',
 'B4',
 'C5',
 'C#5',
 'C5',
 'F5',
 'E-5',
 'C#5',
 'C5',
 'B4',
 'C5',
 'E-5',
 'C#5',
 'C5',
 'F5',
 'E-5',
 'C#5',
 'C5',
 'B-4',
 'C5',
 'B-4',
 'C5',
 'B-4',
 'C5',
 'B-4',
 'A4',
 'B-4',
 'C5',
 'C#5',
 'G#4',
 'B-3',
 'E4',
 'G#4',
 'G4',
 'F4',
 'G#3',
 'F4',
 'C5',
 'C5',
 'F5',
 'E-5',
 'C#5',
 'C5',
 'B4',
 'C5',
 'C#5',
 'C5',
 'F5',
 'E-5',
 'C#5',
 'C5',
 'B4',
 'C5',
 'E-5',
 'C#5',
 'C5',
 'F5',
 'E-5',
 'C#5',
 'C5',
 'A4',
 'B-4',
 'C5',
 'B-4',
 'C5',
 'B-4',
 'C5',
 'B-4',
 'C5',
 'B-4',
 'C5',
 'B-4',
 'C5',
 'B-4',
 'C5',
 'B-4',
 'A4',
 'B-4',
 'C5',
 'E-5',
 'C#5',
 'G#4',
 'B-3',
 'B-3',
 'E4',
 'G#4',
 'G4',
 'F4',
 'G#3',
 'F4',
 'G#4',
 'G#4',
 'B-4',
 'D4',
 'E-4',
 'F4',
 'E-4',
 'G#4',
 'B-4',
 'C5',
 'C5',
 'C5',
 'D5',
 'F#4',
 'G4',
 'G#4',
 'G4',
 'C5',
 'D5',
 'E5',
 'F5',
 'E-5',
 'C#5',
 'C#5',
 'C5',
 'C#5',
 'C5',
 'B-4',
 'B-4',
 'G#4',
 'G4',
 'B-4',
 'G#4',
 'G4',
 'G4',
 'G4',
 'G4',
 'B-4',
 'G#4',
 'G4'

In [5]:
%%time

def prepare_sequences(notes, n_notes):
    """ Prepare the sequences used by the Neural Network """
    sequence_length = 100

    # get all pitch names
    pitchnames = sorted(set(item for item in notes))

     # create a dictionary to map pitches to integers
    note_to_int = dict((note, number) for number, note in enumerate(pitchnames))

    network_input = []
    network_output = []

    # create input sequences and the corresponding outputs
    for i in range(0, len(notes) - sequence_length, 1):
        sequence_in = notes[i:i + sequence_length]
        sequence_out = notes[i + sequence_length]
        network_input.append([note_to_int[char] for char in sequence_in])
        network_output.append(note_to_int[sequence_out])

    n_patterns = len(network_input)

    # reshape the input into a format compatible with LSTM layers
    network_input = np.reshape(network_input, (n_patterns, sequence_length, 1))
    # normalize input
    network_input = network_input / float(n_notes)

    network_output = np_utils.to_categorical(network_output)

    return (network_input, network_output)
  

encoded_notes = prepare_sequences(raw_notes, n_notes)

CPU times: user 6.05 s, sys: 1.47 s, total: 7.52 s
Wall time: 7.52 s


In [6]:
encoded_notes[0].shape

(262883, 100, 1)

In [7]:
encoded_notes[0]

array([[[0.92080745],
        [0.97515528],
        [0.94254658],
        ...,
        [0.93012422],
        [0.94099379],
        [0.97360248]],

       [[0.97515528],
        [0.94254658],
        [0.90838509],
        ...,
        [0.94099379],
        [0.97360248],
        [0.94099379]],

       [[0.94254658],
        [0.90838509],
        [0.92080745],
        ...,
        [0.97360248],
        [0.94099379],
        [0.98447205]],

       ...,

       [[0.89596273],
        [0.87732919],
        [0.9068323 ],
        ...,
        [0.96583851],
        [0.98757764],
        [0.88043478]],

       [[0.87732919],
        [0.9068323 ],
        [0.95031056],
        ...,
        [0.98757764],
        [0.88043478],
        [0.90062112]],

       [[0.9068323 ],
        [0.95031056],
        [0.92857143],
        ...,
        [0.88043478],
        [0.90062112],
        [0.91149068]]])

In [8]:
encoded_notes[1]

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)