In [22]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Listing current data on our folder.
import os
print(os.listdir("."))

['Untitled.ipynb', "Piano Sonata n14 op27 1mov ''Moonlight''.mid", 'README.md', '.gitignore', '.ipynb_checkpoints', '.git', 'Moonlight_Sonata.ipynb']


In [23]:
from music21 import converter, corpus, instrument, midi, note, chord, pitch


In [30]:
midi_path = 'Piano Sonata n14 op27 1mov \'\'Moonlight\'\'.mid'
mf = midi.MidiFile()
mf.open(midi_path)
mf.read()
mf.close()

In [32]:
stream = midi.translate.midiFileToStream(mf)

In [48]:
stream

<music21.stream.Score 0xa1743ccc0>

In [41]:
part1 = stream.parts.stream()[0]

In [45]:
type(part1)

music21.stream.Part

In [46]:
part1.partName

'Piano'

In [52]:
noteIt = part1.notes

In [67]:
df = pd.DataFrame()
for note in noteIt:
    df = df.append([[note.beat, note, note.quarterLength]])

In [90]:
df.index = range(803)

In [91]:
df.iloc[300:448]

Unnamed: 0,0,1,2
300,1,<music21.note.Note E->,1/3
301,4/3,<music21.note.Note F#>,1/3
302,5/3,<music21.note.Note G#>,1/3
303,2,<music21.note.Note E->,1/3
304,7/3,<music21.note.Note F#>,1/3
305,8/3,<music21.note.Note G#>,1/3
306,3,<music21.note.Note E->,1/3
307,10/3,<music21.note.Note F#>,1/3
308,11/3,<music21.note.Note G#>,1/3
309,4,<music21.note.Note E->,1/3


In [77]:
df[2].value_counts()

1/3    778
1.0     19
2/3      3
2.0      2
4.0      1
Name: 2, dtype: int64

In [47]:

mf = midi.translate.streamToMidiFile(part1)
mf.open('test.mid', 'wb')
mf.write()
mf.close()

In [24]:
def open_midi(midi_path, remove_drums):
    # There is an one-line method to read MIDIs
    # but to remove the drums we need to manipulate some
    # low level MIDI events.
    mf = midi.MidiFile()
    mf.open(midi_path)
    mf.read()
    mf.close()
    if (remove_drums):
        for i in range(len(mf.tracks)):
            mf.tracks[i].events = [ev for ev in mf.tracks[i].events if ev.channel != 10]          

    return midi.translate.midiFileToStream(mf)
    
base_midi = open_midi('Piano Sonata n14 op27 1mov \'\'Moonlight\'\'.mid', False)
base_midi

<music21.stream.Score 0xa17cf79b0>

In [28]:
def list_instruments(midi):
    partStream = midi.parts.stream()
    print("List of instruments found on MIDI file:")
    for p in partStream:
        aux = p
        print (p.partName)

list_instruments(base_midi)

List of instruments found on MIDI file:
Piano
Piano
Piano


In [25]:
def extract_notes(midi_part):
    parent_element = []
    ret = []
    for nt in midi_part.flat.notes:        
        if isinstance(nt, note.Note):
            ret.append(max(0.0, nt.pitch.ps))
            parent_element.append(nt)
        elif isinstance(nt, chord.Chord):
            for pitch in nt.pitches:
                ret.append(max(0.0, pitch.ps))
                parent_element.append(nt)
    
    return ret, parent_element

In [26]:
def vectorize_harmony(model, harmonic_reduction):
    # Gets the model vector values for each chord from the reduction.
    word_vecs = []
    for word in harmonic_reduction:
        try:
            vec = model[word]
            word_vecs.append(vec)
        except KeyError:
            # Ignore, if the word doesn't exist in the vocabulary
            pass
    
    # Assuming that document vector is the mean of all the word vectors.
    return np.mean(word_vecs, axis=0)

In [27]:
def create_midi_dataframe(midipath):
    
    pool = Pool(8)
    midi_params = []        
    for midi_name in os.listdir(folderPath):
            midi_params.append(concat_path(folderPath, midi_name))

    results = pool.map(process_single_file, midi_params)
    for result in results:
        if (result is None):
            continue
            
        key_signature_column.append(result[0])
        harmonic_reduction_column.append(result[1])
        midi_name_column.append(result[2])
        
    d = {'midi_name': midi_name_column,
         'key_signature' : key_signature_column,
         'harmonic_reduction': harmonic_reduction_column}
    return pd.DataFrame(data=d)


def process_single_file(midi_path):
    try:
        midi_name = get_file_name(midi_path)
        midi = open_midi(midi_path, True)
        return (
            midi.analyze('key'),
            harmonic_reduction(midi),
            midi_name)
    except Exception as e:
        print("Error on {0}".format(midi_name))
        print(e)
        return None

In [None]:
# import modules & set up logging
import gensim, logging
# logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
 
model = gensim.models.Word2Vec(sonic_df["harmonic_reduction"], min_count=2, window=4)