# Importar mierdas para leer midi y manipularlos

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Listing current data on our folder.
import os
print(os.listdir("."))

In [None]:
# Defining some constants and creating a new folder for MIDIs.
midi_path = "data"
sonic_folder = "Jazzomat"

!rm -r $midi_path
!mkdir $midi_path

# Some helper methods.    
def concat_path(path, child):
    return path + "/" + child

def download_midi(midi_url, path):
    !wget $midi_url --directory-prefix $path > download_midi.log

# Downloading an example file.
sonic_path = concat_path(midi_path, sonic_folder)
download_midi(
    "https://files.khinsider.com/midifiles/genesis/sonic-the-hedgehog/green-hill-zone.mid",
    sonic_path)
    
print(os.listdir(sonic_path))

In [None]:
sonic_path

In [None]:
from music21 import converter, corpus, instrument, midi, note, chord, pitch

def open_midi(midi_path, remove_drums):
    # There is an one-line method to read MIDIs
    # but to remove the drums we need to manipulate some
    # low level MIDI events.
    mf = midi.MidiFile()
    mf.open(midi_path)
    mf.read()
    mf.close()
    if (remove_drums):
        for i in range(len(mf.tracks)):
            mf.tracks[i].events = [ev for ev in mf.tracks[i].events if ev.channel != 10]          

    return midi.translate.midiFileToStream(mf)
    
base_midi = open_midi(concat_path(sonic_path, "ArtPepper_Anthropology_FINAL.mid"), True)
base_midi

In [None]:
def list_instruments(midi):
    partStream = midi.parts.stream()
    print("List of instruments found on MIDI file:")
    for p in partStream:
        aux = p
        print (p.partName)

list_instruments(base_midi)

In [None]:
import matplotlib.pyplot as plt
import matplotlib.lines as mlines

def extract_notes(midi_part):
    parent_element = []
    ret = []
    for nt in midi_part.flat.notes:        
        if isinstance(nt, note.Note):
            ret.append(max(0.0, nt.pitch.ps))
            parent_element.append(nt)
        elif isinstance(nt, chord.Chord):
            for pitch in nt.pitches:
                ret.append(max(0.0, pitch.ps))
                parent_element.append(nt)
    
    return ret, parent_element

def print_parts_countour(midi):
    fig = plt.figure(figsize=(12, 5))
    ax = fig.add_subplot(1, 1, 1)
    minPitch = pitch.Pitch('C10').ps
    maxPitch = 0
    xMax = 0
    
    # Drawing notes.
    for i in range(len(midi.parts)):
        top = midi.parts[i].flat.notes                  
        y, parent_element = extract_notes(top)
        if (len(y) < 1): continue
            
        x = [n.offset for n in parent_element]
        ax.scatter(x, y, alpha=0.6, s=7)
        
        aux = min(y)
        if (aux < minPitch): minPitch = aux
            
        aux = max(y)
        if (aux > maxPitch): maxPitch = aux
            
        aux = max(x)
        if (aux > xMax): xMax = aux
    
    for i in range(1, 10):
        linePitch = pitch.Pitch('C{0}'.format(i)).ps
        if (linePitch > minPitch and linePitch < maxPitch):
            ax.add_line(mlines.Line2D([0, xMax], [linePitch, linePitch], color='red', alpha=0.1))            

    plt.ylabel("Note index (each octave has 12 notes)")
    plt.xlabel("Number of quarter notes (beats)")
    plt.title('Voices motion approximation, each color is a different instrument, red lines show each octave')
    plt.show()

# Focusing only on 6 first measures to make it easier to understand.
print_parts_countour(base_midi.measures(0, 6))

In [None]:
base_midi.plot('histogram', 'pitchClass', 'count')

In [None]:
base_midi.plot('scatter', 'offset', 'pitchClass')

In [None]:
timeSignature = base_midi.getTimeSignatures()[0]
music_analysis = base_midi.analyze('key')
print("Music time signature: {0}/{1}".format(timeSignature.beatCount, timeSignature.denominator))
print("Expected music key: {0}".format(music_analysis))
print("Music key confidence: {0}".format(music_analysis.correlationCoefficient))
print("Other music key alternatives:")
for analysis in music_analysis.alternateInterpretations:
    if (analysis.correlationCoefficient > 0.5):
        print(analysis)

In [None]:
type(base_midi)

In [None]:
from music21 import stream

temp_midi_chords = open_midi(
    concat_path(sonic_path, "ArtPepper_Anthropology_FINAL.mid"),
    True).chordify()
temp_midi = stream.Score()
temp_midi.insert(0, temp_midi_chords)

# Printing merged tracks.
print_parts_countour(temp_midi)

# Dumping first measure notes
temp_midi_chords.measures(0, 1).show("text")

In [None]:
from music21 import roman

def note_count(measure, count_dict):
    bass_note = None
    for chord in measure.recurse().getElementsByClass('Chord'):
        # All notes have the same length of its chord parent.
        note_length = chord.quarterLength
        for note in chord.pitches:          
            # If note is "C5", note.name is "C". We use "C5"
            # style to be able to detect more precise inversions.
            note_name = str(note) 
            if (bass_note is None or bass_note.ps > note.ps):
                bass_note = note
                
            if note_name in count_dict:
                count_dict[note_name] += note_length
            else:
                count_dict[note_name] = note_length
        
    return bass_note
                
def simplify_roman_name(roman_numeral):
    # Chords can get nasty names as "bII#86#6#5",
    # in this method we try to simplify names, even if it ends in
    # a different chord to reduce the chord vocabulary and display
    # chord function clearer.
    ret = roman_numeral.romanNumeral
    inversion_name = None
    inversion = roman_numeral.inversion()
    
    # Checking valid inversions.
    if ((roman_numeral.isTriad() and inversion < 3) or
            (inversion < 4 and
                 (roman_numeral.seventh is not None or roman_numeral.isSeventh()))):
        inversion_name = roman_numeral.inversionName()
        
    if (inversion_name is not None):
        ret = ret + str(inversion_name)
        
    elif (roman_numeral.isDominantSeventh()): ret = ret + "M7"
    elif (roman_numeral.isDiminishedSeventh()): ret = ret + "o7"
    return ret
                
def harmonic_reduction(midi_file):
    ret = []
    temp_midi = stream.Score()
    temp_midi_chords = midi_file.chordify()
    temp_midi.insert(0, temp_midi_chords)    
    music_key = temp_midi.analyze('key')
    max_notes_per_chord = 4   
    for m in temp_midi_chords.measures(0, None): # None = get all measures.
        if (type(m) != stream.Measure):
            continue
        
        # Here we count all notes length in each measure,
        # get the most frequent ones and try to create a chord with them.
        count_dict = dict()
        bass_note = note_count(m, count_dict)
        if (len(count_dict) < 1):
            ret.append("-") # Empty measure
            continue
        
        sorted_items = sorted(count_dict.items(), key=lambda x:x[1])
        sorted_notes = [item[0] for item in sorted_items[-max_notes_per_chord:]]
        measure_chord = chord.Chord(sorted_notes)
        
        # Convert the chord to the functional roman representation
        # to make its information independent of the music key.
        roman_numeral = roman.romanNumeralFromChord(measure_chord, music_key)
        ret.append(simplify_roman_name(roman_numeral))
        
    return ret

harmonic_reduction(base_midi)[0:25]

In [None]:
from music21 import corpus

bachChorale = corpus.parse('bach/bwv846')
harmonic_reduction(bachChorale)[0:11]

In [None]:
import requests
from bs4 import BeautifulSoup

def get_file_name(link):
    filename = link.split('/')[::-1][0]
    return filename

def download_file(link, filename):
    mid_file_request = requests.get(link, stream=True)
    if (mid_file_request.status_code != 200):
        raise Exception("Failed to download {0}".format(url))
        
    with open(filename, 'wb+') as saveMidFile:
        saveMidFile.write(mid_file_request.content)

def download_midi_files(url, output_path):
    site_request = requests.get(url)
    if (site_request.status_code != 200):
        raise Exception("Failed to access {0}".format(url))
    
    soup = BeautifulSoup(site_request.content, 'html.parser')
    link_urls = soup.find_all('a')

    for link in link_urls:
        href = link['href']
        if (href.endswith(".mid")):
            file_name = get_file_name(href)
            download_path = concat_path(output_path, file_name)
            midi_request = download_file(href, download_path)

def start_midis_download(folder, url):
    !mkdir $folder # It is fine if this command fails when the directory already exists.
    download_midi_files(url, folder)

target_games = dict()
target_games["sonic1"] = "https://www.khinsider.com/midi/genesis/sonic-the-hedgehog"
target_games["sonic2"] = "https://www.khinsider.com/midi/genesis/sonic-the-hedgehog-2"
target_games["sonic3"] = "https://www.khinsider.com/midi/genesis/sonic-the-hedgehog-3"
target_games["sonicAndKnuckles"] = "https://www.khinsider.com/midi/genesis/sonic-and-knuckles"

for key, value in target_games.items():
    print(target_games.item())
    file_path = concat_path(sonic_path, key)
    start_midis_download(file_path, value)

In [None]:
# from multiprocessing.dummy import Pool as ThreadPool # Use this when IO is the problem
from multiprocessing import Pool # Use this when CPU-intensive functions are the problem.

# Go get a coffee, this cell takes hours to run...
def process_single_file(midi_param):
    try:
        game_name = midi_param[0]
        midi_path = midi_param[1]
        midi_name = get_file_name(midi_path)
        midi = open_midi(midi_path, True)
        return (
            midi.analyze('key'),
            game_name,
            harmonic_reduction(midi),
            midi_name)
    except Exception as e:
        print("Error on {0}".format(midi_name))
        print(e)
        return None

def create_midi_dataframe(target_games):
    key_signature_column = []
    game_name_column = []
    harmonic_reduction_column = []
    midi_name_column = []
    pool = Pool(8)
    midi_params = []
    for key, value in target_games.items():
        folder_path = concat_path(sonic_path, key)
        for midi_name in os.listdir(folder_path):
            midi_params.append((key, concat_path(folder_path, midi_name)))

    results = pool.map(process_single_file, midi_params)
    for result in results:
        if (result is None):
            continue
            
        key_signature_column.append(result[0])
        game_name_column.append(result[1])
        harmonic_reduction_column.append(result[2])
        midi_name_column.append(result[3])
    
    d = {'midi_name': midi_name_column,
         'game_name': game_name_column,
         'key_signature' : key_signature_column,
         'harmonic_reduction': harmonic_reduction_column}
    return pd.DataFrame(data=d)

sonic_df = create_midi_dataframe(target_games)

In [None]:
print(sonic_path)

In [None]:
sonic_path = 'data/Jazzomat'
sonic_path = 'Pop_Music_Midi'
#midi1 = 'ArtPepper_Stardust-1_FINAL.mid'
#midi2 = 'ArtPepper_BluesForBlanche_FINAL.mid'
#midi3 = 'ArtPepper_Desafinado_FINAL.mid'
#midi4 = 'ArtPepper_InAMellowTone_FINAL.mid'
midi1 = 'All The Small Things - Chorus.midi'
midi2 = 'All The Small Things - Verse.midi'
midi3 = 'Around The World - Chorus.midi'
midi4 = 'Around The World - Verse.midi'
midi5 = 'Barbie Girl - Chorus.midi'
midi6 = 'Barbie Girl - Verse.midi'

mid1 = open_midi(concat_path(sonic_path, midi1), True)
mid2 = open_midi(concat_path(sonic_path, midi2), True)
mid3 = open_midi(concat_path(sonic_path, midi3), True)
mid4 = open_midi(concat_path(sonic_path, midi4), True)
mid5 = open_midi(concat_path(sonic_path, midi5), True)
mid6 = open_midi(concat_path(sonic_path, midi6), True)

k1 = "{0}".format(mid1.analyze('key'))
k2 = "{0}".format(mid2.analyze('key'))
k3 = "{0}".format(mid3.analyze('key'))
k4 = "{0}".format(mid4.analyze('key'))
k5 = "{0}".format(mid5.analyze('key'))
k6 = "{0}".format(mid6.analyze('key'))

h1 = harmonic_reduction(mid1)
h2 = harmonic_reduction(mid2)
h3 = harmonic_reduction(mid3)
h4 = harmonic_reduction(mid4)
h5 = harmonic_reduction(mid5)
h6 = harmonic_reduction(mid6)

d = {'midi_name': [midi1, midi2, midi3, midi4, midi5, midi6], 'game_name': ['k','k','k','k','k','k'], 
     'key_signature': [k1, k2, k3, k4, k5, k6], 'harmonic_reduction': [h1,h2,h3,h4,h5,h6]}

#d = {'midi_name': [midi1, midi2, midi3, midi4], 'game_name': ['k','k','k','k'], 
#     'key_signature': [k1, k2, k3, k4], 'harmonic_reduction': [h1,h2,h3,h4]}

sonic_df = pd.DataFrame(data=d)
sonic_df

In [None]:
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# ----------------------------------------- FUNCION DIVIDE CANCIONES ------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------

# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# -------------------------- FUNCION QUE HACE DATAFRAME PARA EVALUAR SIMILITUDES ------------------------------
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------



In [None]:
# import modules & set up logging
import gensim, logging
# logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
 
model = gensim.models.Word2Vec(sonic_df["harmonic_reduction"], min_count=2, window=4)

In [None]:
model

In [None]:
def get_related_chords(token, topn=3):
    print("Similar chords with " + token)
    for word, similarity in model.wv.most_similar(positive=[token], topn=topn):
        print (word, round(similarity, 3))

def get_chord_similarity(chordA, chordB):
    print("Similarity between {0} and {1}: {2}".format(
        chordA, chordB, model.wv.similarity(chordA, chordB)))
    
print("List of chords found:")
print(model.wv.vocab.keys())
print("Number of chords considered by model: {0}".format(len(model.wv.vocab)))

get_related_chords('i')
get_related_chords('v64')
get_related_chords('v53')

# The first one should be smaller since "i" and "ii" chord doesn't share notes,
# different from "IV" and "vi" which share 2 notes.
get_chord_similarity("i", "i64") 
get_chord_similarity("IV", "vi")

# This one should be bigger because they are "enharmonic".
get_chord_similarity("i", "vii")

get_chord_similarity("i", "I")

In [None]:
import pprint
def vectorize_harmony(model, harmonic_reduction):
    # Gets the model vector values for each chord from the reduction.
    word_vecs = []
    for word in harmonic_reduction:
        if word != '-':
            #print('ok')
            try:
                vec = model[word]
                word_vecs.append(vec)
            except KeyError:
                # Ignore, if the word doesn't exist in the vocabulary
                pass
        #else:
        #    pass
        #    print('pass')
    
    # Assuming that document vector is the mean of all the word vectors.
    return np.mean(word_vecs, axis=0)

def cosine_similarity(vecA, vecB):
    # Find the similarity between two vectors based on the dot product.
    csim = np.dot(vecA, vecB) / (np.linalg.norm(vecA) * np.linalg.norm(vecB))
    if np.isnan(np.sum(csim)):
        return 0
    
    return csim

def calculate_similarity_aux(df, model, source_name, target_names=[], threshold=0):
    source_harmo = df[df["midi_name"] == source_name]["harmonic_reduction"].values[0]
    source_vec = vectorize_harmony(model, source_harmo)    
    results = []
    for name in target_names:
        target_harmo = df[df["midi_name"] == name]["harmonic_reduction"].values[0]
        if (len(target_harmo) == 0):
            continue
            
        target_vec = vectorize_harmony(model, target_harmo)       
        sim_score = cosine_similarity(source_vec, target_vec)
        if sim_score > threshold:
            results.append({
                'score' : sim_score,
                'name' : name
            })
                
    # Sort results by score in desc order
    results.sort(key=lambda k : k['score'] , reverse=True)
    return results

def calculate_similarity(df, model, source_name, target_prefix, threshold=0):
    source_midi_names = df[df["midi_name"] == source_name]["midi_name"].values
    if (len(source_midi_names) == 0):
        print(type(source_midi_names))
        print("Invalid source name")
        return
    
    source_midi_name = source_midi_names[0]
    
    target_midi_names = df[df["midi_name"].str.startswith(target_prefix)]["midi_name"].values  
    print(target_midi_names)
    if (len(target_midi_names) == 0):
        print("Invalid target prefix")
        return
    
    return calculate_similarity_aux(df, model, source_midi_name, target_midi_names, threshold)

pp = pprint.PrettyPrinter(width=41, compact=True)
pp.pprint(calculate_similarity(sonic_df, model, "ArtPepper_InAMellowTone_FINAL", "green")) # sonic1 x sonic1 music
pp.pprint(calculate_similarity(sonic_df, model, "green-hill-zone.mid", "emerald")) # sonic1 x sonic2 music
pp.pprint(calculate_similarity(sonic_df, model, "green-hill-zone.mid", "hydro")) # sonic1 x sonic3 music
pp.pprint(calculate_similarity(sonic_df, model, "green-hill-zone.mid", "sando")) # sonic1 x s&k music

In [None]:
print('i   midi_name                           j    midi_name                     similarity  ')
for i in range(0,6):
    for j in range(0,6):
        source_harmo_1 = sonic_df['harmonic_reduction'][i]
        target_harmo_1 = sonic_df['harmonic_reduction'][j]
        name_i = sonic_df['midi_name'][i]
        name_j = sonic_df['midi_name'][j]

        source_vec = vectorize_harmony(model, source_harmo_1)

        target_vec = vectorize_harmony(model, target_harmo_1)       
        sim_score = cosine_similarity(source_vec, target_vec)
#print(target_vec)
        print(i,' ',name_i,' ',j,' ',name_j,' ', sim_score)

In [None]:
type(source_harmo_1)

In [None]:
print('i  j   similarity')
for i in range(0,6):
    for j in range(0,6):
        source_harmo_1 = sonic_df['harmonic_reduction'][i]
        target_harmo_1 = sonic_df['harmonic_reduction'][j]

        source_vec = vectorize_harmony(model, source_harmo_1)

        target_vec = vectorize_harmony(model, target_harmo_1)       
        sim_score = cosine_similarity(source_vec, target_vec)
#print(target_vec)
        print(i,' ',j,' ', sim_score)

In [None]:
sonic_df

In [None]:
barbie_chorus = open_midi('Pop_Music_Midi/Barbie Girl - Chorus.midi', True)
barbie_verse = open_midi('Pop_Music_Midi/Barbie Girl - Verse.midi', True)
harmonic_bc = harmonic_reduction(barbie_chorus)
harmonic_bv = harmonic_reduction(barbie_verse)
base_midi = open_midi(concat_path(sonic_path, "ArtPepper_Anthropology_FINAL.mid"), True)

In [None]:
barbie_chorus

In [None]:
source_harmo_1 = harmonic_bc
target_harmo_1 = harmonic_bv

source_vec = vectorize_harmony(model, source_harmo_1)

target_vec = vectorize_harmony(model, target_harmo_1)       
sim_score = cosine_similarity(source_vec, target_vec)
print(target_vec)
print(sim_score)

In [None]:
target_harmo_1

In [None]:
sonic_df["midi_name"]

In [None]:
sonic_df["midi_name"] == "ArtPepper_Anthropology_FINAL.mid"

In [None]:
sonic_df[sonic_df["midi_name"] == "ArtPepper_Anthropology_FINAL.mid"]

In [None]:
sonic_df[sonic_df["midi_name"] == "ArtPepper_Anthropology_FINAL.mid"]["midi_name"]

In [None]:
sonic_df[sonic_df["midi_name"].str.startswith("FINAL")]["midi_name"]

In [None]:
import pretty_midi

In [None]:
# Load MIDI file into PrettyMIDI object
midi_data = pretty_midi.PrettyMIDI('data/Jazzomat/ArtPepper_Anthropology_FINAL.mid')
midi_data

In [None]:
midi_data.estimate_tempo()
midi_data.get_end_time()
midi_data.get_tempo_changes()
midi_data.get_beats()
piano = midi_data.get_piano_roll()

In [None]:
midi_data.estimate_tempo()

In [None]:
from mido import Message, MidiFile, MidiTrack
def song_analysis(mid):
    for i, track in enumerate(mid.tracks):
        print('Track {}: {}'.format(i, track.name))
        for msg in track:
            print(msg)
    return
mid_test = MidiFile('./data/Jazzomat/ArtPepper_Anthropology_FINAL.mid')
song_analysis(mid_test)

In [None]:
(midi_data.get_pitch_class_histogram)

In [None]:
import numpy as np
from pypianoroll import Multitrack, Track
from matplotlib import pyplot as plt
from mido import Message, MidiFile, MidiTrack

# Create a pianoroll matrix, where the first and second axes represent time
# and pitch, respectively, and assign a C major chord to the pianoroll
pianoroll = np.zeros((96, 128))
C_maj = [60, 64, 67, 72, 76, 79, 84]
pianoroll[0:95, C_maj] = 100

# Create a `pypianoroll.Track` instance
track = Track(pianoroll=pianoroll, program=0, is_drum=False,
              name='my awesome piano')

# Plot the pianoroll
fig, ax = track.plot()
plt.show()

In [None]:
# Parse a MIDI file to a `pypianoroll.Multitrack` instance
another_multitrack = Multitrack('Pop_Music_Midi/Can You Feel the Love Tonight - Chorus.midi')
mid = MidiFile('Pop_Music_Midi/Can You Feel the Love Tonight - Chorus.midi', clip=True)

print('track 1')
for track in another_multitrack.tracks:
    print(track)
print('track 2')
for track in mid.tracks:
    print(track)

In [None]:
# Plot the pianoroll
fig, ax = another_multitrack.plot()
plt.show()

In [None]:
another_multitrack

In [None]:
song = 'Pop_Music_Midi/Can You Feel the Love Tonight - Chorus.midi'
#song = 'Pop_Music_Midi/I Gotta Feeling - Chorus.midi'
#song1 = pypianoroll.parse(song, beat_resolution=48)
mid_multitrack = Multitrack(song, beat_resolution=24) #read to plot
mid_midifile = MidiFile(song, clip=True) #read to see tracks

# Convert original song to pianoroll
roll = mid_multitrack.get_merged_pianoroll(mode='sum') # merge all tracks #sum or max
roll_to_track = Track(pianoroll=roll) # put the roll into a track
track_to_multitrack = Multitrack(tracks=[roll_to_track]) # put the track into multitrack (necessary to convert to midi later)
track_to_multitrack.write('CACATUA.midi') # convert multtrack to midi

# Plot tracks in original song
print('original song contains:')
print('get active pitch range', mid_multitrack.get_active_pitch_range())
print('get empty tracks', mid_multitrack.get_empty_tracks())
print('get max length', mid_multitrack.get_max_length())
print('beat resolution', mid_multitrack.beat_resolution)
print('roll length: ',len(roll))
for track in mid_midifile.tracks:
    print(track)
# Plot pianoroll of original song
fig, ax = mid_multitrack.plot(mode='separate')
plt.show()

# Read the midi file that has just been converted (from midi to pianoroll and back to midi, to check reconstruction)
test_conversion_multitrack = Multitrack('CACATUA.midi') #read to plot
test_conversion_midifile = MidiFile('CACATUA.midi') #read to see tracks
test_conversion_roll = test_conversion_multitrack.get_merged_pianoroll(mode='sum') # merge all tracks #sum or max
print('converted song contains:')

print('roll length: ',len(test_conversion_roll))
print('get active pitch range', test_conversion_multitrack.get_active_pitch_range())
print('get empty tracks', test_conversion_multitrack.get_empty_tracks())
print('get max length', test_conversion_multitrack.get_max_length())
print('beat resolution', test_conversion_multitrack.beat_resolution)
for track in test_conversion_midifile.tracks:
    print(track)
# Plot the pianoroll
fig, ax = test_conversion_multitrack.plot()
plt.show()

In [None]:
# merge tracks
song = 'Pop_Music_Midi/Can You Feel the Love Tonight - Chorus.midi'
mid_multitrack = Multitrack(song) #read to plot
mid_midifile = MidiFile(song, clip=True) #read to see tracks

merged_roll = mid_multitrack.get_merged_pianoroll(mode='sum') # merged roll (OPTION 1)

# Plot tracks in original song
print('original song contains:')
print('get active pitch range', mid_multitrack.get_active_pitch_range())
print('get empty tracks', mid_multitrack.get_empty_tracks())
print('get max length', mid_multitrack.get_max_length())
print('beat resolution', mid_multitrack.beat_resolution)
print('roll length: ',len(roll))
for track in mid_midifile.tracks:
    print(track)
# Plot pianoroll of original song
fig, ax = mid_multitrack.plot(mode='separate')
plt.show()

# Merge tracks and obtain roll (OPTION 2)
mid_multitrack.merge_tracks(track_indices=[0,1,2], remove_merged=False, name='merged') #merge all tracks and create track merged
roll_one_track = mid_multitrack.tracks[3].pianoroll #obtain roll from track 'merged'
mid_unitrack = Multitrack(tracks=[Track(pianoroll=roll_one_track)]) #obtain track from roll
mid_unitrack.write('CACATUA_UNI.midi') # convert track to midi

# Plot tracks in original song
print('original song contains:')
print('get active pitch range', mid_unitrack.get_active_pitch_range())
print('get empty tracks', mid_unitrack.get_empty_tracks())
print('get max length', mid_unitrack.get_max_length())
print('beat resolution', mid_unitrack.beat_resolution)
print('roll length: ',len(roll_one_track))
for track in mid_unitrack.tracks:
    print(track)
# Plot pianoroll of original song
fig, ax = mid_unitrack.plot(mode='separate')
plt.show()

# Read the midi file that has just been converted (from midi to pianoroll and back to midi, to check reconstruction)
test_conversion_multitrack = Multitrack('CACATUA_UNI.midi', beat_resolution=24) #read to plot
test_conversion_midifile = MidiFile('CACATUA_UNI.midi') #read to see tracks
test_conversion_roll = test_conversion_multitrack.tracks[0].pianoroll
print('converted song contains:')

print('roll length: ',len(test_conversion_roll))
print('get active pitch range', test_conversion_multitrack.get_active_pitch_range())
print('get empty tracks', test_conversion_multitrack.get_empty_tracks())
print('get max length', test_conversion_multitrack.get_max_length())
print('beat resolution', test_conversion_multitrack.beat_resolution)
for track in test_conversion_midifile.tracks:
    print(track)
# Plot the pianoroll
fig, ax = test_conversion_multitrack.plot()
plt.show()

In [None]:
np.array_equal(roll_one_track,test_conversion_roll)

In [None]:
print(roll_one_track.shape)
print(test_conversion_roll.shape)

In [None]:
os.listdir(sonic_path)

In [None]:
pip install pypianoroll

In [1]:
# -------------------------------- DEFINITIONS --------------------------------------------
import os
from pypianoroll import Multitrack, Track
from music21 import converter, corpus, instrument, midi, note, chord, pitch, stream, roman
import pandas as pd
import numpy as np

def open_midi(midi_path, remove_drums):
    # There is an one-line method to read MIDIs
    # but to remove the drums we need to manipulate some
    # low level MIDI events.
    mf = midi.MidiFile()
    mf.open(midi_path)
    mf.read()
    mf.close()
    if (remove_drums):
        for i in range(len(mf.tracks)):
            mf.tracks[i].events = [ev for ev in mf.tracks[i].events if ev.channel != 10]          

    return midi.translate.midiFileToStream(mf)

def note_count(measure, count_dict):
    bass_note = None
    for chord in measure.recurse().getElementsByClass('Chord'):
        # All notes have the same length of its chord parent.
        note_length = chord.quarterLength
        for note in chord.pitches:          
            # If note is "C5", note.name is "C". We use "C5"
            # style to be able to detect more precise inversions.
            note_name = str(note) 
            if (bass_note is None or bass_note.ps > note.ps):
                bass_note = note
                
            if note_name in count_dict:
                count_dict[note_name] += note_length
            else:
                count_dict[note_name] = note_length
        
    return bass_note

def simplify_roman_name(roman_numeral):
    # Chords can get nasty names as "bII#86#6#5",
    # in this method we try to simplify names, even if it ends in
    # a different chord to reduce the chord vocabulary and display
    # chord function clearer.
    ret = roman_numeral.romanNumeral
    inversion_name = None
    inversion = roman_numeral.inversion()
    
    # Checking valid inversions.
    if ((roman_numeral.isTriad() and inversion < 3) or
            (inversion < 4 and
                 (roman_numeral.seventh is not None or roman_numeral.isSeventh()))):
        inversion_name = roman_numeral.inversionName()
        
    if (inversion_name is not None):
        ret = ret + str(inversion_name)
        
    elif (roman_numeral.isDominantSeventh()): ret = ret + "M7"
    elif (roman_numeral.isDiminishedSeventh()): ret = ret + "o7"
    return ret

def harmonic_reduction(midi_file):
    ret = []
    temp_midi = stream.Score()
    temp_midi_chords = midi_file.chordify()
    temp_midi.insert(0, temp_midi_chords)    
    music_key = temp_midi.analyze('key')
    max_notes_per_chord = 4   
    for m in temp_midi_chords.measures(0, None): # None = get all measures.
        if (type(m) != stream.Measure):
            continue
        
        # Here we count all notes length in each measure,
        # get the most frequent ones and try to create a chord with them.
        count_dict = dict()
        bass_note = note_count(m, count_dict)
        if (len(count_dict) < 1):
            ret.append("-") # Empty measure
            continue
        
        sorted_items = sorted(count_dict.items(), key=lambda x:x[1])
        sorted_notes = [item[0] for item in sorted_items[-max_notes_per_chord:]]
        measure_chord = chord.Chord(sorted_notes)
        
        # Convert the chord to the functional roman representation
        # to make its information independent of the music key.
        roman_numeral = roman.romanNumeralFromChord(measure_chord, music_key)
        ret.append(simplify_roman_name(roman_numeral))
        
    return ret

In [2]:
# DEFINE FIRST THE FUNCTIONS OPEN_MIDI, HARMONIC_REDUCTION

def create_interpolation_dataset(folder, save_path, segment_len=250):
    
    '''This function takes all songs in a folder, generates 5s MIDI files with a stride of 1s along each song,
    generates a harmonic reduction and pianoroll of each track and puts everything in a pandas dataframe,
    which is returned'''
    # INPUT
    # folder: name of the folder that contains all the MIDI songs
    # save_path: name of the folder to store the new MIDI segments
    # segment_len: segment length of the new MIDI segments, 250 by default (5s)
    
    d = {'midi_name': [], 'type': [], 'harmonic_reduction': [], 'pianoroll': []} #create empty dictionary
    
    directory = os.listdir(folder)
    directory = directory[:2000]
    for k in range(0,len(directory)): # go through all songs
        str_path = folder+'/'
        song_name = directory[k]

        # get song
        mid_multitrack = Multitrack(filename=folder+'/'+song_name)

        # get merged pianoroll
        merged_pianoroll_total = mid_multitrack.get_merged_pianoroll(mode='sum') # merge all tracks #sum or max

        limit = len(merged_pianoroll_total)
        print('limit: ',limit)
        divisions = int(limit/segment_len)
        for i in range(0,divisions):
            if (segment_len*i) <= (limit-3*segment_len):
        
        # One song (I) contains different number of subsongs (II) according to its length.
        # Each subsong (II) contains 3 segments.
        # Each segment (III) contains 250 events, so each subsong contains 750 events.
        
        # Subsongs are not generated as MIDIs, Segments are.
        # name: Name of original song (I) + Number of subsong (II) + Number of segment (III)

            #segments
                roll_1 = merged_pianoroll_total[segment_len*i:segment_len*(i+1)] #partial segments of pianoroll
                roll_2 = merged_pianoroll_total[segment_len*(i+1):segment_len*(i+2)]
                roll_3 = merged_pianoroll_total[segment_len*(i+2):segment_len*(i+3)]
        
            # create a multitrack which only track is the pianoroll set previously (current_roll)
            #create_multitrack = Multitrack(tracks=[Track(pianoroll=current_roll)]) #mode max or sum
            # create 3 MIDI files (each one corresponds to a segment)
                create_multitrack_1 = Multitrack(tracks=[Track(pianoroll=roll_1)]) #mode max or sum
                create_multitrack_2 = Multitrack(tracks=[Track(pianoroll=roll_2)]) #mode max or sum
                create_multitrack_3 = Multitrack(tracks=[Track(pianoroll=roll_3)]) #mode max or sum
        
                #create_multitrack_1.write(save_path+'/'+song_name+'_'+str(i)+'_1.midi') # convert multtrack to midi
                create_multitrack_1.write(save_path+'/'+song_name+'_'+str(i)+'_1.midi') # convert multtrack to midi
                create_multitrack_2.write(save_path+'/'+song_name+'_'+str(i)+'_2.midi') # convert multtrack to midi
                create_multitrack_3.write(save_path+'/'+song_name+'_'+str(i)+'_3.midi') # convert multtrack to midi

                # STORE SEGMENT 1 IN DICTIONARY
                midi_segment_name = save_path+'/'+song_name+'_'+str(i)+'_1.midi'
                final_name = song_name+'_'+str(i)+'_1.midi'
                midi_file = open_midi(midi_segment_name, True) # ----------------------- OPEN_MIDI()
                harmony_midi = harmonic_reduction(midi_file) # -------------------------------------- HARMONIC_REDUCTION()
                d['midi_name'].append(final_name)
                d['type'].append('init')
                d['harmonic_reduction'].append(harmony_midi)
                d['pianoroll'].append(roll_1)
                
                # STORE SEGMENT 2 IN DICTIONARY
                midi_segment_name = save_path+'/'+song_name+'_'+str(i)+'_2.midi'
                final_name = song_name+'_'+str(i)+'_2.midi'
                midi_file = open_midi(midi_segment_name, True) # ----------------------- OPEN_MIDI()
                harmony_midi = harmonic_reduction(midi_file) # -------------------------------------- HARMONIC_REDUCTION()
                d['midi_name'].append(final_name)
                d['type'].append('interpol')
                d['harmonic_reduction'].append(harmony_midi)
                d['pianoroll'].append(roll_2)
                
                # STORE SEGMENT 3 IN DICTIONARY
                midi_segment_name = save_path+'/'+song_name+'_'+str(i)+'_3.midi'
                final_name = song_name+'_'+str(i)+'_3.midi'
                midi_file = open_midi(midi_segment_name, True) # ----------------------- OPEN_MIDI()
                harmony_midi = harmonic_reduction(midi_file) # -------------------------------------- HARMONIC_REDUCTION()
                d['midi_name'].append(final_name)
                d['type'].append('end')
                d['harmonic_reduction'].append(harmony_midi)
                d['pianoroll'].append(roll_3)
                
                #print('subsong', i, 'done')
                #print('from ', segment_len*(i), 'to ', segment_len*(i+3))
        print(song_name)
        print('song',k,'done')
    return d # dictionary

In [None]:
test_dictionary = create_interpolation_dataset('lahk_midi_tracks', 'lahk_midi_interpolation')
test_df = pd.DataFrame(data=test_dictionary)

limit:  9336
0bc945bdec9b3e295591dec504ebae13_12413.midi
song 0 done
limit:  13368
6bacae2ca62b42674aff60825a1fc0b7_17283.midi
song 1 done
limit:  5208
5386ffb3b559bebd66657123275688d7_453.midi
song 2 done
limit:  11808
4393b768a7347752216f5f99d42aa0b8_7681.midi
song 3 done
limit:  14016
886d10220f2d116c4ab64fa08c7447d1_7611.midi
song 4 done
limit:  15960
5ed33ceaddeecdcf54b85dcb881f0da8_4924.midi
song 5 done
limit:  5400
b6397995c0999bfc7d592acdcc6785bb_19548.midi
song 6 done
limit:  9792
89f8a0384a6c23b0f3e762d12f57f3fb_19027.midi
song 7 done
limit:  6624
8cf2ce253ecc786efe65804f1cc3aa8a_5917.midi
song 8 done
limit:  4896
b70360047b6d793a2eb620d3ea935817_18828.midi
song 9 done
limit:  768
ce8b5eb6a2fc7bcbb6b5e5fb1b544673_16145.midi
song 10 done
limit:  5712
e555e8fdc8e67bc5eff16c09c6a82319_2747.midi
song 11 done
limit:  11352
90ec306bcf3443ea3489c7438831eae1_15051.midi
song 12 done
limit:  6816
81f9a956023052513abb877d6b574575_14474.midi
song 13 done
limit:  7512
83a6d271893b6c93f21d

9018f6b22174acadf88cbbb61683951e_19706.midi
song 118 done
limit:  8736
f1040ba1097811ebb15abb362289dc21_21034.midi
song 119 done
limit:  5376
5a2ad21ac4d2bf2cb572b1c150161e61_13790.midi
song 120 done
limit:  7104
07cfa4d9b9e3576fd0b4f6551c4a0295_19915.midi
song 121 done
limit:  12768
e8fafcc6f63041de36fa119deb97e8c8_19968.midi
song 122 done
limit:  10416
060527d8c0d631f65a739c7967da44e9_4180.midi
song 123 done
limit:  1512
cfe7cbcdd6f8f4cbc2000a0c59a142fa_2403.midi
song 124 done
limit:  5280
42aff386cca9c1032aee5423c295b22b_11534.midi
song 125 done
limit:  6336
0fec637bd561bb9ab214c594809ef204_1367.midi
song 126 done
limit:  9360
a36a3c9a029372e5bf9532404d445f17_12508.midi
song 127 done
limit:  12168
68a61ef04c6cd76cd83e68e036ee3771_5252.midi
song 128 done
limit:  5688
ae80a5101f36e36b3390dde99fb8bc3a_12296.midi
song 129 done
limit:  8376
f5f4478f594e8d7cdeca1cc7b435e4bc_11117.midi
song 130 done
limit:  8352
81d301b1d3f080093096fe8cc7114c5d_8179.midi
song 131 done
limit:  10248
041574a

2ce36f324f48c620b013ad379080d3bd_10246.midi
song 234 done
limit:  8400
a91fc3143974ff3e6542c92fdd4ff308_11187.midi
song 235 done
limit:  3864
2b4a4b7a6caa00fde0d40139b8b777c1_16592.midi
song 236 done
limit:  7584
193c81d7ae32045ce26ec443e7fb1069_1464.midi
song 237 done
limit:  5136
4c67fa9020c7df634f8ee15f1f2ce3f3_14010.midi
song 238 done
limit:  3552
09ae5d4bf9f6710139e0b0c035f4a9fb_6908.midi
song 239 done
limit:  9456
e630663cfaf335b656f38833a7ded89e_17145.midi
song 240 done
limit:  12936
b4eb26d10618438d5e9a43e37e6b8216_19297.midi
song 241 done
limit:  8544
eebbc6f69c2aae87cc3ac8773744cb32_6943.midi
song 242 done
limit:  1536
dbaf467e518dfae25fa68457b70bc158_12572.midi
song 243 done
limit:  12072
33d591b08a3c25ca5471805e635a59fa_14278.midi
song 244 done
limit:  8472
a95032d0cfc53eb5d328ed16e9b6f964_10089.midi
song 245 done
limit:  6936
2d3ab8a81028f5a451952adf0faa89a8_11143.midi
song 246 done
limit:  15264
7b9abfc328ce468c6686b728810e65f4_7145.midi
song 247 done
limit:  3816
5aef401

e437760fbc0d95928f8f9ae4bebe4424_11497.midi
song 350 done
limit:  3552
00497eac10a66f9ab0f4b2b1cab75195_19441.midi
song 351 done
limit:  9384
584fba6cb85e22c2ba0e0be2260b70b6_2861.midi
song 352 done
limit:  7800
e0cf59e149c984cb66c8c7f4b43d8cf6_21283.midi
song 353 done
limit:  14208
1440bd0091dc2025ce88b9c4cca338d0_4401.midi
song 354 done
limit:  13368
8cfd9dff944c694f4becaca376e91a85_10801.midi
song 355 done
limit:  6192
4750831eb02f7346d082ceddf98af728_6162.midi
song 356 done
limit:  12936
530929b09d0f18779c75223f9ed10e30_16949.midi
song 357 done
limit:  9528
24f7b92bc3de3a7ef6fd032e8f627c85_18682.midi
song 358 done
limit:  10224
5c4a110b25e4be9bf15420c82e28186b_9681.midi
song 359 done
limit:  768
c6a6b490df97b3e4f8a9338c7e9f6b5d_915.midi
song 360 done
limit:  9696
08948fbcdcac4db2e2a52d5e7a226125_16788.midi
song 361 done
limit:  12672
731fd5732e00692e1c3ebc861abc231b_170.midi
song 362 done
limit:  11496
5367f4a6a774be47a47b9b8c40a5605b_7532.midi
song 363 done
limit:  7896
0aeadaa911

In [9]:
test_df = jejeje

In [None]:
# SONG TO REMOVE IN CASE THERE IS AN ERROR
directory = os.listdir('lahk_midi_tracks')
print(directory[902])

In [10]:
import pprint
def vectorize_harmony(model, harmonic_reduction):
    # Gets the model vector values for each chord from the reduction.
    word_vecs = []
    for word in harmonic_reduction:
        if word != '-':
            #print('ok')
            try:
                vec = model[word]
                word_vecs.append(vec)
            except KeyError:
                # Ignore, if the word doesn't exist in the vocabulary
                pass
        #else:
        #    pass
        #    print('pass')
    
    # Assuming that document vector is the mean of all the word vectors.
    return np.mean(word_vecs, axis=0)

def cosine_similarity(vecA, vecB):
    # Find the similarity between two vectors based on the dot product.
    csim = np.dot(vecA, vecB) / (np.linalg.norm(vecA) * np.linalg.norm(vecB))
    if np.isnan(np.sum(csim)):
        return 0
    
    return csim

def calculate_similarity_aux(df, model, source_name, target_names=[], threshold=0):
    source_harmo = df[df["midi_name"] == source_name]["harmonic_reduction"].values[0]
    source_vec = vectorize_harmony(model, source_harmo)    
    results = []
    for name in target_names:
        target_harmo = df[df["midi_name"] == name]["harmonic_reduction"].values[0]
        if (len(target_harmo) == 0):
            continue
            
        target_vec = vectorize_harmony(model, target_harmo)       
        sim_score = cosine_similarity(source_vec, target_vec)
        if sim_score > threshold:
            results.append({
                'score' : sim_score,
                'name' : name
            })
                
    # Sort results by score in desc order
    results.sort(key=lambda k : k['score'] , reverse=True)
    return results

def calculate_similarity(df, model, source_name, target_prefix, threshold=0):
    source_midi_names = df[df["midi_name"] == source_name]["midi_name"].values
    if (len(source_midi_names) == 0):
        print(type(source_midi_names))
        print("Invalid source name")
        return
    
    source_midi_name = source_midi_names[0]
    
    target_midi_names = df[df["midi_name"].str.startswith(target_prefix)]["midi_name"].values  
    print(target_midi_names)
    if (len(target_midi_names) == 0):
        print("Invalid target prefix")
        return
    
    return calculate_similarity_aux(df, model, source_midi_name, target_midi_names, threshold)

In [11]:
# import modules & set up logging
import gensim, logging
# logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
 
model = gensim.models.Word2Vec(test_df["harmonic_reduction"], min_count=2, window=4)

In [12]:
print('i  j   similarity')
dataframe = test_df
for i in range(0,len(dataframe)):
    for j in range(0,len(dataframe)):
        source_harmo_1 = dataframe['harmonic_reduction'][i]
        target_harmo_1 = dataframe['harmonic_reduction'][j]

        source_vec = vectorize_harmony(model, source_harmo_1)

        target_vec = vectorize_harmony(model, target_harmo_1)       
        sim_score = cosine_similarity(source_vec, target_vec)
#print(target_vec)
        print(i,' ',j,' ', sim_score)

i  j   similarity


  if __name__ == '__main__':


NameError: name 'np' is not defined

In [16]:
import pandas as np
test_df['midi_name']

0      0a3fdc454bd8432bb6cd4f47811f98cb_19861.midi_0_...
1      0a3fdc454bd8432bb6cd4f47811f98cb_19861.midi_0_...
2      0a3fdc454bd8432bb6cd4f47811f98cb_19861.midi_0_...
3      0a3fdc454bd8432bb6cd4f47811f98cb_19861.midi_1_...
4      0a3fdc454bd8432bb6cd4f47811f98cb_19861.midi_1_...
                             ...                        
106    0a3fdc454bd8432bb6cd4f47811f98cb_19861.midi_35...
107    0a3fdc454bd8432bb6cd4f47811f98cb_19861.midi_35...
108    0a3fdc454bd8432bb6cd4f47811f98cb_19861.midi_36...
109    0a3fdc454bd8432bb6cd4f47811f98cb_19861.midi_36...
110    0a3fdc454bd8432bb6cd4f47811f98cb_19861.midi_36...
Name: midi_name, Length: 111, dtype: object

In [17]:
def evaluate_sim_and_final_create_dataset(pandas_dataframe, save_songs=True, return_similarity_array=False):
    dataframe = pandas_dataframe
    similarities = [0]*len(dataframe)
    count = 0
    similarities = np.empty([1,1000])
    for index,row in dataframe.iterrows():
        #print(index,row)
        if dataframe['type'][index]=='init':
            print(index)
            harmony_1 = dataframe['harmonic_reduction'][index]
            print(dataframe['midi_name'][index])
            harmony_2 = dataframe['harmonic_reduction'][index+2]
            print(dataframe['midi_name'][index+2])

            vec_1 = vectorize_harmony(model, harmony_1)
            vec_2 = vectorize_harmony(model, harmony_2)

            sim_score = cosine_similarity(vec_1, vec_2)
            #print(sim_score)
            #similarities[count]=sim_score
            count+=1
            if save_songs:
                if sim_score <= 0.6:
                    create_multitrack_1 = Multitrack(tracks=[Track(pianoroll=dataframe['pianoroll'][index])])
                    create_multitrack_2 = Multitrack(tracks=[Track(pianoroll=dataframe['pianoroll'][index+1])])
                    create_multitrack_3 = Multitrack(tracks=[Track(pianoroll=dataframe['pianoroll'][index+2])])

                    create_multitrack_1.write('lahk_midi_tracks_interpolation_after_sim/'+dataframe['midi_name'][index])
                    create_multitrack_2.write('lahk_midi_tracks_interpolation_after_sim/'+dataframe['midi_name'][index+1])
                    create_multitrack_3.write('lahk_midi_tracks_interpolation_after_sim/'+dataframe['midi_name'][index+2])
    #if return_similarity_array:
    #return similarities

In [18]:
evaluate_sim_and_final_create_dataset(test_df)

AttributeError: module 'pandas' has no attribute 'empty'

In [None]:
# SAVE CSV
test_df.to_csv('dataset_interpolation', index=False)

In [2]:
import pandas as pd
jejeje = pd.read_csv('dataset_interpolation')

In [4]:
jejeje.head()

Unnamed: 0,midi_name,type,harmonic_reduction,pianoroll
0,0a3fdc454bd8432bb6cd4f47811f98cb_19861.midi_0_...,init,"['vi', 'v', 'iv']",[[0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n [0 0 ...
1,0a3fdc454bd8432bb6cd4f47811f98cb_19861.midi_0_...,interpol,"['vi', 'i53', 'i']",[[0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n [0 0 ...
2,0a3fdc454bd8432bb6cd4f47811f98cb_19861.midi_0_...,end,"['v', 'iv', 'i']",[[0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n [0 0 ...
3,0a3fdc454bd8432bb6cd4f47811f98cb_19861.midi_1_...,init,"['vi', 'i53', 'i']",[[0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n [0 0 ...
4,0a3fdc454bd8432bb6cd4f47811f98cb_19861.midi_1_...,interpol,"['v', 'iv', 'i']",[[0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n [0 0 ...


In [25]:
type(test_df['pianoroll'][0])

str

In [26]:
np.fromstring(test_df['pianoroll'], dtype=int, sep=' ')
#test_df['pianoroll'] = test_df['pianoroll'].astype(np.array)

AttributeError: module 'pandas' has no attribute 'fromstring'

In [27]:
result = test_df['piaonoroll'].apply(lambda x: 
                           np.fromstring(
                               x.replace('\n','')
                                .replace('[','')
                                .replace(']','')
                                .replace('  ',' '), sep=' '))
print 'result'

ValueError: Cannot pass scalar '[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]' to 'pandas.array'.

In [None]:
def create_dict(folder):
    # folder: name of the folder that contains all the songs to be included into the dictionary
    
    d = {'midi_name': [], 'type': [], 'harmonic_reduction': [], 'pianoroll': []} #create empty dictionary
    
    directory = os.listdir(folder)
    for k in range(0,len(directory)): # go through all the songs
        midi_name = directory[k]
        midi_file = open_midi(concat_path(folder, midi_name), True)

        # Get key
        #analyze_midi = "{0}".format(midi_file.analyze('key'))

        harmony_midi = harmonic_reduction(midi_file)

        d['midi_name'].append(midi_name)
        d['harmonic_reduction'].append(harmony_midi)
        
    return d