In [None]:
from music21 import converter, corpus, instrument, midi, note, chord, pitch
import os
import seaborn as snb

In [None]:
def open_midi(midi_path, remove_drums):
    # There is an one-line method to read MIDIs
    # but to remove the drums we need to manipulate some
    # low level MIDI events.
    mf = midi.MidiFile()
    mf.open(midi_path)
    mf.read()
    mf.close()
    if (remove_drums):
        for i in range(len(mf.tracks)):
            mf.tracks[i].events = [ev for ev in mf.tracks[i].events if ev.channel != 10]          

    return midi.translate.midiFileToStream(mf)

def concat_path(path, child):
    return path + "/" + child

In [None]:
from music21 import roman

def note_count(measure, count_dict):
    bass_note = None
    for chord in measure.recurse().getElementsByClass('Chord'):
        # All notes have the same length of its chord parent.
        note_length = chord.quarterLength
        for note in chord.pitches:          
            # If note is "C5", note.name is "C". We use "C5"
            # style to be able to detect more precise inversions.
            note_name = str(note) 
            if (bass_note is None or bass_note.ps > note.ps):
                bass_note = note
                
            if note_name in count_dict:
                count_dict[note_name] += note_length
            else:
                count_dict[note_name] = note_length
        
    return bass_note
                
def simplify_roman_name(roman_numeral):
    # Chords can get nasty names as "bII#86#6#5",
    # in this method we try to simplify names, even if it ends in
    # a different chord to reduce the chord vocabulary and display
    # chord function clearer.
    ret = roman_numeral.romanNumeral
    inversion_name = None
    inversion = roman_numeral.inversion()
    
    # Checking valid inversions.
    if ((roman_numeral.isTriad() and inversion < 3) or
            (inversion < 4 and
                 (roman_numeral.seventh is not None or roman_numeral.isSeventh()))):
        inversion_name = roman_numeral.inversionName()
        
    if (inversion_name is not None):
        ret = ret + str(inversion_name)
        
    elif (roman_numeral.isDominantSeventh()): ret = ret + "M7"
    elif (roman_numeral.isDiminishedSeventh()): ret = ret + "o7"
    return ret
                
def harmonic_reduction(midi_file):
    ret = []
    temp_midi = stream.Score()
    temp_midi_chords = midi_file.chordify()
    temp_midi.insert(0, temp_midi_chords)    
    music_key = temp_midi.analyze('key')
    max_notes_per_chord = 4   
    for m in temp_midi_chords.measures(0, None): # None = get all measures.
        if (type(m) != stream.Measure):
            continue
        
        # Here we count all notes length in each measure,
        # get the most frequent ones and try to create a chord with them.
        count_dict = dict()
        bass_note = note_count(m, count_dict)
        if (len(count_dict) < 1):
            ret.append("-") # Empty measure
            continue
        
        sorted_items = sorted(count_dict.items(), key=lambda x:x[1])
        sorted_notes = [item[0] for item in sorted_items[-max_notes_per_chord:]]
        measure_chord = chord.Chord(sorted_notes)
        
        # Convert the chord to the functional roman representation
        # to make its information independent of the music key.
        roman_numeral = roman.romanNumeralFromChord(measure_chord, music_key)
        ret.append(simplify_roman_name(roman_numeral))
        
    return ret

#harmonic_reduction(base_midi)[0:10]

In [None]:
import re
import os
import pandas as pd

In [None]:
with open("./output/classical_150_it_100_songs/store/train.txt","r",encoding="utf-8") as f:
    data = [f'./data/classical/{j}' for j in f.read().split("\n")]

In [None]:
target_games = data[0:80]
df_array = []
for i in target_games:
    elemento = open_midi(i,True)
    df_array.append({'midi_name':os.path.basename(i),'coef':float(elemento.analyze('key').correlationCoefficient), 'key_signature': str(elemento.analyze('key')),'harmonic_reduction':harmonic_reduction(elemento)})
sonic_df = pd.DataFrame(df_array)
sonic_df.head()

In [None]:
import gensim, logging
model = gensim.models.Word2Vec(sonic_df["harmonic_reduction"], min_count=2, window=4)

In [None]:
sonic_df ["name"] = [f"a_{i}" for i in range(0,80)]
sonic_df

In [None]:
import pprint
import numpy as np

def vectorize_harmony(model, harmonic_reduction):
    # Gets the model vector values for each chord from the reduction.
    word_vecs = []
    for word in harmonic_reduction:
        try:
            vec = model[word]
            word_vecs.append(vec)
        except KeyError:
            # Ignore, if the word doesn't exist in the vocabulary
            pass
    
    # Assuming that document vector is the mean of all the word vectors.
    return np.mean(word_vecs, axis=0)

def cosine_similarity(vecA, vecB):
    # Find the similarity between two vectors based on the dot product.
    csim = np.dot(vecA, vecB) / (np.linalg.norm(vecA) * np.linalg.norm(vecB))
    if np.isnan(np.sum(csim)):
        return 0
    
    return csim

def calculate_similarity_aux(df, model, source_name, target_names=[], threshold=0):
    source_harmo = df[df["name"] == source_name]["harmonic_reduction"].values[0]
    source_vec = vectorize_harmony(model, source_harmo)    
    results = []
    for name in target_names:
        target_harmo = df[df["name"] == name]["harmonic_reduction"].values[0]
        if (len(target_harmo) == 0):
            continue
            
        target_vec = vectorize_harmony(model, target_harmo)       
        sim_score = cosine_similarity(source_vec, target_vec)
        if sim_score > threshold:
            results.append({
                'score' : sim_score,
                'name' : name
            })
                
    # Sort results by score in desc order
    results.sort(key=lambda k : k['score'] , reverse=True)
    return results

def calculate_similarity(df, model, source_name, target_prefix, threshold=0):
    source_midi_names = df[df["name"] == source_name]["name"].values
    if (len(source_midi_names) == 0):
        print("Invalid source name")
        return
    
    source_midi_name = source_midi_names[0]
    
    target_midi_names = df[df["name"].str.startswith(target_prefix)]["name"].values  
    if (len(target_midi_names) == 0):
        print("Invalid target prefix")
        return
    
    return calculate_similarity_aux(df, model, source_midi_name, target_midi_names, threshold)

data = []
index = []
for elemento in sonic_df["name"]:
    index.append(elemento)
    data.append(np.array([j["score"] for j in calculate_similarity(sonic_df, model, elemento, "a")]).mean())

In [None]:
from metricsGAN import get_polyphony_score, get_midi_pattern, get_tones

def info_tones(midi_pattern):
  tones = get_tones(midi_pattern)
  stats = {}
  stats['num_tones'] = len(tones)
  stats['tone_min'] = min(tones)
  stats['tone_max'] = max(tones)
  stats['tone_span'] = max(tones)-min(tones)
  stats['tones_unique'] = len(set(tones))
  return stats

poly_score = [get_polyphony_score(get_midi_pattern(file)) for file in target_games]
tones_music = [info_tones(get_midi_pattern(file)) for file in target_games]

In [None]:
df = pd.DataFrame(list(zip(sonic_df["midi_name"],data,poly_score)),columns=["music","similitud","polifonia"]).set_index("music")
df2 = pd.DataFrame(tones_music,index=sonic_df["midi_name"])
df = pd.merge(df,df2,how='outer',left_index=True,right_index=True)
del df2
df["resultado"] = (df["similitud"]/df["polifonia"])*(df["tone_span"]/df["tones_unique"])
df = pd.merge(df,sonic_df.set_index("midi_name")["key_signature"],how='outer',left_index=True,right_index=True)
df = df.sort_values(by=['resultado'])
df

In [None]:
df["resultado2"] = df["similitud"]*sonic_df.set_index(df.index)["coef"] +df["polifonia"]*(df["tone_span"]/df["tone_span"].max())
df = df.sort_values(by=['resultado2'])
df

In [None]:
plotdf = df[["polifonia"]]
plotdf = pd.merge(plotdf,sonic_df.set_index(df.index)["coef"],how="inner",right_index=True,left_index=True)
plotdf = plotdf.reset_index()
plotdf["consistencia de escala"] = plotdf["coef"]
del plotdf["coef"]
del plotdf["index"]
plotdf = plotdf.sort_values(by=["polifonia"])
#plotdf["tone_span"] = df["tone_span"]/df["tone_span"].max()
#plotdf = plotdf.sort_values(by=['tone_span'])
plt.figure(figsize=(17,10))
sns.set_theme(style="whitegrid")
sns.lineplot(data=plotdf, palette="tab10", markers = True, linewidth=2.5)