In [3]:
import os
import shutil # move files and delete folders with files
import tarfile
import urllib.request # download files folder
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import numpy as np
import pandas as pd
import IPython.display as ipd
import librosa, librosa.display
import re
import IPython # listen to sounds on Python
import pretty_midi

from scipy.io import wavfile
from scipy.spatial import distance_matrix
from matplotlib import colors
from itertools import product
from hmmlearn import hmm
from sklearn.metrics import f1_score


%load_ext autoreload
%autoreload 2

COL_NAMES_NOTES = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"]


ModuleNotFoundError: No module named 'pretty_midi'

In [272]:
import re

def __unify_chords(chords_df):

    chords_processed = chords_df['chord'].str.replace('/5', '')
    chords_processed = [elem.replace('/3', '') for elem in chords_processed]   
    chords_processed = [elem.replace('/b3', '') for elem in chords_processed]  
    chords_processed = [elem.replace('/b5', '') for elem in chords_processed]        # remove /b5
    chords_processed = [elem.replace(':min/', ':m') for elem in chords_processed]    # change min to m
    chords_processed = [re.sub(r'7\(.*\)', '7', elem) for elem in chords_processed]   # remove (..) after 7-chords
    chords_processed = [elem.replace(':maj/9', ':add9') for elem in chords_processed]
    chords_processed = [re.sub(r':maj\(9\)', ':add9',elem) for elem in chords_processed]
    chords_processed = [re.sub(r':sus4.*', ':sus4', elem) for elem in chords_processed]
    chords_processed = [elem.replace('/6', '6') for elem in chords_processed]
    chords_processed = [elem.replace('/7', '7') for elem in chords_processed]
    chords_processed = [re.sub(r':7.*', '7', elem) for elem in chords_processed]
    chords_processed = [elem.replace(':min(9)', ':madd9') for elem in chords_processed]
    chords_processed = [re.sub(r':min\(.*', ':min', elem) for elem in chords_processed]
    chords_processed = [re.sub(r':min7.*', ':min7', elem) for elem in chords_processed]
    chords_processed = [re.sub(r':9.*', '9', elem) for elem in chords_processed]
    chords_processed = [re.sub(r':\(1\)', '1', elem) for elem in chords_processed]
    chords_processed = [elem.replace('/b6', 'addb6') for elem in chords_processed]
    chords_processed = [elem.replace(':hdim7/b7', 'hdim7') for elem in chords_processed]
    chords_processed = [re.sub(r'(.*:.*)/b7', '\g<1>', elem) for elem in chords_processed]
    chords_processed = [elem.replace(r'/b7', 'addb7') for elem in chords_processed]
    chords_processed = [re.sub(r':maj\(2\).*', 'add2', elem) for elem in chords_processed]
    chords_processed = [re.sub(r':(add.)/.*', '\g<1>', elem) for elem in chords_processed]
    chords_processed = [elem.replace(':maj6/2', 'maj6') for elem in chords_processed]
    chords_processed = [re.sub(r'/(.*)', 'add\g<1>', elem) for elem in chords_processed]
    chords_processed = [elem.replace(':maj(#11)', 'add#11') for elem in chords_processed]
    chords_processed = [elem.replace(':maj(11)', 'add11') for elem in chords_processed]
    chords_processed = [re.sub(r':maj\(\*.*\)', '', elem) for elem in chords_processed] #C:maj(*2) -> C
    chords_processed = [re.sub(r'(:maj.)\(.*\)', '\g<1>', elem) for elem in chords_processed]  #C:maj6(2) -> C:maj6
    chords_processed = [re.sub(r':\(.*\)', '', elem) for elem in chords_processed] #C:(1,2,3) -> C
    chords_processed = [elem.replace(':', '') for elem in chords_processed]
    chords_processed = [re.sub(r'(.)min$', '\g<1>m', elem) for elem in chords_processed]
   
#     chords_processed = [elem.split('/')[0] for elem in chords_processed]            # remove inverted chords
#     chords_processed = [elem.split('aug')[0] for elem in chords_processed]          # remove augmented chords
#     chords_processed = [elem.split(':(')[0] for elem in chords_processed]           # remove added notes chords
#     chords_processed = [elem.split('(')[0] for elem in chords_processed]            # remove added notes chords 2
#     chords_processed = [elem.split(':sus')[0] for elem in chords_processed]         # remove sustained chords
#     chords_processed = [re.split(":?\d", elem)[0] for elem in chords_processed]     # remove added note
#     chords_processed = [elem.replace('dim', 'min') for elem in chords_processed]    # change diminute to minor
#     chords_processed = [elem.replace('hmin', 'min') for elem in chords_processed]   # change semi-diminute to minor
#     chords_processed = [re.split(":$", elem)[0] for elem in chords_processed]       # remove added notes chords
    return chords_processed

def read_simplify_chord_file(music_file_path, process_silence=False):
    chords_annotation = pd.read_csv(music_file_path, sep=" ", header=None)
    chords_annotation.columns = ['start', 'end', 'chord']
#    chords_annotation['chord'] = __unify_chords(chords_annotation)
    
    if(process_silence == True): # replace silence by probable tonal end
        chords_annotation.loc[chords_annotation['chord'] == 'N', 'chord'] = chords_annotation['chord'].mode()[0]
    return chords_annotation

In [4]:
#https://github.com/caiomiyashiro/music_and_science
def get_frame_stats(chromagram, signal, Fs):
    frames_per_sec = chromagram.shape[1]/(len(signal)/Fs) # Nbr of frames / length in seconds = frames per second
    frame_duration_sec = 1/frames_per_sec        # frame duration = 1 / frames per second
    return [frames_per_sec, frame_duration_sec]

#https://github.com/caiomiyashiro/music_and_science
def chromagram_2_dataframe(chromagram, frame_duration_sec, test_version=False):
    chromagram = pd.DataFrame(np.transpose(chromagram), columns=COL_NAMES_NOTES)

    chromagram['start'] = np.arange(chromagram.shape[0]) * frame_duration_sec
    chromagram['end'] = chromagram['start'] + frame_duration_sec

    if(test_version == False):

        start_chromagram = pd.DataFrame(np.random.normal(loc=0, scale=0.01, size=chromagram.shape[1]),
                                        index=chromagram.columns).transpose()
        start_chromagram.iloc[:,-2:] = 0                                
        end_chromagram = pd.DataFrame(np.random.normal(loc=-1, scale=0.01, size=chromagram.shape[1]),
                                      index=chromagram.columns).transpose()
        end_chromagram.iloc[:,-2:] = chromagram.iloc[-1]['end']+.01
        chromagram = start_chromagram.append(chromagram, ignore_index=True).append(end_chromagram, ignore_index=True)

    return chromagram

#https://github.com/caiomiyashiro/music_and_science
def __get_chord_ix(elem, chords_annotation):
    diffs = chords_annotation['start'] - elem
    return diffs[diffs <= 0].index[-1]

#https://github.com/caiomiyashiro/music_and_science
def get_annotated_chord_sequence(pcp, chords_annotation, test_version=False):
    chord_ix = pcp['start'].apply(lambda elem: __get_chord_ix(elem, chords_annotation))
    chords = chords_annotation.iloc[chord_ix.values]['chord'].values
    if(test_version == False):
        chords[0] = '<START>'
        chords[-1] = '<END>'
    return chords

In [168]:
#https://github.com/caiomiyashiro/music_and_science
def build_chroma(song_path, process_silence=True, test_version=False):

    # input data -> signal, sample frequency, chromagram and annotated dataset
    x2, Fs2 = librosa.load(song_path)
    C2 = librosa.feature.chroma_stft(y=x2, sr=Fs2, tuning=0, norm=2, hop_length=1024, n_fft=4096)
    frames_per_sec, frame_duration_sec = get_frame_stats(C2, x2, Fs2)
    annotation_file_path = f"{song_path.split('.')[0]}.lab"
    chords_annotation2 = read_simplify_chord_file(annotation_file_path,process_silence=process_silence)

    pcp2 = chromagram_2_dataframe(C2, frame_duration_sec, test_version=test_version)
    pcp2['chord'] = get_annotated_chord_sequence(pcp2, chords_annotation2, test_version=test_version)
    return x2, Fs2, pcp2

def get_full_chromagram(songs):
    chromagram = pd.DataFrame()
    for song in songs:
        signal, sr, chromagram_one = build_chroma(song, test_version=False)
        chromagram = chromagram.append(chromagram_one, ignore_index=True)
    return chromagram
 
songs = []
for file_name in os.listdir('lab_and_musics'):
    if(file_name.endswith('.mp3')):
        songs.append(f'lab_and_musics/{file_name}')

pcp = get_full_chromagram(songs)
#     DEST_FOLDER = 'lab_and_musics'

#     lab_file_path = f'{DEST_FOLDER}/Let_It_Be.lab'
#     chords_annotation = read_simplify_chord_file(lab_file_path,process_silence=True)

#     let_it_be_intro_path = 'sounds/Let it Be Intro.wav'
#     x, Fs = librosa.load(let_it_be_intro_path)
#     let_it_be_chromagram = calc_chromagram(x, Fs, plot=False)
#     frames_per_sec, frame_duration_sec = get_frame_stats(chromagram=let_it_be_chromagram,
#                                                          signal=x, 
#                                                          Fs=Fs)

#     pcp = chromagram_2_dataframe(let_it_be_chromagram, frame_duration_sec, test_version=True)

#     pcp['chord'] = get_annotated_chord_sequence(pcp, chords_annotation, test_version=True)
#     pcp.head()



In [263]:
pcp
#pcp.size

Unnamed: 0,C,C#,D,D#,E,F,F#,G,G#,A,A#,B,start,end,chord,cleaned_chord
0,0.020408,0.013400,-0.006757,0.009951,-0.010764,-0.010094,0.001094,0.000621,-0.010207,0.004937,0.007333,-0.024496,0.000000,0.000000,<START>,<START>
1,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.046439,Db,Db
2,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.046439,0.092877,Db,Db
3,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.092877,0.139316,Db,Db
4,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.139316,0.185755,Db,Db
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193061,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,149.260825,149.307251,C,C
193062,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,149.307251,149.353678,C,C
193063,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,149.353678,149.400104,C,C
193064,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,149.400104,149.446531,C,C


In [273]:

pcp['cleaned_chord'] =  __unify_chords(pcp)
a = pcp.cleaned_chord.unique()
a = np.sort(a)
a

array(['<END>', '<START>', 'A', 'A1', 'A6', 'A7', 'A9', 'Aadd9', 'Aaddb6',
       'Aaddb7', 'Aaug', 'Ab', 'Ab7', 'Abadd2', 'Abaddb7', 'Abaug',
       'Abmaj6', 'Adim', 'Adim7', 'Am', 'Amaj6', 'Amb7', 'Amin7', 'Asus4',
       'B', 'B6', 'B7', 'B9', 'Badd9', 'Bb', 'Bb1', 'Bb7', 'Bbadd9',
       'Bbaddb7', 'Bbm', 'Bbmaj', 'Bbmaj6', 'Bbmaj7', 'Bbmin7', 'Bdim7',
       'Bhdim7', 'Bm', 'Bmin7', 'C', 'C#', 'C#m', 'C#m4', 'C#maj7',
       'C#mb7', 'C#min7', 'C#sus4', 'C1', 'C6', 'C7', 'Cadd#11', 'Cadd2',
       'Cadd9', 'Caug', 'Cm', 'Cmaj6', 'Cmaj7', 'D', 'D#', 'D#dim7',
       'D#hdim7', 'D#m', 'D1', 'D6', 'D7', 'D9', 'Dadd11', 'Dadd2',
       'Dadd9', 'Daddb7', 'Db', 'Dbmaj7', 'Ddim', 'Ddim7', 'Dm', 'Dm2',
       'Dm4', 'Dmaj6', 'Dmb7', 'Dmin7', 'Dsus4', 'E', 'E1', 'E7', 'E9',
       'Eaddb7', 'Eaug', 'Eb', 'Eb7', 'Ebadd2', 'Ebdim', 'Ebmaj6',
       'Ebmin7', 'Em', 'Emaj6', 'Emb7', 'Emin6', 'Emin7', 'Esus4', 'F',
       'F#', 'F#dim', 'F#hdim7', 'F#m', 'F#min7', 'F#sus4', 'F1', 'F7',
      

In [274]:
np.size(a)

141

In [275]:
import re
p = re.compile('.min$')
l2 = [ s for s in a if p.match(s) ]
l2

[]

In [276]:
pcp
pcp.to_csv('training_data_new.csv', header=0)

In [41]:
import pandas as pd

COL_NAMES_NOTES = ["idx", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B", "start", "end", "chord", "cleaned_chord"]

training_data = pd.read_csv('training_data_full.csv', header=None, names=COL_NAMES_NOTES)
for index, row in training_data.iterrows():
    if training_data.loc[index,'C'] == 0:
        training_data.loc[index, 'cleaned_chord'] = '<Silent>'

training_data_grouped = training_data.groupby('cleaned_chord').mean() 
counts = training_data.groupby('cleaned_chord').size() 
training_data_grouped['count'] = counts
training_data_grouped.drop('idx', axis='columns', inplace=True)
training_data_grouped.drop('start', axis='columns', inplace=True)
training_data_grouped.drop('end', axis='columns', inplace=True)
training_data_grouped.to_csv('training_data_grouped.csv', header=0)

In [1]:
training_data_grouped

NameError: name 'training_data_grouped' is not defined

In [2]:
import pandas as pd
chromagram = pd.read_csv('chords_simplified.csv')
chromagram = chromagram.iloc[: , 1:]
chromagram


Unnamed: 0,predicted,start,end
0,F#,0.000000,0.325026
1,G#m,0.325026,0.557187
2,E,0.557187,0.789348
3,F#,0.789348,1.392968
4,E,1.392968,2.043019
...,...,...,...
181,B,188.979278,189.722194
182,F#,189.722194,192.275968
183,G#m,192.275968,194.179691
184,F#,194.179691,196.083413


In [3]:
chromagram = chromagram.drop(chromagram[chromagram.end-chromagram.start < 0.2].index)


In [5]:
chromagram

Unnamed: 0,predicted,start,end
0,F#,0.000000,0.325026
1,G#m,0.325026,0.557187
2,E,0.557187,0.789348
3,F#,0.789348,1.392968
4,E,1.392968,2.043019
...,...,...,...
181,B,188.979278,189.722194
182,F#,189.722194,192.275968
183,G#m,192.275968,194.179691
184,F#,194.179691,196.083413


In [19]:
import numpy as np
start_time_following = np.array(chromagram['start'][1:].tolist())
start_time_following = np.append(start_time_following, chromagram['end'].tail(1))
start_time_following

array([  0.32502579,   0.55718706,   0.78934834,   1.39296765,
         2.04301922,   2.46090952,   3.66814815,   3.99317393,
         5.47900609,   6.12905767,   7.24343179,   8.2649414 ,
        12.72243788,  13.55821847,  15.27621191,  15.50837318,
        16.29772152,  16.66917956,  16.99420534,  19.3158181 ,
        19.96586967,  21.03381153,  23.21612752,  23.77331458,
        24.28406939,  24.65552743,  25.8163338 ,  26.09492733,
        27.673624  ,  27.95221753,  28.37010783,  30.27383029,
        34.0812752 ,  34.82419128,  37.14580403,  37.74942335,
        38.07444913,  38.63163619,  39.04952649,  40.16390061,
        40.81395218,  41.4175715 ,  41.9283263 ,  42.16048758,
        44.34280356,  45.17858415,  45.41074543,  47.9180872 ,
        48.70743554,  49.03246132,  49.58964838,  50.05397093,
        52.18985466,  56.36875762,  57.06524144,  57.39026723,
        58.13318331,  60.73338959,  64.40153774,  64.63369902,
        65.19088608,  65.98023441,  66.53742147,  66.76

In [None]:
filtered_pcp['end'] = start_time_following
    