In [14]:
import pandas as pd
import collections
import numpy as np
from os import listdir
from os.path import isfile, join
import itertools
import pickle
import audiolazy
pd.set_option('display.max_rows', 100)

In [2]:
data = pd.read_csv('YCAC-data-1/MozartSlices.csv')

In [24]:
def sharpify(note):
    number = note[-1]
    note = note[0:-1]
    flats = ['D-', 'E-', 'G-', 'A-', 'B-']
    sharps = ['C#', 'D#', 'F#', 'G#', 'A#']
    try:
        index = flats.index(note)
    except:
        ValueError
        return note + number
    return sharps[index] + number

# test
print(list(map(sharpify,['A1','A-1','B-8','C3', 'D4', 'F#4'])))

['A1', 'G#1', 'A#8', 'C3', 'D4', 'F#4']


In [4]:
def parse_chord(chord_string):
    split_strings = chord_string.split(' ')
    split_strings[-1] = split_strings[-1][:len(split_strings[-1])-1]
    return split_strings[1:]

In [5]:
def get_parsed_chords(chord_string_list):
    parsed_chords = [parse_chord(chord_str) for chord_str in list(chord_string_list)]
    return parsed_chords

In [6]:
def save_reduced_data(file_name):
    data = pd.read_csv(file_name, encoding= 'latin 1')
    data.drop(inplace= True, columns= ['NormalForm', 'PCsInNormalForm', 'GlobalScaleDegrees',
                                       'LocalTonic', 'LocalMode', 'LocalSDForm_BassSD'])
    data['Chord'] = get_parsed_chords(data['Chord'])
    data.to_csv(file_name)

In [36]:
def get_file_paths():
    mypaths = ['YCAC-data-3']
    files = []
    for mypath in mypaths:
        files += [mypath + '/' + f for f in listdir(mypath) if isfile(join(mypath, f))]
    return files
file_paths_3 = get_file_paths()

In [8]:
# Replace all the files with their 'reduced' versions
# for file_path in get_file_paths():
#     print(file_path, 'done')
#     save_reduced_data(file_path)

# Make all chords sharps or naturals (remove all flats)
# for file in file_paths:
#     data = pd.read_csv(file, encoding= 'latin 1')
#     chords = data['Chord']
#     sharpified_chords = [list(map(sharpify, eval(chord))) for chord in chords]
#     data['Chord'] = sharpified_chords
#     data.to_csv(file)
#     print(file, 'completed')

In [8]:
filename = 'YCAC-data-3/PSlices.csv'
data = pd.read_csv(filename, encoding= 'latin 1')

In [68]:
# canon_in_d = data[data['file'].str.contains('Canon in D')]

In [69]:
# canon_in_d.head(100)

In [9]:
def get_compressed_files():
    for filename in file_paths:
        compressed_filename = filename[0:-4] + '_compressed.csv'
        data = pd.read_csv(filename, encoding= 'latin 1')
        data_pieces = pd.DataFrame(columns= ['Piece Name', 'Chords', 'Composer'])
        piece_starts = data[data['offset'] == 0].copy()
        
        for index, row in piece_starts.iterrows():
            piece_data = data[data['file'] == row['file']].copy()
            chord_list = arrange_chord_lists(piece_data['Chord'])
            piece_starts.at[index, 'Chord'] = chord_list
        piece_starts.drop(columns= ['Unnamed: 0'])
        piece_starts.to_csv(compressed_filename)
        print(filename, 'done')

In [10]:
def arrange_chord_lists(chords_series):
    output_list = []
    for chordlist in chords_series:
        output_list.append(eval(chordlist))
    return str(output_list)

In [164]:
data = pd.read_csv('YCAC-data-3/PSlices_compressed.csv')

In [58]:
# data[(data['file'].str.contains('K331')) & 
#     (data['file'].str.contains('A major'))].head(100)
# data['file'][(data['file'].str.contains('K331'))]

In [47]:
def get_best_match(chords_series, highest_note_series, notes):
    # Preprocessing to get the required types of lists
    chords_list = list(chords_series)
    chords_list = [eval(chords) for chords in chords_list]
    highest_note_list = list(highest_note_series)
    number_of_notes = len(notes)
    piece_length = len(chords_list)
    if number_of_notes > piece_length:
        return 0
    
    # Moving the notes through the 'windows'
    ending_point = piece_length - number_of_notes + 1
    checked_windows = 0
    matches = []
    while checked_windows < ending_point:
#         print(index)
#         print(chords_list)
        window_chords = chords_list[checked_windows:checked_windows+len(notes)]
        match = check_match(window_chords, notes)
        if match:
            window_highest_notes = [audiolazy.midi2str(num, sharp= True)[:-1] for num in 
                                    highest_note_list[checked_windows:checked_windows+len(notes)]]
            score = score_match(notes, window_highest_notes)
            matches.append(score)
        checked_windows += 1
    if matches:
        return max(matches)
    else:
        return 0

def check_match(chords_list_with_number, notes):
    chords_list = remove_number(chords_list_with_number)
    zip_list = list(zip(notes,chords_list))
    truth_values_list = [note in chords for (note, chords) in zip_list]
    return np.all(truth_values_list)

def remove_number(chords_list):
    f = lambda x: x[0:-1]
    removed_number_list = [list(map(f, chords)) for chords in chords_list]
    return removed_number_list

def score_match(notes, highest_notes):
    zip_list = list(zip(notes, highest_notes))
    score_list = [1 if note == highest_note else 0 for (note, highest_note) in zip_list]
    return float(sum(score_list)) / len(score_list)

In [173]:
files_by_piece = [(file_name[0:-4] + 'ByPiece', file_name) for file_name in file_paths]
print(files_by_piece)

[('YCAC-data-1/HandelSlicesByPiece', 'YCAC-data-1/HandelSlices.csv'), ('YCAC-data-1/BachSlicesByPiece', 'YCAC-data-1/BachSlices.csv'), ('YCAC-data-1/HaydnSlicesByPiece', 'YCAC-data-1/HaydnSlices.csv'), ('YCAC-data-1/BrahmsSlicesByPiece', 'YCAC-data-1/BrahmsSlices.csv'), ('YCAC-data-1/ChopinSlicesByPiece', 'YCAC-data-1/ChopinSlices.csv'), ('YCAC-data-1/MozartSlicesByPiece', 'YCAC-data-1/MozartSlices.csv'), ('YCAC-data-1/BeethovenSlicesByPiece', 'YCAC-data-1/BeethovenSlices.csv'), ('YCAC-data-1/DebussySlicesByPiece', 'YCAC-data-1/DebussySlices.csv'), ('YCAC-data-1/ByrdSlicesByPiece', 'YCAC-data-1/ByrdSlices.csv'), ('YCAC-data-1/MendelssohnSlicesByPiece', 'YCAC-data-1/MendelssohnSlices.csv'), ('YCAC-data-1/LisztSlicesByPiece', 'YCAC-data-1/LisztSlices.csv'), ('YCAC-data-2/WagnerSlicesByPiece', 'YCAC-data-2/WagnerSlices.csv'), ('YCAC-data-2/TchaikovskySlicesByPiece', 'YCAC-data-2/TchaikovskySlices.csv'), ('YCAC-data-2/Saint-SaensSlicesByPiece', 'YCAC-data-2/Saint-SaensSlices.csv'), ('YCAC-

In [12]:
def get_pieces(data_csv):
    piece_names = list(set(data_csv['file']))
    pieces_dict = dict.fromkeys(piece_names)
    n = len(piece_names)
    for index, piece in enumerate(piece_names):
        pieces_dict[piece] = data_csv[data_csv['file'] == piece]
    return pieces_dict

def save_pieces():
    files_completed = 0
    files_todo = len(files_by_piece)
    for filename_by_piece, file_name in files_by_piece:
        data_by_pieces = get_pieces(pd.read_csv(file_name, encoding= 'latin 1'))
        f = open(filename_by_piece, 'wb')
        pickle.dump(data_by_pieces, f)
        f.close()
        files_completed += 1
        print(files_completed, 'of', files_todo, 'done')
        
# save_pieces()

In [35]:
# f = open('YCAC-data-1/BeethovenSlicesByPiece', 'rb')
# obj = pickle.load(f)

['YCAC-data-1/HandelSlices.csv', 'YCAC-data-1/BachSlices.csv', 'YCAC-data-1/HaydnSlices.csv', 'YCAC-data-1/BrahmsSlices.csv', 'YCAC-data-1/ChopinSlices.csv', 'YCAC-data-1/MozartSlices.csv', 'YCAC-data-1/BeethovenSlices.csv', 'YCAC-data-1/DebussySlices.csv', 'YCAC-data-1/ByrdSlices.csv', 'YCAC-data-1/MendelssohnSlices.csv', 'YCAC-data-1/LisztSlices.csv', 'YCAC-data-2/WagnerSlices.csv', 'YCAC-data-2/TchaikovskySlices.csv', 'YCAC-data-2/Saint-SaensSlices.csv', 'YCAC-data-2/VivaldiSlices.csv', 'YCAC-data-2/SchumannSlices.csv', 'YCAC-data-2/TelemannSlices.csv', 'YCAC-data-2/ScarlattiSlices.csv', 'YCAC-data-2/SchubertSlices.csv', 'YCAC-data-3/LSlices.csv', 'YCAC-data-3/CSlices.csv', 'YCAC-data-3/OSlices.csv', 'YCAC-data-3/JSlices.csv', 'YCAC-data-3/BSlices.csv', 'YCAC-data-3/WSlices.csv', 'YCAC-data-3/ZSlices.csv', 'YCAC-data-3/DSlices.csv', 'YCAC-data-3/ISlices.csv', 'YCAC-data-3/FSlices.csv', 'YCAC-data-3/MSlices.csv', 'YCAC-data-3/HSlices.csv', 'YCAC-data-3/VSlices.csv', 'YCAC-data-3/GSli

In [37]:
names_1 = ['Bach', 'Beethoven', 'Brahms', 'Byrd', 'Chopin', 'Debussy', 
          'Handel', 'Haydn', 'Liszt', 'Mendelssohn', 'Mozart']
names_2 = ['Saint-Saens', 'Scarlatti', 'Schubert', 'Schumann', 'Tchaikovsky', 
          'Telemann', 'Vivaldi', 'Wagner']

file_paths_3 = [file for file in file_paths_3 if '.csv' in file]
names_3 = []
for file in file_paths_3:
    data = pd.read_csv(file, encoding= 'latin 1')
    composers = list(set(data['Composer']))
    names_3 = names_3 + composers

In [81]:
def get_file_path_to_composer(name):
    num = 0
    if name in names_1:
        num = 1
    elif name in names_2:
        num = 2
    elif name in names_3:
        num = 3
    else:
        return False
    return 'YCAC-data-' + str(num) + '/' + name[0] + 'SlicesByPiece'
    
def search_composer(notes, name= 'none'):
    file_name = get_file_path_to_composer(name)
#     matching_compositions = []
    composition_scores = []
    if name:
        file = open(file_name, 'rb')
        data = pickle.load(file)
        if file_name[10] == '3':
            # TODO: get only the data that has the name 'name'
#             data = 
        compositions = list(data)
        for composition in compositions:
            chords_list = data[composition]['Chord']
            highest_notes_list = data[composition]['HighestPitch']
#             if has_notes(chords_list, notes):
#                 matching_compositions.append(composition)
            score = get_best_match(chords_list, highest_notes_list, notes)
            composition_scores.append((composition, score))
    sorted_compositions = sorted(composition_scores, key=lambda x: x[1], reverse= True)
    return sorted_compositions
#     return matching_compositions
    
# found = search_composer(['F#', 'G', 'A', 'B', 'A', 'G', 'A', 'F#', 'G', 'A'], 'Pachelbel')
found = search_composer(['F#', 'G', 'A', 'B', 'A', 'G', 'A', 'F#'], 'Pachelbel')

KeyError: 'Composer'

In [79]:
found

[('Violin Concerto in D 1 Allegro Maestoso Op6No1 D major.mid', 1.0),
 ("Musick's Handmaid Pieces for Harpsichord 8 Lesson A New Ground E minor.mid",
  0.75),
 ('Jerusalem Op208 D major.mid', 0.75),
 ('The Tempest-Arise, ye subterranean winds D major.mid', 0.625),
 ('Come ye sons of art away D major.mid', 0.5),
 ('Sonata for Trumpet and String Orchestra 3 Allegro D major.mid', 0.5),
 ('Gigue in D D major.mid', 0.5),
 ('Oboe Sonata in G Movement 4 G major.mid', 0.375),
 ('Hornpipe in E-ship-board version E minor.mid', 0.375),
 ('Violin Concerto in D 3 Rondo Op6No1 D major.mid', 0.375),
 ('Cinq Impromptus 3 Tres Modere G major.mid', 0.25),
 ('The Yorkshire Feast Song 1 Symphony D major.mid', 0.25),
 ('Sound the Trumpet, Come Ye Sons of Art G major.mid', 0.25),
 ('Sonata for Trumpet and String Orchestra 1 Pomposo D major.mid', 0.25),
 ('Praeludium in D for organ D minor.mid', 0),
 ("Wir glauben all' an einen Gott D minor.mid", 0),
 ('Minuet in G Op14No1 G major.mid', 0),
 ('Prelude for Gu

In [54]:
data = pd.read_csv('YCAC-data-1/MozartSlices.csv', encoding= 'latin 1')
data[(data['file'] == 'Serenade No13 "Eine Kleine Nachtmusik" K525 ii C major.mid') &
    ]

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,offset,Chord,HighestPitch,LowestPitch,file,Composer,Confidence
1019825,1019825,1019825,2.000,"['E5', 'C4']",76,60,"Serenade No13 ""Eine Kleine Nachtmusik"" K525 ii...",Mozart,0.000000
1019826,1019826,1019826,3.000,"['E5', 'C4']",76,60,"Serenade No13 ""Eine Kleine Nachtmusik"" K525 ii...",Mozart,0.000000
1019827,1019827,1019827,4.000,"['E5', 'C4', 'C3', 'C2']",76,36,"Serenade No13 ""Eine Kleine Nachtmusik"" K525 ii...",Mozart,0.000000
1019828,1019828,1019828,4.500,"['E5', 'G3', 'C3', 'C2']",76,36,"Serenade No13 ""Eine Kleine Nachtmusik"" K525 ii...",Mozart,0.841780
1019829,1019829,1019829,4.750,"['E5', 'G3']",76,55,"Serenade No13 ""Eine Kleine Nachtmusik"" K525 ii...",Mozart,0.841780
...,...,...,...,...,...,...,...,...,...
1021255,1021255,1021255,388.375,"['D5', 'B4', 'F4']",74,65,"Serenade No13 ""Eine Kleine Nachtmusik"" K525 ii...",Mozart,0.912734
1021256,1021256,1021256,389.000,"['D5', 'B4', 'F4', 'G2', 'G1']",74,31,"Serenade No13 ""Eine Kleine Nachtmusik"" K525 ii...",Mozart,0.912734
1021257,1021257,1021257,389.375,"['D5', 'B4', 'F4']",74,65,"Serenade No13 ""Eine Kleine Nachtmusik"" K525 ii...",Mozart,0.912734
1021258,1021258,1021258,390.000,"['C5', 'E4', 'C2']",72,36,"Serenade No13 ""Eine Kleine Nachtmusik"" K525 ii...",Mozart,0.912734


In [83]:
""" Mozart tests """
# has_notes(data['Chord'], ['F', 'E', 'D#', 'E', 'B', 'A', 'G#', 'A', 'B', 'A', 'G#', 'A', 'C'])
# has_notes(data['Chord'],['E', 'D#', 'E', 'B', 'A', 'G#', 'A', 'B', 'A', 'G#', 'A', 'C'])

""" Beethoven tests"""
has_notes(data['Chord'],['E', 'D#', 'E', 'D#', 'E', 'B', 'D', 'C', 'A'])

True

In [22]:
data = pd.read_csv('YCAC-data-1/BachSlices.csv')

In [83]:
file = open('YCAC-data-3/PSlicesByPiece', 'rb')
data = pickle.load(file)
data

{'Praeludium in D for organ D minor.mid':        Unnamed: 0  Unnamed: 0.1   offset                            Chord  \
 53293       53293         53293    0.500                     ['D2', 'D3']   
 53294       53294         53294    1.000                     ['F2', 'F3']   
 53295       53295         53295    1.500                   ['C#2', 'C#3']   
 53296       53296         53296    2.000                     ['D2', 'D3']   
 53297       53297         53297    2.500                   ['A#2', 'A#3']   
 ...           ...           ...      ...                              ...   
 54808       54808         54808  377.375         ['D2', 'A3', 'D4', 'D3']   
 54809       54809         54809  377.500   ['D2', 'A3', 'D4', 'E3', 'D3']   
 54810       54810         54810  377.875         ['D2', 'A3', 'D4', 'D3']   
 54811       54811         54811  378.000  ['D2', 'A3', 'D4', 'F#3', 'D3']   
 54812       54812         54812  379.250                          ['F#3']   
 
        HighestPitch 