In [1]:
import pandas as pd
import collections
import numpy as np
from os import listdir
from os.path import isfile, join
import itertools
import pickle
pd.set_option('display.max_rows', 100)

In [2]:
data = pd.read_csv('YCAC-data-1/MozartSlices.csv')

In [3]:
def sharpify(note):
    number = note[-1]
    note = note[0:-1]
    flats = ['D-', 'E-', 'G-', 'A-', 'B-']
    sharps = ['C#', 'D#', 'F#', 'G#', 'A#']
    try:
        index = flats.index(note)
    except:
        ValueError
        return note + number
    return sharps[index] + number

# test
# print(list(map(sharpify,['A1','A-1','B-8','C3', 'D4', 'F#4'])))

In [4]:
def parse_chord(chord_string):
    split_strings = chord_string.split(' ')
    split_strings[-1] = split_strings[-1][:len(split_strings[-1])-1]
    return split_strings[1:]

In [5]:
def get_parsed_chords(chord_string_list):
    parsed_chords = [parse_chord(chord_str) for chord_str in list(chord_string_list)]
    return parsed_chords

In [6]:
def save_reduced_data(file_name):
    data = pd.read_csv(file_name, encoding= 'latin 1')
    data.drop(inplace= True, columns= ['NormalForm', 'PCsInNormalForm', 'GlobalScaleDegrees',
                                       'LocalTonic', 'LocalMode', 'LocalSDForm_BassSD'])
    data['Chord'] = get_parsed_chords(data['Chord'])
    data.to_csv(file_name)

In [7]:
def get_file_paths():
    mypaths = ['YCAC-data-1', 'YCAC-data-2', 'YCAC-data-3']
    files = []
    for mypath in mypaths:
        files += [mypath + '/' + f for f in listdir(mypath) if isfile(join(mypath, f))]
    return files
file_paths_3 = get_file_paths()

In [8]:
# Replace all the files with their 'reduced' versions
# for file_path in get_file_paths():
#     print(file_path, 'done')
#     save_reduced_data(file_path)

# Make all chords sharps or naturals (remove all flats)
# for file in file_paths:
#     data = pd.read_csv(file, encoding= 'latin 1')
#     chords = data['Chord']
#     sharpified_chords = [list(map(sharpify, eval(chord))) for chord in chords]
#     data['Chord'] = sharpified_chords
#     data.to_csv(file)
#     print(file, 'completed')

In [8]:
filename = 'YCAC-data-3/PSlices.csv'
data = pd.read_csv(filename, encoding= 'latin 1')

In [9]:
canon_in_d = data[data['file'].str.contains('Canon in D')]

In [10]:
canon_in_d.head(100)

Unnamed: 0.1,Unnamed: 0,offset,Chord,HighestPitch,LowestPitch,file,Composer,Confidence
147899,147899,0.0,['D3'],50,50,Canon in D D major.mid,Pachelbel,0.0
147900,147900,1.0,"['D4', 'D3']",62,50,Canon in D D major.mid,Pachelbel,0.0
147901,147901,1.25,['D3'],50,50,Canon in D D major.mid,Pachelbel,0.682277
147902,147902,2.0,"['F#4', 'D3']",66,50,Canon in D D major.mid,Pachelbel,0.682277
147903,147903,2.25,['D3'],50,50,Canon in D D major.mid,Pachelbel,0.682277
147904,147904,3.0,"['A4', 'D3']",69,50,Canon in D D major.mid,Pachelbel,0.682277
147905,147905,3.25,['D3'],50,50,Canon in D D major.mid,Pachelbel,0.682277
147906,147906,4.0,['A2'],45,45,Canon in D D major.mid,Pachelbel,0.682277
147907,147907,5.0,"['A3', 'A2']",57,45,Canon in D D major.mid,Pachelbel,0.682277
147908,147908,5.25,['A2'],45,45,Canon in D D major.mid,Pachelbel,0.682277


In [9]:
def get_compressed_files():
    for filename in file_paths:
        compressed_filename = filename[0:-4] + '_compressed.csv'
        data = pd.read_csv(filename, encoding= 'latin 1')
        data_pieces = pd.DataFrame(columns= ['Piece Name', 'Chords', 'Composer'])
        piece_starts = data[data['offset'] == 0].copy()
        
        for index, row in piece_starts.iterrows():
            piece_data = data[data['file'] == row['file']].copy()
            chord_list = arrange_chord_lists(piece_data['Chord'])
            piece_starts.at[index, 'Chord'] = chord_list
        piece_starts.drop(columns= ['Unnamed: 0'])
        piece_starts.to_csv(compressed_filename)
        print(filename, 'done')

In [10]:
def arrange_chord_lists(chords_series):
    output_list = []
    for chordlist in chords_series:
        output_list.append(eval(chordlist))
    return str(output_list)

In [164]:
data = pd.read_csv('YCAC-data-3/PSlices_compressed.csv')

In [55]:
# data[(data['file'].str.contains('K331')) & 
#     (data['file'].str.contains('A major'))].head(100)
data['file'][(data['file'].str.contains('K331'))]

389334     Piano Sonata K331 iii A minor.mid
389335     Piano Sonata K331 iii A minor.mid
389336     Piano Sonata K331 iii A minor.mid
389337     Piano Sonata K331 iii A minor.mid
389338     Piano Sonata K331 iii A minor.mid
                         ...                
1550006     Piano Sonata K331 ii A major.mid
1550007     Piano Sonata K331 ii A major.mid
1550008     Piano Sonata K331 ii A major.mid
1550009     Piano Sonata K331 ii A major.mid
1550010     Piano Sonata K331 ii A major.mid
Name: file, Length: 7528, dtype: object

In [11]:
def has_notes(chords_series, notes):
    chords_list = list(chords_series)
    chords_list = [eval(chords) for chords in chords_list]
    number_of_notes = len(notes)
    piece_length = len(chords_list)
    if number_of_notes > piece_length:
        return False
    else:
        ending_point = piece_length - number_of_notes + 1
        checked_windows = 0
        while checked_windows < ending_point:
            window_chords = chords_list[checked_windows:checked_windows+len(notes)]
            match = check_match(window_chords, notes)
            if match:
                return True
            checked_windows += 1
    return False

def check_match(chords_list_with_number, notes):
    chords_list = remove_number(chords_list_with_number)
    zip_list = list(zip(notes,chords_list))
    truth_values_list = [note in chords for (note, chords) in zip_list]
    return np.all(truth_values_list)

def remove_number(chords_list):
    f = lambda x: x[0:-1]
    removed_number_list = [list(map(f, chords)) for chords in chords_list]
    return removed_number_list
    

In [173]:
files_by_piece = [(file_name[0:-4] + 'ByPiece', file_name) for file_name in file_paths]
print(files_by_piece)

[('YCAC-data-1/HandelSlicesByPiece', 'YCAC-data-1/HandelSlices.csv'), ('YCAC-data-1/BachSlicesByPiece', 'YCAC-data-1/BachSlices.csv'), ('YCAC-data-1/HaydnSlicesByPiece', 'YCAC-data-1/HaydnSlices.csv'), ('YCAC-data-1/BrahmsSlicesByPiece', 'YCAC-data-1/BrahmsSlices.csv'), ('YCAC-data-1/ChopinSlicesByPiece', 'YCAC-data-1/ChopinSlices.csv'), ('YCAC-data-1/MozartSlicesByPiece', 'YCAC-data-1/MozartSlices.csv'), ('YCAC-data-1/BeethovenSlicesByPiece', 'YCAC-data-1/BeethovenSlices.csv'), ('YCAC-data-1/DebussySlicesByPiece', 'YCAC-data-1/DebussySlices.csv'), ('YCAC-data-1/ByrdSlicesByPiece', 'YCAC-data-1/ByrdSlices.csv'), ('YCAC-data-1/MendelssohnSlicesByPiece', 'YCAC-data-1/MendelssohnSlices.csv'), ('YCAC-data-1/LisztSlicesByPiece', 'YCAC-data-1/LisztSlices.csv'), ('YCAC-data-2/WagnerSlicesByPiece', 'YCAC-data-2/WagnerSlices.csv'), ('YCAC-data-2/TchaikovskySlicesByPiece', 'YCAC-data-2/TchaikovskySlices.csv'), ('YCAC-data-2/Saint-SaensSlicesByPiece', 'YCAC-data-2/Saint-SaensSlices.csv'), ('YCAC-

In [12]:
def get_pieces(data_csv):
    piece_names = list(set(data_csv['file']))
    pieces_dict = dict.fromkeys(piece_names)
    n = len(piece_names)
    for index, piece in enumerate(piece_names):
        pieces_dict[piece] = data_csv[data_csv['file'] == piece]
    return pieces_dict

def save_pieces():
    files_completed = 0
    files_todo = len(files_by_piece)
    for filename_by_piece, file_name in files_by_piece:
        data_by_pieces = get_pieces(pd.read_csv(file_name, encoding= 'latin 1'))
        f = open(filename_by_piece, 'wb')
        pickle.dump(data_by_pieces, f)
        f.close()
        files_completed += 1
        print(files_completed, 'of', files_todo, 'done')
        
# save_pieces()

In [15]:
f = open('YCAC-data-1/BeethovenSlicesByPiece', 'rb')
obj = pickle.load(f)

In [52]:
# names_1 = ['Bach', 'Beethoven', 'Brahms', 'Byrd', 'Chopin', 'Debussy', 
#           'Handel', 'Haydn', 'Liszt', 'Mendelssohn', 'Mozart']
# names_2 = ['Saint-Saens', 'Scarlatti', 'Schubert', 'Schumann', 'Tchaikovsky', 
#           'Telemann', 'Vivaldi', 'Wagner']

# file_paths_3 = [file for file in file_paths_3 if '.csv' in file]
# names_3 = []
# for file in file_paths_3:
#     data = pd.read_csv(file, encoding= 'latin 1')
#     composers = list(set(data['Composer']))
#     names_3 = names_3 + composers

['Lefebvre',
 'Loeillet',
 'Liobet',
 'Loud',
 'Liguori',
 'Leonarda',
 'Lemmens',
 'Lacombe',
 'Lecuona',
 'Lawes',
 'Labitzky',
 'Lachner',
 'Lajarte',
 'Lefebure',
 'Lehar',
 'Lobo',
 'Leschetizky',
 'Lalo',
 'Levy',
 'Leo',
 'Losy',
 'Linley',
 'Lully',
 'Luigini',
 'Lemire',
 'Legnani',
 'Lindberg',
 'Lassen',
 'Lauffensteiner',
 'Lotti',
 'Lyadov',
 'Leybach',
 'Leclair',
 'Carissimi',
 'Carreno',
 'Chueca y Robles',
 'Charpentier',
 'Castello',
 'Catel',
 'Casanovas',
 'Carcassi',
 'Crusell',
 'Casella',
 'Carey',
 'Cimarosa',
 'Couperin',
 'Cardillo',
 'Carulli',
 'Cabanilles',
 'Coste',
 'Chabrier',
 'Czerny',
 'Corelli',
 'Clementi',
 'Chaminade',
 'Cramer',
 'Carr',
 'Cooke',
 'Cons',
 'Cervantes',
 'Chapi',
 'Cherubini',
 'Croft',
 'Colkin',
 'Cantallos',
 'Cano',
 'Campion',
 'Campra',
 'Casciolini',
 'Chambonnieres',
 'Cilea',
 'Clerambault',
 'Cernohorsky',
 'Clarke',
 'Coleridge-Taylor',
 'Cui',
 'Caldara',
 'Corbetta',
 'Ocarolan',
 'Oliveira',
 'Offenbach',
 'Onslow',

In [13]:
def get_file_path_to_composer(name):
    num = 0
    if name in names_1:
        num = 1
    elif name in names_2:
        num = 2
    elif name in names_3:
        num = 3
    else:
        return False
    return 'YCAC-data-' + str(num) + '/' + name + 'SlicesByPiece'
    
def search_composer(notes, name= 'none'):
    file_name = get_file_path_to_composer(name)
    matching_compositions = []
    if name:
        file = open(file_name, 'rb')
        data = pickle.load(file)
        compositions = list(data)
        for composition in compositions:
            chords_list = data[composition]['Chord']
            if has_notes(chords_list, notes):
                matching_compositions.append(composition)
    return matching_compositions
    
# print(search_composer(['F', 'E', 'D#', 'E', 'B', 'A', 'G#', 'A', 'B', 'A', 'G#', 'A', 'C'],'Mozart'))
# print(search_composer(['E', 'D#', 'E', 'D#', 'E', 'B', 'D', 'C', 'A'],
#                       'Liszt'))

In [58]:
data = pd.read_csv('YCAC-data-3/FSlices.csv', encoding= 'latin 1')
data[data['Composer'] == np.nan]


Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,offset,Chord,HighestPitch,LowestPitch,file,Composer,Confidence


In [86]:
data = pd.read_csv('YCAC-data-1/MozartSlices.csv', encoding= 'latin 1')

In [83]:
""" Mozart tests """
# has_notes(data['Chord'], ['F', 'E', 'D#', 'E', 'B', 'A', 'G#', 'A', 'B', 'A', 'G#', 'A', 'C'])
# has_notes(data['Chord'],['E', 'D#', 'E', 'B', 'A', 'G#', 'A', 'B', 'A', 'G#', 'A', 'C'])

""" Beethoven tests"""
has_notes(data['Chord'],['E', 'D#', 'E', 'D#', 'E', 'B', 'D', 'C', 'A'])

True