In [102]:
import numpy as np
import mir_eval
import collections
import json
from os.path import join as joinpath
import csv
from statistics import mean
import pandas as pd

In [103]:
path = "/Users/polinap/Yandex.Disk.localized/RESEARCH_RU/VOCAL_NOTES_PROJECT/RUSSIAN/ANALYSIS/"
song = "Da_po_zoriushke_1"
transcriber_list = ["PP", "OV"]
divider = "__"
outfile = "howmanynotes.csv"

In [104]:
notespath_list = []
for transcriber in transcriber_list:
    filename = song + divider + transcriber + divider + 'notes.csv'
    notespath = joinpath(path,song,transcriber,filename)
    notespath_list.append(notespath)
notespath_list

['/Users/polinap/Yandex.Disk.localized/RESEARCH_RU/VOCAL_NOTES_PROJECT/RUSSIAN/ANALYSIS/Da_po_zoriushke_1/PP/Da_po_zoriushke_1__PP__notes.csv',
 '/Users/polinap/Yandex.Disk.localized/RESEARCH_RU/VOCAL_NOTES_PROJECT/RUSSIAN/ANALYSIS/Da_po_zoriushke_1/OV/Da_po_zoriushke_1__OV__notes.csv']

In [105]:
def read_from_csv(notespath):
    notes = pd.read_csv(notespath, header=None) # doesn't read headers

    # remove columns beyond the first three
    if len(notes.columns) > 3:
        notes = notes.iloc[:,:3]

    # remove headers
    if notes.iloc[0,0]=="TIME":
        notes = notes.iloc[1:,:]
        notes.reset_index(drop=True, inplace=True)

    notes.columns = ['TIME', 'VALUE', 'DURATION']
    
    notes['VALUE'] = notes['VALUE'].astype(float)
    if all(notes['VALUE']%1==0):        # all integers
        notes['VALUE'] = notes['VALUE'].astype(int)

    return notes

In [106]:
notes_list = []
onsets_list = []
pitches_list = []
segments_list = []
for notespath in notespath_list:
    notes = read_from_csv(notespath)
    notes_list.append(notes)
    onsets = notes['TIME'].astype(float).to_numpy()
    pitches = notes['VALUE'].astype(float).to_numpy()
    
    durations = notes['DURATION'].astype(float).to_numpy()
    if min(durations) == 0:
        for idx in range(len(duration)):
            if durations[idx] == 0:
                durations[idx] = 0.00001
                
    offsets = np.sum([onsets, durations],axis=0)
    if onsets[0] < 0:
        onsets[0] = 0
    if offsets[0] <= 0:
        offsets[0] = 0.000001
    segments = np.array(list(zip(onsets, offsets)))
    
    onsets_list.append(onsets)
    pitches_list.append(pitches)
    segments_list.append(segments)

In [107]:
mir_eval.transcription.validate(segments_list[0], pitches_list[0], segments_list[1], pitches_list[1])
matched_onsets = mir_eval.transcription.match_note_onsets(segments_list[0], segments_list[1], onset_tolerance=1, strict=False)
#matched_notes = mir_eval.transcription.match_notes(segments_list[0], pitches_list[0], segments_list[1], pitches_list[1])
print(matched_onsets)

[(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7), (8, 8), (9, 9), (10, 10), (11, 11), (12, 12), (13, 13), (14, 14), (15, 15), (16, 16), (17, 17), (18, 18), (19, 19), (20, 20), (21, 21), (22, 22), (23, 23), (24, 24), (25, 25), (26, 27), (27, 28), (28, 29), (29, 30), (30, 31), (31, 32), (32, 33), (33, 34), (34, 35), (35, 36), (36, 37), (37, 38), (38, 39), (39, 40), (40, 41), (41, 42), (42, 43), (43, 44), (44, 45), (45, 46), (46, 47), (47, 48), (48, 49), (49, 50), (50, 51), (51, 52), (52, 53), (53, 54), (54, 55), (55, 56), (56, 57), (57, 58), (58, 59), (59, 60), (60, 61), (61, 64), (62, 65), (63, 66), (64, 67), (65, 68), (66, 69), (67, 71), (68, 72), (69, 73)]


In [108]:
alignment = [(0,0), (1,1)]
clusters = []
prev_onset1 = 1
prev_onset2 = 1
for onset1, onset2 in matched_onsets:
    if onset1 >= 2:
        if onset2 - onset1 > prev_onset2 - prev_onset1:
            clusters.append((prev_onset1, prev_onset2))
            for insert in range(prev_onset2+1, onset2):
                alignment.append((prev_onset1, insert))
                clusters.append((prev_onset1, insert))
        if onset2 - onset1 < prev_onset2 - prev_onset1:
            clusters.append((prev_onset1, prev_onset2))
            for insert in range(prev_onset1+1, onset1):
                alignment.append((insert, prev_onset2))
                clusters.append((insert, prev_onset2))
        alignment.append((onset1, onset2))
        prev_onset1 = onset1
        prev_onset2 = onset2

In [109]:
cluster_segments = []
segments1 = segments_list[0]
segments2 = segments_list[1]
for ind1, ind2 in clusters:
    start = min(segments1[ind1][0], segments2[ind2][0])
    stop = max(segments1[ind1][1], segments2[ind2][1])
    cluster_segments.append([start, stop])
ind = len(cluster_segments)-2
while ind >= 0:
    if cluster_segments[ind][1] >= cluster_segments[ind+1][0]:
        cluster_segments[ind][1] = cluster_segments[ind+1][1]
        del cluster_segments[ind+1]
    ind = ind - 1    
cluster_segments

[[8.95600907, 9.687074829999998],
 [25.153015873, 25.87138322],
 [37.437460317, 38.717460317]]

In [110]:
df = pd.DataFrame(cluster_segments)

In [111]:
df.columns = ["ONSET", "OFFSET"]
df["DURATION"] = df['OFFSET'] - df['ONSET']
df.columns = ["ONSET", "OFFSET", "DURATION"]
df = df[["ONSET", "DURATION"]]

In [112]:
outpath = joinpath(path,song,outfile)
df.to_csv(outpath, index = False)