In [168]:
import numpy as np
import mir_eval
import collections
import json
import os, os.path, copy
import csv
from statistics import mean
from os.path import join as joinpath
import pandas as pd
import itertools

In [169]:
# path to ANALYSIS folder
PATH_TO_ANALYSIS = "/Users/polinap/Yandex.Disk.localized/RESEARCH_RU/VOCAL_NOTES_PROJECT/RUSSIAN/ANALYSIS_quan/" 
from pprint import pprint
import os
pprint(sorted(os.listdir(PATH_TO_ANALYSIS)))

['.DS_Store',
 'Da_po_zoriushke_1',
 'Da_po_zoriushke_2',
 'Da_po_zoriushke_3',
 'Da_po_zoriushke_4',
 'Kak_letala_jara',
 'Milyj_moj_zhalkij',
 'Na_gore_na_gorke',
 'Neumyvataja',
 'Oj_kumushki',
 'Proshaj_zhizn',
 'Protocol template.xlsx',
 'QUAN',
 'Selo_veselo_1',
 'Selo_veselo_2',
 'Selo_veselo_3',
 'Selo_veselo_4',
 'Selo_veselo_5',
 'Selo_veselo_6',
 'Temno_li_na_nebe',
 'Tsvetiki_lazorevye',
 'Tsvety_li_moi_tsvetiki',
 'Uzh_ja_dumala_1',
 'Uzh_ja_dumala_2',
 'Zhil_byl_Lazar',
 'recordings.csv',
 'transcribers.csv']


In [170]:
# path to quantitative analysis results
PATH_TO_QUAN = joinpath(PATH_TO_ANALYSIS, "QUAN")
if not os.path.isdir(PATH_TO_QUAN):
    os.mkdir(PATH_TO_QUAN)

In [171]:
SONG = "" # leave "" to run through all songs
DIVIDER = "__"
FILETYPE_LIST = ['notes', 'segments', 'pitches']

In [172]:
# utils copied from the SV preparation script

def construct_pathlist(transcriber_list, filetype_list, fileext_list):
    path_list = []
    for transcriber in transcriber_list:
        for filetype in filetype_list:
            for fileext in fileext_list:
                filename = SONG + DIVIDER + transcriber + DIVIDER + filetype + fileext
                path = joinpath(PATH_TO_ANALYSIS,SONG,transcriber,filename)
                path_list.append(path)
    return path_list    

def construct_pathlist_existing(transcriber_list, filetype_list, fileext_list):
    path_list = []
    for transcriber in transcriber_list:
        for filetype in filetype_list:
            for fileext in fileext_list:
                filename = SONG + DIVIDER + transcriber + DIVIDER + filetype + fileext
                path = joinpath(PATH_TO_ANALYSIS,SONG,transcriber,filename)
            
                # check existence: notes.csv are mandatory, other files optional
                if path.endswith('notes.csv'):
                    if os.path.isfile(path):
                        path_list.append(path)
                    else:
                        print("ERROR: file not found: %s" %path)
                        raise Exception
                else: 
                    if os.path.isfile(path):
                        path_list.append(path)
                    #else:
                        #print("WARNING: file not found: %s" %path)

    return path_list               


# automatically detect transcribers
def detect_transcribers(SONG):
    songpath = joinpath(PATH_TO_ANALYSIS,SONG)
    if not os.path.isdir(songpath):
        print("File not found: ", songpath)
        raise Exception
    transcriber_list = next(os.walk(songpath))[1]
    #transcriber_list = [d for d in os.listdir(songpath) if os.path.isdir(joinpath(songpath, d))]
        
    tlist = copy.deepcopy(transcriber_list)
    
    for transcriber in tlist:
        all_files = construct_pathlist([transcriber], FILETYPE_LIST, ['.csv', '.svl'])
        for filepath in all_files:
            # check existence: notes.csv are mandatory, other files optional
            if filepath.endswith('notes.csv'):
                if not os.path.isfile(filepath):
                    print("WARNING: file not found: %s, transcriber %s discarded" %(filepath, transcriber))
                    transcriber_list.remove(transcriber)
                    break
            else: 
                if not os.path.isfile(filepath):
                    print("WARNING: file not found: %s" %filepath)                       

    print("transcribers: ", transcriber_list)
    # if a transcriber is not found, a file is missing or a filename is misspelled

    return transcriber_list


# custom csv read
def read_from_csv(notespath):
    notes = pd.read_csv(notespath, header=None, on_bad_lines=lambda x: x[:-1], engine='python') # doesn't read headers

    # remove columns beyond the first three
    if len(notes.columns) > 3:
        notes = notes.iloc[:,:3]

    # remove headers
    if notes.iloc[0,0]=="TIME":
        notes = notes.iloc[1:,:]
        notes.reset_index(drop=True, inplace=True)

    if len(notes.columns) == 3:
        notes.columns = ['TIME', 'VALUE', 'DURATION']
    else:
        if len(notes.columns) == 2:
            notes.columns = ['TIME', 'VALUE']
        else:
            raise Exception    
    
    notes['VALUE'] = notes['VALUE'].astype(float)
    if all(notes['VALUE']%1==0):        # all integers
        notes['VALUE'] = notes['VALUE'].astype(int)

    return notes


In [173]:
def compare_two_transcriptions(notes_filename1, notes_filename2):
    segments1, pitches1 = load_notes(notes_filename1)
    segments2, pitches2 = load_notes(notes_filename2)
    mir_eval.transcription.validate(segments1, pitches1, segments2, pitches2)
    result_dict = mir_eval.transcription.evaluate(segments1, pitches1,  segments2, pitches2)
    return result_dict

In [174]:
def load_notes(filename):
    notes = np.genfromtxt(filename, delimiter=',', skip_header=0, usecols=(0, 1, 2))
    onsets = notes[:, 0]
    pitches = notes[:, 1]
    durations = notes[:, 2]
    if min(durations) <= 0:
        for idx in range(len(duration)):
            if durations[idx] <= 0:
                durations[idx] = 0.00001
                print(onset, duration)
    offsets = np.sum([onsets, durations],axis=0)
    #if onsets[0] < 0:
    #    onsets[0] = 0
    #if offsets[0] <= 0:
    #    offsets[0] = 0.00001
    segments = np.array(list(zip(onsets, offsets)))
    return segments, pitches

In [175]:
def compare_all_transcriptions(transcriber_list):
    results_dictofdicts = {}
    for count, (transcriber1, transcriber2) in enumerate(itertools.combinations(transcriber_list, 2)):
        notespath1 = construct_pathlist([transcriber1], ['notes'], ['.csv'])[0]
        notespath2 = construct_pathlist([transcriber2], ['notes'], ['.csv'])[0]
        results_dictofdicts[count] = compare_two_transcriptions(notespath1, notespath2)
    save_results(results_dictofdicts, transcriber_list)
        
def save_results(results_dictofdicts, transcriber_list):
    csv_outfilename = SONG + DIVIDER + "_".join(transcriber_list) + DIVIDER + "quan_analysis" + ".csv"
    outpath = joinpath(PATH_TO_QUAN, csv_outfilename)
    with open(outpath, 'w') as csvfile:
        print("saving ", outpath)
        csvwriter = csv.writer(csvfile)
        header = ['']
        for count, (transcriber1, transcriber2) in enumerate(itertools.combinations(transcriber_list, 2)):
            header.append(transcriber1 + ' vs ' + transcriber2)
        header.append("mean")
        csvwriter.writerow(header)
        for key in results_dictofdicts[0].keys():
            row = [key]
            results = np.array([])
            for count, (transcriber1, transcriber2) in enumerate(itertools.combinations(transcriber_list, 2)):
                results = np.append(results, results_dictofdicts[count][key])
                row.append(round(results_dictofdicts[count][key],2))
            mymean = np.mean(results)
            row.append(round(mymean,2))
            csvwriter.writerow(row)
        
    

In [176]:
def quan_analysis_1song(SONG): 
    print("---------------\nRunning quantitative analysis for song: ", SONG)
    transcriber_list = detect_transcribers(SONG)
    if len(transcriber_list) < 2:
        print("WARNING: not enough transcribers")

    else: 
        compare_all_transcriptions(transcriber_list)

In [177]:
if len(SONG) > 0:
    quan_analysis_1song(SONG)
else:
    song_list = next(os.walk(PATH_TO_ANALYSIS))[1]
    song_list.sort()
    for SONG in song_list:
        quan_analysis_1song(SONG)
    
    

---------------
Running quantitative analysis for song:  Da_po_zoriushke_1
transcribers:  ['PP', 'OV', 'YN']
saving  /Users/polinap/Yandex.Disk.localized/RESEARCH_RU/VOCAL_NOTES_PROJECT/RUSSIAN/ANALYSIS_quan/QUAN/Da_po_zoriushke_1__PP_OV_YN__quan_analysis.csv
---------------
Running quantitative analysis for song:  Da_po_zoriushke_2
transcribers:  ['PP', 'OV', 'YN']
saving  /Users/polinap/Yandex.Disk.localized/RESEARCH_RU/VOCAL_NOTES_PROJECT/RUSSIAN/ANALYSIS_quan/QUAN/Da_po_zoriushke_2__PP_OV_YN__quan_analysis.csv
---------------
Running quantitative analysis for song:  Da_po_zoriushke_3
transcribers:  ['PP', 'OV', 'YN']
saving  /Users/polinap/Yandex.Disk.localized/RESEARCH_RU/VOCAL_NOTES_PROJECT/RUSSIAN/ANALYSIS_quan/QUAN/Da_po_zoriushke_3__PP_OV_YN__quan_analysis.csv
---------------
Running quantitative analysis for song:  Da_po_zoriushke_4
transcribers:  ['PP', 'OV', 'YN']
saving  /Users/polinap/Yandex.Disk.localized/RESEARCH_RU/VOCAL_NOTES_PROJECT/RUSSIAN/ANALYSIS_quan/QUAN/Da_po_z