In [55]:
import os
import mir_eval

versions = ['HU33', 'SC06', 'FI66', 'FI80']
chroma_type = ['stft', 'cq', 'cens']

template_type = ['binary', 'ks', 'harmonic']

seg_type = ['under', 'mean', 'over']

key2num = {
    "A:maj": 0, "A#:maj": 1, "Bb:maj": 1, "B:maj": 2,
    "C:maj": 3, "C#:maj": 4, "Db:maj": 4, "D:maj": 5, "D#:maj": 6, "Eb:maj": 6, 
    "E:maj": 7, "F:maj": 8, "F#:maj": 9, "G:maj": 10, "G#:maj": 11, "Ab:maj": 11,
    "A:min": 12, "A#:min": 13, "Bb:min": 13, "B:min": 14,
    "C:min": 15, "C#:min": 16, "Db:min": 16, "D:min": 17, "D#:min": 18, "Eb:min": 18, 
    # There is D#:min and Eb:min, This could be a problem when finding key
    "E:min": 19, "F:min": 20, "F#:min": 21, "G:min": 22, "G#:min": 23, "Ab:min": 23,
    }

num2key = {
    0: "A:maj", 1: "A#:maj", 2: "B:maj", 3: "C:maj", 4: "C#:maj", 5: "D:maj", 6: "D#:maj",
    7: "E:maj", 8:"F:maj", 9: "F#maj", 10: "G:maj", 11: "G#:maj", 
    12: "A:min", 13: "A#:min", 14: "B:min", 15: "C:min", 16: "C#:min", 17: "D:min", 18: "D#min",
    19: "E:min", 20: "F:min", 21: "F#:min", 22: "G:min", 23: "G#:min"
}

In [65]:
seg_score = {}
for v in versions:
    seg_score[v] = {}
    for c in chroma_type:
        seg_score[v][c] = {}
        for t in template_type:
            seg_score[v][c][t] = {}
            for n in range(1, 25):
                seg_score[v][c][t][n] = {}
                key_path = "result/SWD/local_predict/" + v + "/" + str(n) + "/" + c + "/" + t + ".txt"

                predict = []
                with open(key_path, mode='r') as f:

                    pre_frame_i = 10
                    frame_i = 0

                    pre_key = int(f.readline())

                    while True:
                        # Parse the string

                        line = f.readline()
                        if not line:
                            break

                        if frame_i < 10:
                            frame_i += 1
                            continue

                        key = int(line)

                        if key != pre_key:

                            predict.append([pre_frame_i, frame_i, pre_key])
                            pre_key = key
                            pre_frame_i = frame_i

                        frame_i += 1
                    predict.append([pre_frame_i, frame_i, pre_key])

                csv_path = "result/SWD/local_predict_csv/" + v + "/" + str(n) + "/" + c + "/" + t + ".csv"
                os.makedirs(os.path.dirname(csv_path), exist_ok=True)
                with open(csv_path, mode='w') as f:
                    f.write('start;end;key\n')
                    for p in predict:
                        key = num2key[p[2]]
                        f.write(str(p[0]/10) + ';' + str(p[1]/10) + ';"' + str(key) + '"\n')

                (ref_intervals0, ref_labels0) = mir_eval.io.load_labeled_intervals(csv_path, 
                                                                                 delimiter=';',
                                                                                 comment='start;end;key')
                (ref_intervals1, ref_labels1) = mir_eval.io.load_labeled_intervals('data/SWD/02_Annotations/ann_audio_localkey-ann1/Schubert_D911-' + str(n).zfill(2) + '_' + v +'.csv', 
                                                                                 delimiter=';',
                                                                                 comment='start;end;key')    
                
                seg_score[v][c][t][n]['under'] = mir_eval.chord.underseg(ref_intervals0, ref_intervals1) 
                seg_score[v][c][t][n]['over'] = mir_eval.chord.overseg(ref_intervals0, ref_intervals1)
                seg_score[v][c][t][n]['mean'] = mir_eval.chord.seg(ref_intervals0, ref_intervals1)

In [66]:
seg_score

{'HU33': {'stft': {'binary': {1: {'under': 0.9344983903817095,
     'over': 0.6825499034127493,
     'mean': 0.6825499034127493},
    2: {'under': 0.8248953974895397,
     'over': 0.49610389610389627,
     'mean': 0.49610389610389627},
    3: {'under': 0.8200304329782819,
     'over': 0.7693235097119892,
     'mean': 0.7693235097119892},
    4: {'under': 0.740057306590258,
     'over': 0.8542792792792794,
     'mean': 0.740057306590258},
    5: {'under': 0.7898491491417107,
     'over': 0.8534016093635698,
     'mean': 0.7898491491417107},
    6: {'under': 0.4042221702890084,
     'over': 0.8928433268858802,
     'mean': 0.4042221702890084},
    7: {'under': 0.6360400444938821,
     'over': 0.756629092860977,
     'mean': 0.6360400444938821},
    8: {'under': 0.9944602988081249,
     'over': 0.3622240392477514,
     'mean': 0.3622240392477514},
    9: {'under': 0.43129230284904774,
     'over': 0.9616730038022814,
     'mean': 0.43129230284904774},
    10: {'under': 0.45486001887385963

In [86]:
versions_score = {}
versions_count = 24*3*3*3
chroma_type_score = {}
chroma_type_count = 24*4*3*3
templates_score = {}
templates_count = 24*4*3*3
seg_type_score = {}
seg_type_count = 24*4*3*3

for v in versions:
    for n in range(1,25):
        for c in chroma_type:
            for t in template_type:
                for s in seg_type:
                    
                    score = float(seg_score[v][c][t][n][s])
                    
                    if v not in versions_score:
                        versions_score[v] = score
                    else:
                        versions_score[v] += score

                    if c not in chroma_type_score:
                        chroma_type_score[c] = score
                    else:
                        chroma_type_score[c] += score

                    if t not in templates_score:
                        templates_score[t] = score
                    else:
                        templates_score[t] += score

                    if s not in seg_type_score:
                        seg_type_score[s] = score
                    else:
                        seg_type_score[s] += score

In [87]:
versions_score = {k: v/versions_count for k, v in versions_score.items()}
chroma_type_score = {k: v/chroma_type_count for k, v in chroma_type_score.items()}
templates_score = {k: v/templates_count for k, v in templates_score.items()}
seg_type_score = {k: v/seg_type_count for k, v in seg_type_score.items()}

In [88]:
versions_score

{'HU33': 0.6783071051079357,
 'SC06': 0.6983518811834014,
 'FI66': 0.6841932830876919,
 'FI80': 0.6795133704041938}

對於四種版本的音訊來說，segmetation score差不多。

In [89]:
chroma_type_score

{'stft': 0.676373592224769,
 'cq': 0.6911064734759813,
 'cens': 0.687794164136672}

對於三種chroma type來說，segmetation score差不多。

In [90]:
templates_score

{'binary': 0.6809751223650466,
 'ks': 0.6785609796445243,
 'harmonic': 0.695738127827851}

對於三種template type來說，segmetation score也差不多。

In [91]:
seg_type_score

{'under': 0.7072184540622708,
 'mean': 0.5807049405924519,
 'over': 0.7673508351826986}

對於所有local key prediction，mean segmantaion的分數最低，而over segmantation的情況較under segmatation較少。

In [93]:
seg_midi_score = {}

for t in template_type:
    seg_midi_score[t] = {}
    for n in range(1, 25):
        seg_midi_score[t][n] = {}
        key_path = "result/SWD/local_predict/midi/" + str(n) + "/" + t + ".txt"

        predict = []
        with open(key_path, mode='r') as f:

            pre_frame_i = 10
            frame_i = 0

            pre_key = int(f.readline())

            while True:
                # Parse the string

                line = f.readline()
                if not line:
                    break

                if frame_i < 10:
                    frame_i += 1
                    continue

                key = int(line)

                if key != pre_key:

                    predict.append([pre_frame_i, frame_i, pre_key])
                    pre_key = key
                    pre_frame_i = frame_i

                frame_i += 1
            predict.append([pre_frame_i, frame_i, pre_key])

        csv_path = "result/SWD/local_predict_csv/midi/" + str(n) + "/" + t + ".csv"
        os.makedirs(os.path.dirname(csv_path), exist_ok=True)
        with open(csv_path, mode='w') as f:
            f.write('start;end;key\n')
            for p in predict:
                key = num2key[p[2]]
                f.write(str(p[0]/10) + ';' + str(p[1]/10) + ';"' + str(key) + '"\n')

        (ref_intervals0, ref_labels0) = mir_eval.io.load_labeled_intervals(csv_path, 
                                                                         delimiter=';',
                                                                         comment='start;end;key')
        (ref_intervals1, ref_labels1) = mir_eval.io.load_labeled_intervals('data/SWD/02_Annotations/ann_score_localkey-ann1/Schubert_D911-' + str(n).zfill(2)+ '.csv', 
                                                                         delimiter=';',
                                                                         comment='start;end;key')    

        seg_midi_score[t][n]['under'] = mir_eval.chord.underseg(ref_intervals0, ref_intervals1) 
        seg_midi_score[t][n]['over'] = mir_eval.chord.overseg(ref_intervals0, ref_intervals1)
        seg_midi_score[t][n]['mean'] = mir_eval.chord.seg(ref_intervals0, ref_intervals1)

In [94]:
seg_midi_score

{'binary': {1: {'under': 0.8116847568230425,
   'over': 0.7751904280417931,
   'mean': 0.7751904280417931},
  2: {'under': 0.8507287947081894,
   'over': 0.6626077348066297,
   'mean': 0.6626077348066297},
  3: {'under': 0.9040706619124329,
   'over': 0.8118448829738412,
   'mean': 0.8118448829738412},
  4: {'under': 0.8155946384829217,
   'over': 0.792274618585298,
   'mean': 0.792274618585298},
  5: {'under': 0.6849839632190635,
   'over': 0.871394348894349,
   'mean': 0.6849839632190635},
  6: {'under': 0.9466991116518607,
   'over': 0.7939295695919508,
   'mean': 0.7939295695919508},
  7: {'under': 0.7574561818402952,
   'over': 0.7845405037440436,
   'mean': 0.7574561818402952},
  8: {'under': 0.9333323671357556,
   'over': 0.6591269199676637,
   'mean': 0.6591269199676637},
  9: {'under': 1.0, 'over': 0.6792890995260664, 'mean': 0.6792890995260664},
  10: {'under': 1.0, 'over': 0.2900252206809584, 'mean': 0.2900252206809584},
  11: {'under': 0.8400744050994714,
   'over': 0.72427

In [101]:
templates_score = {}
templates_count = 24*3
seg_type_score = {}
seg_type_count = 24*3

for t in template_type:
    for n in range(1, 25):
        for s in seg_type:

            score = float(seg_midi_score[t][n][s])

            if t not in templates_score:
                templates_score[t] = score
            else:
                templates_score[t] += score

            if s not in seg_type_score:
                seg_type_score[s] = score
            else:
                seg_type_score[s] += score

In [102]:
templates_score = {k: v/templates_count for k, v in templates_score.items()}
seg_type_score = {k: v/seg_type_count for k, v in seg_type_score.items()}

In [103]:
templates_score

{'binary': 0.7549058922273666,
 'ks': 0.7353725246182548,
 'harmonic': 0.7706214610448763}

對於midi檔的local key prediction來說，harmonic的表現是最好的。

In [104]:
seg_type_score

{'under': 0.8567036305554865,
 'mean': 0.6736157499497641,
 'over': 0.7305804973852469}

對於midi檔的local key prediction來說，和其他四種version相較起來，是under segmantation的情況較少出現。

值得注意的是這個分數比其他四種版本優異。結合Q4的結果，也許midi檔用來偵測local key prediction是比較好的選擇。