# Evaluation of MTGDB classification tasks with SVMs and Transfer Learning CNNs

run `Display_and_parse_jamendo_annotations.ipynb` to generate the required .json groundtruth files

In [1]:
%pylab inline
%load_ext autoreload

Populating the interactive namespace from numpy and matplotlib


In [2]:
import csv
import collections
import json
import os
import matplotlib.pyplot as plt

## Evaluation strategy

*  Use only recordings whose ground truth has a matching class in the classifier. For example, if a recording is only annotated with the class electronic, and this class does not appear in the classifier model, we discard it.

*  Only use recordings that have one ground-truth genre tag, and discard the rest of the recordings when computing evaluation metrics

*  Remove all recordings that have duplicates from consideration

## Evaluation

In [11]:
# Genre maps for our datasets 

maps = [
{   # Rosamerica
    'cla': ['cla'],
    'dan': ['dan'], # We listened to this dataset. It's all eurotrance (subgenre of electronic)
    'hip': ['hip'],
    'jaz': ['jaz'],
    'pop': ['pop'],
    'rhy': ['rhy'],
    'roc': ['roc'],
},

{   # Tzanetakis
    'blu': ['blu'],
    'cla': ['cla'],
    'cou': ['cou'],
    'dis': ['dis'], #disco - electronic & acoustic soul
    'hip': ['hip'],
    'jaz': ['jaz'],
    'met': ['met'],
    'pop': ['pop'],
    'reg': ['reg'], # reggae, in our gt a subgenre of ska
    'roc': ['roc']
},

{   # Dortmund
    'alternative': ['alternative'], # alternative rock is a subgenre of rock
    'blues': ['blues'],
    'electronic': ['electronic'],
    'folkcountry': ['folkcountry'],
    'funksoulrnb': ['funksoulrnb'],
    'jazz': ['jazz'],
    'pop': ['pop'],
    'raphiphop': ['raphiphop'],
    'rock': ['rock']
},

{   # Electronic
    'ambient':'ambient',
    'techno':'techno',
    'dnb':'dnb',
    'trance':'trance',
    'house':'house',
}
]

In [14]:
import genreeval
from IPython.display import HTML

output = ''
for i, task in enumerate(['rosamerica', 'tzanetakis', 'dortmund', 'electronic']):

    ground_truth = "groundtruth/groundtruth_{}.json".format(task)
    estimates = [('{} with SVM'.format(task), 'results_jamendo_svm/genre_{}.json'.format(task)), 
                 ('{} with CNNs'.format(task),'predictions/jamendo/vggish_tl_audioset/genre_{}/jamendo_test_split0_genre_{}_predicted_classes.json'.format(task, task))]

    
    for classifier, estimates_file in estimates: 
        evaluator = genreeval.Evaluator(ground_truth, maps[i], estimates_file, classifier, "Evaluation")
        evaluator.load()
        evaluator.evaluate('S2', 'ALL', 'D1')

        # results = res[classifier][strat][var][d]
        # results.save("results")
        output += "<h1>%s</h1>" % classifier
        output += "<h2>%s</h2>\n" % (evaluator.result.data['name'], )
        output += "<p><b>Raw accuracy: </b>%s\n" % evaluator.result.raw_accuracy
        output += "<br><b>Normalised accuracy: </b>%s</p>\n" % evaluator.result.normalised_accuracy
        output += evaluator.result.html + "\n\n"
HTML(output)

Loading estimates...
done
Loading ground truth...
done
rosamerica with SVM - S2 - ALL - D1
 [                  0%                  ]

Loading estimates...
done
Loading ground truth...
done
rosamerica with CNNs - S2 - ALL - D1
 [                  0%                  ]

Loading estimates...
done
Loading ground truth...
done
tzanetakis with SVM - S2 - ALL - D1
 [                  0%                  ]

Loading estimates...
done
Loading ground truth...
done
tzanetakis with CNNs - S2 - ALL - D1
 [                  0%                  ]

Loading estimates...
done
Loading ground truth...
done
dortmund with SVM - S2 - ALL - D1
 [                  0%                  ]

Loading estimates...
done
Loading ground truth...
done
dortmund with CNNs - S2 - ALL - D1
 [                  0%                  ]

Loading estimates...
done
Loading ground truth...
done
electronic with SVM - S2 - ALL - D1
 [                  0%                  ]

Loading estimates...
done
Loading ground truth...
done
electron

0,1,2,3,4,5,6,7
,cla,dan,hip,jaz,pop,rhy,roc
cla,78.95,0.88,0.0,4.39,1.75,9.65,4.39
dan,0.0,56.36,10.91,0.91,11.82,16.36,3.64
hip,2.38,11.9,54.76,2.38,19.05,9.52,0.0
jaz,22.22,0.0,0.0,22.22,5.56,44.44,5.56
pop,8.99,1.12,4.49,4.49,23.6,51.69,5.62
rhy,0.0,13.79,17.24,13.79,3.45,44.83,6.9
roc,1.21,8.48,5.45,3.64,23.03,9.09,49.09
unmatched,17.15,12.5,9.88,6.1,10.17,37.21,6.98
spe,100.0,0.0,0.0,0.0,0.0,0.0,0.0

0,1,2,3,4,5,6,7
,cla,dan,hip,jaz,pop,rhy,roc
cla,33.33,3.33,0.0,61.67,0.0,1.67,0.0
dan,0.0,95.61,3.51,0.0,0.88,0.0,0.0
hip,0.0,6.0,84.0,4.0,6.0,0.0,0.0
jaz,0.0,4.76,9.52,76.19,4.76,4.76,0.0
pop,0.0,3.26,5.43,58.7,18.48,11.96,2.17
rhy,0.0,0.0,22.58,35.48,22.58,16.13,3.23
roc,0.0,5.26,4.68,9.94,22.81,7.6,49.71
unmatched,8.43,51.4,7.58,23.03,2.81,4.78,1.97
spe,0.0,0.0,100.0,0.0,0.0,0.0,0.0

0,1,2,3,4,5
,blu,cla,hip,jaz,roc
blu,0.0,10.0,0.0,90.0,0.0
cla,0.9,45.05,0.0,54.05,0.0
hip,0.0,0.0,0.0,100.0,0.0
jaz,0.0,0.0,0.0,94.44,5.56
roc,0.0,0.0,0.0,100.0,0.0
unmatched,0.22,2.23,0.0,97.1,0.45
pop,1.1,0.0,0.0,98.9,0.0
met,0.0,0.0,0.0,100.0,0.0
cou,0.0,0.0,0.0,100.0,0.0

0,1,2,3,4,5,6,7,8,9,10
,blu,cla,cou,dis,hip,jaz,met,pop,reg,roc
blu,60.0,0.0,25.0,0.0,0.0,0.0,0.0,0.0,0.0,15.0
cla,5.13,88.89,1.71,0.0,0.0,0.85,0.0,1.71,0.0,1.71
cou,40.0,0.0,40.0,0.0,0.0,10.0,10.0,0.0,0.0,0.0
dis,0.0,0.0,0.0,66.67,0.0,0.0,0.0,16.67,16.67,0.0
hip,0.0,6.12,0.0,2.04,73.47,0.0,0.0,14.29,0.0,4.08
jaz,14.29,0.0,4.76,4.76,0.0,42.86,0.0,9.52,0.0,23.81
met,0.0,0.0,0.0,0.0,0.0,0.0,89.29,0.0,0.0,10.71
pop,27.37,2.11,35.79,4.21,1.05,1.05,0.0,11.58,1.05,15.79
reg,12.5,0.0,0.0,12.5,12.5,0.0,0.0,0.0,62.5,0.0

0,1,2,3,4,5,6,7
,alternative,blues,electronic,folkcountry,jazz,raphiphop,rock
alternative,11.86,1.69,83.05,1.69,1.69,0.0,0.0
blues,35.71,14.29,42.86,7.14,0.0,0.0,0.0
electronic,1.56,1.88,94.38,0.0,2.19,0.0,0.0
folkcountry,36.84,5.26,36.84,21.05,0.0,0.0,0.0
jazz,0.0,7.14,78.57,0.0,14.29,0.0,0.0
raphiphop,0.0,0.0,89.19,0.0,0.0,2.7,8.11
rock,36.59,2.44,51.22,0.0,1.63,0.0,8.13
pop,27.27,5.19,51.95,10.39,5.19,0.0,0.0
unmatched,8.97,4.48,67.71,8.97,9.42,0.45,0.0

0,1,2,3,4,5,6,7,8
,alternative,blues,electronic,folkcountry,jazz,pop,raphiphop,rock
alternative,0.0,0.0,40.98,3.28,21.31,0.0,1.64,32.79
blues,0.0,14.29,0.0,42.86,14.29,0.0,0.0,28.57
electronic,1.2,0.0,88.02,0.0,7.78,0.3,2.69,0.0
folkcountry,0.0,0.0,0.0,73.68,21.05,0.0,0.0,5.26
jazz,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0
pop,5.0,0.0,3.75,58.75,21.25,2.5,6.25,2.5
raphiphop,0.0,0.0,18.6,0.0,4.65,0.0,76.74,0.0
rock,0.79,3.15,0.79,7.09,13.39,0.0,0.0,74.8
unmatched,0.43,0.43,49.36,6.01,41.63,0.0,1.29,0.86

0,1,2,3,4,5
,ambient,dnb,house,techno,trance
ambient,84.85,0.87,14.29,0.0,0.0
dnb,26.67,0.0,50.0,0.0,23.33
house,29.17,0.0,70.83,0.0,0.0
techno,17.14,0.0,68.57,2.86,11.43
trance,25.0,1.47,50.0,1.47,22.06
unmatched,63.93,0.95,25.95,0.38,8.78

0,1,2,3,4,5
,ambient,dnb,house,techno,trance
ambient,0.41,4.98,3.32,36.93,54.36
dnb,0.0,0.0,0.0,11.43,88.57
house,16.0,4.0,0.0,4.0,76.0
techno,2.7,2.7,0.0,0.0,94.59
trance,2.9,1.45,0.0,7.25,88.41
unmatched,3.64,6.19,7.83,17.12,65.21


In [4]:
output = ''
for task in ['mood_acoustic', 'mood_electronic', 'mood_aggressive', 'mood_relaxed', 'mood_happy', 'mood_sad', 'mood_party']:
    ground_truth = "groundtruth/groundtruth_{}.json".format(task)
    estimates = [('{} with SVM'.format(task), 'results_jamendo_svm/{}.json'.format(task)), 
                 ('{} with CNNs'.format(task),'predictions/jamendo/vggish_tl_audioset/{}/jamendo_test_split0_{}_predicted_classes.json'.format(task, task))]

    mood =  task.split('_')[1]
    gt_map = {'not_{}'.format(mood): '0', mood: '1'}


    for classifier, estimates_file in estimates: 
        evaluator = genreeval.Evaluator(ground_truth, gt_map, estimates_file, classifier, "Evaluation")
        evaluator.load()
        evaluator.evaluate('S2', 'ALL', 'D1')

        # results = res[classifier][strat][var][d]
        # results.save("results")
        output += "<h1>%s</h1>" % classifier
        output += "<h2>%s</h2>\n" % (evaluator.result.data['name'], )
        output += "<p><b>Raw accuracy: </b>%s\n" % evaluator.result.raw_accuracy
        output += "<br><b>Normalised accuracy: </b>%s</p>\n" % evaluator.result.normalised_accuracy
        output += evaluator.result.html + "\n\n"
        print('Total tracks considered: {}'.format(evaluator.result.confusion_matrix.sum().sum()))
HTML(output)


Loading estimates...
done
Loading ground truth...
done
mood_acoustic with SVM - S2 - ALL - D1
 [                  0%                  ]

Total tracks considered: 912
Loading estimates...
done
Loading ground truth...
done
mood_acoustic with CNNs - S2 - ALL - D1
 [                  0%                  ]

Total tracks considered: 1000
Loading estimates...
done
Loading ground truth...
done
mood_electronic with SVM - S2 - ALL - D1
 [                  0%                  ]

Total tracks considered: 912
Loading estimates...
done
Loading ground truth...
done
mood_electronic with CNNs - S2 - ALL - D1
 [                  0%                  ]

Total tracks considered: 1000
Loading estimates...
done
Loading ground truth...
done
mood_aggressive with SVM - S2 - ALL - D1
 [                  0%                  ]

Total tracks considered: 912
Loading estimates...
done
Loading ground truth...
done
mood_aggressive with CNNs - S2 - ALL - D1
 [                  0%                  ]

Total tracks conside

0,1,2
,0.0,1.0
0.0,77.26,22.74
1.0,26.88,73.12

0,1,2
,0.0,1.0
0.0,82.36,17.64
1.0,17.98,82.02

0,1,2
,0.0,1.0
0.0,68.79,31.21
1.0,28.88,71.12

0,1,2
,0.0,1.0
0.0,84.38,15.62
1.0,17.55,82.45

0,1,2
,0.0,1.0
0.0,80.98,19.02
1.0,46.7,53.3

0,1,2
,0.0,1.0
0.0,92.2,7.8
1.0,44.5,55.5

0,1,2
,1.0,0.0
1.0,93.6,6.4
0.0,73.97,26.03

0,1,2
,0.0,1.0
0.0,52.37,47.63
1.0,5.71,94.29

0,1,2
,0.0,1.0
0.0,68.06,31.94
1.0,48.55,51.45

0,1,2
,0.0,1.0
0.0,80.27,19.73
1.0,39.2,60.8

0,1,2
,1.0,0.0
1.0,87.7,12.3
0.0,56.08,43.92

0,1,2
,0.0,1.0
0.0,56.45,43.55
1.0,12.12,87.88

0,1,2
,1.0,0.0
1.0,74.1,25.9
0.0,20.88,79.12

0,1,2
,1.0,0.0
1.0,61.76,38.24
0.0,10.44,89.56


In [7]:
output = ''
for task in ['timbre', 'tonal_atonal', 'danceability', 'voice_instrumental', 'gender']:
    ground_truth = "groundtruth/groundtruth_misc_{}.json".format(task)
    estimates = [('{} with SVM'.format(task), 'results_jamendo_svm/{}.json'.format(task)), 
                 ('{} with CNNs'.format(task),'predictions/jamendo/vggish_tl_audioset/{}/jamendo_test_split0_{}_predicted_classes.json'.format(task, task))]

    if task == 'timbre':
        gt_map = {'dark': '0', 'bright': '1'}
    if task == 'tonal_atonal':
        gt_map = {'atonal': '0', 'tonal': '1'}
    if task == 'danceability':
        gt_map = {'not_danceable': '0', 'danceable': '1'}
    if task == 'voice_instrumental':
        gt_map = {'instrumental': '0', 'voice': '1'}
    if task == 'gender':
        gt_map = {'male': '0', 'female': '1'}

    for classifier, estimates_file in estimates: 
        evaluator = genreeval.Evaluator(ground_truth, gt_map, estimates_file, classifier, "Evaluation")
        evaluator.load()
        evaluator.evaluate('S2', 'ALL', 'D1')

        # results = res[classifier][strat][var][d]
        # results.save("results")
        output += "<h1>%s</h1>" % classifier
        output += "<h2>%s</h2>\n" % (evaluator.result.data['name'], )
        output += "<p><b>Raw accuracy: </b>%s\n" % evaluator.result.raw_accuracy
        output += "<br><b>Normalised accuracy: </b>%s</p>\n" % evaluator.result.normalised_accuracy
        output += evaluator.result.html + "\n\n"
HTML(output)

Loading estimates...
done
Loading ground truth...
done
timbre with SVM - S2 - ALL - D1
 [                  0%                  ]

Loading estimates...
done
Loading ground truth...
done
timbre with CNNs - S2 - ALL - D1
 [                  0%                  ]

Loading estimates...
done
Loading ground truth...
done
tonal_atonal with SVM - S2 - ALL - D1
 [                  0%                  ]

Loading estimates...
done
Loading ground truth...
done
tonal_atonal with CNNs - S2 - ALL - D1
 [                  0%                  ]

Loading estimates...
done
Loading ground truth...
done
danceability with SVM - S2 - ALL - D1
 [                  0%                  ]

Loading estimates...
done
Loading ground truth...
done
danceability with CNNs - S2 - ALL - D1
 [                  0%                  ]

Loading estimates...
done
Loading ground truth...
done
voice_instrumental with SVM - S2 - ALL - D1
 [                  0%                  ]

Loading estimates...
done
Loading ground truth...
d

0,1,2
,0.0,1.0
0.0,61.65,38.35
1.0,54.26,45.74

0,1,2
,1.0,0.0
1.0,61.24,38.76
0.0,57.89,42.11

0,1,2
,0.0,1.0
0.0,77.78,22.22
1.0,57.6,42.4

0,1,2
,0.0,1.0
0.0,91.11,8.89
1.0,59.45,40.55

0,1,2
,0.0,1.0
0.0,68.75,31.25
1.0,14.0,86.0

0,1,2
,0.0,1.0
0.0,77.68,22.32
1.0,34.0,66.0

0,1,2
,0.0,1.0
0.0,83.23,16.77
1.0,40.0,60.0

0,1,2
,0.0,1.0
0.0,85.63,14.37
1.0,10.53,89.47

0,1,2
,0.0,1.0
0.0,61.54,38.46
1.0,28.57,71.43
2.0,30.0,70.0

0,1,2
,0.0,1.0
0.0,92.31,7.69
1.0,71.43,28.57
2.0,32.35,67.65
