/
datasets.py
709 lines (587 loc) · 24.5 KB
/
datasets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
from six import StringIO
from io import BytesIO, TextIOWrapper
import csv
import datetime
import json
import math
import os
import zipfile
from collections import defaultdict
import six
from six.moves import range
from flask import Blueprint, render_template, request, jsonify, redirect, url_for, send_file, current_app
from flask_login import login_required, current_user
from flask_wtf import FlaskForm
from flask_wtf.csrf import generate_csrf
from werkzeug.exceptions import NotFound, Unauthorized, BadRequest, Forbidden
import db.dataset
import db.dataset_eval
import db.exceptions
import db.data
import db.user
from db.dataset import slugify
from utils import dataset_validator
from webserver import flash, forms
from webserver.decorators import service_session_login_required
from webserver.external import musicbrainz
from webserver.views.api.exceptions import APIUnauthorized
# Below values are defined in 'classification_project_template.yaml' file.
C = '-5, -3, -1, 1, 3, 5, 7, 9, 11'
gamma = '3, 1, -1, -3, -5, -7, -9, -11'
datasets_bp = Blueprint("datasets", __name__)
class JSONDateTimeEncoder(json.JSONEncoder):
"""A JSONEncoder which turns datetime objects into ISO 8601-formatted strings"""
def default(self, obj):
if isinstance(obj, datetime.datetime):
return obj.isoformat()
# Let the base class default method raise the TypeError
return json.JSONEncoder.default(self, obj)
def _pagenum_to_offset(pagenum, limit):
# Page number and limit to list elements
# 3, 5 -> list[10:15]
if pagenum < 1:
return 0, limit
start = (pagenum-1) * limit
end = start + limit
return start, end
def _make_pager(data, page, url, urlargs):
DEFAULT_LIMIT = 10
total = len(data)
total_pages = int(math.ceil(total/DEFAULT_LIMIT))
if page > total_pages:
page = total_pages
start, end = _pagenum_to_offset(page, DEFAULT_LIMIT)
dataview = data[start:end]
pages = []
for p in range(1, total_pages+1):
pages.append( (p, "%s?page=%s" % (url_for(url, **urlargs), p)) )
prevpage = None
if page > 1:
prevpage = "%s?page=%s" % (url_for(url, **urlargs), page-1)
nextpage = None
if page < total_pages:
nextpage = "%s?page=%s" % (url_for(url, **urlargs), page+1)
return dataview, page, total_pages, prevpage, pages, nextpage
@datasets_bp.route("/list", defaults={"status": "all"})
@datasets_bp.route("/list/<status>")
def list_datasets(status):
if status != "all" and status not in db.dataset_eval.VALID_STATUSES:
status = "all"
page = request.args.get("page", 1)
try:
page = int(page)
except ValueError:
page = 1
alldatasets = db.dataset.get_public_datasets(status)
datasets, page, total_pages, prevpage, pages, nextpage = _make_pager(alldatasets,
page, ".list_datasets", {"status": status})
return render_template("datasets/list.html",
datasets=datasets,
status=status,
page=page,
pages=pages,
total_pages=total_pages,
prevpage=prevpage,
nextpage=nextpage)
@datasets_bp.route("/<uuid:dataset_id>")
def view(dataset_id):
ds = get_dataset(dataset_id)
author = db.user.get(ds["author"])
page_props = {
"dataset_mode": "view",
"data": {
"datasetId": str(dataset_id),
"dataset": ds,
"author": author
}
}
return render_template(
"datasets/view.html",
dataset=ds,
author=author,
page_props=json.dumps(page_props, cls=JSONDateTimeEncoder)
)
@datasets_bp.route("/<uuid:dataset_id>/download_annotation")
def download_annotation_csv(dataset_id):
""" Converts dataset dict to csv for user to download
"""
ds = get_dataset(dataset_id)
fp = _convert_dataset_to_csv_stringio(ds)
bio = BytesIO()
bio.write(fp.getvalue().encode())
bio.seek(0)
file_name = "dataset_annotations_%s.csv" % db.dataset.slugify(ds["name"])
return send_file(bio,
mimetype='text/csv',
as_attachment=True,
attachment_filename=file_name)
@datasets_bp.route("/<uuid:dataset_id>/<uuid:job_id>/download_model")
@login_required
def download_dataset_history(dataset_id, job_id):
""" Converts dataset dict to csv for user to download
"""
ds = get_dataset(dataset_id)
jobs = db.dataset_eval.get_jobs_for_dataset(ds["id"])
this_job = [j for j in jobs if j["id"] == job_id]
if not this_job:
raise NotFound("No such evaluation job")
this_job = this_job[0]
if this_job.get("status") != db.dataset_eval.STATUS_DONE:
raise NotFound("Job hasn't finished")
history_path = this_job.get("result", {}).get("history_path")
if not history_path or not os.path.exists(history_path):
raise NotFound("Cannot find history file")
file_name = os.path.basename(history_path)
return send_file(history_path,
mimetype='application/octet-stream',
as_attachment=True,
attachment_filename=file_name)
def _convert_dataset_to_csv_stringio(dataset):
"""Convert a dataset to a CSV representation that can be imported
by the dataset importer.
A dataset file contains a line for each item in the format
classname,mbid
Arguments:
dataset: a dataset loaded with get_dataset
Returns:
A rewound StringIO containing a CSV representation of the dataset"""
# We need to encode all text fields, because they may have non-ascii characters
# - dataset description, class names, class descriptions
# TODO: On upgrade to python 3, check that stringio accepts the correct data
# (may have to change to bytesio if we encode this data)
fp = StringIO()
writer = csv.writer(fp)
# write dataset description only if it is set
if dataset["description"]:
description = dataset["description"]
writer.writerow(["description", description])
for ds_class in dataset["classes"]:
# write class description only if it is set
if ds_class["description"]:
ds_class_description = ds_class["description"]
ds_class_desc_head = "description:" + ds_class["name"]
writer.writerow([ds_class_desc_head, ds_class_description])
for ds_class in dataset["classes"]:
class_name = ds_class["name"]
for rec in ds_class["recordings"]:
writer.writerow([rec, class_name])
fp.seek(0)
return fp
@datasets_bp.route("/<uuid:dataset_id>/download_dataset")
def download_dataset(dataset_id):
"""Download the full contents of a dataset, including lowlevel files.
The dataset is given as a zip file.
If the number of items in a dataset is more than config.DATASET_DOWNLOAD_RECORDINGS_LIMIT
then redirect back to the dataset page and don't download it.
"""
if current_app.config["DATASET_DOWNLOAD_RECORDINGS_LIMIT"] <= 0:
flash.error("Downloading complete dataset is disabled")
return redirect(url_for(".view", dataset_id=dataset_id))
ds = get_dataset(dataset_id)
if ds["num_recordings"] > current_app.config["DATASET_DOWNLOAD_RECORDINGS_LIMIT"]:
flash.error("Downloading complete dataset is disabled for datasets with "
"more than %d recordings." % current_app.config["DATASET_DOWNLOAD_RECORDINGS_LIMIT"])
return redirect(url_for(".view", dataset_id=dataset_id))
dataset_name = slugify(ds["name"])
zip_file = generate_zip_from_dataset(ds)
return send_file(zip_file,
as_attachment=True,
attachment_filename="acousticbrainz-dataset-{}-{}.zip".format(dataset_id, dataset_name))
def generate_zip_from_dataset(ds):
"""Build a zip archive of all low-level documents that make up this dataset.
A folder is made for each class.
Arguments:
ds: the dataset to generate data for
Returns:
A rewound StringIO containing a zip file with the dataset contents
"""
dataset_name = slugify(ds["name"])
sio = StringIO.StringIO()
zipfp = zipfile.ZipFile(sio, "w", allowZip64=True)
for data in ds["classes"]:
class_name = slugify(data["name"])
CHUNK_SIZE = 100
recordings = [(mbid, 0) for mbid in data.get("recordings", [])]
chunks = [recordings[i: i + CHUNK_SIZE] for i in range(0, len(recordings), CHUNK_SIZE)]
for chunk in chunks:
recordings_json = db.data.load_many_low_level(chunk)
for mbid, offset in chunk:
data = recordings_json.get(mbid, {}).get(str(offset))
if data:
zipfp.writestr(os.path.join(dataset_name, class_name, "{}.json".format(mbid)), json.dumps(data))
zipfp.close()
sio.seek(0)
return sio
@datasets_bp.route("/accuracy")
def accuracy():
return render_template("datasets/accuracy.html")
@datasets_bp.route("/<uuid:dataset_id>/evaluation")
def eval_info(dataset_id):
ds = get_dataset(dataset_id)
page_props = {
"dataset_mode": "eval-info",
"data": {
"datasetId": str(dataset_id),
"dataset": ds,
"author": db.user.get(ds["author"])
}
}
return render_template(
"datasets/eval-info.html",
dataset=ds,
author=db.user.get(ds["author"]),
page_props=json.dumps(page_props, cls=JSONDateTimeEncoder)
)
@datasets_bp.route("/service/<uuid:dataset_id>/<uuid:job_id>", methods=["DELETE"])
def eval_job(dataset_id, job_id):
"""Delete a dataset evaluation job.
In the case that the user isn't logged in, or the request dataset doesn't belong to
the current logged in user, we return 404 (instead of 401) in order to prevent
leaking the existence of the dataset"""
# Getting dataset to check if it exists and current user is allowed to view it.
ds = get_dataset(dataset_id)
job = db.dataset_eval.get_job(job_id)
if not job or job["dataset_id"] != ds["id"]:
return jsonify({
"success": False,
"error": "Can't find evaluation job with a specified ID for this dataset.",
}), 404
if request.method == "DELETE":
if not current_user.is_authenticated or ds["author"] != current_user.id:
return jsonify({
"success": False,
"error": "You are not allowed to delete this evaluation job.",
}), 401 # Unauthorized
try:
db.dataset_eval.delete_job(job_id)
except db.exceptions.DatabaseException as e:
return jsonify({
"success": False,
"error": str(e),
}), 400 # Bad Request
return jsonify({"success": True})
@datasets_bp.route("/service/<uuid:dataset_id>/evaluation/json")
@service_session_login_required
def eval_jobs(dataset_id):
# Getting dataset to check if it exists and current user is allowed to view it.
ds = get_dataset(dataset_id)
jobs = db.dataset_eval.get_jobs_for_dataset(ds["id"])
# TODO(roman): Remove unused data ("confusion_matrix", "dataset_id").
last_edited_time = ds["last_edited"]
for job in jobs:
if "result" in job and job["result"]:
job['outdated'] = last_edited_time > job["created"]
job["result"]["table"] = prepare_table_from_cm(job["result"]["confusion_matrix"])
return jsonify({
"jobs": jobs,
"dataset": {
"author": db.user.get(ds["author"]),
}
})
@datasets_bp.route("/<uuid:dataset_id>/evaluate", methods=('GET', 'POST'))
@login_required
def evaluate(dataset_id):
"""Endpoint for submitting dataset for evaluation."""
ds = get_dataset(dataset_id)
if not ds["public"]:
flash.warn("Can't add private datasets into evaluation queue.")
return redirect(url_for(".eval_info", dataset_id=dataset_id))
if db.dataset_eval.job_exists(dataset_id):
flash.warn("An evaluation job for this dataset has been already created.")
return redirect(url_for(".eval_info", dataset_id=dataset_id))
# Validate dataset structure before choosing evaluation preferences
try:
db.dataset_eval.validate_dataset_structure(ds)
except db.dataset_eval.IncompleteDatasetException as e:
flash.error("Cannot add this dataset because of a validation error: %s" % e)
return redirect(url_for("datasets.view", dataset_id=dataset_id))
form = forms.DatasetEvaluationForm()
if form.validate_on_submit():
try:
if form.filter_type.data == forms.DATASET_EVAL_NO_FILTER:
form.filter_type.data = None
c_values = None
gamma_values = None
preprocessing_values = None
if form.svm_filtering.data:
c_values = [int(i) for i in form.c_value.data.split(",")]
gamma_values = [int(i) for i in form.gamma_value.data.split(",")]
preprocessing_values = form.preprocessing_values.data
db.dataset_eval.evaluate_dataset(
dataset_id=ds["id"],
normalize=form.normalize.data,
eval_location=form.evaluation_location.data,
c_values=c_values,
gamma_values=gamma_values,
preprocessing_values=preprocessing_values,
filter_type=form.filter_type.data,
)
flash.info("Dataset %s has been added into evaluation queue." % ds["id"])
except db.dataset_eval.IncompleteDatasetException as e:
flash.error("Cannot add this dataset because of a validation error: %s" % e)
except db.dataset_eval.JobExistsException:
flash.warn("An evaluation job for this dataset has been already created.")
return redirect(url_for(".eval_info", dataset_id=dataset_id))
return render_template("datasets/evaluate.html", dataset=ds, form=form)
@datasets_bp.route("/service/<uuid:dataset_id>/json")
def view_json(dataset_id):
"""Get the JSON of a dataset.
In the case that the user isn't logged in, or the request dataset doesn't belong to
the current logged in user, we return 404 (instead of 401) in order to prevent
leaking the existence of the dataset"""
dataset = get_dataset(dataset_id)
dataset_clean = {
"name": dataset["name"],
"description": dataset["description"],
"classes": [],
"public": dataset["public"],
}
for cls in dataset["classes"]:
dataset_clean["classes"].append({
"name": cls["name"],
"description": cls["description"],
"recordings": cls["recordings"],
})
return jsonify(dataset_clean)
@datasets_bp.route("/create", methods=("GET", ))
@login_required
def create():
csrf = generate_csrf()
page_props = {
"dataset_mode": "create",
"data": {
"csrfToken": csrf
}
}
return render_template("datasets/edit.html", page_props=json.dumps(page_props))
@datasets_bp.route("/service/create", methods=("POST", ))
@service_session_login_required
def create_service():
if request.method == "POST":
dataset_dict = request.get_json()
if not dataset_dict:
return jsonify(
success=False,
error="Data must be submitted in JSON format.",
), 400
try:
dataset_id = db.dataset.create_from_dict(dataset_dict, current_user.id)
except dataset_validator.ValidationException as e:
return jsonify(
success=False,
error=e.error,
), 400
return jsonify(
success=True,
dataset_id=dataset_id,
)
@datasets_bp.route("/import", methods=("GET", "POST"))
@login_required
def import_csv():
form = forms.DatasetCSVImportForm()
if form.validate_on_submit():
# Decode the rows as UTF-8.
# The utf-8-sig codec removes a UTF-8 BOM if it exists at the start of a file.
# In that case, col1 in the first row could start with 0xfeff, so remove it.
# For all other items it will decode as regular UTF-8
file = TextIOWrapper(request.files[form.file.name], encoding='utf-8-sig')
description, classes = _parse_dataset_csv(file)
dataset_dict = {
"name": form.name.data,
"description": description if description else form.description.data,
"classes": classes,
"public": form.public.data,
}
try:
dataset_id = db.dataset.create_from_dict(dataset_dict, current_user.id)
except dataset_validator.ValidationException as e:
raise BadRequest(e.error)
flash.info("Dataset has been imported successfully.")
return redirect(url_for(".view", dataset_id=dataset_id))
else:
return render_template("datasets/import.html", form=form)
def _parse_dataset_csv(file):
"""Parse a csv file containing a representation of a dataset.
The csv file should have rows with 2 columns in one of the following forms:
<recording_id>,<classname>
description,<dataset_description>
description:<classname>,<class_description>
Note:
The caller should open the stream in the 'utf-8-sig' encoding to remove
the UTF-8 BOM before passing it to this method. This method no longer
handles BOM.
Arguments:
file: path to the csv file containing the dataset
Returns: a tuple of (dataset description, [classes]), where classes is a list of dictionaries
{"name": class name, "description": class description, "recordings": []}
a class is only returned if there are recordings for it. A class
"""
classes_dict = defaultdict(lambda: {"description": "", "recordings": []})
dataset_description = None
for class_row in csv.reader(file):
if len(class_row) != 2:
raise BadRequest("Bad dataset! Each row must contain one <MBID, class name> pair.")
col1 = class_row[0]
col2 = class_row[1]
if col1 == "description":
# row is the dataset description
dataset_description = col2
elif col1[:12] == "description:":
# row is a class description
class_name = col1[12:]
classes_dict[class_name]["description"] = col2
else:
# row is a recording
classes_dict[col2]["recordings"].append(col1)
classes = []
for name, class_data in six.iteritems(classes_dict):
if class_data["recordings"]:
classes.append({
"recordings": class_data["recordings"] if "recordings" in class_data else [],
"name": name,
"description": class_data["description"] if "description" in class_data else None,
})
return dataset_description, classes
@datasets_bp.route("/<uuid:dataset_id>/edit", methods=("GET", ))
@login_required
def edit(dataset_id):
ds = get_dataset(dataset_id)
if ds["author"] != current_user.id:
raise Unauthorized("You can't edit this dataset.")
csrf = generate_csrf()
page_props = {
"dataset_mode": "edit",
"data": {
"datasetId": str(dataset_id),
"csrfToken": csrf
}
}
return render_template("datasets/edit.html",
mode="edit",
dataset_name=ds["name"],
page_props=json.dumps(page_props, cls=JSONDateTimeEncoder))
@datasets_bp.route("/service/<uuid:dataset_id>/edit", methods=("POST", ))
@service_session_login_required
def edit_service(dataset_id):
ds = get_dataset(dataset_id)
if ds["author"] != current_user.id:
raise APIUnauthorized("You can't edit this dataset.")
if request.method == "POST":
dataset_dict = request.get_json()
if not dataset_dict:
return jsonify(
success=False,
error="Data must be submitted in JSON format.",
), 400
try:
db.dataset.update(str(dataset_id), dataset_dict, current_user.id)
except dataset_validator.ValidationException as e:
return jsonify(
success=False,
error=e.error,
), 400
return jsonify(
success=True,
dataset_id=dataset_id,
)
@datasets_bp.route("/<uuid:dataset_id>/delete", methods=("GET", "POST"))
@login_required
def delete(dataset_id):
ds = get_dataset(dataset_id)
if ds["author"] != current_user.id:
raise Forbidden("You can't delete this dataset.")
form = FlaskForm()
if form.validate_on_submit():
db.dataset.delete(ds["id"])
flash.success("Dataset has been deleted.")
return redirect(url_for("user.profile", musicbrainz_id=current_user.musicbrainz_id))
else: # GET
return render_template("datasets/delete.html", dataset=ds, form=form)
def _get_recording_info_for_mbid(mbid):
try:
recording = musicbrainz.get_recording_by_id(mbid)
return jsonify(recording={
"title": recording["title"],
"artist": recording["artist-credit-phrase"],
})
except musicbrainz.DataUnavailable as e:
return jsonify(error=str(e)), 404
@datasets_bp.route("/metadata/recording/<uuid:mbid>")
@login_required
def recording_info(mbid):
"""Endpoint for getting information about recordings (title and artist)."""
return _get_recording_info_for_mbid(mbid)
@datasets_bp.route("/metadata/dataset/<uuid:dataset_id>/<uuid:mbid>")
def recording_info_in_dataset(dataset_id, mbid):
"""Endpoint for getting information about recordings (title and artist), for the
case when user is not logged in.
Args:
mbid (uuid): the recording mbid for which info is to be returned
dataset_id (uuid): the dataset id to which the passed recording mbid belongs
Returns:
json: If the mbid is present in the dataset, info about the recording
404 otherwise
"""
if not db.dataset.check_recording_in_dataset(dataset_id, mbid):
return jsonify(error="Recording not found in the dataset"), 404
return _get_recording_info_for_mbid(mbid)
def get_dataset(dataset_id):
"""Wrapper for `dataset.get` function in `db` package.
Checks the following conditions and raises NotFound exception if they
aren't met:
* Specified dataset exists.
* Current user is allowed to access this dataset.
"""
try:
ds = db.dataset.get(dataset_id)
except db.exceptions.NoDataFoundException as e:
raise NotFound("Can't find this dataset.")
if ds["public"] or (current_user.is_authenticated and
ds["author"] == current_user.id):
return ds
else:
raise NotFound("Can't find this dataset.")
def prepare_table_from_cm(confusion_matrix):
"""Prepares data for table to visualize confusion matrix from Gaia.
This works with modified version of confusion matrix that we store in our
database (we store number of recordings in each predicted class instead of
actual UUIDs of recordings). See gaia_wrapper.py in dataset_eval package
for implementation details.
"""
all_classes = set()
dataset_size = 0 # Number of recordings in the dataset
for actual_cls in confusion_matrix:
all_classes.add(actual_cls)
for predicted_cls in confusion_matrix[actual_cls]:
# Need to add to class list from there as well because some classes
# might be missing from the outer dictionary.
all_classes.add(predicted_cls)
dataset_size += confusion_matrix[actual_cls][predicted_cls]
# Sorting to be able to match columns in the table.
all_classes = sorted(all_classes)
table_data = {
"classes": all_classes,
"rows": [],
}
for actual in all_classes:
# Counting how many tracks were associated with that class during classification
predicted_class_size = 0
for predicted in list(confusion_matrix[actual].values()):
predicted_class_size += predicted
row = {
"total": predicted_class_size,
"proportion": predicted_class_size * 100.0 / dataset_size,
"predicted": [],
}
for predicted in all_classes:
current_cls = {
"count": 0,
"percentage": 0,
}
if actual in confusion_matrix:
if predicted in confusion_matrix[actual]:
current_cls["count"] = confusion_matrix[actual][predicted]
current_cls["percentage"] = current_cls["count"] * 100.0 / predicted_class_size
row["predicted"].append(current_cls)
table_data["rows"].append(row)
return table_data