In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "3"

from tools import (
    load_embeddings_and_labels, 
    train_music_model, 
    evaluate,
    id_to_labels,
    logits_to_probs,
    logits_to_text    
)
import torch
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold

import yaml
from pprint import pprint
from tqdm.notebook import tqdm
import json


In [2]:
os.chdir("/homes/lm004/toys-critical-analysis")
#test_original_ids = [] # placeholder, in cv_by_graph.py this is replaced by the actual test set

with open('music/config_binary_training.yaml', 'r') as f:
    music_config_training = yaml.safe_load(f)

music_df = pd.read_csv('music/music_binary_groundtruth.csv', index_col="stimulus_id")
# remove test set from the data

kf = KFold(n_splits=10, random_state=42, shuffle=True)

results = []
for fold_idx, (train_index, test_index) in enumerate(kf.split(music_df)):

    X, y_mid, y_emo, y_cls = load_embeddings_and_labels(
        music_df.iloc[test_index], 
        music_config_training['mid_dict'],
        music_config_training['emo_dict'],
        music_config_training['cls_dict']
    )
    #X.shape[0], y_mid["Electric/Acoustic"].shape[0], y_emo["Happy"].shape[0], y_cls["target_of_toy_ad"].shape[0]

    music_train_index, music_test_index = train_test_split(range(X.shape[0]), test_size=0.10, random_state=42)


    X_train = X[music_train_index]
    X_test = X[music_test_index]
    y_mid_train = {k: v[music_train_index] for k, v in y_mid.items()}
    y_mid_test = {k: v[music_test_index] for k, v in y_mid.items()}
    y_emo_train = {k: v[music_train_index] for k, v in y_emo.items()}
    y_emo_test = {k: v[music_test_index] for k, v in y_emo.items()}
    y_cls_train = {k: v[music_train_index] for k, v in y_cls.items()}
    y_cls_test = {k: v[music_test_index] for k, v in y_cls.items()}

    music_model = train_music_model(
        music_config_training, X_train, y_mid_train, y_emo_train, y_cls_train
    )

    results.append(evaluate(music_model, music_config_training, X_test, y_mid_test, y_emo_test, y_cls_test))

    print(f"Fold {fold_idx+1}")
    pprint(results[-1])
    print("")



GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/homes/lm004/.conda/envs/autocritical/lib/python3.13/site-packages/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py:76: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `pytorch_lightning` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [3]

  | Name       | Type        | Params | Mode 
---------------------------------------------------
0 | batch_norm | BatchNorm1d | 2.0 K  | train
1 | hidden     | Linear      | 524 K  | train
2 | bn_mid     | BatchNorm1d | 1.0 K  | train
3 | bn_emo     | BatchNorm1d | 1.0 K  | train
4 | bn_c

Fold 1
{'cls': {'target_of_toy_ad': 1.0,
         'voice_age': 0.7912087912087912,
         'voice_gender': 0.6031746031746031},
 'emo': {'Angry': 0.6714285714285715,
         'Beauty': 0.8507936507936508,
         'Calm': 0.7023809523809523,
         'Happy': 0.7023809523809523},
 'mid': {'Dense/Sparse': 0.8507936507936508,
         'Distorted/Clear': 0.4047619047619047,
         'Electric/Acoustic': 0.7142857142857143,
         'Harmonious/Disharmonious': 0.42857142857142855,
         'Heavy/Light': 0.8507936507936508,
         'High pitch/Low pitch': 0.7023809523809523,
         'Loud/Soft': 0.8507936507936508,
         'Punchy/Smooth': 0.7142857142857143,
         'Strong beat/Weak beat': 0.8634920634920634}}



/homes/lm004/.conda/envs/autocritical/lib/python3.13/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=95` in the `DataLoader` to improve performance.
/homes/lm004/.conda/envs/autocritical/lib/python3.13/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=95` in the `DataLoader` to improve performance.
/homes/lm004/.conda/envs/autocritical/lib/python3.13/site-packages/pytorch_lightning/loops/fit_loop.py:310: The number of training batches (1) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.
GPU available: True (cuda), used: True
TPU av

Fold 2
{'cls': {'target_of_toy_ad': 0.5, 'voice_age': 1.0, 'voice_gender': 1.0},
 'emo': {'Angry': 0.7023809523809523,
         'Beauty': 0.8634920634920634,
         'Calm': 0.5142857142857143,
         'Happy': 0.8507936507936508},
 'mid': {'Dense/Sparse': 0.8571428571428571,
         'Distorted/Clear': 0.7023809523809523,
         'Electric/Acoustic': 0.6233766233766234,
         'Harmonious/Disharmonious': 0.34285714285714286,
         'Heavy/Light': 0.7023809523809523,
         'High pitch/Low pitch': 0.6714285714285715,
         'Loud/Soft': 1.0,
         'Punchy/Smooth': 0.8507936507936508,
         'Strong beat/Weak beat': 0.7142857142857143}}




  | Name       | Type        | Params | Mode 
---------------------------------------------------
0 | batch_norm | BatchNorm1d | 2.0 K  | train
1 | hidden     | Linear      | 524 K  | train
2 | bn_mid     | BatchNorm1d | 1.0 K  | train
3 | bn_emo     | BatchNorm1d | 1.0 K  | train
4 | bn_cls     | BatchNorm1d | 1.0 K  | train
5 | hidden_mid | Linear      | 131 K  | train
6 | hidden_emo | Linear      | 131 K  | train
7 | hidden_cls | Linear      | 131 K  | train
8 | out        | ModuleDict  | 9.5 K  | train
---------------------------------------------------
933 K     Trainable params
0         Non-trainable params
933 K     Total params
3.734     Total estimated model params size (MB)
28        Modules in train mode
0         Modules in eval mode
/homes/lm004/.conda/envs/autocritical/lib/python3.13/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `nu

Fold 3
{'cls': {'target_of_toy_ad': 0.8707482993197279,
         'voice_age': 0.4155844155844156,
         'voice_gender': 0.8507936507936508},
 'emo': {'Angry': 0.7142857142857143,
         'Beauty': 0.8507936507936508,
         'Calm': 0.5523809523809524,
         'Happy': 1.0},
 'mid': {'Dense/Sparse': 0.7142857142857143,
         'Distorted/Clear': 0.8507936507936508,
         'Electric/Acoustic': 0.7023809523809523,
         'Harmonious/Disharmonious': 0.7142857142857143,
         'Heavy/Light': 1.0,
         'High pitch/Low pitch': 1.0,
         'Loud/Soft': 1.0,
         'Punchy/Smooth': 0.8507936507936508,
         'Strong beat/Weak beat': 1.0}}



/homes/lm004/.conda/envs/autocritical/lib/python3.13/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=95` in the `DataLoader` to improve performance.
/homes/lm004/.conda/envs/autocritical/lib/python3.13/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=95` in the `DataLoader` to improve performance.
/homes/lm004/.conda/envs/autocritical/lib/python3.13/site-packages/pytorch_lightning/loops/fit_loop.py:310: The number of training batches (1) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.
GPU available: True (cuda), used: True
TPU av

Fold 4
{'cls': {'target_of_toy_ad': 0.7222222222222222,
         'voice_age': 0.7912087912087912,
         'voice_gender': 0.4408163265306122},
 'emo': {'Angry': 0.6,
         'Beauty': 0.5523809523809524,
         'Calm': 0.4523809523809524,
         'Happy': 0.25396825396825395},
 'mid': {'Dense/Sparse': 0.7142857142857143,
         'Distorted/Clear': 0.7142857142857143,
         'Electric/Acoustic': 0.8571428571428571,
         'Harmonious/Disharmonious': 0.5904761904761904,
         'Heavy/Light': 0.8507936507936508,
         'High pitch/Low pitch': 0.5523809523809524,
         'Loud/Soft': 0.7261904761904762,
         'Punchy/Smooth': 0.7023809523809523,
         'Strong beat/Weak beat': 1.0}}



/homes/lm004/.conda/envs/autocritical/lib/python3.13/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=95` in the `DataLoader` to improve performance.
/homes/lm004/.conda/envs/autocritical/lib/python3.13/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=95` in the `DataLoader` to improve performance.
/homes/lm004/.conda/envs/autocritical/lib/python3.13/site-packages/pytorch_lightning/loops/fit_loop.py:310: The number of training batches (1) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.
GPU available: True (cuda), used: True
TPU av

Fold 5
{'cls': {'target_of_toy_ad': 0.7142857142857143,
         'voice_age': 0.5952380952380952,
         'voice_gender': 1.0},
 'emo': {'Angry': 0.8634920634920634,
         'Beauty': 0.25396825396825395,
         'Calm': 0.7272727272727273,
         'Happy': 0.7261904761904762},
 'mid': {'Dense/Sparse': 0.8571428571428571,
         'Distorted/Clear': 0.7261904761904762,
         'Electric/Acoustic': 0.8744588744588745,
         'Harmonious/Disharmonious': 0.7142857142857143,
         'Heavy/Light': 0.8634920634920634,
         'High pitch/Low pitch': 0.7142857142857143,
         'Loud/Soft': 0.7261904761904762,
         'Punchy/Smooth': 0.5714285714285714,
         'Strong beat/Weak beat': 0.8634920634920634}}



/homes/lm004/.conda/envs/autocritical/lib/python3.13/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=95` in the `DataLoader` to improve performance.
/homes/lm004/.conda/envs/autocritical/lib/python3.13/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=95` in the `DataLoader` to improve performance.
/homes/lm004/.conda/envs/autocritical/lib/python3.13/site-packages/pytorch_lightning/loops/fit_loop.py:310: The number of training batches (1) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.
GPU available: True (cuda), used: True
TPU av

Fold 6
{'cls': {'target_of_toy_ad': 0.6944444444444443,
         'voice_age': 0.5952380952380952,
         'voice_gender': 0.6493506493506492},
 'emo': {'Angry': 0.8398268398268397,
         'Beauty': 0.6714285714285715,
         'Calm': 0.8398268398268397,
         'Happy': 0.8398268398268397},
 'mid': {'Dense/Sparse': 0.7023809523809523,
         'Distorted/Clear': 0.7142857142857143,
         'Electric/Acoustic': 0.7571428571428572,
         'Harmonious/Disharmonious': 0.9230769230769231,
         'Heavy/Light': 0.6714285714285715,
         'High pitch/Low pitch': 0.5523809523809524,
         'Loud/Soft': 0.7023809523809523,
         'Punchy/Smooth': 0.38095238095238093,
         'Strong beat/Weak beat': 0.38095238095238093}}



/homes/lm004/.conda/envs/autocritical/lib/python3.13/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=95` in the `DataLoader` to improve performance.
/homes/lm004/.conda/envs/autocritical/lib/python3.13/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=95` in the `DataLoader` to improve performance.
/homes/lm004/.conda/envs/autocritical/lib/python3.13/site-packages/pytorch_lightning/loops/fit_loop.py:310: The number of training batches (1) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.
GPU available: True (cuda), used: True
TPU av

Fold 7
{'cls': {'target_of_toy_ad': 1.0,
         'voice_age': 0.5333333333333333,
         'voice_gender': 0.7666666666666666},
 'emo': {'Angry': 0.5142857142857142,
         'Beauty': 0.625,
         'Calm': 0.6666666666666666,
         'Happy': 0.8380952380952381},
 'mid': {'Dense/Sparse': 0.6666666666666666,
         'Distorted/Clear': 0.6666666666666666,
         'Electric/Acoustic': 0.5333333333333333,
         'Harmonious/Disharmonious': 0.45714285714285713,
         'Heavy/Light': 0.6666666666666666,
         'High pitch/Low pitch': 0.8380952380952381,
         'Loud/Soft': 0.6666666666666666,
         'Punchy/Smooth': 0.8148148148148148,
         'Strong beat/Weak beat': 0.7575757575757575}}



/homes/lm004/.conda/envs/autocritical/lib/python3.13/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=95` in the `DataLoader` to improve performance.
/homes/lm004/.conda/envs/autocritical/lib/python3.13/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=95` in the `DataLoader` to improve performance.
/homes/lm004/.conda/envs/autocritical/lib/python3.13/site-packages/pytorch_lightning/loops/fit_loop.py:310: The number of training batches (1) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.
GPU available: True (cuda), used: True
TPU av

Fold 8
{'cls': {'target_of_toy_ad': 0.0, 'voice_age': 1.0, 'voice_gender': 1.0},
 'emo': {'Angry': 0.8285714285714286,
         'Beauty': 0.8380952380952381,
         'Calm': 0.851851851851852,
         'Happy': 0.09523809523809523},
 'mid': {'Dense/Sparse': 0.5142857142857142,
         'Distorted/Clear': 0.6666666666666666,
         'Electric/Acoustic': 0.8285714285714286,
         'Harmonious/Disharmonious': 0.8285714285714286,
         'Heavy/Light': 0.8148148148148148,
         'High pitch/Low pitch': 0.8285714285714286,
         'Loud/Soft': 0.8285714285714286,
         'Punchy/Smooth': 1.0,
         'Strong beat/Weak beat': 0.8285714285714286}}



/homes/lm004/.conda/envs/autocritical/lib/python3.13/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=95` in the `DataLoader` to improve performance.
/homes/lm004/.conda/envs/autocritical/lib/python3.13/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=95` in the `DataLoader` to improve performance.
/homes/lm004/.conda/envs/autocritical/lib/python3.13/site-packages/pytorch_lightning/loops/fit_loop.py:310: The number of training batches (1) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.
GPU available: True (cuda), used: True
TPU av

Fold 9
{'cls': {'target_of_toy_ad': 0.5,
         'voice_age': 0.5333333333333333,
         'voice_gender': 0.6547619047619048},
 'emo': {'Angry': 0.6666666666666666,
         'Beauty': 0.8380952380952381,
         'Calm': 0.5428571428571428,
         'Happy': 0.4166666666666667},
 'mid': {'Dense/Sparse': 1.0,
         'Distorted/Clear': 0.851851851851852,
         'Electric/Acoustic': 0.7083333333333334,
         'Harmonious/Disharmonious': 0.48571428571428577,
         'Heavy/Light': 1.0,
         'High pitch/Low pitch': 0.8285714285714286,
         'Loud/Soft': 1.0,
         'Punchy/Smooth': 1.0,
         'Strong beat/Weak beat': 0.25}}



/homes/lm004/.conda/envs/autocritical/lib/python3.13/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=95` in the `DataLoader` to improve performance.
/homes/lm004/.conda/envs/autocritical/lib/python3.13/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=95` in the `DataLoader` to improve performance.
/homes/lm004/.conda/envs/autocritical/lib/python3.13/site-packages/pytorch_lightning/loops/fit_loop.py:310: The number of training batches (1) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Fold 10
{'cls': {'target_of_toy_ad': 0.6666666666666666,
         'voice_age': 0.3333333333333333,
         'voice_gender': 0.39999999999999997},
 'emo': {'Angry': 0.6666666666666666,
         'Beauty': 0.6666666666666666,
         'Calm': 0.5142857142857142,
         'Happy': 0.6666666666666666},
 'mid': {'Dense/Sparse': 0.8285714285714286,
         'Distorted/Clear': 1.0,
         'Electric/Acoustic': 0.8380952380952381,
         'Harmonious/Disharmonious': 1.0,
         'Heavy/Light': 0.8285714285714286,
         'High pitch/Low pitch': 1.0,
         'Loud/Soft': 0.8285714285714286,
         'Punchy/Smooth': 0.5142857142857142,
         'Strong beat/Weak beat': 0.3333333333333333}}



In [3]:
for task in ["cls", "mid", "emo"]:
    task_averages = []
    for subtask in music_config_training[f"{task}_dict"].keys():
        main_results = [rslt[task][subtask] for rslt in results]
        print(f"{task} {subtask} = {np.mean(main_results):.2f} +/- {np.std(main_results):.2f}")
        task_averages.append(np.mean(main_results))
    print(f"{task} average = {np.mean(task_averages):.2f} +/- {np.std(task_averages):.2f}")

cls target_of_toy_ad = 0.67 +/- 0.28
cls voice_age = 0.66 +/- 0.22
cls voice_gender = 0.74 +/- 0.21
cls average = 0.69 +/- 0.03
mid Strong beat/Weak beat = 0.70 +/- 0.26
mid Electric/Acoustic = 0.74 +/- 0.10
mid Distorted/Clear = 0.73 +/- 0.15
mid Loud/Soft = 0.83 +/- 0.12
mid Heavy/Light = 0.82 +/- 0.11
mid High pitch/Low pitch = 0.77 +/- 0.15
mid Punchy/Smooth = 0.74 +/- 0.19
mid Harmonious/Disharmonious = 0.65 +/- 0.21
mid Dense/Sparse = 0.77 +/- 0.13
mid average = 0.75 +/- 0.05
emo Happy = 0.64 +/- 0.28
emo Beauty = 0.70 +/- 0.18
emo Calm = 0.64 +/- 0.13
emo Angry = 0.71 +/- 0.10
emo average = 0.67 +/- 0.03


In [4]:
# retrain on all data (to compute the music descriptions)
X, y_mid, y_emo, y_cls = load_embeddings_and_labels(
    music_df,
    music_config_training['mid_dict'],
    music_config_training['emo_dict'],
    music_config_training['cls_dict']
)
music_model, f1s_val = train_music_model(music_config_training, X, y_mid, y_emo, y_cls, return_metrics=True)

# print validation metrics
for task in ["cls", "mid", "emo"]:
    for subtask in music_config_training[f"{task}_dict"].keys():
        print(f"{task} {subtask} = {f1s_val[task][subtask]:.2f}")

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [3]

  | Name       | Type        | Params | Mode 
---------------------------------------------------
0 | batch_norm | BatchNorm1d | 2.0 K  | train
1 | hidden     | Linear      | 524 K  | train
2 | bn_mid     | BatchNorm1d | 1.0 K  | train
3 | bn_emo     | BatchNorm1d | 1.0 K  | train
4 | bn_cls     | BatchNorm1d | 1.0 K  | train
5 | hidden_mid | Linear      | 131 K  | train
6 | hidden_emo | Linear      | 131 K  | train
7 | hidden_cls | Linear      | 131 K  | train
8 | out        | ModuleDict  | 9.5 K  | train
---------------------------------------------------
933 K     Trainable params
0         Non-trainable params
933 K     Total params
3.734     Total estimated model params size (MB)
28        Modules in train mode
0         Modules in eval mode
/homes/lm004/.conda/envs/autocritical/lib/python3.13/site-packages/pytorch_lightning

cls target_of_toy_ad = 0.67
cls voice_age = 0.77
cls voice_gender = 0.83
mid Strong beat/Weak beat = 0.73
mid Electric/Acoustic = 0.76
mid Distorted/Clear = 0.77
mid Loud/Soft = 0.82
mid Heavy/Light = 0.86
mid High pitch/Low pitch = 0.75
mid Punchy/Smooth = 0.71
mid Harmonious/Disharmonious = 0.72
mid Dense/Sparse = 0.82
emo Happy = 0.70
emo Beauty = 0.81
emo Calm = 0.76
emo Angry = 0.77


In [5]:
unseen_datapoints_df = pd.read_csv(
    "files/unseen_transcripts.csv", index_col=0
)

# run the model on all unseen datapoints
for idx in tqdm(
    unseen_datapoints_df.index, 
):    
    # compute audio description for current datapoint
    y_mid_label, y_emo_label, y_cls_label, y_logits = id_to_labels(
        music_model, 
        music_config_training, # music_config_inference,
        idx,
        embeddings_dir="files/clap_embeddings",
    )

    # save audio description to file
    os.makedirs("results/complete_binary_music_predictions", exist_ok=True)
    with open(os.path.join("results/complete_binary_music_predictions", f"{idx}_music_pred.json"), "w") as f:
        json.dump({"mid": y_mid_label, "emo": y_emo_label, "cls": y_cls_label}, f)

  0%|          | 0/2041 [00:00<?, ?it/s]