# Setup

In [1]:
import os
import sys

sys.path.append(os.path.abspath("../.."))

In [2]:
from pathlib import Path

In [3]:
REPETITIONS = 1
FOLDS = 5

In [4]:
from src.experiment.sets.metric_sets import metrics_for_correlation_analysis
metrics_for_correlation_analysis

['macro_accuracy',
 'micro_accuracy',
 'macro_precision',
 'micro_precision',
 'macro_recall',
 'micro_recall',
 'macro_f1',
 'micro_f1',
 'MSE',
 'LogLoss',
 'AUNU',
 'micro_ROC-AUC']

In [5]:
from src.experiment.helpers.variables import report_output_root_dir
report_output_root_dir

WindowsPath('C:/VisualStudioRepositories/MUSIC_DATA/metric_analysis/output')

In [6]:
from src.experiment.sets.multilabel_balanced_datasets import multilabel_balanced_datasets
multilabel_balanced_datasets

{'bookmarks_balanced': {'path': WindowsPath('c:/VisualStudioRepositories/MUSIC_DATA/datasets/multilabel/bookmarks_balanced'),
  'classification_type': 'multilabel',
  'class_balance': 'balanced',
  'dataset_name': 'bookmarks_balanced'},
 'cal500_balanced': {'path': WindowsPath('c:/VisualStudioRepositories/MUSIC_DATA/datasets/multilabel/cal500_balanced'),
  'classification_type': 'multilabel',
  'class_balance': 'balanced',
  'dataset_name': 'cal500_balanced'},
 'corel16k009_balanced': {'path': WindowsPath('c:/VisualStudioRepositories/MUSIC_DATA/datasets/multilabel/corel16k009_balanced'),
  'classification_type': 'multilabel',
  'class_balance': 'balanced',
  'dataset_name': 'corel16k009_balanced'},
 'delicious_balanced': {'path': WindowsPath('c:/VisualStudioRepositories/MUSIC_DATA/datasets/multilabel/delicious_balanced'),
  'classification_type': 'multilabel',
  'class_balance': 'balanced',
  'dataset_name': 'delicious_balanced'},
 'emotions_balanced': {'path': WindowsPath('c:/VisualSt

# Get report.json files and group by dataset

### Load all report files

In [7]:
report_files = list(report_output_root_dir.rglob('report.json'))
report_files = sorted(report_files, key=lambda x: x.name)
report_files

[WindowsPath('C:/VisualStudioRepositories/MUSIC_DATA/metric_analysis/output/multilabel/balanced/yelp_balanced/logistic_regression/0/fold_0/report.json'),
 WindowsPath('C:/VisualStudioRepositories/MUSIC_DATA/metric_analysis/output/multilabel/balanced/yelp_balanced/logistic_regression/0/fold_1/report.json'),
 WindowsPath('C:/VisualStudioRepositories/MUSIC_DATA/metric_analysis/output/multilabel/balanced/yelp_balanced/logistic_regression/0/fold_2/report.json'),
 WindowsPath('C:/VisualStudioRepositories/MUSIC_DATA/metric_analysis/output/multilabel/balanced/yelp_balanced/logistic_regression/0/fold_3/report.json'),
 WindowsPath('C:/VisualStudioRepositories/MUSIC_DATA/metric_analysis/output/multilabel/balanced/yelp_balanced/logistic_regression/0/fold_4/report.json'),
 WindowsPath('C:/VisualStudioRepositories/MUSIC_DATA/metric_analysis/output/multilabel/balanced/yeast_balanced/logistic_regression/0/fold_0/report.json'),
 WindowsPath('C:/VisualStudioRepositories/MUSIC_DATA/metric_analysis/output

In [8]:
len(report_files) # 2 datasets * 3 models * 2 repetitions * 5 CV = 60 reports

65

### Group by dataset

In [9]:
from collections import defaultdict
import json

# Group report files by "dataset name"
dataset_reports = defaultdict(list)
for file in report_files:
    with open(file, 'r') as f:
        data = json.load(f)
        dataset_name = data.get('dataset name')
        if dataset_name:
            dataset_reports[dataset_name].append(file)

# Convert defaultdict to regular dict for JSON serialization
grouped_by_dataset = dict(dataset_reports)

In [10]:
grouped_by_dataset

{'yelp_balanced': [WindowsPath('C:/VisualStudioRepositories/MUSIC_DATA/metric_analysis/output/multilabel/balanced/yelp_balanced/logistic_regression/0/fold_0/report.json'),
  WindowsPath('C:/VisualStudioRepositories/MUSIC_DATA/metric_analysis/output/multilabel/balanced/yelp_balanced/logistic_regression/0/fold_1/report.json'),
  WindowsPath('C:/VisualStudioRepositories/MUSIC_DATA/metric_analysis/output/multilabel/balanced/yelp_balanced/logistic_regression/0/fold_2/report.json'),
  WindowsPath('C:/VisualStudioRepositories/MUSIC_DATA/metric_analysis/output/multilabel/balanced/yelp_balanced/logistic_regression/0/fold_3/report.json'),
  WindowsPath('C:/VisualStudioRepositories/MUSIC_DATA/metric_analysis/output/multilabel/balanced/yelp_balanced/logistic_regression/0/fold_4/report.json')],
 'yeast_balanced': [WindowsPath('C:/VisualStudioRepositories/MUSIC_DATA/metric_analysis/output/multilabel/balanced/yeast_balanced/logistic_regression/0/fold_0/report.json'),
  WindowsPath('C:/VisualStudioRep

In [11]:
# 3 models * 2 repetitions * 5 CV = 30 reports per dataset
for dataset_name, files in grouped_by_dataset.items():
    print(f"{dataset_name}: {len(files)}")

yelp_balanced: 5
yeast_balanced: 5
tmc2007500_balanced: 5
scene_balanced: 5
nuswidebow_balanced: 5
ng20: 5
mediamill_balanced: 5
imdb_balanced: 5
emotions_balanced: 5
delicious_balanced: 5
corel16k009_balanced: 5
cal500_balanced: 5
bookmarks_balanced: 5


# Transform report data

### Create dfs

In [12]:
# Get all metrics from reports grouped by dataset
metrics_grouped_by_dataset = dict()

for dataset_name, files in grouped_by_dataset.items():
    metrics_grouped_by_dataset[dataset_name] = []
    for file in files:
        with open(file, 'r') as f:
            data = json.load(f)
            metrics_grouped_by_dataset[dataset_name].append(data['metrics'])

metrics_grouped_by_dataset

{'yelp_balanced': [{'macro_accuracy': 0.7819148936170213,
   'micro_accuracy': 0.7819148936170213,
   'accuracy_per_class': {'IsFoodGood': 0.8202127659574469,
    'IsAmbianceGood': 0.7510638297872341,
    'IsServiceGood': 0.7638297872340426,
    'IsPriceGood': 0.7925531914893617},
   'macro_precision': 0.806601605723039,
   'micro_precision': 0.807109252483011,
   'precision_per_class': {'IsFoodGood': 0.8483033932135728,
    'IsAmbianceGood': 0.7876857749469215,
    'IsServiceGood': 0.7868852459016393,
    'IsPriceGood': 0.8035320088300221},
   'macro_recall': 0.7736312774900157,
   'micro_recall': 0.7739348370927318,
   'recall_per_class': {'IsFoodGood': 0.8204633204633205,
    'IsAmbianceGood': 0.7346534653465346,
    'IsServiceGood': 0.7649402390438247,
    'IsPriceGood': 0.774468085106383},
   'macro_f1': 0.7897217500801316,
   'micro_f1': 0.7901740020470829,
   'f1_per_class': {'IsFoodGood': 0.8341511285574092,
    'IsAmbianceGood': 0.7602459016393442,
    'IsServiceGood': 0.77575

In [13]:
# Leave only metrics meant for correlation analysis
filtered_metrics_grouped_by_dataset = {
    dataset: [
        {k: m[k] for k in metrics_for_correlation_analysis if k in m}
        for m in metrics_list
    ]
    for dataset, metrics_list in metrics_grouped_by_dataset.items()
}

filtered_metrics_grouped_by_dataset


{'yelp_balanced': [{'macro_accuracy': 0.7819148936170213,
   'micro_accuracy': 0.7819148936170213,
   'macro_precision': 0.806601605723039,
   'micro_precision': 0.807109252483011,
   'macro_recall': 0.7736312774900157,
   'micro_recall': 0.7739348370927318,
   'macro_f1': 0.7897217500801316,
   'micro_f1': 0.7901740020470829,
   'MSE': 0.15994133055210114,
   'LogLoss': 0.5190487504005432,
   'AUNU': 0.8443337678909302,
   'micro_ROC-AUC': 0.8456090688705444},
  {'macro_accuracy': 0.7816753926701571,
   'micro_accuracy': 0.7816753926701571,
   'macro_precision': 0.7991508056871744,
   'micro_precision': 0.7995873097755997,
   'macro_recall': 0.7765764552245492,
   'micro_recall': 0.7767476822851416,
   'macro_f1': 0.7876618472294188,
   'micro_f1': 0.788002033553635,
   'MSE': 0.16220518946647644,
   'LogLoss': 0.5294665098190308,
   'AUNU': 0.8402114510536194,
   'micro_ROC-AUC': 0.8411623239517212},
  {'macro_accuracy': 0.7803615303615303,
   'micro_accuracy': 0.7803615303615303,
  

In [14]:
# Transform report data into DataFrames for each dataset
import pandas as pd

tables_by_dataset = {}

for dataset, metrics_list in filtered_metrics_grouped_by_dataset.items():
    df = pd.DataFrame(metrics_list)
    tables_by_dataset[dataset] = df.T

In [15]:
# Show example table for a specific dataset
tables_by_dataset['emotions_balanced']

Unnamed: 0,0,1,2,3,4
macro_accuracy,0.74505,0.757353,0.761254,0.76747,0.764535
micro_accuracy,0.74505,0.757353,0.761254,0.76747,0.764535
macro_precision,0.646064,0.657904,0.658772,0.665992,0.663144
micro_precision,0.650407,0.662698,0.659631,0.667323,0.666667
macro_recall,0.568577,0.592056,0.594881,0.606753,0.60047
micro_recall,0.571429,0.596429,0.598086,0.609712,0.603448
macro_f1,0.604385,0.620829,0.623848,0.634269,0.629225
micro_f1,0.608365,0.62782,0.627353,0.637218,0.633484
MSE,0.184338,0.181109,0.174454,0.169011,0.167637
LogLoss,0.628546,0.607995,0.57934,0.567157,0.556252


In [16]:
# Show example table for a specific dataset
tables_by_dataset['ng20']

Unnamed: 0,0,1,2,3,4
macro_accuracy,0.973761,0.974121,0.974122,0.97426,0.974446
micro_accuracy,0.973761,0.974121,0.974122,0.97426,0.974446
macro_precision,0.811364,0.819705,0.819292,0.818083,0.820322
micro_precision,0.81768,0.825045,0.824385,0.823424,0.825314
macro_recall,0.632711,0.632331,0.632585,0.63638,0.638415
micro_recall,0.632638,0.632327,0.63252,0.636329,0.638364
macro_f1,0.709397,0.712704,0.712926,0.714847,0.716954
micro_f1,0.713354,0.715944,0.715818,0.717887,0.7199
MSE,0.020737,0.020519,0.020503,0.020389,0.020266
LogLoss,0.081603,0.081403,0.080874,0.080196,0.079632


### Add headers

In [27]:
MODEL_HEADERS = ["logistic_regression"]
REPETITION_HEADERS = [f"R{i}" for i in reversed(range(REPETITIONS))]
CV_HEADERS =  [f"CV{i}" for i in range(FOLDS)]

In [21]:
REPETITION_HEADERS

['R0']

In [22]:
CV_HEADERS

['CV0', 'CV1', 'CV2', 'CV3', 'CV4']

In [23]:
for dataset, table in tables_by_dataset.items():
    repeated_headers = CV_HEADERS * (table.shape[1] // len(CV_HEADERS))
    tables_by_dataset[dataset].columns = repeated_headers

In [24]:
for dataset, table in tables_by_dataset.items():
    n_reps = len(REPETITION_HEADERS)
    n_folds = FOLDS
    n_cols = table.shape[1]
  
    rep_headers = []
    fold_headers = []
    for rep in REPETITION_HEADERS:
        rep_headers.extend([rep] * n_folds)
        fold_headers.extend(CV_HEADERS)

    repeats = (n_cols + n_reps * n_folds - 1) // (n_reps * n_folds)
    rep_headers = (rep_headers * repeats)[:n_cols]
    fold_headers = (fold_headers * repeats)[:n_cols]
    table.columns = pd.MultiIndex.from_arrays([rep_headers, fold_headers])

In [28]:
for dataset, table in tables_by_dataset.items():
    n_models = len(MODEL_HEADERS)
    n_cols_per_model = REPETITIONS * FOLDS
    new_top_level = []
    for model in MODEL_HEADERS:
        new_top_level.extend([model] * n_cols_per_model)

    if len(new_top_level) < table.shape[1]:
        repeats = (table.shape[1] + n_cols_per_model - 1) // n_cols_per_model
        new_top_level = (MODEL_HEADERS * repeats)[:table.shape[1]]

    table.columns = pd.MultiIndex.from_arrays([new_top_level, table.columns.get_level_values(0), table.columns.get_level_values(1)])

### Display

In [31]:
tables_by_dataset["emotions_balanced"]

Unnamed: 0_level_0,logistic_regression,logistic_regression,logistic_regression,logistic_regression,logistic_regression
Unnamed: 0_level_1,R0,R0,R0,R0,R0
Unnamed: 0_level_2,CV0,CV1,CV2,CV3,CV4
macro_accuracy,0.74505,0.757353,0.761254,0.76747,0.764535
micro_accuracy,0.74505,0.757353,0.761254,0.76747,0.764535
macro_precision,0.646064,0.657904,0.658772,0.665992,0.663144
micro_precision,0.650407,0.662698,0.659631,0.667323,0.666667
macro_recall,0.568577,0.592056,0.594881,0.606753,0.60047
micro_recall,0.571429,0.596429,0.598086,0.609712,0.603448
macro_f1,0.604385,0.620829,0.623848,0.634269,0.629225
micro_f1,0.608365,0.62782,0.627353,0.637218,0.633484
MSE,0.184338,0.181109,0.174454,0.169011,0.167637
LogLoss,0.628546,0.607995,0.57934,0.567157,0.556252


In [30]:
tables_by_dataset["ng20"]

Unnamed: 0_level_0,logistic_regression,logistic_regression,logistic_regression,logistic_regression,logistic_regression
Unnamed: 0_level_1,R0,R0,R0,R0,R0
Unnamed: 0_level_2,CV0,CV1,CV2,CV3,CV4
macro_accuracy,0.973761,0.974121,0.974122,0.97426,0.974446
micro_accuracy,0.973761,0.974121,0.974122,0.97426,0.974446
macro_precision,0.811364,0.819705,0.819292,0.818083,0.820322
micro_precision,0.81768,0.825045,0.824385,0.823424,0.825314
macro_recall,0.632711,0.632331,0.632585,0.63638,0.638415
micro_recall,0.632638,0.632327,0.63252,0.636329,0.638364
macro_f1,0.709397,0.712704,0.712926,0.714847,0.716954
micro_f1,0.713354,0.715944,0.715818,0.717887,0.7199
MSE,0.020737,0.020519,0.020503,0.020389,0.020266
LogLoss,0.081603,0.081403,0.080874,0.080196,0.079632


# Calculate correlation matrixes

### Calculate

In [32]:
pearson_correlations_by_dataset_metrics = {
    dataset: table.T.corr(method='pearson')
    for dataset, table in tables_by_dataset.items()
}

In [33]:
spearman_correlations_by_dataset_metrics = {
    dataset: table.T.corr(method='spearman')
    for dataset, table in tables_by_dataset.items()
}

### Display examples

##### Pearson

In [34]:
pearson_correlations_by_dataset_metrics["emotions_balanced"]


Unnamed: 0,macro_accuracy,micro_accuracy,macro_precision,micro_precision,macro_recall,micro_recall,macro_f1,micro_f1,MSE,LogLoss,AUNU,micro_ROC-AUC
macro_accuracy,1.0,1.0,0.992671,0.94554,0.99463,0.989918,0.996713,0.984085,-0.912524,-0.931461,0.742313,0.793176
micro_accuracy,1.0,1.0,0.992671,0.94554,0.99463,0.989918,0.996713,0.984085,-0.912524,-0.931461,0.742313,0.793176
macro_precision,0.992671,0.992671,1.0,0.975329,0.997416,0.995334,0.998175,0.99628,-0.889902,-0.903181,0.713879,0.773726
micro_precision,0.94554,0.94554,0.975329,1.0,0.963851,0.966915,0.960622,0.982555,-0.83531,-0.848524,0.656951,0.732893
macro_recall,0.99463,0.99463,0.997416,0.963851,1.0,0.999079,0.9971,0.995898,-0.872892,-0.893372,0.683278,0.742829
micro_recall,0.989918,0.989918,0.995334,0.966915,0.999079,1.0,0.993159,0.997485,-0.85333,-0.877522,0.653928,0.717114
macro_f1,0.996713,0.996713,0.998175,0.960622,0.9971,0.993159,1.0,0.990548,-0.901827,-0.913126,0.73164,0.785962
micro_f1,0.984085,0.984085,0.99628,0.982555,0.995898,0.997485,0.990548,1.0,-0.853525,-0.875136,0.658263,0.725529
MSE,-0.912524,-0.912524,-0.889902,-0.83531,-0.872892,-0.85333,-0.901827,-0.853525,1.0,0.989117,-0.949757,-0.97026
LogLoss,-0.931461,-0.931461,-0.903181,-0.848524,-0.893372,-0.877522,-0.913126,-0.875136,0.989117,1.0,-0.908634,-0.935539


In [35]:
pearson_correlations_by_dataset_metrics["ng20"]

Unnamed: 0,macro_accuracy,micro_accuracy,macro_precision,micro_precision,macro_recall,micro_recall,macro_f1,micro_f1,MSE,LogLoss,AUNU,micro_ROC-AUC
macro_accuracy,1.0,1.0,0.864949,0.855617,0.785157,0.788786,0.993948,0.981135,-0.996771,-0.899723,0.214885,0.205839
micro_accuracy,1.0,1.0,0.864949,0.855617,0.785157,0.788786,0.993948,0.981135,-0.996771,-0.899723,0.214885,0.205839
macro_precision,0.864949,0.864949,1.0,0.9993,0.369293,0.374885,0.805271,0.754518,-0.824187,-0.581733,-0.289932,-0.300769
micro_precision,0.855617,0.855617,0.9993,1.0,0.3535,0.35938,0.793837,0.743743,-0.813042,-0.561208,-0.311969,-0.3221
macro_recall,0.785157,0.785157,0.369293,0.3535,1.0,0.999953,0.847193,0.888201,-0.828231,-0.944137,0.757239,0.754207
micro_recall,0.788786,0.788786,0.374885,0.35938,0.999953,1.0,0.85014,0.891093,-0.831316,-0.944039,0.751947,0.749015
macro_f1,0.993948,0.993948,0.805271,0.793837,0.847193,0.85014,1.0,0.995044,-0.999281,-0.940008,0.31998,0.311271
micro_f1,0.981135,0.981135,0.754518,0.743743,0.888201,0.891093,0.995044,1.0,-0.990905,-0.94922,0.386941,0.379909
MSE,-0.996771,-0.996771,-0.824187,-0.813042,-0.828231,-0.831316,-0.999281,-0.990905,1.0,0.930856,-0.287768,-0.278739
LogLoss,-0.899723,-0.899723,-0.581733,-0.561208,-0.944137,-0.944039,-0.940008,-0.94922,0.930856,1.0,-0.598499,-0.588581


##### Spearman

In [36]:
spearman_correlations_by_dataset_metrics["emotions_balanced"]

Unnamed: 0,macro_accuracy,micro_accuracy,macro_precision,micro_precision,macro_recall,micro_recall,macro_f1,micro_f1,MSE,LogLoss,AUNU,micro_ROC-AUC
macro_accuracy,1.0,1.0,1.0,0.9,1.0,1.0,1.0,0.9,-0.9,-0.9,0.8,0.8
micro_accuracy,1.0,1.0,1.0,0.9,1.0,1.0,1.0,0.9,-0.9,-0.9,0.8,0.8
macro_precision,1.0,1.0,1.0,0.9,1.0,1.0,1.0,0.9,-0.9,-0.9,0.8,0.8
micro_precision,0.9,0.9,0.9,1.0,0.9,0.9,0.9,1.0,-0.8,-0.8,0.6,0.6
macro_recall,1.0,1.0,1.0,0.9,1.0,1.0,1.0,0.9,-0.9,-0.9,0.8,0.8
micro_recall,1.0,1.0,1.0,0.9,1.0,1.0,1.0,0.9,-0.9,-0.9,0.8,0.8
macro_f1,1.0,1.0,1.0,0.9,1.0,1.0,1.0,0.9,-0.9,-0.9,0.8,0.8
micro_f1,0.9,0.9,0.9,1.0,0.9,0.9,0.9,1.0,-0.8,-0.8,0.6,0.6
MSE,-0.9,-0.9,-0.9,-0.8,-0.9,-0.9,-0.9,-0.8,1.0,1.0,-0.9,-0.9
LogLoss,-0.9,-0.9,-0.9,-0.8,-0.9,-0.9,-0.9,-0.8,1.0,1.0,-0.9,-0.9


In [37]:
spearman_correlations_by_dataset_metrics["ng20"]

Unnamed: 0,macro_accuracy,micro_accuracy,macro_precision,micro_precision,macro_recall,micro_recall,macro_f1,micro_f1,MSE,LogLoss,AUNU,micro_ROC-AUC
macro_accuracy,1.0,1.0,0.6,0.6,0.7,0.7,1.0,0.9,-1.0,-1.0,0.4,0.4
micro_accuracy,1.0,1.0,0.6,0.6,0.7,0.7,1.0,0.9,-1.0,-1.0,0.4,0.4
macro_precision,0.6,0.6,1.0,1.0,0.1,0.1,0.6,0.7,-0.6,-0.6,0.0,0.0
micro_precision,0.6,0.6,1.0,1.0,0.1,0.1,0.6,0.7,-0.6,-0.6,0.0,0.0
macro_recall,0.7,0.7,0.1,0.1,1.0,1.0,0.7,0.6,-0.7,-0.7,0.9,0.9
micro_recall,0.7,0.7,0.1,0.1,1.0,1.0,0.7,0.6,-0.7,-0.7,0.9,0.9
macro_f1,1.0,1.0,0.6,0.6,0.7,0.7,1.0,0.9,-1.0,-1.0,0.4,0.4
micro_f1,0.9,0.9,0.7,0.7,0.6,0.6,0.9,1.0,-0.9,-0.9,0.3,0.3
MSE,-1.0,-1.0,-0.6,-0.6,-0.7,-0.7,-1.0,-0.9,1.0,1.0,-0.4,-0.4
LogLoss,-1.0,-1.0,-0.6,-0.6,-0.7,-0.7,-1.0,-0.9,1.0,1.0,-0.4,-0.4


# Save

In [40]:
def save_correlation_matrices(correlations_by_dataset, corr_type, filename, datasets):
    for dataset_name, corr_matrix in correlations_by_dataset.items():
        dataset_info = datasets[dataset_name]
        output_dir = report_output_root_dir / dataset_info["classification_type"] / dataset_info["class_balance"] / dataset_info["dataset_name"]
        output_path = output_dir / filename

        output = {
            "dataset_name": dataset_info["dataset_name"],
            "classification_type": dataset_info["classification_type"],
            "class_balance": dataset_info["class_balance"],
            "correlation_type": corr_type,
            "correlation_matrix": corr_matrix.to_dict()
        }

        output_path.parent.mkdir(parents=True, exist_ok=True)
        with open(output_path, "w", encoding="utf-8") as f:
            json.dump(output, f, indent=2)

In [43]:
save_correlation_matrices(pearson_correlations_by_dataset_metrics, "pearson", "pearson_correlation.json", multilabel_balanced_datasets)
save_correlation_matrices(spearman_correlations_by_dataset_metrics, "spearman", "spearman_correlation.json", multilabel_balanced_datasets)