# Setup

In [6]:
import os
import sys

sys.path.append(os.path.abspath("../.."))

In [7]:
from pathlib import Path

In [8]:
REPETITIONS = 2
FOLDS = 5

In [9]:
from src.experiment.sets.metric_sets import metrics_for_correlation_analysis
metrics_for_correlation_analysis

['macro_accuracy',
 'micro_accuracy',
 'macro_precision',
 'micro_precision',
 'macro_recall',
 'micro_recall',
 'macro_f1',
 'micro_f1',
 'MSE',
 'LogLoss',
 'AUNU',
 'micro_ROC-AUC']

In [10]:
from src.experiment.helpers.variables import report_output_root_dir
report_output_root_dir

WindowsPath('C:/VisualStudioRepositories/MUSIC_DATA/metric_analysis/output')

In [46]:
from src.experiment.sets.data_sets import multilabel_datasets
multilabel_datasets

{'bibtex_trimmed': {'path': WindowsPath('c:/VisualStudioRepositories/MUSIC_DATA/datasets/multilabel/bibtex_trimmed'),
  'classification_type': 'multilabel',
  'class_balance': 'balanced',
  'dataset_name': 'bibtex_trimmed'},
 'emotions_trimmed': {'path': WindowsPath('c:/VisualStudioRepositories/MUSIC_DATA/datasets/multilabel/emotions_trimmed'),
  'classification_type': 'multilabel',
  'class_balance': 'balanced',
  'dataset_name': 'emotions_trimmed'}}

# Get report.json files and group by dataset

### Load all report files

In [11]:
report_files = list(report_output_root_dir.rglob('report.json'))
report_files = sorted(report_files, key=lambda x: x.name)
report_files

[WindowsPath('C:/VisualStudioRepositories/MUSIC_DATA/metric_analysis/output/multilabel/balanced/emotions_trimmed/logistic_regression/1/fold_0/report.json'),
 WindowsPath('C:/VisualStudioRepositories/MUSIC_DATA/metric_analysis/output/multilabel/balanced/emotions_trimmed/logistic_regression/1/fold_1/report.json'),
 WindowsPath('C:/VisualStudioRepositories/MUSIC_DATA/metric_analysis/output/multilabel/balanced/emotions_trimmed/logistic_regression/1/fold_2/report.json'),
 WindowsPath('C:/VisualStudioRepositories/MUSIC_DATA/metric_analysis/output/multilabel/balanced/emotions_trimmed/logistic_regression/1/fold_3/report.json'),
 WindowsPath('C:/VisualStudioRepositories/MUSIC_DATA/metric_analysis/output/multilabel/balanced/emotions_trimmed/logistic_regression/1/fold_4/report.json'),
 WindowsPath('C:/VisualStudioRepositories/MUSIC_DATA/metric_analysis/output/multilabel/balanced/emotions_trimmed/logistic_regression/0/fold_0/report.json'),
 WindowsPath('C:/VisualStudioRepositories/MUSIC_DATA/metri

In [12]:
len(report_files) # 2 datasets * 3 models * 2 repetitions * 5 CV = 60 reports

60

### Group by dataset

In [13]:
from collections import defaultdict
import json

# Group report files by "dataset name"
dataset_reports = defaultdict(list)
for file in report_files:
    with open(file, 'r') as f:
        data = json.load(f)
        dataset_name = data.get('dataset name')
        if dataset_name:
            dataset_reports[dataset_name].append(file)

# Convert defaultdict to regular dict for JSON serialization
grouped_by_dataset = dict(dataset_reports)

In [14]:
grouped_by_dataset

{'emotions_trimmed': [WindowsPath('C:/VisualStudioRepositories/MUSIC_DATA/metric_analysis/output/multilabel/balanced/emotions_trimmed/logistic_regression/1/fold_0/report.json'),
  WindowsPath('C:/VisualStudioRepositories/MUSIC_DATA/metric_analysis/output/multilabel/balanced/emotions_trimmed/logistic_regression/1/fold_1/report.json'),
  WindowsPath('C:/VisualStudioRepositories/MUSIC_DATA/metric_analysis/output/multilabel/balanced/emotions_trimmed/logistic_regression/1/fold_2/report.json'),
  WindowsPath('C:/VisualStudioRepositories/MUSIC_DATA/metric_analysis/output/multilabel/balanced/emotions_trimmed/logistic_regression/1/fold_3/report.json'),
  WindowsPath('C:/VisualStudioRepositories/MUSIC_DATA/metric_analysis/output/multilabel/balanced/emotions_trimmed/logistic_regression/1/fold_4/report.json'),
  WindowsPath('C:/VisualStudioRepositories/MUSIC_DATA/metric_analysis/output/multilabel/balanced/emotions_trimmed/logistic_regression/0/fold_0/report.json'),
  WindowsPath('C:/VisualStudioRe

In [15]:
# 3 models * 2 repetitions * 5 CV = 30 reports per dataset
for dataset_name, files in grouped_by_dataset.items():
    print(f"{dataset_name}: {len(files)}")

emotions_trimmed: 30
bibtex_trimmed: 30


# Transform report data

### Create dfs

In [16]:
# Get all metrics from reports grouped by dataset
metrics_grouped_by_dataset = dict()

for dataset_name, files in grouped_by_dataset.items():
    metrics_grouped_by_dataset[dataset_name] = []
    for file in files:
        with open(file, 'r') as f:
            data = json.load(f)
            metrics_grouped_by_dataset[dataset_name].append(data['metrics'])

metrics_grouped_by_dataset

{'emotions_trimmed': [{'macro_accuracy': 0.7808181315644003,
   'micro_accuracy': 0.7808181315644003,
   'accuracy_per_class': {'amazed-suprised': 0.7230514096185738,
    'happy-pleased': 0.7197346600331676,
    'relaxing-calm': 0.7330016583747927,
    'quiet-still': 0.900497512437811,
    'sad-lonely': 0.8192371475953566,
    'angry-aggresive': 0.7893864013266998},
   'macro_precision': 0.6620516519811628,
   'micro_precision': 0.6675925925925926,
   'precision_per_class': {'amazed-suprised': 0.5879120879120879,
    'happy-pleased': 0.5298507462686567,
    'relaxing-calm': 0.6231884057971014,
    'quiet-still': 0.8095238095238095,
    'sad-lonely': 0.72,
    'angry-aggresive': 0.7018348623853211},
   'macro_recall': 0.623414415922039,
   'micro_recall': 0.6242424242424243,
   'recall_per_class': {'amazed-suprised': 0.5376884422110553,
    'happy-pleased': 0.4011299435028249,
    'relaxing-calm': 0.6084905660377359,
    'quiet-still': 0.864406779661017,
    'sad-lonely': 0.617142857142

In [17]:
# Leave only metrics meant for correlation analysis
filtered_metrics_grouped_by_dataset = {
    dataset: [
        {k: m[k] for k in metrics_for_correlation_analysis if k in m}
        for m in metrics_list
    ]
    for dataset, metrics_list in metrics_grouped_by_dataset.items()
}

filtered_metrics_grouped_by_dataset


{'emotions_trimmed': [{'macro_accuracy': 0.7808181315644003,
   'micro_accuracy': 0.7808181315644003,
   'macro_precision': 0.6620516519811628,
   'micro_precision': 0.6675925925925926,
   'macro_recall': 0.623414415922039,
   'micro_recall': 0.6242424242424243,
   'macro_f1': 0.6402336063073225,
   'micro_f1': 0.6451901565995526,
   'MSE': 0.15418820083141327,
   'LogLoss': 0.5047345757484436,
   'AUNU': 0.8282828330993652,
   'micro_ROC-AUC': 0.8425180912017822},
  {'macro_accuracy': 0.7812648597242036,
   'micro_accuracy': 0.7812648597242036,
   'macro_precision': 0.6663157208243017,
   'micro_precision': 0.6714628297362111,
   'macro_recall': 0.6220386963263516,
   'micro_recall': 0.6226834692364714,
   'macro_f1': 0.6419905915134955,
   'micro_f1': 0.6461538461538462,
   'MSE': 0.15408913791179657,
   'LogLoss': 0.5031600594520569,
   'AUNU': 0.8287929892539978,
   'micro_ROC-AUC': 0.8429752588272095},
  {'macro_accuracy': 0.7813283208020051,
   'micro_accuracy': 0.781328320802005

In [18]:
# Transform report data into DataFrames for each dataset
import pandas as pd

tables_by_dataset = {}

for dataset, metrics_list in filtered_metrics_grouped_by_dataset.items():
    df = pd.DataFrame(metrics_list)
    tables_by_dataset[dataset] = df.T

In [19]:
# Show example table for a specific dataset
tables_by_dataset['emotions_trimmed']

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,20,21,22,23,24,25,26,27,28,29
macro_accuracy,0.780818,0.781265,0.781328,0.780007,0.784667,0.775081,0.774557,0.774701,0.777639,0.778333,...,0.779721,0.778558,0.779603,0.780057,0.780333,0.77799,0.777626,0.778236,0.779281,0.779267
micro_accuracy,0.780818,0.781265,0.781328,0.780007,0.784667,0.775081,0.774557,0.774701,0.777639,0.778333,...,0.779721,0.778558,0.779603,0.780057,0.780333,0.77799,0.777626,0.778236,0.779281,0.779267
macro_precision,0.662052,0.666316,0.66647,0.667298,0.672867,0.644896,0.637272,0.643061,0.657976,0.658561,...,0.681014,0.677332,0.680925,0.681159,0.68104,0.680878,0.679819,0.677374,0.680408,0.680701
micro_precision,0.667593,0.671463,0.672462,0.672285,0.677654,0.643617,0.638814,0.648897,0.664345,0.665193,...,0.693842,0.68925,0.692168,0.69209,0.691824,0.697506,0.69486,0.692373,0.694833,0.694109
macro_recall,0.623414,0.622039,0.625444,0.619465,0.627841,0.62516,0.616874,0.609502,0.616739,0.622883,...,0.559665,0.560955,0.562781,0.566052,0.568361,0.543865,0.546111,0.552678,0.555523,0.557497
micro_recall,0.624242,0.622683,0.626701,0.620392,0.629149,0.626943,0.618799,0.611785,0.618677,0.624481,...,0.561939,0.563137,0.564943,0.568157,0.570539,0.545836,0.548091,0.554804,0.557788,0.559751
macro_f1,0.640234,0.641991,0.643984,0.641392,0.648504,0.630627,0.624368,0.623835,0.634621,0.638471,...,0.610006,0.609659,0.612253,0.614496,0.615859,0.598515,0.600176,0.603212,0.606348,0.608047
micro_f1,0.64519,0.646154,0.648776,0.645297,0.652501,0.635171,0.628647,0.629795,0.640698,0.644195,...,0.620963,0.619843,0.622118,0.62403,0.625355,0.61242,0.61281,0.616001,0.618814,0.619731
MSE,0.154188,0.154089,0.155177,0.15602,0.153231,0.153097,0.157442,0.155555,0.154623,0.155597,...,0.152663,0.15348,0.153065,0.152778,0.152837,0.153236,0.153762,0.153225,0.152713,0.153006
LogLoss,0.504735,0.50316,0.510308,0.512698,0.504911,0.482261,0.496046,0.490032,0.498256,0.507489,...,1.240509,1.373849,1.450793,1.527112,1.626046,0.592988,0.77119,0.88146,0.983813,1.164115


In [20]:
# Show example table for a specific dataset
tables_by_dataset['bibtex_trimmed']

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,20,21,22,23,24,25,26,27,28,29
macro_accuracy,0.973244,0.972998,0.972995,0.972834,0.972888,0.971901,0.971902,0.972584,0.97282,0.972979,...,0.972218,0.97195,0.971825,0.971681,0.971552,0.973472,0.973227,0.97298,0.972647,0.97243
micro_accuracy,0.973244,0.972998,0.972995,0.972834,0.972888,0.971901,0.971902,0.972584,0.97282,0.972979,...,0.972218,0.97195,0.971825,0.971681,0.971552,0.973472,0.973227,0.97298,0.972647,0.97243
macro_precision,0.722535,0.718005,0.722787,0.701757,0.693701,0.77451,0.723431,0.723401,0.739693,0.721596,...,0.547497,0.540905,0.542123,0.541558,0.544006,0.583803,0.567754,0.560529,0.557165,0.553231
micro_precision,0.84375,0.842466,0.849398,0.830287,0.829787,0.8,0.835443,0.855856,0.855346,0.849515,...,0.67709,0.649398,0.64451,0.631096,0.623951,0.77619,0.757024,0.737418,0.709318,0.695914
macro_recall,0.122381,0.118754,0.119041,0.119028,0.118267,0.09434,0.106804,0.104572,0.113073,0.117784,...,0.137811,0.134951,0.131017,0.128241,0.125407,0.157928,0.153466,0.148645,0.143949,0.14048
micro_recall,0.122727,0.119475,0.119694,0.119729,0.119145,0.09396,0.109272,0.107466,0.115352,0.118805,...,0.138494,0.135649,0.131748,0.128881,0.12616,0.158304,0.154048,0.149424,0.144435,0.141073
macro_f1,0.176313,0.172896,0.172492,0.172255,0.169939,0.12923,0.149202,0.147154,0.161214,0.16915,...,0.19232,0.188381,0.184086,0.180699,0.177514,0.21558,0.211124,0.205492,0.200074,0.196048
micro_f1,0.214286,0.209273,0.209821,0.209279,0.20837,0.168168,0.193265,0.190955,0.203288,0.208457,...,0.229952,0.22442,0.218775,0.214049,0.209882,0.262974,0.256002,0.248495,0.24,0.23459
MSE,0.023361,0.023622,0.023565,0.02363,0.023605,0.02462,0.024477,0.023963,0.023691,0.023577,...,0.026036,0.026457,0.026692,0.027049,0.027389,0.023495,0.024066,0.024602,0.025136,0.02564
LogLoss,0.101021,0.102188,0.101772,0.101577,0.101628,0.105749,0.105158,0.102742,0.102072,0.101795,...,0.541482,0.596411,0.64696,0.694264,0.743255,0.191617,0.272628,0.349577,0.41942,0.48863


### Add headers

In [21]:
MODEL_HEADERS = ["logistic_regression", "linear_svm", "knn"]
REPETITION_HEADERS = [f"R{i}" for i in reversed(range(REPETITIONS))]
CV_HEADERS =  [f"CV{i}" for i in range(FOLDS)]

In [22]:
REPETITION_HEADERS

['R1', 'R0']

In [23]:
CV_HEADERS

['CV0', 'CV1', 'CV2', 'CV3', 'CV4']

In [24]:
for dataset, table in tables_by_dataset.items():
    repeated_headers = CV_HEADERS * (table.shape[1] // len(CV_HEADERS))
    tables_by_dataset[dataset].columns = repeated_headers

In [25]:
for dataset, table in tables_by_dataset.items():
    n_reps = len(REPETITION_HEADERS)
    n_folds = FOLDS
    n_cols = table.shape[1]
  
    rep_headers = []
    fold_headers = []
    for rep in REPETITION_HEADERS:
        rep_headers.extend([rep] * n_folds)
        fold_headers.extend(CV_HEADERS)

    repeats = (n_cols + n_reps * n_folds - 1) // (n_reps * n_folds)
    rep_headers = (rep_headers * repeats)[:n_cols]
    fold_headers = (fold_headers * repeats)[:n_cols]
    table.columns = pd.MultiIndex.from_arrays([rep_headers, fold_headers])

In [26]:
for dataset, table in tables_by_dataset.items():
    n_models = len(MODEL_HEADERS)
    n_cols_per_model = REPETITIONS * FOLDS
    new_top_level = []
    for model in MODEL_HEADERS:
        new_top_level.extend([model] * n_cols_per_model)

    if len(new_top_level) < table.shape[1]:
        repeats = (table.shape[1] + n_cols_per_model - 1) // n_cols_per_model
        new_top_level = (MODEL_HEADERS * repeats)[:table.shape[1]]

    table.columns = pd.MultiIndex.from_arrays([new_top_level, table.columns.get_level_values(0), table.columns.get_level_values(1)])

### Display

In [31]:
tables_by_dataset["bibtex_trimmed"]

Unnamed: 0_level_0,logistic_regression,logistic_regression,logistic_regression,logistic_regression,logistic_regression,logistic_regression,logistic_regression,logistic_regression,logistic_regression,logistic_regression,...,knn,knn,knn,knn,knn,knn,knn,knn,knn,knn
Unnamed: 0_level_1,R1,R1,R1,R1,R1,R0,R0,R0,R0,R0,...,R1,R1,R1,R1,R1,R0,R0,R0,R0,R0
Unnamed: 0_level_2,CV0,CV1,CV2,CV3,CV4,CV0,CV1,CV2,CV3,CV4,...,CV0,CV1,CV2,CV3,CV4,CV0,CV1,CV2,CV3,CV4
macro_accuracy,0.973244,0.972998,0.972995,0.972834,0.972888,0.971901,0.971902,0.972584,0.97282,0.972979,...,0.972218,0.97195,0.971825,0.971681,0.971552,0.973472,0.973227,0.97298,0.972647,0.97243
micro_accuracy,0.973244,0.972998,0.972995,0.972834,0.972888,0.971901,0.971902,0.972584,0.97282,0.972979,...,0.972218,0.97195,0.971825,0.971681,0.971552,0.973472,0.973227,0.97298,0.972647,0.97243
macro_precision,0.722535,0.718005,0.722787,0.701757,0.693701,0.77451,0.723431,0.723401,0.739693,0.721596,...,0.547497,0.540905,0.542123,0.541558,0.544006,0.583803,0.567754,0.560529,0.557165,0.553231
micro_precision,0.84375,0.842466,0.849398,0.830287,0.829787,0.8,0.835443,0.855856,0.855346,0.849515,...,0.67709,0.649398,0.64451,0.631096,0.623951,0.77619,0.757024,0.737418,0.709318,0.695914
macro_recall,0.122381,0.118754,0.119041,0.119028,0.118267,0.09434,0.106804,0.104572,0.113073,0.117784,...,0.137811,0.134951,0.131017,0.128241,0.125407,0.157928,0.153466,0.148645,0.143949,0.14048
micro_recall,0.122727,0.119475,0.119694,0.119729,0.119145,0.09396,0.109272,0.107466,0.115352,0.118805,...,0.138494,0.135649,0.131748,0.128881,0.12616,0.158304,0.154048,0.149424,0.144435,0.141073
macro_f1,0.176313,0.172896,0.172492,0.172255,0.169939,0.12923,0.149202,0.147154,0.161214,0.16915,...,0.19232,0.188381,0.184086,0.180699,0.177514,0.21558,0.211124,0.205492,0.200074,0.196048
micro_f1,0.214286,0.209273,0.209821,0.209279,0.20837,0.168168,0.193265,0.190955,0.203288,0.208457,...,0.229952,0.22442,0.218775,0.214049,0.209882,0.262974,0.256002,0.248495,0.24,0.23459
MSE,0.023361,0.023622,0.023565,0.02363,0.023605,0.02462,0.024477,0.023963,0.023691,0.023577,...,0.026036,0.026457,0.026692,0.027049,0.027389,0.023495,0.024066,0.024602,0.025136,0.02564
LogLoss,0.101021,0.102188,0.101772,0.101577,0.101628,0.105749,0.105158,0.102742,0.102072,0.101795,...,0.541482,0.596411,0.64696,0.694264,0.743255,0.191617,0.272628,0.349577,0.41942,0.48863


In [27]:
tables_by_dataset["emotions_trimmed"]

Unnamed: 0_level_0,logistic_regression,logistic_regression,logistic_regression,logistic_regression,logistic_regression,logistic_regression,logistic_regression,logistic_regression,logistic_regression,logistic_regression,...,knn,knn,knn,knn,knn,knn,knn,knn,knn,knn
Unnamed: 0_level_1,R1,R1,R1,R1,R1,R0,R0,R0,R0,R0,...,R1,R1,R1,R1,R1,R0,R0,R0,R0,R0
Unnamed: 0_level_2,CV0,CV1,CV2,CV3,CV4,CV0,CV1,CV2,CV3,CV4,...,CV0,CV1,CV2,CV3,CV4,CV0,CV1,CV2,CV3,CV4
macro_accuracy,0.780818,0.781265,0.781328,0.780007,0.784667,0.775081,0.774557,0.774701,0.777639,0.778333,...,0.779721,0.778558,0.779603,0.780057,0.780333,0.77799,0.777626,0.778236,0.779281,0.779267
micro_accuracy,0.780818,0.781265,0.781328,0.780007,0.784667,0.775081,0.774557,0.774701,0.777639,0.778333,...,0.779721,0.778558,0.779603,0.780057,0.780333,0.77799,0.777626,0.778236,0.779281,0.779267
macro_precision,0.662052,0.666316,0.66647,0.667298,0.672867,0.644896,0.637272,0.643061,0.657976,0.658561,...,0.681014,0.677332,0.680925,0.681159,0.68104,0.680878,0.679819,0.677374,0.680408,0.680701
micro_precision,0.667593,0.671463,0.672462,0.672285,0.677654,0.643617,0.638814,0.648897,0.664345,0.665193,...,0.693842,0.68925,0.692168,0.69209,0.691824,0.697506,0.69486,0.692373,0.694833,0.694109
macro_recall,0.623414,0.622039,0.625444,0.619465,0.627841,0.62516,0.616874,0.609502,0.616739,0.622883,...,0.559665,0.560955,0.562781,0.566052,0.568361,0.543865,0.546111,0.552678,0.555523,0.557497
micro_recall,0.624242,0.622683,0.626701,0.620392,0.629149,0.626943,0.618799,0.611785,0.618677,0.624481,...,0.561939,0.563137,0.564943,0.568157,0.570539,0.545836,0.548091,0.554804,0.557788,0.559751
macro_f1,0.640234,0.641991,0.643984,0.641392,0.648504,0.630627,0.624368,0.623835,0.634621,0.638471,...,0.610006,0.609659,0.612253,0.614496,0.615859,0.598515,0.600176,0.603212,0.606348,0.608047
micro_f1,0.64519,0.646154,0.648776,0.645297,0.652501,0.635171,0.628647,0.629795,0.640698,0.644195,...,0.620963,0.619843,0.622118,0.62403,0.625355,0.61242,0.61281,0.616001,0.618814,0.619731
MSE,0.154188,0.154089,0.155177,0.15602,0.153231,0.153097,0.157442,0.155555,0.154623,0.155597,...,0.152663,0.15348,0.153065,0.152778,0.152837,0.153236,0.153762,0.153225,0.152713,0.153006
LogLoss,0.504735,0.50316,0.510308,0.512698,0.504911,0.482261,0.496046,0.490032,0.498256,0.507489,...,1.240509,1.373849,1.450793,1.527112,1.626046,0.592988,0.77119,0.88146,0.983813,1.164115


# Calculate correlation matrixes

### Calculate

In [40]:
pearson_correlations_by_dataset_metrics = {
    dataset: table.T.corr(method='pearson')
    for dataset, table in tables_by_dataset.items()
}

In [None]:
spearman_correlations_by_dataset_metrics = {
    dataset: table.T.corr(method='spearman')
    for dataset, table in tables_by_dataset.items()
}

### Display examples

##### Pearson

In [42]:
pearson_correlations_by_dataset_metrics["bibtex_trimmed"]


Unnamed: 0,macro_accuracy,micro_accuracy,macro_precision,micro_precision,macro_recall,micro_recall,macro_f1,micro_f1,MSE,LogLoss,AUNU,micro_ROC-AUC
macro_accuracy,1.0,1.0,0.076114,0.595768,0.581411,0.582631,0.596273,0.670123,-0.888015,-0.721323,0.605005,0.648119
micro_accuracy,1.0,1.0,0.076114,0.595768,0.581411,0.582631,0.596273,0.670123,-0.888015,-0.721323,0.605005,0.648119
macro_precision,0.076114,0.076114,1.0,0.817963,-0.744523,-0.743331,-0.741489,-0.665856,-0.477345,-0.680667,0.796379,0.758945
micro_precision,0.595768,0.595768,0.817963,1.0,-0.272951,-0.267145,-0.263512,-0.15365,-0.878861,-0.95857,0.984702,0.979391
macro_recall,0.581411,0.581411,-0.744523,-0.272951,1.0,0.99945,0.992776,0.991871,-0.199421,0.062046,-0.239409,-0.178664
micro_recall,0.582631,0.582631,-0.743331,-0.267145,0.99945,1.0,0.991893,0.993051,-0.201913,0.059308,-0.236734,-0.175898
macro_f1,0.596273,0.596273,-0.741489,-0.263512,0.992776,0.991893,1.0,0.987302,-0.202796,0.068247,-0.238451,-0.17991
micro_f1,0.670123,0.670123,-0.665856,-0.15365,0.991871,0.993051,0.987302,1.0,-0.312851,-0.053337,-0.124572,-0.063074
MSE,-0.888015,-0.888015,-0.477345,-0.878861,-0.199421,-0.201913,-0.202796,-0.312851,1.0,0.958227,-0.899109,-0.923318
LogLoss,-0.721323,-0.721323,-0.680667,-0.95857,0.062046,0.059308,0.068247,-0.053337,0.958227,1.0,-0.981917,-0.99181


In [41]:
pearson_correlations_by_dataset_metrics["emotions_trimmed"]

Unnamed: 0,macro_accuracy,micro_accuracy,macro_precision,micro_precision,macro_recall,micro_recall,macro_f1,micro_f1,MSE,LogLoss,AUNU,micro_ROC-AUC
macro_accuracy,1.0,1.0,0.4961,0.356663,0.187539,0.183713,0.432041,0.487507,-0.346455,0.027515,0.330141,0.156864
micro_accuracy,1.0,1.0,0.4961,0.356663,0.187539,0.183713,0.432041,0.487507,-0.346455,0.027515,0.330141,0.156864
macro_precision,0.4961,0.4961,1.0,0.983043,-0.737756,-0.740783,-0.531922,-0.475012,-0.734244,0.419785,-0.513866,-0.697391
micro_precision,0.356663,0.356663,0.983043,1.0,-0.833391,-0.836099,-0.658777,-0.604053,-0.701654,0.386718,-0.619425,-0.775385
macro_recall,0.187539,0.187539,-0.737756,-0.833391,1.0,0.999945,0.963096,0.943374,0.566583,-0.411451,0.891208,0.93857
micro_recall,0.183713,0.183713,-0.740783,-0.836099,0.999945,1.0,0.962002,0.941923,0.56675,-0.408476,0.889066,0.937755
macro_f1,0.432041,0.432041,-0.531922,-0.658777,0.963096,0.962002,1.0,0.99641,0.434246,-0.33332,0.90062,0.890094
micro_f1,0.487507,0.487507,-0.475012,-0.604053,0.943374,0.941923,0.99641,1.0,0.395412,-0.347297,0.906813,0.882774
MSE,-0.346455,-0.346455,-0.734244,-0.701654,0.566583,0.56675,0.434246,0.395412,1.0,-0.470974,0.24716,0.416283
LogLoss,0.027515,0.027515,0.419785,0.386718,-0.411451,-0.408476,-0.33332,-0.347297,-0.470974,1.0,-0.458826,-0.560044


##### Spearman

In [None]:
spearman_correlations_by_dataset_metrics["bibtex_trimmed"]

Unnamed: 0,macro_accuracy,micro_accuracy,macro_precision,micro_precision,macro_recall,micro_recall,macro_f1,micro_f1,MSE,LogLoss,AUNU,micro_ROC-AUC
macro_accuracy,1.0,1.0,0.115462,0.216018,0.661402,0.659177,0.664516,0.71168,-0.92703,-0.822024,0.173749,0.179088
micro_accuracy,1.0,1.0,0.115462,0.216018,0.661402,0.659177,0.664516,0.71168,-0.92703,-0.822024,0.173749,0.179088
macro_precision,0.115462,0.115462,1.0,0.944383,-0.618242,-0.620022,-0.619132,-0.560845,-0.335261,-0.412236,0.931034,0.931924
micro_precision,0.216018,0.216018,0.944383,1.0,-0.553281,-0.555061,-0.553281,-0.49277,-0.413126,-0.474527,0.955061,0.959511
macro_recall,0.661402,0.661402,-0.618242,-0.553281,1.0,0.999555,0.998665,0.994661,-0.454505,-0.329922,-0.577308,-0.576418
micro_recall,0.659177,0.659177,-0.620022,-0.555061,0.999555,1.0,0.99822,0.994216,-0.452725,-0.330812,-0.575973,-0.575083
macro_f1,0.664516,0.664516,-0.619132,-0.553281,0.998665,0.99822,1.0,0.993326,-0.453615,-0.326363,-0.582202,-0.581313
micro_f1,0.71168,0.71168,-0.560845,-0.49277,0.994661,0.994216,0.993326,1.0,-0.518131,-0.392659,-0.522136,-0.521246
MSE,-0.92703,-0.92703,-0.335261,-0.413126,-0.454505,-0.452725,-0.453615,-0.518131,1.0,0.958621,-0.384205,-0.386874
LogLoss,-0.822024,-0.822024,-0.412236,-0.474527,-0.329922,-0.330812,-0.326363,-0.392659,0.958621,1.0,-0.474082,-0.476752


In [None]:
spearman_correlations_by_dataset_metrics["emotions_trimmed"]

Unnamed: 0,macro_accuracy,micro_accuracy,macro_precision,micro_precision,macro_recall,micro_recall,macro_f1,micro_f1,MSE,LogLoss,AUNU,micro_ROC-AUC
macro_accuracy,1.0,1.0,0.094994,-0.113237,0.400222,0.380645,0.533259,0.56307,-0.103003,0.220022,0.502113,0.288988
micro_accuracy,1.0,1.0,0.094994,-0.113237,0.400222,0.380645,0.533259,0.56307,-0.103003,0.220022,0.502113,0.288988
macro_precision,0.094994,0.094994,1.0,0.835818,-0.726808,-0.730812,-0.665851,-0.657842,-0.745495,0.45317,-0.585317,-0.7802
micro_precision,-0.113237,-0.113237,0.835818,1.0,-0.916796,-0.922581,-0.866518,-0.850056,-0.535929,0.09366,-0.687653,-0.764627
macro_recall,0.400222,0.400222,-0.726808,-0.916796,1.0,0.99822,0.971079,0.962625,0.437152,-0.061624,0.85317,0.854505
micro_recall,0.380645,0.380645,-0.730812,-0.922581,0.99822,1.0,0.967075,0.957731,0.430033,-0.066518,0.849611,0.849166
macro_f1,0.533259,0.533259,-0.665851,-0.866518,0.971079,0.967075,1.0,0.995106,0.430923,0.011791,0.853615,0.828254
micro_f1,0.56307,0.56307,-0.657842,-0.850056,0.962625,0.957731,0.995106,1.0,0.41624,-0.010456,0.875417,0.836263
MSE,-0.103003,-0.103003,-0.745495,-0.535929,0.437152,0.430033,0.430923,0.41624,1.0,-0.398443,0.268966,0.450501
LogLoss,0.220022,0.220022,0.45317,0.09366,-0.061624,-0.066518,0.011791,-0.010456,-0.398443,1.0,-0.19733,-0.389099


# Save

In [55]:
def save_correlation_matrices(correlations_by_dataset, corr_type, filename):
    for dataset_name, corr_matrix in correlations_by_dataset.items():
        dataset_info = multilabel_datasets[dataset_name]
        output_dir = report_output_root_dir / dataset_info["classification_type"] / dataset_info["class_balance"] / dataset_info["dataset_name"]
        output_path = output_dir / filename

        output = {
            "dataset_name": dataset_info["dataset_name"],
            "classification_type": dataset_info["classification_type"],
            "class_balance": dataset_info["class_balance"],
            "correlation_type": corr_type,
            "correlation_matrix": corr_matrix.to_dict()
        }

        output_path.parent.mkdir(parents=True, exist_ok=True)
        with open(output_path, "w", encoding="utf-8") as f:
            json.dump(output, f, indent=2)

In [None]:
save_correlation_matrices(pearson_correlations_by_dataset_metrics, "pearson", "pearson_correlation.json")
save_correlation_matrices(rank_correlations_by_dataset_metrics, "spearman", "spearman_correlation.json")