# Setup

In [1]:
import os
import sys

sys.path.append(os.path.abspath("../.."))

In [2]:
from pathlib import Path
import json
import numpy as np


In [3]:
from src.experiment.helpers.variables import report_output_root_dir
report_output_root_dir

WindowsPath('C:/VisualStudioRepositories/MUSIC_DATA/metric_analysis/output')

### Datasets

In [13]:
from src.experiment.sets.binary_balanced_datasets import binary_balanced_datasets
binary_balanced_dataset_names = list(binary_balanced_datasets.keys())
binary_balanced_dataset_names

['all-in-one_sentiment_balanced',
 'amazon-reviews-0.25_balanced',
 'ceas_balanced',
 'colon-0.5_balanced',
 'fake-news_balanced',
 'news-sarcasm_balanced',
 'philippine_balanced',
 'santander-customer-satisfaction_balanced',
 'spambase_balanced',
 'vehicle-sensit_balanced']

In [14]:
from src.experiment.sets.binary_imbalanced_datasets import binary_imbalanced_datasets
binary_imbalanced_dataset_names = list(binary_imbalanced_datasets.keys())
binary_imbalanced_dataset_names

['all-in-one_sentiment_imbalanced',
 'amazon-reviews-0.25_imbalanced',
 'ceas_imbalanced',
 'colon-0.5_imbalanced',
 'fake-news_imbalanced',
 'news-sarcasm_imbalanced',
 'philippine_imbalanced',
 'santander-customer-satisfaction_imbalanced',
 'spambase_imbalanced',
 'vehicle-sensit_imbalanced']

In [15]:
from src.experiment.sets.multiclass_balanced_datasets import multiclass_balanced_datasets
multiclass_balanced_dataset_names = list(multiclass_balanced_datasets.keys())
multiclass_balanced_dataset_names

['ag-news_balanced',
 'dbpedia-ontology_balanced',
 'gas-drift_balanced',
 'gtsrb-huelist_balanced',
 'irish-times_balanced',
 'mfeat-karhunen_balanced',
 'news-category_balanced',
 'nyt-comments-april17_balanced',
 'usps_balanced',
 'volkert_balanced']

In [16]:
from src.experiment.sets.multiclass_imbalanced_datasets import multiclass_imbalanced_datasets
multiclass_imbalanced_dataset_names = list(multiclass_imbalanced_datasets.keys())
multiclass_imbalanced_dataset_names

['ag-news_imbalanced',
 'dbpedia-ontology_imbalanced',
 'gas-drift_imbalanced',
 'gtsrb-huelist_imbalanced',
 'irish-times_imbalanced',
 'mfeat-karhunen_imbalanced',
 'news-category_imbalanced',
 'nyt-comments-april17_imbalanced',
 'usps_imbalanced',
 'volkert_imbalanced']

In [17]:
from src.experiment.sets.multilabel_balanced_datasets import multilabel_balanced_datasets
multilabel_balanced_dataset_names = list(multilabel_balanced_datasets.keys())
multilabel_balanced_dataset_names

['bookmarks_balanced',
 'emotions_balanced',
 'imdb_balanced',
 'mediamill_balanced',
 'ng20_balanced',
 'nuswidevlad_balanced',
 'scene_balanced',
 'tmc2007500_balanced',
 'yeast_balanced',
 'yelp_balanced']

In [18]:
from src.experiment.sets.multilabel_imbalanced_datasets import multilabel_imbalanced_datasets
multilabel_imbalanced_dataset_names = list(multilabel_imbalanced_datasets.keys())
multilabel_imbalanced_dataset_names

['bookmarks_imbalanced',
 'emotions_imbalanced',
 'imdb_imbalanced',
 'mediamill_imbalanced',
 'ng20_imbalanced',
 'nuswidevlad_imbalanced',
 'scene_imbalanced',
 'tmc2007500_imbalanced',
 'yeast_imbalanced',
 'yelp_imbalanced']

In [19]:
all_dataset_names = binary_balanced_dataset_names + binary_imbalanced_dataset_names + multiclass_balanced_dataset_names + multiclass_imbalanced_dataset_names + multilabel_balanced_dataset_names + multilabel_imbalanced_dataset_names
all_dataset_names

['all-in-one_sentiment_balanced',
 'amazon-reviews-0.25_balanced',
 'ceas_balanced',
 'colon-0.5_balanced',
 'fake-news_balanced',
 'news-sarcasm_balanced',
 'philippine_balanced',
 'santander-customer-satisfaction_balanced',
 'spambase_balanced',
 'vehicle-sensit_balanced',
 'all-in-one_sentiment_imbalanced',
 'amazon-reviews-0.25_imbalanced',
 'ceas_imbalanced',
 'colon-0.5_imbalanced',
 'fake-news_imbalanced',
 'news-sarcasm_imbalanced',
 'philippine_imbalanced',
 'santander-customer-satisfaction_imbalanced',
 'spambase_imbalanced',
 'vehicle-sensit_imbalanced',
 'ag-news_balanced',
 'dbpedia-ontology_balanced',
 'gas-drift_balanced',
 'gtsrb-huelist_balanced',
 'irish-times_balanced',
 'mfeat-karhunen_balanced',
 'news-category_balanced',
 'nyt-comments-april17_balanced',
 'usps_balanced',
 'volkert_balanced',
 'ag-news_imbalanced',
 'dbpedia-ontology_imbalanced',
 'gas-drift_imbalanced',
 'gtsrb-huelist_imbalanced',
 'irish-times_imbalanced',
 'mfeat-karhunen_imbalanced',
 'news-c

In [20]:
len(all_dataset_names)

60

### Imbalanced datasets by degree of skew

Binary

In [4]:
binary_mildly_imbalanced_dataset_names = [
    "amazon-reviews-0.25_imbalanced",
    "colon-0.5_imbalanced",
    "news-sarcasm_imbalanced",
    "philippine_imbalanced",
    "spambase_imbalanced"
]

In [5]:
binary_strongly_imbalanced_dataset_names = [
    "all-in-one_sentiment_imbalanced",
    "ceas_imbalanced",
    "fake-news_imbalanced",
    "santander-customer-satisfaction_imbalanced",
    "vehicle-sensit_imbalanced"
]

Multiclass

In [6]:
multiclass_mildly_imbalanced_dataset_names = [
    "ag-news_imbalanced",
    "dbpedia-ontology_imbalanced",
    "gas-drift_imbalanced",
    "mfeat-karhunen_imbalanced",
    "usps_imbalanced"
]

In [7]:
multiclass_strongly_imbalanced_dataset_names = [
    "gtsrb-huelist_imbalanced",
    "irish-times_imbalanced",
    "news-category_imbalanced",
    "nyt-comments-april17_imbalanced",
    "volkert_imbalanced"
]

Multilabel

In [8]:
multilabel_mildly_imbalanced_dataset_names = [
    "emotions_imbalanced",
    "imdb_imbalanced",
    "scene_imbalanced",
    "yeast_imbalanced",
    "yelp_imbalanced"
]

In [9]:
multilabel_strongly_imbalanced_dataset_names = [
    "bookmarks_imbalanced",
    "mediamill_imbalanced",
    "ng20_imbalanced",
    "nuswidevlad_imbalanced",
    "tmc2007500_imbalanced"
]

All

In [10]:
mildly_imbalanced_dataset_names = binary_mildly_imbalanced_dataset_names + multiclass_mildly_imbalanced_dataset_names + multilabel_mildly_imbalanced_dataset_names
mildly_imbalanced_dataset_names

['amazon-reviews-0.25_imbalanced',
 'colon-0.5_imbalanced',
 'news-sarcasm_imbalanced',
 'philippine_imbalanced',
 'spambase_imbalanced',
 'ag-news_imbalanced',
 'dbpedia-ontology_imbalanced',
 'gas-drift_imbalanced',
 'mfeat-karhunen_imbalanced',
 'usps_imbalanced',
 'emotions_imbalanced',
 'imdb_imbalanced',
 'scene_imbalanced',
 'yeast_imbalanced',
 'yelp_imbalanced']

In [11]:
len(mildly_imbalanced_dataset_names)

15

In [12]:
strongly_imbalanced_dataset_names = binary_strongly_imbalanced_dataset_names + multiclass_strongly_imbalanced_dataset_names + multilabel_strongly_imbalanced_dataset_names
strongly_imbalanced_dataset_names

['all-in-one_sentiment_imbalanced',
 'ceas_imbalanced',
 'fake-news_imbalanced',
 'santander-customer-satisfaction_imbalanced',
 'vehicle-sensit_imbalanced',
 'gtsrb-huelist_imbalanced',
 'irish-times_imbalanced',
 'news-category_imbalanced',
 'nyt-comments-april17_imbalanced',
 'volkert_imbalanced',
 'bookmarks_imbalanced',
 'mediamill_imbalanced',
 'ng20_imbalanced',
 'nuswidevlad_imbalanced',
 'tmc2007500_imbalanced']

In [13]:
len(strongly_imbalanced_dataset_names)

15

# Get all correlation JSONs

In [14]:
pearson_correlation_files = sorted(report_output_root_dir.rglob('pearson_correlation.json'), key=lambda x: x.name)
spearman_correlation_files = sorted(report_output_root_dir.rglob('spearman_correlation.json'), key=lambda x: x.name)
correlation_files = pearson_correlation_files + spearman_correlation_files


In [15]:
print(len(pearson_correlation_files))
print(len(spearman_correlation_files))
print(len(correlation_files))

60
60
120


# Functions

In [18]:
def average_corr_matrices(files, classification_types, class_balances, datasets, correlation):
    correlation_matrices = []

    for file_path in files:
        with open(file_path, 'r') as f:
            data = json.load(f)

            if (
                data.get("classification_type") in classification_types and
                data.get("class_balance") in class_balances and
                data.get("dataset_name") in datasets and
                data.get("correlation_type") == correlation
            ):
                correlation_matrices.append(data["correlation_matrix"])
            
    if not correlation_matrices:
        return
    
    metric_keys = correlation_matrices[0].keys()
    averaged_matrix = {}

    for metric in metric_keys:
        keys = correlation_matrices[0][metric].keys()
        averaged_matrix[metric] = {}
        for k in keys:
            # Collect all values for this cell across matrices
            values = [m[metric][k] for m in correlation_matrices]
            averaged_matrix[metric][k] = float(np.mean(values))
    
    full_matrix_info = {
        "classification_types": classification_types,
        "class_balances": class_balances,
        "correlation_type": correlation,
        "datasets": datasets,
        "matrix": averaged_matrix
    }
    
    return full_matrix_info

In [None]:
def average_and_save_matrix(files, classification_types, class_balances, datasets, suffix, root_dir=report_output_root_dir):
    for corr_type in ["pearson", "spearman"]:
        matrix_info = average_corr_matrices(
            files=files,
            classification_types=classification_types,
            class_balances=class_balances,
            datasets=datasets,
            correlation=corr_type
        )
        
        if not matrix_info:
            continue
    
        output_path = root_dir / f"averaged_{corr_type}_correlation_{suffix}.json"
        
        with open(output_path, "w") as f:
            json.dump(matrix_info, f, indent=2)

# Create matrices

All

In [None]:
average_and_save_matrix(
    files=correlation_files,
    classification_types=["binary", "multiclass", "multilabel"],
    class_balances=["balanced", "imbalanced"],
    datasets=all_dataset_names,
    suffix="all")

By classification type

In [None]:
average_and_save_matrix(
    files=correlation_files,
    classification_types=["binary"],
    class_balances=["balanced", "imbalanced"],
    datasets=binary_balanced_dataset_names + binary_imbalanced_dataset_names,
    suffix="binary")

In [None]:
average_and_save_matrix(
    files=correlation_files,
    classification_types=["multiclass"],
    class_balances=["balanced", "imbalanced"],
    datasets=multiclass_balanced_dataset_names + multiclass_imbalanced_dataset_names,
    suffix="multiclass")

In [None]:
average_and_save_matrix(
    files=correlation_files,
    classification_types=["multilabel"],
    class_balances=["balanced", "imbalanced"],
    datasets=multilabel_balanced_dataset_names + multilabel_imbalanced_dataset_names,
    suffix="multilabel")

By class balance

In [None]:
average_and_save_matrix(
        files=correlation_files,
        classification_types=["binary", "multiclass", "multilabel"],
        class_balances="balanced",
        datasets=binary_balanced_dataset_names + multiclass_balanced_dataset_names + multilabel_balanced_dataset_names,
        suffix="balanced"
    )

In [None]:
average_and_save_matrix(
        files=correlation_files,
        classification_types=["binary", "multiclass", "multilabel"],
        class_balances="imbalanced",
        datasets=binary_imbalanced_dataset_names + multiclass_imbalanced_dataset_names + multilabel_imbalanced_dataset_names,
        suffix="imbalanced"
    )

Both classification type and class balance

In [None]:
average_and_save_matrix(
        files=correlation_files,
        classification_types=["binary"],
        class_balances="balanced",
        datasets=binary_balanced_dataset_names,
        suffix="binary_balanced"
    )

In [None]:
average_and_save_matrix(
        files=correlation_files,
        classification_types=["binary"],
        class_balances="imbalanced",
        datasets=binary_imbalanced_dataset_names,
        suffix="binary_imbalanced"
    )

In [None]:
average_and_save_matrix(
        files=correlation_files,
        classification_types=["multiclass"],
        class_balances="balanced",
        datasets=multiclass_balanced_dataset_names,
        suffix="multiclass_balanced"
    )

In [None]:
average_and_save_matrix(
        files=correlation_files,
        classification_types=["multiclass"],
        class_balances="imbalanced",
        datasets=multiclass_imbalanced_dataset_names,
        suffix="multiclass_imbalanced"
    )

In [None]:
average_and_save_matrix(
        files=correlation_files,
        classification_types=["multilabel"],
        class_balances="balanced",
        datasets=multilabel_balanced_dataset_names,
        suffix="multilabel_balanced"
    )

In [None]:
average_and_save_matrix(
        files=correlation_files,
        classification_types=["multilabel"],
        class_balances="imbalanced",
        datasets=multilabel_imbalanced_dataset_names,
        suffix="multilabel_imbalanced"
    )

### Imbalanced datasets by degree of skew

Binary

In [None]:
average_and_save_matrix(
        files=correlation_files,
        classification_types=["binary"],
        class_balances="imbalanced",
        datasets=binary_mildly_imbalanced_dataset_names,
        suffix="binary_imbalanced_mildly"
    )

In [None]:
average_and_save_matrix(
        files=correlation_files,
        classification_types=["binary"],
        class_balances="imbalanced",
        datasets=binary_strongly_imbalanced_dataset_names,
        suffix="binary_imbalanced_strongly"
    )

Multiclass

In [None]:
average_and_save_matrix(
        files=correlation_files,
        classification_types=["multiclass"],
        class_balances="imbalanced",
        datasets=multiclass_mildly_imbalanced_dataset_names,
        suffix="multiclass_imbalanced_mildly"
    )

In [None]:
average_and_save_matrix(
        files=correlation_files,
        classification_types=["multiclass"],
        class_balances="imbalanced",
        datasets=multiclass_strongly_imbalanced_dataset_names,
        suffix="multiclass_imbalanced_strongly"
    )

Multilabel

In [None]:
average_and_save_matrix(
        files=correlation_files,
        classification_types=["multilabel"],
        class_balances="imbalanced",
        datasets=multilabel_mildly_imbalanced_dataset_names,
        suffix="multilabel_imbalanced_mildly"
    )

In [None]:
average_and_save_matrix(
        files=correlation_files,
        classification_types=["multilabel"],
        class_balances="imbalanced",
        datasets=multilabel_strongly_imbalanced_dataset_names,
        suffix="multilabel_imbalanced_strongly"
    )

All

In [None]:
average_and_save_matrix(
        files=correlation_files,
        classification_types=["binary", "multiclass", "multilabel"],
        class_balances="imbalanced",
        datasets=mildly_imbalanced_dataset_names,
        suffix="imbalanced_mildly"
    )

In [None]:
average_and_save_matrix(
        files=correlation_files,
        classification_types=["binary", "multiclass", "multilabel"],
        class_balances="imbalanced",
        datasets=strongly_imbalanced_dataset_names,
        suffix="imbalanced_strongly"
    )

# Calculate differences

In [44]:
# get all file paths that have "pearson" from output directory
pearson_avg_files = sorted(report_output_root_dir.rglob('averaged_pearson_correlation_*.json'), key=lambda x: x.name)
pearson_avg_files

[WindowsPath('C:/VisualStudioRepositories/MUSIC_DATA/metric_analysis/output/averaged_pearson_correlation_all.json'),
 WindowsPath('C:/VisualStudioRepositories/MUSIC_DATA/metric_analysis/output/averaged_pearson_correlation_balanced.json'),
 WindowsPath('C:/VisualStudioRepositories/MUSIC_DATA/metric_analysis/output/averaged_pearson_correlation_binary.json'),
 WindowsPath('C:/VisualStudioRepositories/MUSIC_DATA/metric_analysis/output/averaged_pearson_correlation_binary_balanced.json'),
 WindowsPath('C:/VisualStudioRepositories/MUSIC_DATA/metric_analysis/output/averaged_pearson_correlation_binary_imbalanced.json'),
 WindowsPath('C:/VisualStudioRepositories/MUSIC_DATA/metric_analysis/output/averaged_pearson_correlation_binary_imbalanced_mildly.json'),
 WindowsPath('C:/VisualStudioRepositories/MUSIC_DATA/metric_analysis/output/averaged_pearson_correlation_binary_imbalanced_strongly.json'),
 WindowsPath('C:/VisualStudioRepositories/MUSIC_DATA/metric_analysis/output/averaged_pearson_correlatio

In [45]:
len(pearson_avg_files)

20

In [46]:
results = []

for pearson_path in pearson_avg_files:
    spearman_path = pearson_path.with_name(pearson_path.name.replace("pearson", "spearman"))
    if not spearman_path.exists():
        continue

    with open(pearson_path, "r") as f:
        pearson_json = json.load(f)
    with open(spearman_path, "r") as f:
        spearman_json = json.load(f)

    pearson_matrix = pearson_json["matrix"]
    spearman_matrix = spearman_json["matrix"]

    max_diff = -float("inf")
    max_pair = (None, None)
    for row_key in pearson_matrix:
        for col_key in pearson_matrix[row_key]:
            diff = abs(pearson_matrix[row_key][col_key] - spearman_matrix[row_key][col_key])
            if diff > max_diff:
                max_diff = diff
                max_pair = (row_key, col_key)

    results.append({
        "pearson_file": str(pearson_path),
        "spearman_file": str(spearman_path),
        "max_difference": max_diff,
        "max_pair": max_pair
    })

results

[{'pearson_file': 'C:\\VisualStudioRepositories\\MUSIC_DATA\\metric_analysis\\output\\averaged_pearson_correlation_all.json',
  'spearman_file': 'C:\\VisualStudioRepositories\\MUSIC_DATA\\metric_analysis\\output\\averaged_spearman_correlation_all.json',
  'max_difference': 0.06457750662595774,
  'max_pair': ('micro_precision', 'AUNU')},
 {'pearson_file': 'C:\\VisualStudioRepositories\\MUSIC_DATA\\metric_analysis\\output\\averaged_pearson_correlation_balanced.json',
  'spearman_file': 'C:\\VisualStudioRepositories\\MUSIC_DATA\\metric_analysis\\output\\averaged_spearman_correlation_balanced.json',
  'max_difference': 0.09125347012269569,
  'max_pair': ('macro_precision', 'LogLoss')},
 {'pearson_file': 'C:\\VisualStudioRepositories\\MUSIC_DATA\\metric_analysis\\output\\averaged_pearson_correlation_binary.json',
  'spearman_file': 'C:\\VisualStudioRepositories\\MUSIC_DATA\\metric_analysis\\output\\averaged_spearman_correlation_binary.json',
  'max_difference': 0.11642491535211885,
  'max_p

In [47]:
# print overall max difference and pair from all files
overall_max_diff = -float("inf")
overall_max_pair = (None, None)
for result in results:
    if result["max_difference"] > overall_max_diff:
        overall_max_diff = result["max_difference"]
        overall_max_pair = result["max_pair"]
        pearson_file = result["pearson_file"]

print(f"Overall max difference: {overall_max_diff} between {overall_max_pair[0]} and {overall_max_pair[1]} for file\n{pearson_file}")

Overall max difference: 0.17181748186867218 between macro_recall and LogLoss for file
C:\VisualStudioRepositories\MUSIC_DATA\metric_analysis\output\averaged_pearson_correlation_binary_imbalanced_mildly.json
