In [None]:
import pandas as pd
import numpy as np
from scipy.stats import binomtest
import json
import operator

import metrics

round_precision = 4
p_baseline = 0.5 # baseline for binomial test
results_path = "../results/"
time_results_column = "mae"

"""
more parameters "condition" fewer parameters 
">": operator.gt,
"<": operator.lt,
"==": operator.eq,
"!=": operator.ne,
">=": operator.ge,
"<=": operator.le,
"""

bionm_tests = [
    {
        "name": "Range",
        "sort_criteria": metrics.parameters,
        "metric": metrics.range,
        "condition": operator.lt,
    },
    {
        "name": "Standard Deviation",
        "sort_criteria": metrics.parameters,
        "metric": metrics.standard_deviation,
        "condition": operator.lt,
    },
    {
        "name": "Skewness",
        "sort_criteria": metrics.parameters,
        "metric": metrics.absolute_value_skew,
        "condition": operator.lt,
    },
    {
        "name": "Lower Fence Outliers",
        "sort_criteria": metrics.parameters,
        "metric": metrics.lower_fence_outliers,
        "condition": operator.lt,
    },
    {
        "name": "Upper Fence Outliers",
        "sort_criteria": metrics.parameters,
        "metric": metrics.upper_fence_outliers,
        "condition": operator.lt,
    },
        {
        "name": "All Outliers",
        "sort_criteria": metrics.parameters,
        "metric": metrics.all_outliers,
        "condition": operator.lt,
    },
]

In [None]:
def get_distribution(file, return_error):

    if "image_classification" in file:
        df = pd.read_csv(file)
        if return_error:
            return metrics.accuracy_to_error(df["test_accuracy"].tolist())
        else:
            return df["test_accuracy"].tolist()
    elif "time_series" in file:
        df = pd.read_csv(file)
        return df[time_results_column].tolist()


In [None]:
def calculate_k_same_total(test_results, test_metric, condition):
    total = 0
    k = 0
    same = 0

    # Compare the results one by one
    for i in range(len(test_results) - 1):
        for j in range(i + 1, len(test_results)):
            
            greater_distribution = test_results[i]["distribution"]
            greater_results = test_metric(greater_distribution)

            less_distribution = test_results[j]["distribution"]
            less_results = test_metric(less_distribution)

            # Compare the results values
            if condition(greater_results, less_results):
                k += 1
            elif greater_results == less_results:
                same += 1
            
            total += 1

    return k, same, total

In [None]:
# Image Classification Intra-architecture Binomial Comparison
image_classification_model_tests = json.load(open("image_classification_intra_architecture.json", "r"))

print("Test & # Larger Models & # Smaller Models & # Tied Models & # Total Models & Baseline p-value & Larger p-value & Smaller p-value \\\\")

for bionm_test in bionm_tests:

    total = 0
    k = 0
    same = 0

    for model_test in image_classification_model_tests:
        
        image_classification_test_results = []

        for model in model_test["models"]:
            # Get the file name
            file = f"{model}-{model_test['dataset']}-idun-A100-PyTorch-ngc2312"
            if model_test["pretrained"]:
                file += "-pretrained"
            file += ".csv"

            # Get the distribution
            distribution = get_distribution(results_path + "image_classification/" + file, return_error=False)

            # Get the sort_value from the sort_criteria
            if bionm_test["sort_criteria"] == metrics.parameters:
                sort_value = metrics.parameters(model)
            else:
                sort_value = bionm_test["sort_criteria"](distribution)

            image_classification_test_results.append({
                "test": file,
                "distribution": distribution,
                "sort": float(sort_value),
            })

        # Sort the test results by the sort key descending order (largest sort value first)
        image_classification_test_results = sorted(image_classification_test_results, key=lambda x: x["sort"], reverse=True)

        x, y, z = calculate_k_same_total(image_classification_test_results, bionm_test["metric"], bionm_test["condition"])

        k += x
        same += y
        total += z

    greater = k
    less = total - k - same

    # Discard the trial: You can remove the tied trial from your analysis and only include clear successes or failures.
    total = total - same

    binom_results = binomtest(k=greater, n=total, p=p_baseline, alternative='greater')

    geater_p_value = metrics.safe_round(binom_results.pvalue, round_precision)

    binom_results = binomtest(k=less, n=total, p=p_baseline, alternative='greater')

    less_p_value = metrics.safe_round(binom_results.pvalue, round_precision)

    print(f"{bionm_test['name']} & {greater} & {less} & {same} & {total} & {p_baseline} & {geater_p_value} & {less_p_value} \\\\")

In [None]:
# Image Classification Cross-architecture Binomial Comparison
image_classification_model_tests = json.load(open("image_classification_cross_architecture.json", "r"))

print("Test & # Larger Models & # Smaller Models & # Tied Models & # Total Models & Baseline p-value & Larger p-value & Smaller p-value \\\\")

for bionm_test in bionm_tests:

    total = 0
    k = 0
    same = 0

    for model_test in image_classification_model_tests:
        
        image_classification_test_results = []

        for model in model_test["models"]:
            # Get the file name
            file = f"{model}-{model_test['dataset']}-idun-A100-PyTorch-ngc2312"
            if model_test["pretrained"]:
                file += "-pretrained"
            file += ".csv"

            # Get the distribution
            distribution = get_distribution(results_path + "image_classification/" + file, return_error=False)

            # Get the sort_value from the sort_criteria
            if bionm_test["sort_criteria"] == metrics.parameters:
                sort_value = metrics.parameters(model)
            else:
                sort_value = bionm_test["sort_criteria"](distribution)

            image_classification_test_results.append({
                "test": file,
                "distribution": distribution,
                "sort": float(sort_value),
            })

        # Sort the test results by the sort key descending order (largest sort value first)
        image_classification_test_results = sorted(image_classification_test_results, key=lambda x: x["sort"], reverse=True)

        x, y, z = calculate_k_same_total(image_classification_test_results, bionm_test["metric"], bionm_test["condition"])

        k += x
        same += y
        total += z

    greater = k
    less = total - k - same

    # Discard the trial: You can remove the tied trial from your analysis and only include clear successes or failures.
    total = total - same

    binom_results = binomtest(k=greater, n=total, p=p_baseline, alternative='greater')

    geater_p_value = metrics.safe_round(binom_results.pvalue, round_precision)

    binom_results = binomtest(k=less, n=total, p=p_baseline, alternative='greater')

    less_p_value = metrics.safe_round(binom_results.pvalue, round_precision)

    print(f"{bionm_test['name']} & {greater} & {less} & {same} & {total} & {p_baseline} & {geater_p_value} & {less_p_value} \\\\")

In [None]:
# Time Series Cross-architecture Binomial Comparison
time_series_model_tests = json.load(open("time_series_model_tests.json", "r"))

print("Test & # Larger Models & # Smaller Models & # Tied Models & # Total Models & Baseline p-value & Larger p-value & Smaller p-value \\\\")

for bionm_test in bionm_tests:

    total = 0
    k = 0
    same = 0

    for model_test in time_series_model_tests:
        
        time_series_test_results = []

        for model in model_test["models"]:
            # Get the file name
            file = f"{model}_{model_test['dataset']}_96_{model_test['horizon']}_0_100.csv"

            # Get the distribution
            distribution = get_distribution(results_path + "time_series/" + file, return_error=True)

            # Get the sort_value from the sort_criteria
            if bionm_test["sort_criteria"] == metrics.parameters:
                sort_value = metrics.parameters(model, model_test['dataset'], model_test['horizon'])
            else:
                sort_value = bionm_test["sort_criteria"](distribution)

            time_series_test_results.append({
                "test": file,
                "distribution": distribution,
                "horizon": model_test["horizon"],
                "sort": float(sort_value),
            })

        # Sort the test results by the sort key descending order (largest sort value first)
        time_series_test_results = sorted(time_series_test_results, key=lambda x: x["sort"], reverse=True)

        x, y, z = calculate_k_same_total(time_series_test_results, bionm_test["metric"], bionm_test["condition"])

        k += x
        same += y
        total += z

    greater = k
    less = total - k - same

    # Discard the trial: You can remove the tied trial from your analysis and only include clear successes or failures.
    total = total - same

    binom_results = binomtest(k=greater, n=total, p=p_baseline, alternative='greater')

    geater_p_value = metrics.safe_round(binom_results.pvalue, round_precision)

    binom_results = binomtest(k=less, n=total, p=p_baseline, alternative='greater')

    less_p_value = metrics.safe_round(binom_results.pvalue, round_precision)

    print(f"{bionm_test['name']} & {greater} & {less} & {same} & {total} & {p_baseline} & {geater_p_value} & {less_p_value} \\\\")