In [1]:
import os

import pandas as pd
from IPython.display import display

pd.options.display.float_format = '{:.2f}'.format
import warnings
from itertools import combinations

import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from scipy.stats import *
from statsmodels.stats.multicomp import pairwise_tukeyhsd

warnings.filterwarnings("ignore")

In [2]:
file_paths = [f"error/hirasaki_{i}_errors.csv" for i in range(10)]
dataframes = [pd.read_csv(file_path) for file_path in file_paths]

In [3]:
frame_wise_avg_mpjpe_combinations_revised = {n: [] for n in range(2, 6)}

for n in range(5):
    for combo_indices in combinations(range(5), n + 2):
        for df in dataframes:
            combo_col_name = f"{''.join(map(str, combo_indices))}_mpjpe"
            if combo_col_name in df.columns:
                frame_wise_avg_mpjpe_combinations_revised[n + 2].extend(df[combo_col_name].values)

print("Number of combinations for each n:")
for n, values in frame_wise_avg_mpjpe_combinations_revised.items():
    print(f"n={n}: {len(values)}")

Number of combinations for each n:
n=2: 18440
n=3: 18440
n=4: 9220
n=5: 1844


In [4]:
raw_combination_data = []
for df in dataframes:
    for col in df.columns:
        if "_mpjpe" in col or "_angle" in col:
            error_type = "MPJPE" if "_mpjpe" in col else col.split('_')[1] + " " + col.split('_')[2]
            if len(col.split('_')) == 6:
                error_type += " " + col.split('_')[3]
            error_type = error_type.replace("NKEE", "KNEE")
            for value in df[col].values:
                raw_combination_data.append({"Combination": col.split('_')[0], "Error Type": error_type, "Error Value": value})

raw_combination_df = pd.DataFrame(raw_combination_data)
raw_combination_df.head()

Unnamed: 0,Combination,Error Type,Error Value
0,1,MPJPE,31.56
1,1,MPJPE,32.11
2,1,MPJPE,35.39
3,1,MPJPE,33.97
4,1,MPJPE,30.75


In [5]:
camera04_two_camera_data = raw_combination_df[
    (raw_combination_df["Error Type"] == "MPJPE") &
    (raw_combination_df["Combination"].str.len() == 2) &
    (raw_combination_df["Combination"].str.contains("04"))
]["Error Value"]

other_two_camera_data = raw_combination_df[
    (raw_combination_df["Error Type"] == "MPJPE") &
    (raw_combination_df["Combination"].str.len() == 2) &
    (~raw_combination_df["Combination"].str.contains("04"))
]["Error Value"]

ks_stat_two_camera, ks_pvalue_two_camera = ks_2samp(camera04_two_camera_data, other_two_camera_data)
print(f"Two Camera Combination (04 vs Others):")
print(f"KS Statistic: {ks_stat_two_camera:.4f}, p-value: {ks_pvalue_two_camera:.4f}")

Two Camera Combination (04 vs Others):
KS Statistic: 0.5493, p-value: 0.0000


In [6]:
stat_df = pd.DataFrame()
stat_df['Camera 04'] = camera04_two_camera_data.describe()
stat_df['Others'] = other_two_camera_data.describe()
display(stat_df)

Unnamed: 0,Camera 04,Others
count,1844.0,16596.0
mean,65.91,33.85
std,51.71,9.95
min,26.66,19.04
25%,37.82,27.85
50%,46.76,31.52
75%,75.99,36.55
max,458.91,157.03


In [7]:
mpjpe_df = raw_combination_df[(raw_combination_df["Error Type"] == "MPJPE") & (raw_combination_df["Combination"] != "04")]
display(mpjpe_df)

Unnamed: 0,Combination,Error Type,Error Value
0,01,MPJPE,31.56
1,01,MPJPE,32.11
2,01,MPJPE,35.39
3,01,MPJPE,33.97
4,01,MPJPE,30.75
...,...,...,...
334439,01234,MPJPE,34.52
334440,01234,MPJPE,36.56
334441,01234,MPJPE,40.43
334442,01234,MPJPE,41.69


In [8]:
mean_values = [mpjpe_df[mpjpe_df["Combination"].str.len() == n]["Error Value"].mean() for n in range(2, 6)]
std_values = [mpjpe_df[mpjpe_df["Combination"].str.len() == n]["Error Value"].std() for n in range(2, 6)]

print("Mean values for each n:")
for n, mean, std in zip(range(2, 6), mean_values, std_values):
    print(f"n={n}: {mean:.2f} ± {std:.2f}")

reduction_rates = [(mean_values[n] - mean_values[n + 1]) / mean_values[n] * 100 for n in range(3)]
print(f"Decreasing rates for each n:")
for n, rate in enumerate(reduction_rates):
    print(f"{n + 2} -> {n + 3}: {rate:.2f}")

diminishing_rates = [reduction_rates[n] - reduction_rates[n + 1] for n in range(2)]
print(f"Diminishing rates for each n:")
for n, rate in enumerate(diminishing_rates):
    print(f"{n + 2} -> {n + 3} -> {n + 4}: {rate:.2f}")

Mean values for each n:
n=2: 33.85 ± 9.95
n=3: 29.58 ± 7.26
n=4: 27.35 ± 6.34
n=5: 26.18 ± 6.18
Decreasing rates for each n:
2 -> 3: 12.61
3 -> 4: 7.53
4 -> 5: 4.30
Diminishing rates for each n:
2 -> 3 -> 4: 5.09
3 -> 4 -> 5: 3.22


In [9]:
values_2 = mpjpe_df[mpjpe_df["Combination"].str.len() == 2]["Error Value"]
values_3 = mpjpe_df[mpjpe_df["Combination"].str.len() == 3]["Error Value"]
values_4 = mpjpe_df[mpjpe_df["Combination"].str.len() == 4]["Error Value"]
values_5 = mpjpe_df[mpjpe_df["Combination"].str.len() == 5]["Error Value"]

stat_levene, p_levene = levene(values_2, values_3, values_4, values_5)
print(f"Levene's test: stat={stat_levene:.4f}, p={p_levene:.4f}")

stat_fligner, p_fligner = fligner(values_2, values_3, values_4, values_5)
print(f"Fligner-Killeen test: stat={stat_fligner:.4f}, p={p_fligner:.4f}")

stat_kruskal, p_kruskal = kruskal(values_2, values_3, values_4, values_5)

def cliffs_delta(a, b):
    m, n = len(a), len(b)
    rank = np.argsort(np.concatenate([a, b]))
    a_ranks = np.sum(rank[:m])
    b_ranks = np.sum(rank[m:])
    return (2 * a_ranks / m - (m + 1)) / n - 0.5

cliffs_effects = {
    f"{n1} vs {n2}": cliffs_delta(
        mpjpe_df[mpjpe_df["Combination"].str.len() == n1]["Error Value"],
        mpjpe_df[mpjpe_df["Combination"].str.len() == n2]["Error Value"],
    )
    for n1, n2 in [(2, 3), (3, 4), (4, 5)]
}

print(f"Kruskal-Wallis test: stat={stat_kruskal:.4f}, p={p_kruskal:.4f}")
print("\nCliff's Delta Effect Sizes (pairwise comparisons):")
for pair, delta in cliffs_effects.items():
    print(f"{pair}: {delta:.4f}")

Levene's test: stat=513.0655, p=0.0000
Fligner-Killeen test: stat=2921.6424, p=0.0000
Kruskal-Wallis test: stat=7211.5310, p=0.0000

Cliff's Delta Effect Sizes (pairwise comparisons):
2 vs 3: 0.7236
3 vs 4: 0.6997
4 vs 5: 0.6161


In [10]:
stat_df = pd.DataFrame()
stat_df["0"] = mpjpe_df[mpjpe_df.Combination.str.contains("0")]["Error Value"].describe()
stat_df["1"] = mpjpe_df[mpjpe_df.Combination.str.contains("1")]["Error Value"].describe()
stat_df["2"] = mpjpe_df[mpjpe_df.Combination.str.contains("2")]["Error Value"].describe()
stat_df["3"] = mpjpe_df[mpjpe_df.Combination.str.contains("3")]["Error Value"].describe()
stat_df["4"] = mpjpe_df[mpjpe_df.Combination.str.contains("4")]["Error Value"].describe()
display(stat_df)

Unnamed: 0,0,1,2,3,4
count,25816.0,27660.0,27660.0,27660.0,25816.0
mean,30.66,30.32,28.83,29.39,29.8
std,8.64,8.54,6.94,8.04,8.35
min,17.98,18.1,17.98,17.98,18.35
25%,25.9,25.6,25.12,25.18,25.39
50%,29.15,28.55,27.78,27.87,28.02
75%,32.81,32.48,31.16,31.19,31.98
max,157.03,150.89,141.94,157.03,145.66


In [11]:
min_comb_2 = pd.DataFrame(mpjpe_df[mpjpe_df["Combination"].str.len() == 2].groupby("Combination")["Error Value"].mean()).sort_values("Error Value").index[0]
min_comb_3 = pd.DataFrame(mpjpe_df[mpjpe_df["Combination"].str.len() == 3].groupby("Combination")["Error Value"].mean()).sort_values("Error Value").index[0]
min_comb_4 = pd.DataFrame(mpjpe_df[mpjpe_df["Combination"].str.len() == 4].groupby("Combination")["Error Value"].mean()).sort_values("Error Value").index[0]
min_comb_5 = pd.DataFrame(mpjpe_df[mpjpe_df["Combination"].str.len() == 5].groupby("Combination")["Error Value"].mean()).sort_values("Error Value").index[0]

print(f"Minimum combinations for each n:")
print(f"n=2: {min_comb_2}")
print(f"n=3: {min_comb_3}")
print(f"n=4: {min_comb_4}")
print(f"n=5: {min_comb_5}")

values_2_min = mpjpe_df[mpjpe_df["Combination"] == min_comb_2]["Error Value"]
values_3_min = mpjpe_df[mpjpe_df["Combination"] == min_comb_3]["Error Value"]
values_4_min = mpjpe_df[mpjpe_df["Combination"] == min_comb_4]["Error Value"]
values_5_min = mpjpe_df[mpjpe_df["Combination"] == min_comb_5]["Error Value"]

print("\nMean values for minimum combinations:")
print(f"n=2: {values_2_min.mean():.2f}")
print(f"n=3: {values_3_min.mean():.2f}")
print(f"n=4: {values_4_min.mean():.2f}")
print(f"n=5: {values_5_min.mean():.2f}")

stat_kruskal_min, p_kruskal_min = kruskal(values_2_min, values_3_min, values_4_min, values_5_min)
print(f"Kruskal-Wallis test (minimum combinations): stat={stat_kruskal_min:.4f}, p={p_kruskal_min:.4f}")

cliffs_effects_min = {
    f"{n1} vs {n2}": cliffs_delta(
        mpjpe_df[mpjpe_df["Combination"] == globals()[f"min_comb_{n1}"]]["Error Value"],
        mpjpe_df[mpjpe_df["Combination"] == globals()[f"min_comb_{n2}"]]["Error Value"],
    )
    for n1, n2 in [(2, 3), (3, 4), (4, 5)]
}

print("\nCliff's Delta Effect Sizes (minimum combinations):")
for pair, delta in cliffs_effects_min.items():
    print(f"{pair}: {delta:.4f}")

Minimum combinations for each n:
n=2: 23
n=3: 123
n=4: 1234
n=5: 01234

Mean values for minimum combinations:
n=2: 27.92
n=3: 27.05
n=4: 25.86
n=5: 26.18
Kruskal-Wallis test (minimum combinations): stat=506.6030, p=0.0000

Cliff's Delta Effect Sizes (minimum combinations):
2 vs 3: 0.6746
3 vs 4: 0.7644
4 vs 5: 0.5827
