# Compare initilization methods for different datasets
In particular, compare beta initialization and normal initialization using mu and sigma from beta.

In [1]:
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns

from scipy.stats import friedmanchisquare, wilcoxon, mannwhitneyu, kruskal

from itertools import combinations

import os

In [2]:
datasets = ['kdd', 'covtype', 'rice', 'glass_identification']
optimizers = ['cobyla', 'spsa']
preprocessings = ['pca', 'lda']
inits = ['beta', 'normal', 'normal_beta_dist', 'beta_mu', 'uniform', 'zero']

res = []

for dataset in datasets:
    for optimizer in optimizers:
        for preprocessing in preprocessings:
            if dataset == 'rice' and preprocessing == 'lda':
                continue
            for init in inits:
                if os.path.exists("../reports/results/{dataset}___{optimizer}_{preprocessing}_{init}_results.csv".format(dataset=dataset, optimizer=optimizer, preprocessing=preprocessing, init=init)):
                    r = pd.read_csv("../reports/results/{dataset}___{optimizer}_{preprocessing}_{init}_results.csv".format(dataset=dataset, optimizer=optimizer, preprocessing=preprocessing, init=init), index_col=0)

                    r['dataset'] = dataset
                    r['initialization'] = init
                    r['optimizer'] = optimizer.upper()

                    res.append(r)
                else:
                    print("Missing {dataset}___{optimizer}_{preprocessing}_{init}_results.csv".format(dataset=dataset, optimizer=optimizer, preprocessing=preprocessing, init=init))

res = pd.concat(res).set_index('dataset')

res['featuremap'] = res['featuremap'].apply(lambda x: 'ZFeatureMap' if 'get_zfeaturemap' in x else 'ZZFeatureMap') 
res['ansatz'] = res['ansatz'].apply(lambda x: 'RealAmplitudes' if 'get_realamplitudes' in x else 'EfficientSU2' if 'get_efficientsu2' in x else 'PauliTwoDesign' if 'get_paulitwodesign' in x else "TwoLocal")

In [3]:
index = ['ansatz', 'optimizer', 'featuremap', 'entanglement', 'entanglement_featuremap', 'preprocessing']

In [4]:
res.head()

Unnamed: 0_level_0,accuracy,f1,time,ansatz,optimizer,featuremap,entanglement,entanglement_featuremap,iterations,preprocessing,initialization
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
kdd,0.552,0.399874,2563.761526,EfficientSU2,COBYLA,ZFeatureMap,full,,397,pca,beta
kdd,0.664,0.562332,2813.074599,EfficientSU2,COBYLA,ZFeatureMap,linear,,500,pca,beta
kdd,0.588,0.465773,2881.298857,EfficientSU2,COBYLA,ZFeatureMap,circular,,500,pca,beta
kdd,0.652,0.552719,2836.419739,EfficientSU2,COBYLA,ZFeatureMap,sca,,500,pca,beta
kdd,0.844,0.847934,4903.255884,EfficientSU2,COBYLA,ZZFeatureMap,full,full,500,pca,beta


In [5]:
res.sort_values(by=['dataset','accuracy', 'f1', 'time'], ascending=False).groupby(['dataset', 'initialization'])[['accuracy', 'f1']]\
    .agg(['mean', 'std']).sort_values(by=['dataset', ('accuracy', 'mean')], ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,accuracy,accuracy,f1,f1
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std
dataset,initialization,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
rice,beta_mu,0.694286,0.093786,0.689656,0.09522
rice,beta,0.692214,0.100151,0.687422,0.102238
rice,normal_beta_dist,0.686071,0.099749,0.680435,0.10174
rice,uniform,0.51919,0.034771,0.507013,0.043813
rice,zero,0.518357,0.030417,0.508765,0.035464
rice,normal,0.516095,0.029734,0.507574,0.033373
kdd,beta,0.864357,0.100169,0.848749,0.126497
kdd,normal_beta_dist,0.85919,0.10668,0.842729,0.136233
kdd,beta_mu,0.859048,0.10271,0.843104,0.127567
kdd,uniform,0.665571,0.074873,0.672754,0.073212


In [6]:
# Iterate over each dataset
for dataset_name in pd.unique(res.index):
    print(f"\n=== {dataset_name} ===")

    # Create a DataFrame
    df = res[res.index == dataset_name].set_index(index).sort_index()

    # Group by the 'Group' column
    grouped_data = df.groupby('initialization')

    # Extract the values for the Friedman test
    # Each group's values should be in a separate array
    group_values = [group['accuracy'].to_numpy() for name, group in grouped_data]

    # Perform the Friedman test
    statistic, p_value = friedmanchisquare(*group_values)

    # Display the results
    print(f"Friedman Test Statistic: {statistic}")
    print(f"P-value: {p_value}")

    # Interpret the results based on the p-value
    alpha = 0.05
    if p_value < alpha:
        print("Reject the null hypothesis. There are significant differences among the groups.")

        # Pairwise comparisons with post hoc analysis
        pairs = list(combinations(grouped_data.groups.keys(), 2))

        # Create a table to display results
        results_table = pd.DataFrame(index=grouped_data.groups.keys(), columns=grouped_data.groups.keys())

        for group1, group2 in pairs:
            # Wilcoxon signed-rank test for pairwise comparisons
            _, p_value_pairwise = wilcoxon(grouped_data.get_group(group1)['accuracy'].to_numpy(),
                                                     grouped_data.get_group(group2)['accuracy'].to_numpy())
            
            if p_value_pairwise > alpha:
                results_table.at[group1, group2] = "~"+str(p_value_pairwise.round(4))
                results_table.at[group2, group1] = "~"+str(p_value_pairwise.round(4))
            else:
                _, p_value_pairwise = wilcoxon(grouped_data.get_group(group1)['accuracy'].to_numpy(),
                                                     grouped_data.get_group(group2)['accuracy'].to_numpy(), alternative='greater')
                if p_value_pairwise < alpha:
                    results_table.at[group1, group2] = '+'+str(p_value_pairwise.round(4))
                    results_table.at[group2, group1] = '-'+str(p_value_pairwise.round(4))
                
                _, p_value_pairwise = wilcoxon(grouped_data.get_group(group2)['accuracy'].to_numpy(),
                                                     grouped_data.get_group(group1)['accuracy'].to_numpy(), alternative='greater')
                if p_value_pairwise < alpha:
                    results_table.at[group1, group2] = '-'+str(p_value_pairwise.round(4))
                    results_table.at[group2, group1] = '+'+str(p_value_pairwise.round(4))

        print("\nPairwise Comparisons:")
        results_table = results_table.fillna(" ")
        display(results_table)
    else:
        print("Fail to reject the null hypothesis. There are no significant differences among the groups.")


=== kdd ===
Friedman Test Statistic: 809.6711317190449
P-value: 9.354866919177094e-173
Reject the null hypothesis. There are significant differences among the groups.

Pairwise Comparisons:


Unnamed: 0,beta,beta_mu,normal,normal_beta_dist,uniform,zero
beta,,~0.448,+0.0,~0.3452,+0.0,+0.0
beta_mu,~0.448,,+0.0,~0.6928,+0.0,+0.0
normal,-0.0,-0.0,,-0.0,~0.309,~0.7339
normal_beta_dist,~0.3452,~0.6928,+0.0,,+0.0,+0.0
uniform,-0.0,-0.0,~0.309,-0.0,,~0.1912
zero,-0.0,-0.0,~0.7339,-0.0,~0.1912,



=== covtype ===
Friedman Test Statistic: 1103.4088383622532
P-value: 2.4429740095366e-236
Reject the null hypothesis. There are significant differences among the groups.

Pairwise Comparisons:


Unnamed: 0,beta,beta_mu,normal,normal_beta_dist,uniform,zero
beta,,~0.5478,+0.0,-0.0007,+0.0,+0.0
beta_mu,~0.5478,,+0.0,-0.0001,+0.0,+0.0
normal,-0.0,-0.0,,-0.0,~0.8276,~0.3577
normal_beta_dist,+0.0007,+0.0001,+0.0,,+0.0,+0.0
uniform,-0.0,-0.0,~0.8276,-0.0,,~0.0818
zero,-0.0,-0.0,~0.3577,-0.0,~0.0818,



=== rice ===
Friedman Test Statistic: 621.6909871244634
P-value: 4.157115618510485e-132
Reject the null hypothesis. There are significant differences among the groups.

Pairwise Comparisons:


Unnamed: 0,beta,beta_mu,normal,normal_beta_dist,uniform,zero
beta,,~0.2156,+0.0,~0.541,+0.0,+0.0
beta_mu,~0.2156,,+0.0,~0.1227,+0.0,+0.0
normal,-0.0,-0.0,,-0.0,~0.2469,~0.3636
normal_beta_dist,~0.541,~0.1227,+0.0,,+0.0,+0.0
uniform,-0.0,-0.0,~0.2469,-0.0,,~0.6467
zero,-0.0,-0.0,~0.3636,-0.0,~0.6467,



=== glass_identification ===
Friedman Test Statistic: 914.2155149820915
P-value: 2.2306905698539016e-195
Reject the null hypothesis. There are significant differences among the groups.

Pairwise Comparisons:


Unnamed: 0,beta,beta_mu,normal,normal_beta_dist,uniform,zero
beta,,~0.8052,+0.0,~0.8353,+0.0,+0.0
beta_mu,~0.8052,,+0.0,~0.8078,+0.0,+0.0
normal,-0.0,-0.0,,-0.0,~0.4071,~0.9591
normal_beta_dist,~0.8353,~0.8078,+0.0,,+0.0,+0.0
uniform,-0.0,-0.0,~0.4071,-0.0,,~0.3997
zero,-0.0,-0.0,~0.9591,-0.0,~0.3997,


In [7]:
datasets = ['kdd', 'covtype', 'rice', 'glass_identification']
optimizers = ['cobyla', 'spsa', 'neldermead']
preprocessings = ['pca', 'lda']
noise = ["_", "ibm_perth"]
inits = ['beta', 'normal', 'uniform']

res = []

for dataset in datasets:
    for optimizer in optimizers:
        for preprocessing in preprocessings:
            if dataset == 'rice' and preprocessing == 'lda':
                continue
            for init in inits:
                for n in noise:
                    if os.path.exists("../reports/results/{dataset}_{n}_{optimizer}_{preprocessing}_{init}_results.csv".format(dataset=dataset, n=n, optimizer=optimizer, preprocessing=preprocessing, init=init)):
                        r = pd.read_csv("../reports/results/{dataset}_{n}_{optimizer}_{preprocessing}_{init}_results.csv".format(dataset=dataset, n=n, optimizer=optimizer, preprocessing=preprocessing, init=init), index_col=0)

                        r['dataset'] = dataset
                        r['initialization'] = init
                        r['optimizer'] = optimizer.upper()
                        r['noise'] = n if n != "_" else "None"

                        res.append(r)
                    else:
                        print("Missing {dataset}_{n}_{optimizer}_{preprocessing}_{init}_results.csv".format(dataset=dataset, n=n, optimizer=optimizer, preprocessing=preprocessing, init=init))

res = pd.concat(res).set_index('dataset')

res['featuremap'] = res['featuremap'].apply(lambda x: 'ZFeatureMap' if 'get_zfeaturemap' in x else 'ZZFeatureMap') 
res['ansatz'] = res['ansatz'].apply(lambda x: 'RealAmplitudes' if 'get_realamplitudes' in x else 'EfficientSU2' if 'get_efficientsu2' in x else 'PauliTwoDesign' if 'get_paulitwodesign' in x else "TwoLocal")

In [8]:
# Iterate over each dataset
for dataset_name in pd.unique(res.index):
    print(f"\n=== {dataset_name} ===")

    for noise in pd.unique(res[res.index == dataset_name]['noise']):
        print(f"\n=== {noise} ===")
        # Create a DataFrame
        df = res[(res.index == dataset_name)&(res.noise==noise)].set_index(index).sort_index()

        # Group by the 'Group' column
        grouped_data = df.groupby('initialization')

        # Extract the values for the Friedman test
        # Each group's values should be in a separate array
        group_values = [group['accuracy'].to_numpy() for name, group in grouped_data]

        # Perform the Friedman test
        statistic, p_value = friedmanchisquare(*group_values)

        # Display the results
        print(f"Friedman Test Statistic: {statistic}")
        print(f"P-value: {p_value}")

        # Interpret the results based on the p-value
        alpha = 0.05
        if p_value < alpha:
            print("Reject the null hypothesis. There are significant differences among the groups.")

            # Pairwise comparisons with post hoc analysis
            pairs = list(combinations(grouped_data.groups.keys(), 2))

            # Create a table to display results
            results_table = pd.DataFrame(index=grouped_data.groups.keys(), columns=grouped_data.groups.keys())

            for group1, group2 in pairs:
                # Wilcoxon signed-rank test for pairwise comparisons
                _, p_value_pairwise = wilcoxon(grouped_data.get_group(group1)['accuracy'].to_numpy(),
                                                        grouped_data.get_group(group2)['accuracy'].to_numpy())
                
                if p_value_pairwise > alpha:
                    results_table.at[group1, group2] = "~"+str(p_value_pairwise.round(4))
                    results_table.at[group2, group1] = "~"+str(p_value_pairwise.round(4))
                else:
                    _, p_value_pairwise = wilcoxon(grouped_data.get_group(group1)['accuracy'].to_numpy(),
                                                        grouped_data.get_group(group2)['accuracy'].to_numpy(), alternative='greater')
                    if p_value_pairwise < alpha:
                        results_table.at[group1, group2] = '+'+str(p_value_pairwise.round(4))
                        results_table.at[group2, group1] = '-'+str(p_value_pairwise.round(4))
                    
                    _, p_value_pairwise = wilcoxon(grouped_data.get_group(group2)['accuracy'].to_numpy(),
                                                        grouped_data.get_group(group1)['accuracy'].to_numpy(), alternative='greater')
                    if p_value_pairwise < alpha:
                        results_table.at[group1, group2] = '-'+str(p_value_pairwise.round(4))
                        results_table.at[group2, group1] = '+'+str(p_value_pairwise.round(4))

            print("\nPairwise Comparisons:")
            results_table = results_table.fillna(" ")
            display(results_table)
        else:
            print("Fail to reject the null hypothesis. There are no significant differences among the groups.")


=== kdd ===

=== None ===
Friedman Test Statistic: 190.5581162324643
P-value: 4.176676311761817e-42
Reject the null hypothesis. There are significant differences among the groups.

Pairwise Comparisons:


Unnamed: 0,beta,normal,uniform
beta,,+0.0,+0.0
normal,-0.0,,~0.3375
uniform,-0.0,~0.3375,



=== ibm_perth ===
Friedman Test Statistic: 47.21227364185119
P-value: 5.5973831954889735e-11
Reject the null hypothesis. There are significant differences among the groups.

Pairwise Comparisons:


Unnamed: 0,beta,normal,uniform
beta,,0.0,0.0
normal,-0.0,,-0.0
uniform,-0.0,0.0,



=== covtype ===

=== None ===
Friedman Test Statistic: 279.2883744338195
P-value: 2.255797244549553e-61
Reject the null hypothesis. There are significant differences among the groups.

Pairwise Comparisons:


Unnamed: 0,beta,normal,uniform
beta,,+0.0,+0.0
normal,-0.0,,~0.2777
uniform,-0.0,~0.2777,



=== ibm_perth ===
Friedman Test Statistic: 312.42849924204137
P-value: 1.4355337088002226e-68
Reject the null hypothesis. There are significant differences among the groups.

Pairwise Comparisons:


Unnamed: 0,beta,normal,uniform
beta,,+0.0,+0.0
normal,-0.0,,~0.5102
uniform,-0.0,~0.5102,



=== rice ===

=== None ===
Friedman Test Statistic: 135.3421052631579
P-value: 4.081645264529754e-30
Reject the null hypothesis. There are significant differences among the groups.

Pairwise Comparisons:


Unnamed: 0,beta,normal,uniform
beta,,+0.0,+0.0
normal,-0.0,,~0.4354
uniform,-0.0,~0.4354,



=== ibm_perth ===
Friedman Test Statistic: 106.61088709677401
P-value: 7.07522102603349e-24
Reject the null hypothesis. There are significant differences among the groups.

Pairwise Comparisons:


Unnamed: 0,beta,normal,uniform
beta,,+0.0,+0.0
normal,-0.0,,~0.4412
uniform,-0.0,~0.4412,



=== glass_identification ===

=== None ===
Friedman Test Statistic: 252.08450704225325
P-value: 1.8219850500195392e-55
Reject the null hypothesis. There are significant differences among the groups.

Pairwise Comparisons:


Unnamed: 0,beta,normal,uniform
beta,,+0.0,+0.0
normal,-0.0,,~0.3384
uniform,-0.0,~0.3384,



=== ibm_perth ===
Friedman Test Statistic: 141.635588705381
P-value: 1.7547800652605816e-31
Reject the null hypothesis. There are significant differences among the groups.

Pairwise Comparisons:


Unnamed: 0,beta,normal,uniform
beta,,+0.0,+0.0
normal,-0.0,,~0.6722
uniform,-0.0,~0.6722,


In [9]:
# Iterate over each dataset
for dataset_name in pd.unique(res.index):
    print(f"\n=== {dataset_name} ===")

    for noise in pd.unique(res[res.index == dataset_name]['noise']):
        print(f"\n=== {noise} ===")
        # Create a DataFrame
        df = res[(res.index == dataset_name)&(res.noise==noise)].set_index(index).sort_index()

        # Group by the 'Group' column
        grouped_data = df.groupby('optimizer')

        # Extract the values for the Friedman test
        # Each group's values should be in a separate array
        group_values = [group['accuracy'].to_numpy() for _, group in grouped_data]
        
        # Perform the Friedman test
        statistic, p_value = friedmanchisquare(*group_values)

        # Display the results
        print(f"Friedman Test Statistic: {statistic}")
        print(f"P-value: {p_value}")

        # Interpret the results based on the p-value
        alpha = 0.05
        if p_value < alpha:
            print("Reject the null hypothesis. There are significant differences among the groups.")

            # Pairwise comparisons with post hoc analysis
            pairs = list(combinations(grouped_data.groups.keys(), 2))

            # Create a table to display results
            results_table = pd.DataFrame(index=grouped_data.groups.keys(), columns=grouped_data.groups.keys())

            for group1, group2 in pairs:
                # Wilcoxon signed-rank test for pairwise comparisons
                _, p_value_pairwise = wilcoxon(grouped_data.get_group(group1)['accuracy'].to_numpy(),
                                                        grouped_data.get_group(group2)['accuracy'].to_numpy())
                
                if p_value_pairwise > alpha:
                    results_table.at[group1, group2] = "~"+str(p_value_pairwise.round(4))
                    results_table.at[group2, group1] = "~"+str(p_value_pairwise.round(4))
                else:
                    _, p_value_pairwise = wilcoxon(grouped_data.get_group(group1)['accuracy'].to_numpy(),
                                                        grouped_data.get_group(group2)['accuracy'].to_numpy(), alternative='greater')
                    if p_value_pairwise < alpha:
                        results_table.at[group1, group2] = '+'+str(p_value_pairwise.round(4))
                        results_table.at[group2, group1] = '-'+str(p_value_pairwise.round(4))
                    
                    _, p_value_pairwise = wilcoxon(grouped_data.get_group(group2)['accuracy'].to_numpy(),
                                                        grouped_data.get_group(group1)['accuracy'].to_numpy(), alternative='greater')
                    if p_value_pairwise < alpha:
                        results_table.at[group1, group2] = '-'+str(p_value_pairwise.round(4))
                        results_table.at[group2, group1] = '+'+str(p_value_pairwise.round(4))

            print("\nPairwise Comparisons:")
            results_table = results_table.fillna(" ")
            display(results_table)
        else:
            print("Fail to reject the null hypothesis. There are no significant differences among the groups.")


=== kdd ===

=== None ===


Friedman Test Statistic: 702.8231738035262
P-value: 2.4203988153594256e-153
Reject the null hypothesis. There are significant differences among the groups.

Pairwise Comparisons:


Unnamed: 0,COBYLA,NELDERMEAD,SPSA
COBYLA,,0.0,0.0
NELDERMEAD,-0.0,,-0.0
SPSA,-0.0,0.0,



=== ibm_perth ===
Friedman Test Statistic: 738.3039999999999
P-value: 4.778850192384588e-161
Reject the null hypothesis. There are significant differences among the groups.

Pairwise Comparisons:


Unnamed: 0,COBYLA,NELDERMEAD,SPSA
COBYLA,,0.0,~0.3574
NELDERMEAD,-0.0,,-0.0
SPSA,~0.3574,0.0,



=== covtype ===

=== None ===
Friedman Test Statistic: 491.75454545454494
P-value: 1.6476206602272606e-107
Reject the null hypothesis. There are significant differences among the groups.

Pairwise Comparisons:


Unnamed: 0,COBYLA,NELDERMEAD,SPSA
COBYLA,,0.0,-0.0
NELDERMEAD,-0.0,,-0.0
SPSA,0.0,0.0,



=== ibm_perth ===
Friedman Test Statistic: 399.8302172814545
P-value: 1.5065080901775364e-87
Reject the null hypothesis. There are significant differences among the groups.

Pairwise Comparisons:


Unnamed: 0,COBYLA,NELDERMEAD,SPSA
COBYLA,,0.0,-0.0092
NELDERMEAD,-0.0,,-0.0
SPSA,0.0092,0.0,



=== rice ===

=== None ===
Friedman Test Statistic: 65.02443991853356
P-value: 7.587911855049771e-15
Reject the null hypothesis. There are significant differences among the groups.

Pairwise Comparisons:


Unnamed: 0,COBYLA,NELDERMEAD,SPSA
COBYLA,,0.0,~0.1202
NELDERMEAD,-0.0,,-0.0
SPSA,~0.1202,0.0,



=== ibm_perth ===
Friedman Test Statistic: 59.17694641051569
P-value: 1.412177731991149e-13
Reject the null hypothesis. There are significant differences among the groups.

Pairwise Comparisons:


Unnamed: 0,COBYLA,NELDERMEAD,SPSA
COBYLA,,0.0,~0.4649
NELDERMEAD,-0.0,,-0.0
SPSA,~0.4649,0.0,



=== glass_identification ===

=== None ===
Friedman Test Statistic: 119.160876536611
P-value: 1.3321233188336085e-26
Reject the null hypothesis. There are significant differences among the groups.

Pairwise Comparisons:


Unnamed: 0,COBYLA,NELDERMEAD,SPSA
COBYLA,,0.0,-0.0004
NELDERMEAD,-0.0,,-0.0
SPSA,0.0004,0.0,



=== ibm_perth ===
Friedman Test Statistic: 101.46375266524453
P-value: 9.277395093296631e-23
Reject the null hypothesis. There are significant differences among the groups.

Pairwise Comparisons:


Unnamed: 0,COBYLA,NELDERMEAD,SPSA
COBYLA,,0.0,~0.5149
NELDERMEAD,-0.0,,-0.0
SPSA,~0.5149,0.0,


In [10]:
def get_significance_res(column, paired = True):
    # Iterate over each dataset
    results = []
    for dataset_name in pd.unique(res.index):
        # print(f"\n=== {dataset_name} ===")

        for noise in pd.unique(res[res.index == dataset_name]['noise']):
            # print(f"\n=== {noise} ===")
            # Create a DataFrame
            df = res[(res.index == dataset_name)&(res.noise==noise)].set_index(index).sort_index()

            # Group by the 'Group' column
            grouped_data = df.groupby(column)

            # Extract the values for the Friedman test
            # Each group's values should be in a separate array
            group_values = [group['accuracy'].to_numpy() for _, group in grouped_data]

            # print("Reject the null hypothesis. There are significant differences among the groups.")
            # Pairwise comparisons with post hoc analysis
            pairs = list(combinations(grouped_data.groups.keys(), 2))

            # Create a table to display results
            results_table = pd.DataFrame(index=grouped_data.groups.keys(), columns=grouped_data.groups.keys())

            for group1, group2 in pairs:
                # Wilcoxon signed-rank test for pairwise comparisons
                if not paired:
                    _, p_value_pairwise = mannwhitneyu(grouped_data.get_group(group1)['accuracy'].to_numpy(),
                                                       grouped_data.get_group(group2)['accuracy'].to_numpy())
                else:
                    _, p_value_pairwise = wilcoxon(grouped_data.get_group(group1)['accuracy'].to_numpy(),
                                                            grouped_data.get_group(group2)['accuracy'].to_numpy())
                if p_value_pairwise > alpha:
                    results_table.at[group1, group2] = "~"+str(p_value_pairwise.round(4))
                    results_table.at[group2, group1] = "~"+str(p_value_pairwise.round(4))
                else:
                    if not paired:
                        _, p_value_pairwise = mannwhitneyu(grouped_data.get_group(group1)['accuracy'].to_numpy(),
                                                        grouped_data.get_group(group2)['accuracy'].to_numpy(), alternative='greater')
                    else:
                        _, p_value_pairwise = wilcoxon(grouped_data.get_group(group1)['accuracy'].to_numpy(),
                                                            grouped_data.get_group(group2)['accuracy'].to_numpy(), alternative='greater')
                    
                    if p_value_pairwise < alpha:
                        results_table.at[group1, group2] = '+'+str(p_value_pairwise.round(4))
                        results_table.at[group2, group1] = '-'+str(p_value_pairwise.round(4))

                    if not paired:
                        _, p_value_pairwise = mannwhitneyu(grouped_data.get_group(group2)['accuracy'].to_numpy(),
                                                         grouped_data.get_group(group1)['accuracy'].to_numpy(), alternative='greater')
                    else:  
                        _, p_value_pairwise = wilcoxon(grouped_data.get_group(group2)['accuracy'].to_numpy(),
                                                          grouped_data.get_group(group1)['accuracy'].to_numpy(), alternative='greater')
                    if p_value_pairwise < alpha:
                        results_table.at[group1, group2] = '-'+str(p_value_pairwise.round(4))
                        results_table.at[group2, group1] = '+'+str(p_value_pairwise.round(4))
                results.append([dataset_name, noise, group1, group2, results_table.at[group1, group2]])
                results.append([dataset_name, noise, group2, group1, results_table.at[group2, group1]])

            # print("\nPairwise Comparisons:")
            results_table = results_table.fillna(" ")
            # display(results_table)
    return results

In [11]:
df = pd.DataFrame(get_significance_res('optimizer'), columns=['dataset', 'noise', 'optimizer1', 'optimizer2', 'p-value'])
order = ['kdd', 'covtype', 'glass_identification', 'rice']
print(pd.pivot_table(df, index=['optimizer1', 'noise'], columns=['optimizer2', 'dataset'], values='p-value', aggfunc='first').fillna(" ").reindex(order, level=1, axis=1).to_latex().replace("+", "\cellcolor{Green}").replace("-", "\cellcolor{BrickRed}").replace("~", "\cellcolor{Yellow}").replace("None", "P").replace("ibm_perth", "N").replace("kdd", "K").replace("covtype", "C").replace("glass_identification", "G").replace("rice", "R").replace("{r}", "{c|}"))

\begin{tabular}{llllllllllllll}
\toprule
 & optimizer2 & \multicolumn{4}{c|}{COBYLA} & \multicolumn{4}{c|}{NELDERMEAD} & \multicolumn{4}{c|}{SPSA} \\
 & dataset & K & C & G & R & K & C & G & R & K & C & G & R \\
optimizer1 & noise &  &  &  &  &  &  &  &  &  &  &  &  \\
\midrule
\multirow[t]{2}{*}{COBYLA} & P &   &   &   &   & \cellcolor{Green}0.0 & \cellcolor{Green}0.0 & \cellcolor{Green}0.0 & \cellcolor{Green}0.0 & \cellcolor{Green}0.0 & \cellcolor{BrickRed}0.0 & \cellcolor{BrickRed}0.0004 & \cellcolor{Yellow}0.1202 \\
 & N &   &   &   &   & \cellcolor{Green}0.0 & \cellcolor{Green}0.0 & \cellcolor{Green}0.0 & \cellcolor{Green}0.0 & \cellcolor{Yellow}0.3574 & \cellcolor{BrickRed}0.0092 & \cellcolor{Yellow}0.5149 & \cellcolor{Yellow}0.4649 \\
\cline{1\cellcolor{BrickRed}14}
\multirow[t]{2}{*}{NELDERMEAD} & P & \cellcolor{BrickRed}0.0 & \cellcolor{BrickRed}0.0 & \cellcolor{BrickRed}0.0 & \cellcolor{BrickRed}0.0 &   &   &   &   & \cellcolor{BrickRed}0.0 & \cellcolor{BrickRed}0.0 & \cellco

In [12]:
df = pd.DataFrame(get_significance_res('ansatz', paired=False), columns=['dataset', 'noise', 'optimizer1', 'optimizer2', 'p-value'])
order = ['kdd', 'covtype', 'glass_identification', 'rice']
print(pd.pivot_table(df, index=['optimizer1', 'noise'], columns=['optimizer2', 'dataset'], values='p-value', aggfunc='first').fillna(" ").reindex(order, level=1, axis=1).to_latex().replace("+", "\cellcolor{Green}").replace("-", "\cellcolor{BrickRed}").replace("~", "\cellcolor{Yellow}").replace("None", "P").replace("ibm_perth", "N").replace("kdd", "K").replace("covtype", "C").replace("glass_identification", "G").replace("rice", "R").replace("{r}", "{c|}"))

\begin{tabular}{llllllllllllllllll}
\toprule
 & optimizer2 & \multicolumn{4}{c|}{EfficientSU2} & \multicolumn{4}{c|}{PauliTwoDesign} & \multicolumn{4}{c|}{RealAmplitudes} & \multicolumn{4}{c|}{TwoLocal} \\
 & dataset & K & C & G & R & K & C & G & R & K & C & G & R & K & C & G & R \\
optimizer1 & noise &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  \\
\midrule
\multirow[t]{2}{*}{EfficientSU2} & P &   &   &   &   & \cellcolor{Yellow}0.2987 & \cellcolor{Yellow}0.3504 & \cellcolor{Yellow}0.1533 & \cellcolor{Yellow}0.8715 & \cellcolor{Yellow}0.8032 & \cellcolor{Yellow}0.5991 & \cellcolor{Yellow}0.6753 & \cellcolor{Yellow}0.1564 & \cellcolor{Yellow}0.4778 & \cellcolor{Yellow}0.0605 & \cellcolor{Yellow}0.7428 & \cellcolor{Yellow}0.19 \\
 & N &   &   &   &   & \cellcolor{Yellow}0.3055 & \cellcolor{Yellow}0.7642 & \cellcolor{Yellow}0.6053 & \cellcolor{Yellow}0.8493 & \cellcolor{Yellow}0.9429 & \cellcolor{Yellow}0.5566 & \cellcolor{Yellow}0.2516 & \cellcolor{Yellow}0.7142 & \cellcolor{Yellow}0.

In [13]:
df = pd.DataFrame(get_significance_res('featuremap', paired=False), columns=['dataset', 'noise', 'optimizer1', 'optimizer2', 'p-value'])
order = ['kdd', 'covtype', 'glass_identification', 'rice']
print(pd.pivot_table(df, index=['optimizer1', 'noise'], columns=['optimizer2', 'dataset'], values='p-value', aggfunc='first').fillna(" ").reindex(order, level=1, axis=1).to_latex().replace("+", "\cellcolor{Green}").replace("-", "\cellcolor{BrickRed}").replace("~", "\cellcolor{Yellow}").replace("None", "P").replace("ibm_perth", "N").replace("kdd", "K").replace("covtype", "C").replace("glass_identification", "G").replace("rice", "R").replace("{r}", "{c|}"))

\begin{tabular}{llllllllll}
\toprule
 & optimizer2 & \multicolumn{4}{c|}{ZFeatureMap} & \multicolumn{4}{c|}{ZZFeatureMap} \\
 & dataset & K & C & G & R & K & C & G & R \\
optimizer1 & noise &  &  &  &  &  &  &  &  \\
\midrule
\multirow[t]{2}{*}{ZFeatureMap} & P &   &   &   &   & \cellcolor{Green}0.0131 & \cellcolor{Green}0.0 & \cellcolor{Green}0.0 & \cellcolor{Yellow}0.5866 \\
 & N &   &   &   &   & \cellcolor{Yellow}0.1692 & \cellcolor{Green}0.0 & \cellcolor{Green}0.0 & \cellcolor{Yellow}0.1756 \\
\cline{1\cellcolor{BrickRed}10}
\multirow[t]{2}{*}{ZZFeatureMap} & P & \cellcolor{BrickRed}0.0131 & \cellcolor{BrickRed}0.0 & \cellcolor{BrickRed}0.0 & \cellcolor{Yellow}0.5866 &   &   &   &   \\
 & N & \cellcolor{Yellow}0.1692 & \cellcolor{BrickRed}0.0 & \cellcolor{BrickRed}0.0 & \cellcolor{Yellow}0.1756 &   &   &   &   \\
\cline{1\cellcolor{BrickRed}10}
\bottomrule
\end{tabular}



In [16]:
df = pd.DataFrame(get_significance_res('entanglement_featuremap'), columns=['dataset', 'noise', 'optimizer1', 'optimizer2', 'p-value'])
order = ['kdd', 'covtype', 'glass_identification', 'rice']
intermediate = pd.pivot_table(df, index=['optimizer1', 'noise'], columns=['optimizer2', 'dataset'], values='p-value', aggfunc='first').fillna(" ").reindex(order, level=1, axis=1)

# remove last column
part1 = intermediate.iloc[:, :-8]
part2 = intermediate.iloc[:, -8:]

for elem in [part1, part2]:
    print(elem.to_latex().replace("+", "\cellcolor{Green}").replace("-", "\cellcolor{BrickRed}").replace("~", "\cellcolor{Yellow}").replace("None", "P").replace("ibm_perth", "N").replace("kdd", "K").replace("covtype", "C").replace("glass_identification", "G").replace("rice", "R").replace("{r}", "{c|}"))

\begin{tabular}{llllllllllllll}
\toprule
 & optimizer2 & \multicolumn{4}{c|}{circular} & \multicolumn{4}{c|}{full} & \multicolumn{4}{c|}{linear} \\
 & dataset & K & C & G & R & K & C & G & R & K & C & G & R \\
optimizer1 & noise &  &  &  &  &  &  &  &  &  &  &  &  \\
\midrule
\multirow[t]{2}{*}{circular} & P &   &   &   &   & \cellcolor{Green}0.0001 & \cellcolor{Green}0.018 & \cellcolor{Yellow}0.2119 & \cellcolor{Yellow}0.7301 & \cellcolor{Yellow}0.1493 & \cellcolor{BrickRed}0.0 & \cellcolor{Yellow}0.4048 & \cellcolor{Yellow}0.5988 \\
 & N &   &   &   &   & \cellcolor{Green}0.0 & \cellcolor{Yellow}0.5443 & \cellcolor{Yellow}0.2441 & \cellcolor{Yellow}0.1297 & \cellcolor{Yellow}0.1537 & \cellcolor{BrickRed}0.0 & \cellcolor{Yellow}0.4214 & \cellcolor{Yellow}0.8146 \\
\cline{1\cellcolor{BrickRed}14}
\multirow[t]{2}{*}{full} & P & \cellcolor{BrickRed}0.0001 & \cellcolor{BrickRed}0.018 & \cellcolor{Yellow}0.2119 & \cellcolor{Yellow}0.7301 &   &   &   &   & \cellcolor{BrickRed}0.0 & \cellcol

In [15]:
df = pd.DataFrame(get_significance_res('entanglement', paired=False), columns=['dataset', 'noise', 'optimizer1', 'optimizer2', 'p-value'])
order = ['kdd', 'covtype', 'glass_identification', 'rice']
intermediate = pd.pivot_table(df, index=['optimizer1', 'noise'], columns=['optimizer2', 'dataset'], values='p-value', aggfunc='first').fillna(" ").reindex(order, level=1, axis=1)

# remove last column
part1 = intermediate.iloc[:, :-8]
part2 = intermediate.iloc[:, -8:]

for elem in [part1, part2]:
    print(elem.to_latex().replace("+", "\cellcolor{Green}").replace("-", "\cellcolor{BrickRed}").replace("~", "\cellcolor{Yellow}").replace("None", "P").replace("ibm_perth", "N").replace("kdd", "K").replace("covtype", "C").replace("glass_identification", "G").replace("rice", "R").replace("{r}", "{c|}"))

\begin{tabular}{llllllllllllll}
\toprule
 & optimizer2 & \multicolumn{4}{c|}{circular} & \multicolumn{4}{c|}{full} & \multicolumn{4}{c|}{linear} \\
 & dataset & K & C & G & R & K & C & G & R & K & C & G & R \\
optimizer1 & noise &  &  &  &  &  &  &  &  &  &  &  &  \\
\midrule
\multirow[t]{2}{*}{circular} & P &   &   &   &   & \cellcolor{Yellow}0.7625 & \cellcolor{Yellow}0.1441 & \cellcolor{Yellow}0.0594 & \cellcolor{BrickRed}0.0042 & \cellcolor{Yellow}0.9804 & \cellcolor{Yellow}0.7872 & \cellcolor{Yellow}0.0623 & \cellcolor{Yellow}0.2437 \\
 & N &   &   &   &   & \cellcolor{Yellow}0.9271 & \cellcolor{Yellow}0.2133 & \cellcolor{Yellow}0.6949 & \cellcolor{BrickRed}0.0 & \cellcolor{Yellow}0.4918 & \cellcolor{Yellow}0.3285 & \cellcolor{Yellow}0.5024 & \cellcolor{BrickRed}0.0085 \\
\cline{1\cellcolor{BrickRed}14}
\multirow[t]{2}{*}{full} & P & \cellcolor{Yellow}0.7625 & \cellcolor{Yellow}0.1441 & \cellcolor{Yellow}0.0594 & \cellcolor{Green}0.0042 &   &   &   &   & \cellcolor{Yellow}0.8075 &

In [204]:
df = pd.DataFrame(get_significance_res('preprocessing'), columns=['dataset', 'noise', 'optimizer1', 'optimizer2', 'p-value'])
order = ['kdd', 'covtype', 'glass_identification', 'rice']
print(pd.pivot_table(df, index=['optimizer1', 'noise'], columns=['optimizer2', 'dataset'], values='p-value', aggfunc='first').fillna(" ").reindex(order, level=1, axis=1).to_latex().replace("+", "\cellcolor{Green}").replace("-", "\cellcolor{BrickRed}").replace("~", "\cellcolor{Yellow}").replace("None", "P").replace("ibm_perth", "N").replace("kdd", "K").replace("covtype", "C").replace("glass_identification", "G").replace("rice", "R").replace("{r}", "{c|}"))

\begin{tabular}{llllllll}
\toprule
 & optimizer2 & \multicolumn{3}{c|}{lda} & \multicolumn{3}{c|}{pca} \\
 & dataset & K & C & G & K & C & G \\
optimizer1 & noise &  &  &  &  &  &  \\
\midrule
\multirow[t]{2}{*}{lda} & P &   &   &   & \cellcolor{BrickRed}0.0 & \cellcolor{Green}0.0 & \cellcolor{Green}0.0008 \\
 & N &   &   &   & \cellcolor{BrickRed}0.0 & \cellcolor{Green}0.0 & \cellcolor{Green}0.0 \\
\cline{1\cellcolor{BrickRed}8}
\multirow[t]{2}{*}{pca} & P & \cellcolor{Green}0.0 & \cellcolor{BrickRed}0.0 & \cellcolor{BrickRed}0.0008 &   &   &   \\
 & N & \cellcolor{Green}0.0 & \cellcolor{BrickRed}0.0 & \cellcolor{BrickRed}0.0 &   &   &   \\
\cline{1\cellcolor{BrickRed}8}
\bottomrule
\end{tabular}

