In [1]:
import matplotlib.pyplot as plt
import io
from PIL import Image, ImageChops

white = (255, 255, 255, 255)

def latex_to_img(tex):
    buf = io.BytesIO()
    plt.rc('text', usetex=True)
    plt.rc('font', family='serif')
    plt.axis('off')
    plt.text(0.05, 0.5, f'${tex}$', size=40)
    plt.savefig(buf, format='png')
    plt.close()

    im = Image.open(buf)
    bg = Image.new(im.mode, im.size, white)
    diff = ImageChops.difference(im, bg)
    diff = ImageChops.add(diff, diff, 2.0, -100)
    bbox = diff.getbbox()
    return im.crop(bbox)

In [4]:
from scipy.stats import wilcoxon
import pandas as pd

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
pd.options.display.float_format = '{:.5f}'.format

def wilcoxon_test(results_best,results_compare):
    pvalue = wilcoxon(x=results_best,y=results_compare).pvalue
    if pvalue<=0.001:
        return {}
    elif pvalue>0.001 and pvalue<0.05:
        return {'dag':'--rwrap'}
    else:
        return {'ddag':'--rwrap'}

In [2]:
import pandas as pd
import os
from functools import partial

export_dir = '/media/nas/pgonzalez/a2032'

path = "results"

def show_results_table(experiment_names,base_path='', include_std=False, row_names=None,error_measures=['AE','RAE']):
    table = pd.DataFrame(columns=error_measures,dtype='float')
    results_error = {}
    for error_measure in error_measures:
        results_error[error_measure] = dict()


    for experiment in experiment_names:
        results = pd.read_csv(os.path.join(base_path,path,experiment+'_errors.txt'))
        for error_measure in error_measures:
            results_error[error_measure][experiment] =results[error_measure]
            table.loc[experiment,error_measure] = results[error_measure].mean()

    #table = table.style.highlight_min(color = 'yellow', axis = 0)

    #get best method for ae and best method for rae
    best_method = {}
    for error_measure in error_measures:
        best_method[error_measure] = table.index[table[error_measure].argmin()]

    table_style = table.style

    #make wixcolom testing MAE
    for experiment in experiment_names:
        for error_measure in error_measures:
            if experiment!=best_method[error_measure]:
                table_style.set_properties(subset=(experiment,error_measure),**wilcoxon_test(results_error[error_measure][experiment],results_error[error_measure][best_method[error_measure]]))
        
    for error_measure in error_measures:    
        table_style = table_style.highlight_min(axis=0,props='textbf:--rwrap;',subset=error_measure)
    #table = table.style.highlight_min(color = 'yellow', axis = 0)

    def add_deviation(x,std):
        return "{:.5f}".format(x)+' $\pm$ ' +"{:.5f}".format(std)

    use_f = lambda x: partial(add_deviation, std=x)
    formatters = [add_deviation]

    if include_std:
        for experiment_name in experiment_names:
            for error_measure in error_measures:
                std_value = results_error[error_measure][experiment_name].std()
                table_style = table_style.format(formatter=use_f(std_value),subset=(experiment_name,error_measure))
            

    latex_code = table_style.to_latex(hrules=True,column_format="r|"+"r"*len(table.columns))
    if row_names is not None:
        for experiment_name,row_name in zip(experiment_names,row_names):
            latex_code = latex_code.replace(experiment_name,row_name)

    return latex_code.replace("_","\\_"), results_error

  return "{:.5f}".format(x)+' $\pm$ ' +"{:.5f}".format(std)


## Results T1A

In [5]:
experiment_names = ["CC_T1A","PCC_T1A","ACC_T1A","PACC_T1A","HDy_T1A","QuaNet_T1A","EMQ_BCTS_T1A","EMQ_NoCal_T1A","deepsets_avg_T1A","deepsets_max_T1A","deepsets_median_T1A","settransformers_T1A","histnet_hard_T1A"]
row_names = ["CC","PCC","ACC","PACC","HDy","QuaNet","EMQ-BCTS","EMQ-NoCal","Deepsets (avg)","Deepsets (max)","Deepsets (median)","SetTransformers","HistNetQ"]
t1a_table,_ = show_results_table(experiment_names=experiment_names, base_path='', include_std=True,error_measures=['AE','RAE'], row_names=row_names)
print(t1a_table)
with open(os.path.join(export_dir,'tables/t1a_table.tex'),'w') as f:
    f.write(t1a_table)


\begin{tabular}{r|rr}
\toprule
 & AE & RAE \\
\midrule
CC & 0.09160 $\pm$ 0.05540 & 1.08400 $\pm$ 4.31090 \\
PCC & 0.11664 $\pm$ 0.06978 & 1.39402 $\pm$ 5.62123 \\
ACC & 0.03716 $\pm$ 0.02936 & 0.17020 $\pm$ 0.50800 \\
PACC & 0.02985 $\pm$ 0.02258 & 0.15218 $\pm$ 0.46440 \\
HDy & 0.02814 $\pm$ 0.02212 & 0.14514 $\pm$ 0.45621 \\
QuaNet & 0.03418 $\pm$ 0.02528 & 0.31764 $\pm$ 1.35237 \\
EMQ-BCTS & 0.02689 $\pm$ 0.02094 & 0.11828 $\pm$ 0.25065 \\
EMQ-NoCal & 0.02359 $\pm$ 0.01845 & 0.10878 $\pm$ 0.26668 \\
Deepsets (avg) & 0.02779 $\pm$ 0.02105 & 0.12686 $\pm$ 0.22817 \\
Deepsets (max) & 0.04991 $\pm$ 0.04167 & 0.21830 $\pm$ 0.48828 \\
Deepsets (median) & 0.02919 $\pm$ 0.02273 & 0.13887 $\pm$ 0.25631 \\
SetTransformers & \ddag{0.02246 $\pm$ 0.01717} & \ddag{0.10958 $\pm$ 0.26205} \\
HistNetQ & \textbf{0.02236 $\pm$ 0.01709} & \textbf{0.10707 $\pm$ 0.23312} \\
\bottomrule
\end{tabular}



## Results T1B

In [6]:
experiment_names = ["deepsets_avg_T1B","deepsets_max_T1B","deepsets_median_T1B","settransformers_T1B","histnet_hard_T1B_64bins","CC_T1B","PCC_T1B","ACC_T1B","PACC_T1B","EMQ_BCTS_T1B","EMQ_NoCal_T1B"]
row_names = ["Deepsets (avg)","Deepsets (max)","Deepsets (median)","SetTransformers","HistNetQ","CC","PCC","ACC","PACC","EMQ-BCTS","EMQ-NoCal"]
t1b_table,_=show_results_table(experiment_names,include_std=True,row_names=row_names)
print(t1b_table)
with open(os.path.join(export_dir,'tables/t1b_table.tex'),'w') as f:
    f.write(t1b_table)

\begin{tabular}{r|rr}
\toprule
 & AE & RAE \\
\midrule
Deepsets (avg) & 0.01283 $\pm$ 0.00379 & 0.99542 $\pm$ 0.65778 \\
Deepsets (max) & 0.02766 $\pm$ 0.00515 & 1.46464 $\pm$ 1.02644 \\
Deepsets (median) & 0.01429 $\pm$ 0.00432 & 0.84427 $\pm$ 0.54286 \\
SetTransformers & 0.03847 $\pm$ 0.00779 & 1.67475 $\pm$ 1.42750 \\
HistNetQ & \textbf{0.01070 $\pm$ 0.00367} & \textbf{0.75739 $\pm$ 0.48891} \\
CC & 0.01406 $\pm$ 0.00295 & 1.89365 $\pm$ 1.18732 \\
PCC & 0.01711 $\pm$ 0.00332 & 2.26462 $\pm$ 1.41627 \\
ACC & 0.01841 $\pm$ 0.00437 & 1.42134 $\pm$ 1.26971 \\
PACC & 0.01578 $\pm$ 0.00379 & 1.30538 $\pm$ 0.98837 \\
EMQ-BCTS & 0.01174 $\pm$ 0.00305 & 0.93721 $\pm$ 0.81732 \\
EMQ-NoCal & 0.01177 $\pm$ 0.00285 & 0.87802 $\pm$ 0.75120 \\
\bottomrule
\end{tabular}



## Results T1

In [7]:
experiment_names = ["CC_T1","PCC_T1","ACC_T1","PACC_T1","HDy_T1","QuaNet_T1","EMQ_BCTS_T1","EMQ_NoCal_T1","deepsets_avg_T1","deepsets_max_T1","deepsets_median_T1","settransformers_T1","histnet_hard_T1"]
row_names = ["CC","PCC","ACC","PACC","HDy","QuaNet","EMQ-BCTS","EMQ-NoCal","Deepsets (avg)","Deepsets (max)","Deepsets (median)","SetTransformers","HistNetQ"]
t1_table,_ = show_results_table(experiment_names=experiment_names, base_path='', include_std=True,error_measures=['AE','RAE'], row_names=row_names)
print(t1_table)
with open(os.path.join(export_dir,'tables/t1_table.tex'),'w') as f:
    f.write(t1_table)

FileNotFoundError: [Errno 2] No such file or directory: 'results/CC_T1_errors.txt'

## Results T2

In [10]:
#experiment_names = ["deepsets_avg_T2","deepsets_max_T2","deepsets_median_T2","settransformers_T2","histnet_hard_T2_64bins","CC_T2","PCC_T2","ACC_T2","PACC_T2","EMQ_BCTS_T2","EMQ_NoCal_T2"]
#row_names = ["Deepsets (avg)","Deepsets (max)","Deepsets (median)","SetTransformers","HistNetQ","CC","PCC","ACC","PACC","EMQ-BCTS","EMQ-NoCal"]
experiment_names = ["deepsets_avg_T2","deepsets_max_T2","deepsets_median_T2","settransformers_T2","histnet_hard_T2_64bins"]
row_names = ["Deepsets (avg)","Deepsets (max)","Deepsets (median)","SetTransformers","HistNetQ"]
t2_table,_=show_results_table(experiment_names,include_std=True,row_names=row_names)
print(t2_table)
with open(os.path.join(export_dir,'tables/t2_table.tex'),'w') as f:
    f.write(t2_table)

\begin{tabular}{r|rr}
\toprule
 & AE & RAE \\
\midrule
Deepsets (avg) & 0.04082 $\pm$ 0.00972 & 1.69817 $\pm$ 2.26256 \\
Deepsets (max) & 0.02188 $\pm$ 0.00366 & 2.42173 $\pm$ 1.87929 \\
Deepsets (median) & 0.02089 $\pm$ 0.00591 & 1.23527 $\pm$ 0.89104 \\
SetTransformers & 0.03836 $\pm$ 0.01308 & 3.62753 $\pm$ 4.21755 \\
HistNetQ & \textbf{0.01808 $\pm$ 0.00553} & \textbf{0.95084 $\pm$ 0.57574} \\
\bottomrule
\end{tabular}



## Análisis por número de bins

In [12]:
import numpy as np
import seaborn as sns
sns.set_theme()
experiment_names = ["histnet_hard_T1B_8bins","histnet_hard_T1B_16bins","histnet_hard_T1B","histnet_hard_T1B_64bins"]
row_names = ["HistNet (8 bins)","HistNet (16 bins)","HistNet (32 bins)","HistNet (64 bins)"]
bins_comp, results_error=show_results_table(experiment_names, row_names = row_names, include_std=True)
print(bins_comp)
with open(os.path.join(export_dir,'tables/bins_comp.tex'),'w') as f:
    f.write(bins_comp)

\begin{tabular}{r|rr}
\toprule
 & AE & RAE \\
\midrule
HistNet (8 bins) & 0.02966 $\pm$ 0.00820 & 1.28782 $\pm$ 0.99960 \\
HistNet (16 bins) & 0.02120 $\pm$ 0.00710 & 1.05719 $\pm$ 0.73762 \\
HistNet (32 bins) & 0.01212 $\pm$ 0.00482 & 0.78514 $\pm$ 0.51994 \\
HistNet (32 bins)\_64bins & \textbf{0.01070 $\pm$ 0.00367} & \textbf{0.75739 $\pm$ 0.48891} \\
\bottomrule
\end{tabular}



## Resultados FASHIONMNIST

In [22]:
experiment_names = ["settransformers_ae_fashionmnist","settransformers_rae_fashionmnist",
"deepsets_avg_ae_fashionmnist","deepsets_avg_ae_fashionmnist","deepsets_avg_rae_fashionmnist",
"deepsets_median_ae_fashionmnist","deepsets_median_rae_fashionmnist","deepsets_max_ae_fashionmnist",
"deepsets_max_rae_fashionmnist",
"histnet_hard_ae_fashionmnist","histnet_hard_rae_fashionmnist",
"CC_fashionmnist","PCC_fashionmnist","AC_fashionmnist","PAC_fashionmnist","HDy_fashionmnist",
"EM_fashionmnist","EM-BCTS_fashionmnist"]
#,

fashionmnist_results,_=show_results_table(experiment_names,base_path="fashionmnist",include_std=True,error_measures=['AE','RAE'])
print(fashionmnist_results)
with open(os.path.join(export_dir,'tables/fashionmnist.tex'),'w') as f:
    f.write(fashionmnist_results)

\begin{tabular}{r|rr}
\toprule
 & AE & RAE \\
\midrule
settransformers\_ae\_fashionmnist & 0.01043 $\pm$ 0.00328 & 0.37164 $\pm$ 0.44702 \\
settransformers\_rae\_fashionmnist & 0.12947 $\pm$ 0.02279 & 2.20175 $\pm$ 1.19007 \\
deepsets\_avg\_ae\_fashionmnist & 0.00829 $\pm$ 0.00254 & 0.29700 $\pm$ 0.34086 \\
deepsets\_avg\_rae\_fashionmnist & 0.02494 $\pm$ 0.01429 & 0.32826 $\pm$ 0.23251 \\
deepsets\_median\_ae\_fashionmnist & 0.00942 $\pm$ 0.00288 & 0.35496 $\pm$ 0.42178 \\
deepsets\_median\_rae\_fashionmnist & 0.06554 $\pm$ 0.01760 & 0.71946 $\pm$ 0.58579 \\
deepsets\_max\_ae\_fashionmnist & 0.02185 $\pm$ 0.00699 & 0.41466 $\pm$ 0.34917 \\
deepsets\_max\_rae\_fashionmnist & 0.02502 $\pm$ 0.01115 & 0.35195 $\pm$ 0.32316 \\
histnet\_hard\_ae\_fashionmnist & \textbf{0.00602 $\pm$ 0.00206} & 0.23270 $\pm$ 0.28885 \\
histnet\_hard\_rae\_fashionmnist & 0.00685 $\pm$ 0.00239 & \dag{0.15923 $\pm$ 0.17085} \\
CC\_fashionmnist & 0.01634 $\pm$ 0.00738 & 0.58279 $\pm$ 0.72314 \\
PCC\_fashionmnist

## Comparación con baselines la competición LEQUA

In [None]:
from dlquantification.utils.lossfunc import MRAE
import torch

def compute_errors(baselines,baseline_path,test_prevalences,dataset,loss_mrae):
    test_prevalences = pd.read_csv(test_prevalences)
    for baseline in baselines:
        export_path = os.path.join("results/", baseline + "_"+dataset+"_errors.txt")
        if not os.path.exists(export_path):
            errors = pd.DataFrame(columns=('AE','RAE'),index=range(5000))
            results = pd.read_csv(os.path.join(baseline_path,baseline)+'.csv')
            for i in range(len(results)):
                errors.iloc[i]['RAE']=loss_mrae(torch.FloatTensor(test_prevalences.iloc[i,1:].to_numpy()), torch.FloatTensor(results.iloc[i,1:].to_numpy())).numpy()
                errors.iloc[i]['AE']=torch.nn.functional.l1_loss(torch.FloatTensor(test_prevalences.iloc[i,1:].to_numpy()), torch.FloatTensor(results.iloc[i,1:].to_numpy())).numpy()
            errors.to_csv(export_path, index_label="id")


baselines=['CC','ACC','HDy','PACC','PCC','QuaNet','SLD']
#For this experiments we need to compute the errors as we only have the predictions

loss_mrae = MRAE(eps=1.0 / (2 * 250), n_classes=2).MRAE
baseline_path_T1A = os.path.join(path,'codalab/T1A')
test_prevalences = '/media/nas/pgonzalez/lequa/T1A/public/test_prevalences.txt'
compute_errors(baselines,baseline_path_T1A,test_prevalences,'T1A',loss_mrae)
baselines=['CC','ACC','PACC','PCC','SLD']
loss_mrae = MRAE(eps=1.0 / (2 * 1000), n_classes=28).MRAE
baseline_path_T1B = os.path.join(path,'codalab/T1B')
test_prevalences = '/media/nas/pgonzalez/lequa/T1B/public/test_prevalences.txt'
compute_errors(baselines,baseline_path_T1B,test_prevalences,'T1B',loss_mrae)