In [1]:
import matplotlib.pyplot as plt
import io
from PIL import Image, ImageChops

white = (255, 255, 255, 255)

def latex_to_img(tex):
    buf = io.BytesIO()
    plt.rc('text', usetex=True)
    plt.rc('font', family='serif')
    plt.axis('off')
    plt.text(0.05, 0.5, f'${tex}$', size=40)
    plt.savefig(buf, format='png')
    plt.close()

    im = Image.open(buf)
    bg = Image.new(im.mode, im.size, white)
    diff = ImageChops.difference(im, bg)
    diff = ImageChops.add(diff, diff, 2.0, -100)
    bbox = diff.getbbox()
    return im.crop(bbox)

In [2]:
from scipy.stats import wilcoxon
import pandas as pd

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
pd.options.display.float_format = '{:.5f}'.format

def wilcoxon_test(results_best,results_compare):
    pvalue = wilcoxon(x=results_best,y=results_compare).pvalue
    if pvalue<=0.001:
        return {}
    elif pvalue>0.001 and pvalue<0.05:
        return {'dag':'--rwrap'}
    else:
        return {'ddag':'--rwrap'}

In [3]:
import pandas as pd
import os
from functools import partial

export_dir = '/media/nas/pgonzalez/histnetq'

path = "results"

def show_results_table(experiment_names,base_path='', include_std=False, row_names=None,error_measures=['AE','RAE']):
    table = pd.DataFrame(columns=error_measures,dtype='float')
    results_error = {}
    for error_measure in error_measures:
        results_error[error_measure] = dict()


    for experiment in experiment_names:
        results = pd.read_csv(os.path.join(base_path,path,experiment+'_errors.txt'))
        for error_measure in error_measures:
            results_error[error_measure][experiment] =results[error_measure]
            table.loc[experiment,error_measure] = results[error_measure].mean()

    #table = table.style.highlight_min(color = 'yellow', axis = 0)

    #get best method for ae and best method for rae
    best_method = {}
    for error_measure in error_measures:
        best_method[error_measure] = table.index[table[error_measure].argmin()]

    table_style = table.style

    #make wixcolom testing MAE
    for experiment in experiment_names:
        for error_measure in error_measures:
            if experiment!=best_method[error_measure]:
                table_style.set_properties(subset=(experiment,error_measure),**wilcoxon_test(results_error[error_measure][experiment],results_error[error_measure][best_method[error_measure]]))
        
    for error_measure in error_measures:    
        table_style = table_style.highlight_min(axis=0,props='textbf:--rwrap;',subset=error_measure)
    #table = table.style.highlight_min(color = 'yellow', axis = 0)

    def add_deviation(x,std):
        return "{:.4f}".format(x)+' $\pm$ ' +"{:.3f}".format(std)

    use_f = lambda x: partial(add_deviation, std=x)
    formatters = [add_deviation]

    if include_std:
        for experiment_name in experiment_names:
            for error_measure in error_measures:
                std_value = results_error[error_measure][experiment_name].std()
                table_style = table_style.format(formatter=use_f(std_value),subset=(experiment_name,error_measure))
            

    latex_code = table_style.to_latex(hrules=True,column_format="r|"+"r"*len(table.columns))
    if row_names is not None:
        for experiment_name,row_name in zip(experiment_names,row_names):
            latex_code = latex_code.replace(experiment_name,row_name)

    return latex_code.replace("_","\\_"), results_error

  return "{:.4f}".format(x)+' $\pm$ ' +"{:.3f}".format(std)


## Results T1A

In [4]:
experiment_names = ["CC_T1A","PCC_T1A","ACC_T1A","PACC_T1A","HDy_T1A","QuaNet_T1A","EMQ_BCTS_T1A","EMQ_NoCal_T1A","deepsets_avg_T1A","deepsets_max_T1A","deepsets_median_T1A","settransformers_T1A","histnet_hard_T1A"]
row_names = ["CC","PCC","ACC","PACC","HDy","QuaNet","EMQ-BCTS","EMQ-NoCal","Deepsets (avg)","Deepsets (max)","Deepsets (median)","SetTransformers","HistNetQ"]
t1a_table,_ = show_results_table(experiment_names=experiment_names, base_path='', include_std=True,error_measures=['AE','RAE'], row_names=row_names)
print(t1a_table)
with open(os.path.join(export_dir,'tables/t1a_table.tex'),'w') as f:
    f.write(t1a_table)


\begin{tabular}{r|rr}
\toprule
 & AE & RAE \\
\midrule
CC & 0.0916 $\pm$ 0.055 & 1.0840 $\pm$ 4.311 \\
PCC & 0.1166 $\pm$ 0.070 & 1.3940 $\pm$ 5.621 \\
ACC & 0.0372 $\pm$ 0.029 & 0.1702 $\pm$ 0.508 \\
PACC & 0.0298 $\pm$ 0.023 & 0.1522 $\pm$ 0.464 \\
HDy & 0.0281 $\pm$ 0.022 & 0.1451 $\pm$ 0.456 \\
QuaNet & 0.0342 $\pm$ 0.025 & 0.3176 $\pm$ 1.352 \\
EMQ-BCTS & 0.0269 $\pm$ 0.021 & 0.1183 $\pm$ 0.251 \\
EMQ-NoCal & 0.0236 $\pm$ 0.018 & 0.1088 $\pm$ 0.267 \\
Deepsets (avg) & 0.0278 $\pm$ 0.021 & 0.1269 $\pm$ 0.228 \\
Deepsets (max) & 0.0499 $\pm$ 0.042 & 0.2183 $\pm$ 0.488 \\
Deepsets (median) & 0.0292 $\pm$ 0.023 & 0.1389 $\pm$ 0.256 \\
SetTransformers & \ddag{0.0225 $\pm$ 0.017} & \ddag{0.1096 $\pm$ 0.262} \\
HistNetQ & \textbf{0.0224 $\pm$ 0.017} & \textbf{0.1071 $\pm$ 0.233} \\
\bottomrule
\end{tabular}



## Results T1B

In [5]:
experiment_names = ["deepsets_avg_T1B","deepsets_max_T1B","deepsets_median_T1B","settransformers_T1B","histnet_hard_T1B_64bins","CC_T1B","PCC_T1B","ACC_T1B","PACC_T1B","EMQ_BCTS_T1B","EMQ_NoCal_T1B"]
row_names = ["Deepsets (avg)","Deepsets (max)","Deepsets (median)","SetTransformers","HistNetQ","CC","PCC","ACC","PACC","EMQ-BCTS","EMQ-NoCal"]
t1b_table,_=show_results_table(experiment_names,include_std=True,row_names=row_names)
print(t1b_table)
with open(os.path.join(export_dir,'tables/t1b_table.tex'),'w') as f:
    f.write(t1b_table)

\begin{tabular}{r|rr}
\toprule
 & AE & RAE \\
\midrule
Deepsets (avg) & 0.0128 $\pm$ 0.004 & 0.9954 $\pm$ 0.658 \\
Deepsets (max) & 0.0277 $\pm$ 0.005 & 1.4646 $\pm$ 1.026 \\
Deepsets (median) & 0.0143 $\pm$ 0.004 & 0.8443 $\pm$ 0.543 \\
SetTransformers & 0.0385 $\pm$ 0.008 & 1.6748 $\pm$ 1.428 \\
HistNetQ & \textbf{0.0107 $\pm$ 0.004} & \textbf{0.7574 $\pm$ 0.489} \\
CC & 0.0141 $\pm$ 0.003 & 1.8936 $\pm$ 1.187 \\
PCC & 0.0171 $\pm$ 0.003 & 2.2646 $\pm$ 1.416 \\
ACC & 0.0184 $\pm$ 0.004 & 1.4213 $\pm$ 1.270 \\
PACC & 0.0158 $\pm$ 0.004 & 1.3054 $\pm$ 0.988 \\
EMQ-BCTS & 0.0117 $\pm$ 0.003 & 0.9372 $\pm$ 0.817 \\
EMQ-NoCal & 0.0118 $\pm$ 0.003 & 0.8780 $\pm$ 0.751 \\
\bottomrule
\end{tabular}



## Results T1

In [6]:
experiment_names = ["CC_T1","PCC_T1","ACC_T1","PACC_T1","HDy_T1","Quanet_T1","SLD-BCTS_T1","SLD_T1","deepsets_avg_T1","deepsets_median_T1","deepsets_max_T1","settransformers_T1","histnet_hard_T1"]
row_names = ["CC","PCC","ACC","PACC","HDy","Quanet","EMQ-BCTS","EMQ-NoCal","Deepsets (avg)","Deepsets (median)","Deepsets (max)","SetTransformers","HistNetQ"]
t1_table,_ = show_results_table(experiment_names=experiment_names, base_path='', include_std=True,error_measures=['AE','RAE'], row_names=row_names)
print(t1_table)
with open(os.path.join(export_dir,'tables/t1_table.tex'),'w') as f:
    f.write(t1_table)

\begin{tabular}{r|rr}
\toprule
 & AE & RAE \\
\midrule
CC & 0.0796 $\pm$ 0.048 & 0.9774 $\pm$ 3.919 \\
PCC & 0.1017 $\pm$ 0.060 & 1.2656 $\pm$ 5.113 \\
ACC & 0.0264 $\pm$ 0.020 & 0.1644 $\pm$ 0.603 \\
PACC & 0.0240 $\pm$ 0.018 & 0.1339 $\pm$ 0.463 \\
HDy & 0.0221 $\pm$ 0.017 & \textbf{0.1067 $\pm$ 0.290} \\
Quanet & 0.0243 $\pm$ 0.018 & 0.2640 $\pm$ 1.284 \\
EMQ-BCTS & 0.0221 $\pm$ 0.017 & \ddag{0.1097 $\pm$ 0.324} \\
EMQ-NoCal & 0.0211 $\pm$ 0.017 & 0.1110 $\pm$ 0.367 \\
Deepsets (avg) & 0.0208 $\pm$ 0.016 & 0.1096 $\pm$ 0.331 \\
Deepsets (median) & 0.0237 $\pm$ 0.018 & 0.1235 $\pm$ 0.324 \\
Deepsets (max) & 0.0447 $\pm$ 0.037 & 0.2191 $\pm$ 0.575 \\
SetTransformers & \textbf{0.0202 $\pm$ 0.016} & 0.1114 $\pm$ 0.374 \\
HistNetQ & \ddag{0.0204 $\pm$ 0.016} & 0.1069 $\pm$ 0.312 \\
\bottomrule
\end{tabular}



## Results T2

In [7]:
experiment_names = ["CC_T2","PCC_T2","ACC_T2","PACC_T2","SLD-BCTS_T2","SLD_T2","deepsets_avg_T2","deepsets_median_T2","deepsets_max_T2","settransformers_T2","histnet_hard_T2_64bins"]
row_names = ["CC","PCC","ACC","PACC","EMQ-BCTS","EMQ","Deepsets (avg)","Deepsets (med)","Deepsets (max)","SetTransformers","HistNetQ"]
t2_table,_=show_results_table(experiment_names,include_std=True,row_names=row_names)
print(t2_table)
with open(os.path.join(export_dir,'tables/t2_table.tex'),'w') as f:
    f.write(t2_table)

\begin{tabular}{r|rr}
\toprule
 & AE & RAE \\
\midrule
CC & 0.0166 $\pm$ 0.003 & 2.3096 $\pm$ 1.383 \\
PCC & 0.0193 $\pm$ 0.003 & 2.6751 $\pm$ 1.605 \\
ACC & 0.0164 $\pm$ 0.004 & 1.3479 $\pm$ 1.161 \\
PACC & 0.0155 $\pm$ 0.004 & 1.1942 $\pm$ 1.135 \\
EMQ-BCTS & 0.0138 $\pm$ 0.004 & 1.1500 $\pm$ 0.978 \\
EMQ & \textbf{0.0134 $\pm$ 0.003} & 1.1616 $\pm$ 0.991 \\
Deepsets (avg) & 0.0408 $\pm$ 0.010 & 1.6982 $\pm$ 2.263 \\
Deepsets (med) & 0.0209 $\pm$ 0.006 & 1.2353 $\pm$ 0.891 \\
Deepsets (max) & 0.0219 $\pm$ 0.004 & 2.4217 $\pm$ 1.879 \\
SetTransformers & 0.0384 $\pm$ 0.013 & 3.6275 $\pm$ 4.218 \\
HistNetQ & 0.0181 $\pm$ 0.006 & \textbf{0.9508 $\pm$ 0.576} \\
\bottomrule
\end{tabular}



## Análisis por número de bins

In [8]:
import numpy as np
experiment_names = ["histnet_hard_T1B_8bins","histnet_hard_T1B_16bins","histnet_hard_T1B","histnet_hard_T1B_64bins"]
row_names = ["HistNet (8 bins)","HistNet (16 bins)","HistNet (32 bins)","HistNet (64 bins)"]
bins_comp, results_error=show_results_table(experiment_names, row_names = row_names, include_std=True)
print(bins_comp)
with open(os.path.join(export_dir,'tables/bins_comp.tex'),'w') as f:
    f.write(bins_comp)

\begin{tabular}{r|rr}
\toprule
 & AE & RAE \\
\midrule
HistNet (8 bins) & 0.0297 $\pm$ 0.008 & 1.2878 $\pm$ 1.000 \\
HistNet (16 bins) & 0.0212 $\pm$ 0.007 & 1.0572 $\pm$ 0.738 \\
HistNet (32 bins) & 0.0121 $\pm$ 0.005 & 0.7851 $\pm$ 0.520 \\
HistNet (32 bins)\_64bins & \textbf{0.0107 $\pm$ 0.004} & \textbf{0.7574 $\pm$ 0.489} \\
\bottomrule
\end{tabular}



## Resultados FASHIONMNIST

In [9]:
experiment_names = ["settransformers_ae_fashionmnist","settransformers_rae_fashionmnist",
"deepsets_avg_ae_fashionmnist","deepsets_avg_ae_fashionmnist","deepsets_avg_rae_fashionmnist",
"deepsets_median_ae_fashionmnist","deepsets_median_rae_fashionmnist","deepsets_max_ae_fashionmnist",
"deepsets_max_rae_fashionmnist",
"histnet_hard_ae_fashionmnist","histnet_hard_rae_fashionmnist",
"CC_fashionmnist","PCC_fashionmnist","AC_fashionmnist","PAC_fashionmnist","HDy_fashionmnist",
"EM_fashionmnist","EM-BCTS_fashionmnist"]
#,

fashionmnist_results,_=show_results_table(experiment_names,base_path="fashionmnist",include_std=True,error_measures=['AE','RAE'])
print(fashionmnist_results)
with open(os.path.join(export_dir,'tables/fashionmnist.tex'),'w') as f:
    f.write(fashionmnist_results)

\begin{tabular}{r|rr}
\toprule
 & AE & RAE \\
\midrule
settransformers\_ae\_fashionmnist & 0.0104 $\pm$ 0.003 & 0.3716 $\pm$ 0.447 \\
settransformers\_rae\_fashionmnist & 0.1295 $\pm$ 0.023 & 2.2017 $\pm$ 1.190 \\
deepsets\_avg\_ae\_fashionmnist & 0.0083 $\pm$ 0.003 & 0.2970 $\pm$ 0.341 \\
deepsets\_avg\_rae\_fashionmnist & 0.0249 $\pm$ 0.014 & 0.3283 $\pm$ 0.233 \\
deepsets\_median\_ae\_fashionmnist & 0.0094 $\pm$ 0.003 & 0.3550 $\pm$ 0.422 \\
deepsets\_median\_rae\_fashionmnist & 0.0655 $\pm$ 0.018 & 0.7195 $\pm$ 0.586 \\
deepsets\_max\_ae\_fashionmnist & 0.0219 $\pm$ 0.007 & 0.4147 $\pm$ 0.349 \\
deepsets\_max\_rae\_fashionmnist & 0.0250 $\pm$ 0.011 & 0.3520 $\pm$ 0.323 \\
histnet\_hard\_ae\_fashionmnist & \textbf{0.0060 $\pm$ 0.002} & 0.2327 $\pm$ 0.289 \\
histnet\_hard\_rae\_fashionmnist & 0.0069 $\pm$ 0.002 & \dag{0.1592 $\pm$ 0.171} \\
CC\_fashionmnist & 0.0163 $\pm$ 0.007 & 0.5828 $\pm$ 0.723 \\
PCC\_fashionmnist & 0.0204 $\pm$ 0.008 & 0.7817 $\pm$ 0.974 \\
AC\_fashionmnist & 0

## Comparación con baselines la competición LEQUA

In [10]:
from dlquantification.utils.lossfunc import MRAE
import torch

def compute_errors(baselines,baseline_path,test_prevalences,dataset,loss_mrae):
    test_prevalences = pd.read_csv(test_prevalences)
    for baseline in baselines:
        export_path = os.path.join("results/", baseline + "_"+dataset+"_errors.txt")
        if not os.path.exists(export_path):
            errors = pd.DataFrame(columns=('AE','RAE'),index=range(5000))
            results = pd.read_csv(os.path.join(baseline_path,baseline)+'.csv')
            for i in range(len(results)):
                errors.iloc[i]['RAE']=loss_mrae(torch.FloatTensor(test_prevalences.iloc[i,1:].to_numpy()), torch.FloatTensor(results.iloc[i,1:].to_numpy())).numpy()
                errors.iloc[i]['AE']=torch.nn.functional.l1_loss(torch.FloatTensor(test_prevalences.iloc[i,1:].to_numpy()), torch.FloatTensor(results.iloc[i,1:].to_numpy())).numpy()
            errors.to_csv(export_path, index_label="id")


baselines=['CC','ACC','HDy','PACC','PCC','QuaNet','SLD']
#For this experiments we need to compute the errors as we only have the predictions

loss_mrae = MRAE(eps=1.0 / (2 * 250), n_classes=2).MRAE
baseline_path_T1A = os.path.join(path,'codalab/T1A')
test_prevalences = '/media/nas/pgonzalez/histnetq/experiments/paper/lequa/T1A/public/test_prevalences.txt'
compute_errors(baselines,baseline_path_T1A,test_prevalences,'T1A',loss_mrae)
baselines=['CC','ACC','PACC','PCC','SLD']
loss_mrae = MRAE(eps=1.0 / (2 * 1000), n_classes=28).MRAE
baseline_path_T1B = os.path.join(path,'codalab/T1B')
test_prevalences = '/media/nas/pgonzalez/histnetq/experiments/paper/lequa/T1B/public/test_prevalences.txt'
compute_errors(baselines,baseline_path_T1B,test_prevalences,'T1B',loss_mrae)
baselines=['CC','ACC','PACC','PCC','HDy','Quanet','SLD','SLD-BCTS']
loss_mrae = MRAE(eps=1.0 / (2 * 250), n_classes=2).MRAE
baseline_path_T1 = os.path.join(path,'codalab/T1')
test_prevalences = '/media/nas/pgonzalez/histnetq/experiments/paper/lequa/T1/public/test_prevalences.txt'
compute_errors(baselines,baseline_path_T1,test_prevalences,'T1',loss_mrae)
baselines=['CC','ACC','PACC','PCC','SLD','SLD-BCTS']
loss_mrae = MRAE(eps=1.0 / (2 * 1000), n_classes=28).MRAE
baseline_path_T2 = os.path.join(path,'codalab/T2')
test_prevalences = '/media/nas/pgonzalez/histnetq/experiments/paper/lequa/T2/public/test_prevalences.txt'
compute_errors(baselines,baseline_path_T2,test_prevalences,'T2',loss_mrae)