In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

experiments = pd.read_parquet("results_bernoulli_symmetrical.parquet")
experiments["error"] = np.abs(experiments["estimate"])

In [2]:
experiments

Unnamed: 0,n,method,estimate,delta,distribution,c_eta,error
0,100,mean,-0.073612,0.100,Gaussian,0.5,0.073612
1,100,mean,-0.073713,0.100,Gaussian,0.5,0.073713
2,100,mean,-0.160986,0.100,Gaussian,0.5,0.160986
3,100,mean,-0.019979,0.100,Gaussian,0.5,0.019979
4,100,mean,0.149599,0.100,Gaussian,0.5,0.149599
...,...,...,...,...,...,...,...
149995,500,tm exp,0.050865,0.001,St df=2.01,2.0,0.050865
149996,500,tm exp,0.051008,0.001,St df=2.01,2.0,0.051008
149997,500,tm exp,0.058377,0.001,St df=2.01,2.0,0.058377
149998,500,tm exp,-0.079589,0.001,St df=2.01,2.0,0.079589


In [3]:
range_ns = np.unique(experiments["n"])
range_deltas = -np.sort(-np.unique(experiments["delta"]))
range_distributions = np.unique(experiments["distribution"])
range_c_etas = np.sort(np.unique(experiments["c_eta"]))

# organize results
df_grouped = [ experiments[ experiments["delta"] == delta ].groupby(["n", "delta", "method", "distribution", "c_eta"])["error"].quantile(1-delta).reset_index() for delta in range_deltas ]
df_grouped = pd.concat(df_grouped)

df_grouped_pivot = df_grouped.pivot(index=['n', 'delta', 'distribution', "c_eta"], columns='method', values='error').reset_index()

relative_errors = df_grouped_pivot.copy()
est_cols = [col for col in df_grouped_pivot.columns if col not in ['n', 'delta', 'distribution',"c_eta"]]
for est in est_cols:
    relative_errors[est] = (df_grouped_pivot[est] -  df_grouped_pivot["mean"])/df_grouped_pivot["mean"]

#df_grouped
relative_errors

method,n,delta,distribution,c_eta,mean,mean atm,mean exp,mean lv,mean win,mom,mom atm,mom exp,mom lv,mom win,tm,tm atm,tm exp,tm lv,tm win
0,100,0.001,Gaussian,0.5,0.0,0.019782,0.01536602,0.01536901,0.015734,0.0920714,0.021179,-0.009305029,-0.00384287,0.019372,0.010677,0.020469,0.01336595,0.015366,0.032692
1,100,0.001,Gaussian,1.0,0.0,0.032704,0.009918823,0.01272792,0.013638,0.2687376,0.12185,0.008280786,0.01980129,0.042837,0.022161,0.060853,0.008984647,0.012262,0.020676
2,100,0.001,Gaussian,2.0,0.0,0.059229,0.008397566,-0.009305029,-0.006061,0.1550153,0.171777,0.04472327,0.02562982,0.044555,0.042556,0.085771,0.02492738,0.071492,0.035008
3,100,0.001,St df=2.01,0.5,0.0,-0.754471,-0.5744758,-0.4568705,-0.791735,-0.6782153,-0.809532,-0.8380164,-0.8345188,-0.812233,-0.822779,-0.805324,-0.8433383,-0.847263,-0.815904
4,100,0.001,St df=2.01,1.0,0.0,-0.710082,-0.3794714,-0.2060109,-0.698138,-0.7204041,-0.816893,-0.8208888,-0.8296296,-0.820277,-0.836227,-0.823713,-0.8336003,-0.851192,-0.831074
5,100,0.001,St df=2.01,2.0,0.0,-0.481431,-0.1929805,-0.0513577,-0.646645,-0.7597032,-0.829342,-0.5521633,-0.7504643,-0.827041,-0.850094,-0.854184,-0.5516755,-0.750368,-0.854456
6,100,0.01,Gaussian,0.5,0.0,0.099574,-0.005461924,-0.006584187,0.009805,0.1779478,0.159381,-0.005842656,-0.006627451,0.004087,0.000796,0.105965,-0.006575065,-0.006001,0.009411
7,100,0.01,Gaussian,1.0,0.0,0.080405,0.0007824873,0.01003047,0.012088,0.2200704,0.16805,0.009276669,0.00272974,0.045722,0.010398,0.099724,0.00139258,0.016441,0.026792
8,100,0.01,Gaussian,2.0,0.0,0.034815,0.01209952,0.02879402,0.013671,0.1925192,0.071054,0.03631067,0.03949866,0.080236,0.024427,0.057652,0.02514824,0.046935,0.044872
9,100,0.01,St df=2.01,0.5,0.0,-0.549279,-0.5814704,-0.5877641,-0.553562,-0.2481407,-0.557174,-0.6082471,-0.6163171,-0.562221,-0.57429,-0.560492,-0.6258909,-0.633697,-0.571921


In [4]:
experiments_split = pd.read_parquet("results_bernoulli_symmetrical_split.parquet")
experiments_split["error"] = np.abs(experiments_split["estimate"])

range_ns_split = np.unique(experiments_split["n"])
range_deltas_split = -np.sort(-np.unique(experiments_split["delta"]))
range_distributions_split = np.unique(experiments_split["distribution"])
range_c_etas_split = np.sort(np.unique(experiments_split["c_eta"]))

# organize results
df_grouped_split = [ experiments_split[ experiments_split["delta"] == delta ].groupby(["n", "delta", "method", "distribution", "c_eta"])["error"].quantile(1-delta).reset_index() for delta in range_deltas_split ]
df_grouped_split = pd.concat(df_grouped_split)

df_grouped_pivot_split = df_grouped_split.pivot(index=['n', 'delta', 'distribution',"c_eta"], columns='method', values='error').reset_index()

relative_errors_split = df_grouped_pivot_split.copy()
est_cols_split = [col for col in df_grouped_pivot_split.columns if col not in ['n', 'delta', 'distribution',"c_eta"]]
for est in est_cols_split:
    relative_errors_split[est] = (df_grouped_pivot_split[est] -  df_grouped_pivot_split["mean"])/df_grouped_pivot_split["mean"]

assert (range_ns_split==range_ns).all()
assert (range_deltas_split==range_deltas).all()
assert (range_distributions_split==range_distributions).all()
assert (range_c_etas_split==range_c_etas).all()
assert est_cols_split==est_cols


In [8]:
name_dict = {
    "mean": "$\\overline{X}$",
    "tm": "TM",
    "mom": "MoM",
    "win": "$1 \\wedge  t^{-1}$",
    "lv": "$(1-t^2)_+$ ",
    "exp": "$e^{-t^2}$ ",
    "atm": "$\mathbf{1}_{t < 1}$"
}

for distribution in range_distributions:
    print()
    for n in range_ns:
        print()
        print("\\begin{table}[t!]")
        print("\t\\centering")
        print("\\setlength\\tabcolsep{3pt}")

        print("\\begin{tabular}{l|cc|cc|cc|cc|cc|cc|cc|cc|cc}")
        print("& \\multicolumn{6}{c|}{\\( \delta = 0.1 \\)} & \multicolumn{6}{c|}{\\( \delta = 0.01 \\)} & \multicolumn{6}{c}{\\( \delta = 0.001 \\)} \\\\")
        print("& \\multicolumn{2}{c}{\\( c_\\eta = 0.5 \\)} & \\multicolumn{2}{c}{\\( c_\\eta = 1 \\)} & \\multicolumn{2}{c|}{\\( c_\\eta = 2 \\)} & \\multicolumn{2}{c}{\\( c_\\eta = 0.5 \\)} & \\multicolumn{2}{c}{\\( c_\\eta = 1 \\)} & \\multicolumn{2}{c|}{\\( c_\\eta = 2 \\)} & \\multicolumn{2}{c}{\\( c_\\eta = 0.5 \\)} & \\multicolumn{2}{c}{\\( c_\\eta = 1 \\)} & \\multicolumn{2}{c}{\\( c_\\eta = 2 \\)} \\\\")
        print("Estimator & \\( \ovoid \\) & \\(\\oslash\\) & \\( \ovoid \\) & \\(\oslash\\) & \\( \ovoid \\) & \\(\\oslash\\) & \\( \\ovoid \\) & \\(\\oslash\\) & \\( \\ovoid \\) & \\(\\oslash\\) & \\( \\ovoid \\) & \\(\\oslash\\) & \\( \\ovoid \\) & \\(\\oslash\\) & \\( \\ovoid \\) & \\(\\oslash\\) & \\( \\ovoid \\) & \\(\\oslash\\) \\\\")

        for est in est_cols:
            if " " in est:
                line = name_dict[est.split(" ")[1]]
                #kappa, rho = est.split(" ")
                #kappa = name_dict[kappa]
                #rho = name_dict[rho]
                #if "$" in kappa:
                #    kappa = kappa.replace("$", "")
                #else:
                #    kappa = f"\\text{{{kappa}}}"
                #rho = rho.replace("$", "")
                #line = f"$\\widehat{{\\kappa}}={kappa}, \\rho(t)={rho}$"
            else:
                line = name_dict[est]
            if " " not in est:
                print("\\hline")
            for delta in range_deltas:
                for c_eta in range_c_etas:
                    best = False
                    if est in best_dicts[delta][n][distribution][c_eta]:
                        best = True
                    if " " in est:
                        change = relative_errors[ (relative_errors.n == n) & (relative_errors.delta == delta) & (relative_errors.delta == delta) & (relative_errors.distribution == distribution) & (relative_errors.c_eta == c_eta)][est].values[0]
                        if best:
                            line += f" & \\(\\mathbf{{{change*100:.0f}}}\\)"
                        else:
                            line += f" & \\({change*100:.0f}\\)"
                        change = relative_errors_split[ (relative_errors_split.n == n) & (relative_errors_split.delta == delta) & (relative_errors_split.delta == delta) & (relative_errors_split.distribution == distribution)  & (relative_errors_split.c_eta == c_eta)][est].values[0]
                        line += f" & \\({change*100:.0f}\\)"
                    else:
                        change = change = relative_errors[ (relative_errors.n == n) & (relative_errors.delta == delta) & (relative_errors.delta == delta) & (relative_errors.distribution == distribution) & (relative_errors.c_eta == c_eta)][est].values[0]
                        change_split = relative_errors_split[ (relative_errors_split.n == n) & (relative_errors_split.delta == delta) & (relative_errors_split.delta == delta) & (relative_errors_split.distribution == distribution)  & (relative_errors_split.c_eta == c_eta)][est].values[0]
                        assert change == change_split
                        if c_eta == range_c_etas[-1] and delta == range_deltas[-1]:
                            if best:
                                line += f" & \\multicolumn{{2}}{{c}}{{\\(\\mathbf{{{change*100:.0f}}}\\)}}"  
                            else:
                                line += f" & \\multicolumn{{2}}{{c}}{{\\({change*100:.0f}\\)}}"
                        else:
                            if best:
                                line += f" & \\multicolumn{{2}}{{c|}}{{\\(\\mathbf{{{change*100:.0f}}}\\)}}"  
                            else:
                                line += f" & \\multicolumn{{2}}{{c|}}{{\\({change*100:.0f}\\)}}"
            line += "\\\\"
            print(line)
        print("\\hline")
        print("\\end{tabular}")
        print(f"\\caption{{Relative difference (in \\%) of the empirical \\( 1-\\delta  \\) confidence interval error of the indicated estimators with respect to that of the empirical mean under a {distribution} with \\(n={n}\\).}}")
        print(f"\\label{{tab:experiment{distribution}_n={n}}}")
        print("\\end{table}")




\begin{table}[t!]
	\centering
\setlength\tabcolsep{3pt}
\begin{tabular}{l|cc|cc|cc|cc|cc|cc|cc|cc|cc}
& \multicolumn{6}{c|}{\( \delta = 0.1 \)} & \multicolumn{6}{c|}{\( \delta = 0.01 \)} & \multicolumn{6}{c}{\( \delta = 0.001 \)} \\
& \multicolumn{2}{c}{\( c_\eta = 0.5 \)} & \multicolumn{2}{c}{\( c_\eta = 1 \)} & \multicolumn{2}{c|}{\( c_\eta = 2 \)} & \multicolumn{2}{c}{\( c_\eta = 0.5 \)} & \multicolumn{2}{c}{\( c_\eta = 1 \)} & \multicolumn{2}{c|}{\( c_\eta = 2 \)} & \multicolumn{2}{c}{\( c_\eta = 0.5 \)} & \multicolumn{2}{c}{\( c_\eta = 1 \)} & \multicolumn{2}{c}{\( c_\eta = 2 \)} \\
Estimator & \( \ovoid \) & \(\oslash\) & \( \ovoid \) & \(\oslash\) & \( \ovoid \) & \(\oslash\) & \( \ovoid \) & \(\oslash\) & \( \ovoid \) & \(\oslash\) & \( \ovoid \) & \(\oslash\) & \( \ovoid \) & \(\oslash\) & \( \ovoid \) & \(\oslash\) & \( \ovoid \) & \(\oslash\) \\
\hline
$\overline{X}$ & \multicolumn{2}{c|}{\(\mathbf{0}\)} & \multicolumn{2}{c|}{\(\mathbf{0}\)} & \multicolumn{2}{c|}{\(\mathbf

In [6]:
range_c_etas

array([0.5, 1. , 2. ])

In [9]:
range_ns

array([ 50, 500])

In [None]:
experiments

In [13]:
filtered =relative_errors[ (relative_errors.n == n) & (relative_errors.delta == delta) & (relative_errors.distribution == distribution) & (relative_errors.c_eta == c_eta)].T[4:]


In [30]:
list(filtered.iloc[np.argsort(np.array(filtered).flatten())[:2]].index)

['tm lv', 'tm exp']

In [6]:
best_dicts = dict()

for delta in range_deltas:
    best_dicts[delta] = dict()
    for n in range_ns:
        best_dicts[delta][n]=dict()
        for distribution in range_distributions:
            best_dicts[delta][n][distribution]=dict()
            for c_eta in range_c_etas:
                filtered = relative_errors[ (relative_errors.n == n) & (relative_errors.delta == delta) & (relative_errors.distribution == distribution) & (relative_errors.c_eta == c_eta)].T[4:]
                best_dicts[delta][n][distribution][c_eta]=list(filtered.iloc[np.argsort(np.array(filtered).flatten())[:2]].index)
                

In [32]:
best_dicts

{np.float64(0.1): {np.int64(100): {'Gaussian': {np.float64(0.5): ['mean',
     'mom'],
    np.float64(1.0): ['mean', 'mean exp'],
    np.float64(2.0): ['mean', 'mean exp']},
   'St df=2.01': {np.float64(0.5): ['tm lv', 'mom lv'],
    np.float64(1.0): ['tm lv', 'tm exp'],
    np.float64(2.0): ['tm lv', 'tm exp']}},
  np.int64(500): {'Gaussian': {np.float64(0.5): ['tm lv', 'mean'],
    np.float64(1.0): ['tm lv', 'mean lv'],
    np.float64(2.0): ['tm lv', 'mean lv']},
   'St df=2.01': {np.float64(0.5): ['tm lv', 'mean lv'],
    np.float64(1.0): ['tm lv', 'mean lv'],
    np.float64(2.0): ['tm lv', 'mom lv']}}},
 np.float64(0.01): {np.int64(100): {'Gaussian': {np.float64(0.5): ['mom lv',
     'mean lv'],
    np.float64(1.0): ['mean', 'mean exp'],
    np.float64(2.0): ['mean', 'mean exp']},
   'St df=2.01': {np.float64(0.5): ['tm lv', 'tm exp'],
    np.float64(1.0): ['tm lv', 'tm exp'],
    np.float64(2.0): ['tm lv', 'tm']}},
  np.int64(500): {'Gaussian': {np.float64(0.5): ['mean win', 'tm w