# Trimmed sample means for robust uniform mean estimation and regression
## Notebook companion on experiments and algorithms

We start importing a few libraries and defining a function to plot our experiments.

In [1]:
import numpy as np
import pandas as pd
import json
import itertools

from src.plot import *

### Comparison between different cross validation strategies



In [2]:
# define experiment
d = 5
beta = np.ones(d)/d

n = 1000

r = n // 100
n_contaminateds = [2 * r, 6 * r, 10 * r, 14 * r]
error_types = [0,1]
skews = [False, True]
heteroscedasticitys = [False, True]

mom_params = [2*nc+21 for nc in n_contaminateds]
tm_params = [(p-1)/(2*n) for p in mom_params]

algorithms = ["plugin", "gd"]

selection_strategies = ["max_slope", "min_loss"]
fold_Ks = ["maxK/V", "K/V"]

results = json.load(open("results/comparison_algorithms_strategies.json", "r"))
for error_type, skew, heteroscedasticity in itertools.product(error_types, skews, heteroscedasticitys):
    print(error_type, skew, heteroscedasticity)
    plot_alg_cv_comparison(results, error_type, skew, heteroscedasticity, savefig=True)

0 False False
0 False True


  plt.tight_layout()


0 True False


  plt.tight_layout()


0 True True


  plt.tight_layout()


1 False False


  plt.tight_layout()


1 False True


  plt.tight_layout()


1 True False


  plt.tight_layout()


1 True True


  plt.tight_layout()


<Figure size 1000x400 with 0 Axes>

### Setup A experiments



In [3]:
# define experiment
d = 5
beta = np.ones(d)/d

n = 1000

r = n // 100
n_contaminateds = [0, 2 * r, 4 * r, 6 * r, 8 * r, 10 * r, 12 * r, 14 * r]
error_types = [0, 1, 2]
skews = [True, False]
heteroscedasticitys = [True, False]

mom_params = [2*nc+1 + 20 for nc in n_contaminateds]
tm_params = [(p-1)/(2*n) for p in mom_params]

results = json.load(open("results/setupA.json", "r"))

for error_type in error_types:
    for skew in skews:
        for heteroscedasticity in heteroscedasticitys:
            print(f"error={error_type}, skew={skew}, h={heteroscedasticity}")
            make_plot_setup(
                results,
                {"error_type": error_type, 
                 "skew": skew,
                 "heteroscedasticity": heteroscedasticity
                },
                savefig=True
            )


error=0, skew=True, h=True
error=0, skew=True, h=False


  plt.tight_layout()


error=0, skew=False, h=True


  plt.tight_layout()


error=0, skew=False, h=False


  plt.tight_layout()


error=1, skew=True, h=True


  plt.tight_layout()


error=1, skew=True, h=False


  plt.tight_layout()


error=1, skew=False, h=True


  plt.tight_layout()


error=1, skew=False, h=False


  plt.tight_layout()


error=2, skew=True, h=True


  plt.tight_layout()


error=2, skew=True, h=False


  plt.tight_layout()


error=2, skew=False, h=True


  plt.tight_layout()


error=2, skew=False, h=False


  plt.tight_layout()


<Figure size 500x400 with 0 Axes>

### Setup B experiments

In [4]:
# define experiment
d = 5
beta = np.ones(d)/d

n = 1000

r = n // 100
n_contaminateds = [0, 2 * r, 4 * r, 6 * r, 8 * r, 10 * r, 12 * r, 14 * r]
ps = [.01, .03, .05, .07, .09, .11, .13]

mom_params = [2*nc+1 + 20 for nc in n_contaminateds]
tm_params = [(p-1)/(2*n) for p in mom_params]

results = json.load(open("results/setupB.json", "r"))

for p in ps:
    print(f"p={p}")
    make_plot_setup(results, {"p": p}, vertical_at=50*p, vertical_label=r"$\varepsilon=p$", savefig=True)


p=0.01
p=0.03


  plt.tight_layout()


p=0.05


  plt.tight_layout()


p=0.07


  plt.tight_layout()


p=0.09


  plt.tight_layout()


p=0.11


  plt.tight_layout()


p=0.13


  plt.tight_layout()


<Figure size 500x400 with 0 Axes>