In [2]:
import pandas as pd
import numpy as np
from glob import glob
from scipy import stats

In [3]:
eps = [0.125, 2.0, 8.0]
N = [12,24,36,48,60,72,84,96,108,120]

In [4]:
def computeCI(data_list, alpha=0.05):
    '''
    This function computes the 100*(1-alpha)% Confidence Interval for the true mean based
    on n observations given a list.
    '''
    # Convert list to numpy array
    data = np.array(data_list)
    n = len(data)
    
    # Calculate mean and standard error
    mean = np.mean(data)
    std = np.std(data, ddof=1)  # Sample std (using Bessel's correction with ddof=1)
    se = std / np.sqrt(n)
    
    # Calculate confidence interval using t-distribution
    df = n - 1
    t_critical = stats.t.ppf(1 - alpha/2, df)
    margin = t_critical * se
    
    return mean, margin

In [5]:
# Table 3 - error with N=120
for ep in eps:
    InitialModel_relativeOODerror = []
    OptimizedModel_relativeOODerror = []
    benchmark_relativeOODerror = []
    times = []
    for k in range(10):
        df = pd.read_csv(f'training_log_eps{ep}N{120}/training_log_{k}.csv')
        InitialModel_relativeOODerror.append(np.sqrt(df['Relative_OOD_Error'][0]))
        OptimizedModel_relativeOODerror.append(np.sqrt(df['Relative_OOD_Error'].iloc[-1]))
        benchmark_relativeOODerror.append(np.sqrt(df['Relative_Benchmark_OOD'][0]))
        times.append(df['Time (s)'].iloc[-1])
    init_mean, init_margin = computeCI(InitialModel_relativeOODerror)
    op_mean, op_margin = computeCI(OptimizedModel_relativeOODerror)
    bench_mean, bench_margin = computeCI(benchmark_relativeOODerror)
    time_mean, _ = computeCI(times)
    print(f'EPSILON {ep} - Initial: {init_mean:.2e} pm {init_margin:.2e}',
          f'Optimized: {op_mean:.2e} pm {op_margin:.2e}',
          f'Benchmark: {bench_mean:.2e} pm {bench_margin:.2e}')
    print('Relative Error Reduction:',round(100*(init_mean - op_mean)/init_mean,2),'%')
    print('Average Running Time:',round(time_mean/60,1), "min.")

EPSILON 0.125 - Initial: 8.65e-02 pm 8.18e-03 Optimized: 1.55e-02 pm 9.72e-04 Benchmark: 1.62e-02 pm 5.34e-03
Relative Error Reduction: 82.09 %
Average Running Time: 20.6 min.
EPSILON 2.0 - Initial: 8.76e-02 pm 8.63e-03 Optimized: 1.29e-02 pm 1.20e-03 Benchmark: 1.24e-02 pm 2.69e-03
Relative Error Reduction: 85.26 %
Average Running Time: 26.7 min.
EPSILON 8.0 - Initial: 9.09e-02 pm 4.39e-03 Optimized: 1.13e-02 pm 1.33e-03 Benchmark: 1.16e-02 pm 2.26e-03
Relative Error Reduction: 87.53 %
Average Running Time: 26.1 min.


In [6]:
# Total experiment runtime
# The epsilon experiments ran in parallel with 
# epsilon=2 taking the longest taking 27 hours
total_compute_time = 0
for n in N:
    times = []
    for k in range(10):
        df = pd.read_csv(f'training_log_eps{eps[1]}N{n}/training_log_{k}.csv')
        times.append(df['Time (s)'].iloc[-1])
    total_compute_time += sum(times)
print('Total Compute Time:', round(total_compute_time/3600), 'hrs')

Total Compute Time: 27 hrs
