In [1]:
import numpy as np
import pandas as pd
from multiprocess import Pool

from benchmarking.multiprocessing.functions import evaluate_ar, evaluate_multiplier_iid, evaluate_ma
from bstrapping.synthetic_time_series.log_ma import LogMovingAverage

In [2]:
benchmark = []
means_of_variance = []
std_of_variance = []
coverage_probability = []

list_name_weights = ['AR',
                     'Multiplier',
                     'MA',
                     ]

In [3]:
sample_size = 10000
runs = 250

parameters = np.array([0.5 ** i for i in range(1, 3)])

In [4]:
samples = [
    LogMovingAverage(parameters=parameters,mu=0).generate_samples(sample_size)
    for _ in range(runs)]

In [5]:
time_series = LogMovingAverage(parameters=parameters,mu=0)
mean = time_series.mean
time_series.asymptotic_variance

18.65153850269085

## Benchmark bootstraps

In [6]:
%%time
%%capture
# benchmark AR bootstrap
p = Pool()

evaluations_ar = p.map(lambda sample: evaluate_ar(sample, alpha=0.1, mean=mean), samples)

result = np.array(evaluations_ar)

coverage_probability.append(np.sum(result.T[1]) / runs)
means_of_variance.append(np.mean(result.T[0]))
std_of_variance.append(np.std(result.T[0]))


CPU times: user 52.5 ms, sys: 29.5 ms, total: 82 ms
Wall time: 1min 55s


In [7]:
%%time
%%capture
# benchmark AR bootstrap
p = Pool()

evaluations_multiplier_iid = p.map(lambda sample: evaluate_multiplier_iid(sample, alpha=0.1, mean=mean), samples)

result = np.array(evaluations_multiplier_iid)

coverage_probability.append(np.sum(result.T[1]) / runs)
means_of_variance.append(np.mean(result.T[0]))
std_of_variance.append(np.std(result.T[0]))



CPU times: user 52.9 ms, sys: 38.5 ms, total: 91.3 ms
Wall time: 2.57 s


In [8]:
%%time
%%capture
# benchmark AR bootstrap
p = Pool()

evaluations_ma = p.map(lambda sample: evaluate_ma(sample, alpha=0.1, mean=mean), samples)

result = np.array(evaluations_ma)

coverage_probability.append(np.sum(result.T[1]) / runs)
means_of_variance.append(np.mean(result.T[0]))
std_of_variance.append(np.std(result.T[0]))



CPU times: user 179 ms, sys: 86.7 ms, total: 266 ms
Wall time: 51min


## Concatination result

In [9]:
benchmark = [time_series.asymptotic_variance,
             sample_size] + means_of_variance + std_of_variance + [1 - 0.1] + coverage_probability

In [10]:
benchmark = pd.DataFrame([benchmark], columns=pd.MultiIndex.from_tuples([
                                                             ("mean", "Asymptotic variance"),
                                                             ("Sample size", "")] +
                                                            [("mean", name,) for name in list_name_weights] +
                                                            [("std", name,) for name in list_name_weights] +
                                                            [("In confidence interval", "Confidence level")]
                                                            +
                                                            [("In confidence interval", name,) for name in
                                                             list_name_weights]
                                                            )).set_index(["Sample size"])

In [11]:
benchmark

Unnamed: 0_level_0,mean,mean,mean,mean,std,std,std,In confidence interval,In confidence interval,In confidence interval,In confidence interval
Unnamed: 0_level_1,Asymptotic variance,AR,Multiplier,MA,AR,Multiplier,MA,Confidence level,AR,Multiplier,MA
Sample size,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
10000,18.651539,18.363996,9.890292,18.185969,3.753501,1.97988,3.564705,0.9,0.88,0.736,0.872


In [12]:
benchmark.to_csv(f"./data/log-ma/benchmark_{sample_size}_log_ma.csv")
benchmark.to_pickle(f"./data/log-ma/benchmark_{sample_size}_log_ma.pkl")