In [None]:
from matplotlib import pyplot as plt
import pandas as pd
import scienceplots
import matplotlib as mpl
# Use the pgf backend (must be set before pyplot imported)
mpl.use('pgf')

# Performance validation

In [None]:
pd.set_option('display.max_columns', None)

In [None]:
names_dependence_coefficients = [
    "iid",
    "MA(2)",
    "MA(20)",
]

In [None]:
list_name_weights = ['AR bootstrap',
                     'IID bootstrap',
                     'MA bootstrap',
                     ]

In [None]:
benchmark = pd.concat(
    [pd.read_pickle(f"./data/ma-process/benchmark_{sample_size}_" + name + ".pkl") for
     name in
     names_dependence_coefficients for sample_size in [1000, 2000, 5000, 10000]])

In [None]:
benchmark

In [None]:
plt.style.use(['science', 'ieee'])
plt.rcParams.update({'font.size': 18})
fig, a = plt.subplots(2, len(names_dependence_coefficients), figsize=(18, 12))

for index, name_dependence_coefficient in enumerate(names_dependence_coefficients):
    benchmark_wrt_dependence_coefficients = benchmark[benchmark["Stochastic process"] == name_dependence_coefficient]
    benchmark_wrt_dependence_coefficients["mean"].plot(yerr=benchmark_wrt_dependence_coefficients["std"],
                                                       xlabel="",
                                                       sharex=False,
                                                       legend=False,
                                                       capsize=4,
                                                       #fontsize=15,
                                                       #ylabel="Bootstrapped variance",
                                                       ax=a[0][index])
    benchmark_wrt_dependence_coefficients["In confidence interval"].plot(xlabel="",
                                                                         #ylabel="Coverage probability", 
                                                                         legend=False,
                                                                         ax=a[1][index],
                                                                         #fontsize=15,
                                                                         ylim=[0.6, 1])

    a[0][index].set_title(name_dependence_coefficient,
                          #fontsize=15
                          )

a[0][0].set_ylabel("Bootstrapped variance")
a[1][int(len(names_dependence_coefficients) / 2)].set_xlabel("Sample size")
a[1][0].set_ylabel("Coverage probability")
a[0][-1].legend(["Baseline"] + list_name_weights)

#plt.savefig('benchmark.pgf', format='pgf')
#plt.tight_layout()

In [None]:
plt.style.use(['science', 'ieee'])
plt.rcParams.update({'font.size': 18})
fig1, a1 = plt.subplots(1, len(names_dependence_coefficients), figsize=(18, 6))

for index, name_dependence_coefficient in enumerate(names_dependence_coefficients):
    benchmark_wrt_dependence_coefficients = benchmark[benchmark["Stochastic process"] == name_dependence_coefficient]
    benchmark_wrt_dependence_coefficients["mean"].plot(yerr=benchmark_wrt_dependence_coefficients["std"],
                                                       xlabel="",
                                                       sharex=False,
                                                       legend=False,
                                                       capsize=4,
                                                       #fontsize=15,
                                                       #ylabel="Bootstrapped variance",
                                                       ax=a1[index])

    a1[index].set_title(name_dependence_coefficient,
                        #fontsize=15
                        )
a1[int(len(names_dependence_coefficients) / 2)].set_xlabel("Sample size")
a1[0].set_ylabel("Bootstrapped variance")
a1[-1].legend(["Baseline"] + list_name_weights, loc='center right', bbox_to_anchor=(1, 0.4))

#plt.savefig('benchmark-variance.pgf', format='pgf')
#plt.tight_layout()

In [None]:
plt.style.use(['science', 'ieee'])
plt.rcParams.update({'font.size': 18})
fig2, a2 = plt.subplots(1, len(names_dependence_coefficients), figsize=(18, 6))

for index, name_dependence_coefficient in enumerate(names_dependence_coefficients):
    benchmark_wrt_dependence_coefficients = benchmark[benchmark["Stochastic process"] == name_dependence_coefficient]
    benchmark_wrt_dependence_coefficients["In confidence interval"].plot(xlabel="",
                                                                         #ylabel="Coverage probability", 
                                                                         legend=False,
                                                                         ax=a2[index],
                                                                         #fontsize=15,
                                                                         ylim=[0.6, 1])

    a2[index].set_title(name_dependence_coefficient,
                        #fontsize=15
                        )
a2[-1].legend(["Baseline"] + list_name_weights)
a2[int(len(names_dependence_coefficients) / 2)].set_xlabel("Sample size")
a2[0].set_ylabel("Coverage probability")

#plt.savefig('benchmark-confidence.pgf', format='pgf')
#plt.tight_layout()

# MA-GARCH process

In [None]:
benchmark_garch = pd.concat(
    [pd.read_pickle(f"./data/magarch-process/benchmark_{sample_size}_magarch11.pkl") for sample_size in
     [1000, 2000, 5000, 10000]])

In [None]:
benchmark_garch

In [None]:
plt.style.use(['science', 'ieee'])
plt.rcParams.update({'font.size': 18})
fig, a = plt.subplots(1,2, figsize=(12, 6))

benchmark_garch["mean"].plot(yerr=benchmark_garch["std"],
                             xlabel="",
                             sharex=False,
                             legend=False,
                             capsize=4,
                             #fontsize=15,
                             #ylabel="Bootstrapped variance",
                             ax=a[0])
benchmark_garch["In confidence interval"].plot(xlabel="",
                                               #ylabel="Coverage probability", 
                                               legend=False,
                                               ax=a[1],
                                               #fontsize=15,
                                               ylim=[0.6, 1])

a[0].set_ylabel("Bootstrapped variance")
a[0].set_xlabel("Sample size")
a[0].set_title("MA(2)-GARCH(1,1)")
a[1].set_xlabel("Sample size")
a[1].set_ylabel("Coverage probability")
a[1].legend(["Baseline"] + list_name_weights)


plt.savefig('benchmark_ma-garch.pgf', format='pgf')
#plt.tight_layout()

# Delta method

In [None]:
benchmark_delta_exp = pd.concat(
    [pd.read_pickle(f"./data/delta_method/benchmark_{sample_size}_exp.pkl") for sample_size in
     [1000, 2000, 5000, 10000]])

In [None]:
benchmark_delta_exp

In [None]:
plt.style.use(['science', 'ieee'])
plt.rcParams.update({'font.size': 18})
fig, a = plt.subplots(1,2, figsize=(12, 6))

benchmark_delta_exp["mean"].plot(yerr=benchmark_delta_exp["std"],
                             xlabel="",
                             sharex=False,
                             legend=False,
                             capsize=4,
                             #fontsize=15,
                             #ylabel="Bootstrapped variance",
                             ax=a[0])
benchmark_delta_exp["In confidence interval"].plot(xlabel="",
                                               #ylabel="Coverage probability", 
                                               legend=False,
                                               ax=a[1],
                                               #fontsize=15,
                                               ylim=[0.6, 1])

a[0].set_ylabel("Bootstrapped variance")
a[0].set_title("Exponential of sample average")
a[0].set_xlabel("Sample size")
a[1].set_xlabel("Sample size")
a[1].set_ylabel("Coverage probability")
a[1].legend(["Baseline"] + list_name_weights)

#plt.savefig('benchmark_exp_ma.pgf', format='pgf')
#plt.tight_layout()

In [None]:
benchmark_delta = pd.concat(
    [pd.read_pickle(f"./data/delta_method/benchmark_{sample_size}.pkl") for sample_size in
     [1000, 2000, 5000, 10000]])

In [None]:
benchmark_delta

In [None]:
plt.style.use(['science', 'ieee'])
plt.rcParams.update({'font.size': 18})
fig, a = plt.subplots(1,2, figsize=(12, 6))

benchmark_delta["mean"].plot(yerr=benchmark_delta["std"],
                             xlabel="",
                             sharex=False,
                             legend=False,
                             capsize=4,
                             #fontsize=15,
                             #ylabel="Bootstrapped variance",
                             ax=a[0])
benchmark_delta["In confidence interval"].plot(xlabel="",
                                               #ylabel="Coverage probability", 
                                               legend=False,
                                               ax=a[1],
                                               #fontsize=15,
                                               ylim=[0.6, 1])

a[0].set_ylabel("Bootstrapped variance")
a[0].set_title("LogMeanExp of MA(2)")
a[0].set_xlabel("Sample size")
a[1].set_xlabel("Sample size")
a[1].set_ylabel("Coverage probability")
a[1].legend(["Baseline"] + list_name_weights)

plt.savefig('benchmark_log_log_ma.pgf', format='pgf')
#plt.tight_layout()

 # log-MA

In [None]:
benchmark_log_ma = pd.concat(
    [pd.read_pickle(f"./data/log-ma/benchmark_{sample_size}_log_ma.pkl") for sample_size in
     [1000, 2000, 5000, 10000]])

In [None]:
benchmark_log_ma

In [None]:
plt.style.use(['science', 'ieee'])
plt.rcParams.update({'font.size': 18})
fig, a = plt.subplots(1,2, figsize=(12, 6))

benchmark_log_ma["mean"].plot(yerr=benchmark_log_ma["std"],
                             xlabel="",
                             sharex=False,
                             legend=False,
                             capsize=4,
                             #fontsize=15,
                             #ylabel="Bootstrapped variance",
                             ax=a[0])
benchmark_log_ma["In confidence interval"].plot(xlabel="",
                                               #ylabel="Coverage probability", 
                                               legend=False,
                                               ax=a[1],
                                               #fontsize=15,
                                               ylim=[0.6, 1])

a[0].set_ylabel("Bootstrapped variance")
a[0].set_title("log-MA")
a[0].set_xlabel("Sample size")
a[1].set_xlabel("Sample size")
a[1].set_ylabel("Coverage probability")
a[1].legend(["Baseline"] + list_name_weights)

#plt.savefig('benchmark_log-ma.pgf', format='pgf')
#plt.tight_layout()

# Different hyperparameters beta

In [None]:
benchmark_beta = pd.read_pickle(f"./data/ma-process/benchmark_beta.pkl")

In [None]:
plt.rcParams.update({'font.size': 8})
benchmark_beta["Beta"].T["bias-variance trade-off"].plot(xlabel="Hyperparameter beta",
                                                         ylabel="Mean squared error", )
#plt.savefig('benchmark-beta.pgf', format='pgf')

# Time analysis

In [None]:
benchmark_time = pd.read_pickle(f"./data/ma-process/benchmark_time.pkl")

In [None]:
benchmark_time.describe()

In [None]:
benchmark_time.plot(logy=True, ylabel="Time per update [sec]", xlabel="Sample index")
#plt.savefig('benchmark-time.pgf', format='pgf')