# Table 2 - Comparison between one-way and two-way bootstrap on ASP

This is the notebook that generates the bootstrap standard error / vanilla sample standard error ratios that appear in Table 2 of the paper.

In [1]:
from matplotlib import pyplot as plt
import matplotlib.lines as mlines
import pandas as pd
from typing import List
from oce_ecomm_abv_calculation.plots.vanilla_vs_bootstrap_se import (
    get_plot_statistics_from_experiment_results_file
)

## Load the plot statistics

We reuse the routine that creates the DataFrame containing the statistics required to plot the vanilla vs bootstrap SE (see notebook F02-03), and filters for the relevant numbers that goes into Table 2.

In [2]:
plot_statistics_uci_asp_oneway_df = (
    get_plot_statistics_from_experiment_results_file(
        vanilla_experiment_results_df=pd.read_parquet(
            "./data/expt_vanilla_uci-online-retail-ii-customer-order-item-view_"
            "r-SellingPrice_consolidated.parquet"),
        bootstrap_experiment_results_df=pd.read_parquet(
            "./data/expt_oneway_uci-online-retail-ii-customer-order-item-view_"
            "r-SellingPrice_consolidated.parquet"),
        normalize_se = True
    )
)

plot_statistics_uci_asp_twoway_df = (
    get_plot_statistics_from_experiment_results_file(
        vanilla_experiment_results_df=pd.read_parquet(
            "./data/expt_vanilla_uci-online-retail-ii-customer-order-item-view_"
            "r-SellingPrice_consolidated.parquet"),
        bootstrap_experiment_results_df=pd.read_parquet(
            "./data/expt_twoway_uci-online-retail-ii-customer-order-item-view_"
            "r-SellingPrice_consolidated.parquet"),
        normalize_se = True
    )
)

plot_statistics_olist_asp_oneway_df = (
    get_plot_statistics_from_experiment_results_file(
        vanilla_experiment_results_df=pd.read_parquet(
            "./data/expt_vanilla_olist-brazilian-ecommerce-customer-order-item-view_"
            "r-SellingPrice_consolidated.parquet"),
        bootstrap_experiment_results_df=pd.read_parquet(
            "./data/expt_oneway_olist-brazilian-ecommerce-customer-order-item-view_"
            "r-SellingPrice_consolidated.parquet"),
        normalize_se = True
    )
)

plot_statistics_olist_asp_twoway_df = (
    get_plot_statistics_from_experiment_results_file(
        vanilla_experiment_results_df=pd.read_parquet(
            "./data/expt_vanilla_olist-brazilian-ecommerce-customer-order-item-view_"
            "r-SellingPrice_consolidated.parquet"),
        bootstrap_experiment_results_df=pd.read_parquet(
            "./data/expt_twoway_olist-brazilian-ecommerce-customer-order-item-view_"
            "r-SellingPrice_consolidated.parquet"),
        normalize_se = True
    )
)

In [3]:
def get_bootstrap_standard_error_mean_stddev_ratio(
    plot_statistics_df: pd.DataFrame,
    end_times: List[str]
) -> pd.DataFrame:
    return(
        plot_statistics_df[plot_statistics_df['end_time'].isin(end_times)]
            [['dataset_name', 
              'response_col', 
              'start_time', 
              'end_time', 
              'bootstrap_standard_error_mean_ratio',
              'bootstrap_standard_error_stddev_ratio',
              'bootstrap_standard_error_count',]]
    )

## Defining Experiment End Times

In this set of experiments, the end dates used on the UCI Online Retail II dataset are:

* `datetime(2010, 2, 13)` - 74 days (10% of dataset duration)
* `datetime(2010, 12, 6)` - 370 days (50% of dataset duration)
* `datetime(2012, 1, 1)` - Full dataset duration

The end dates used on the Olist Brazilian e-Commerce are:

* `datetime(2017, 1, 28)` - 146 days (20% of dataset duration)
* `datetime(2017, 9, 4)` - 365 days (50% of dataset duration / 1 year)
* `datetime(2018, 9, 4)` - Full dataset duration

In [4]:
uci_end_times = ["2010-02-13", "2010-12-06", "2012-01-01"]
olist_end_times = ["2017-01-28", "2017-09-04", "2018-09-04"]

## UCI - One-way

In [5]:
get_bootstrap_standard_error_mean_stddev_ratio(
    plot_statistics_df=plot_statistics_uci_asp_oneway_df,
    end_times=uci_end_times
)

Unnamed: 0,dataset_name,response_col,start_time,end_time,bootstrap_standard_error_mean_ratio,bootstrap_standard_error_stddev_ratio,bootstrap_standard_error_count
8,uci_online_retail_ii_customer_order_item_view,r_SellingPrice,2009-12-01,2010-02-13,64.450864,1.941167,30
15,uci_online_retail_ii_customer_order_item_view,r_SellingPrice,2009-12-01,2010-12-06,87.949526,2.816707,30
20,uci_online_retail_ii_customer_order_item_view,r_SellingPrice,2009-12-01,2012-01-01,69.822919,1.369853,31


## UCI - Two-way

In [6]:
get_bootstrap_standard_error_mean_stddev_ratio(
    plot_statistics_df=plot_statistics_uci_asp_twoway_df,
    end_times=uci_end_times
)

Unnamed: 0,dataset_name,response_col,start_time,end_time,bootstrap_standard_error_mean_ratio,bootstrap_standard_error_stddev_ratio,bootstrap_standard_error_count
0,uci_online_retail_ii_customer_order_item_view,r_SellingPrice,2009-12-01,2010-02-13,72.833891,2.196337,54
1,uci_online_retail_ii_customer_order_item_view,r_SellingPrice,2009-12-01,2010-12-06,105.150605,3.330223,15
2,uci_online_retail_ii_customer_order_item_view,r_SellingPrice,2009-12-01,2012-01-01,94.327675,2.276844,18


## Olist - One-way

In [7]:
get_bootstrap_standard_error_mean_stddev_ratio(
    plot_statistics_df=plot_statistics_olist_asp_oneway_df,
    end_times=olist_end_times
)

Unnamed: 0,dataset_name,response_col,start_time,end_time,bootstrap_standard_error_mean_ratio,bootstrap_standard_error_stddev_ratio,bootstrap_standard_error_count
5,olist_brazilian_ecommerce_customer_order_item_...,r_SellingPrice,2016-09-04,2017-01-28,1.103773,0.035482,50
13,olist_brazilian_ecommerce_customer_order_item_...,r_SellingPrice,2016-09-04,2017-09-04,1.05817,0.035993,50
18,olist_brazilian_ecommerce_customer_order_item_...,r_SellingPrice,2016-09-04,2018-09-04,1.075852,0.03729,52


## Olist - Two-way

In [8]:
get_bootstrap_standard_error_mean_stddev_ratio(
    plot_statistics_df=plot_statistics_olist_asp_twoway_df,
    end_times=olist_end_times
)

Unnamed: 0,dataset_name,response_col,start_time,end_time,bootstrap_standard_error_mean_ratio,bootstrap_standard_error_stddev_ratio,bootstrap_standard_error_count
0,olist_brazilian_ecommerce_customer_order_item_...,r_SellingPrice,2016-09-04,2017-01-28,2.083072,0.07533,50
1,olist_brazilian_ecommerce_customer_order_item_...,r_SellingPrice,2016-09-04,2017-09-04,2.531421,0.066323,50
2,olist_brazilian_ecommerce_customer_order_item_...,r_SellingPrice,2016-09-04,2018-09-04,3.363627,0.109007,54
