In [None]:
import os, sys
sys.path.append(os.path.join(os.getcwd(), 'src/'))

from datetime import datetime
import glob

In [None]:
from oce_ecomm_abv_calculation.sample_statistics.twoway_bootstrap import TwowayBootstrapStatistics
from oce_ecomm_abv_calculation.utils.experiment_data import consolidate_experiment_data_files

# UCI Online Retail II - ASP

Some end dates used in our experiments:

* `datetime(2010, 2, 13)` - 74 days (10% of dataset duration)
* `datetime(2010, 12, 6)` - 370 days (50% of dataset duration)
* `datetime(2012, 1, 1)` - Full dataset duration

These end dates define the expanding windows, which simulate different experiment durations.

**WARNING**: These experiments take a long time to run - using a single thread on an AMD Ryzen 9 5950X CPU, it takes 6 hours to run a 74-day experiment, and 75+ hours to run a full-duration experiment.

In [None]:
start_date = datetime(2009, 12, 1)
end_date = datetime(2010, 2, 13)

num_std_error_samples = 1
num_bootstrap_means = 500

# Twoway bootstrap statistics
uci_retail_coi_asp_twoway = (
    TwowayBootstrapStatistics(
        dataset="uci_online_retail_ii_customer_order_item_view",
        response_col='r_SellingPrice',
        start_time=start_date,
        end_time=end_date
    )
)

for i in range(1, num_std_error_samples + 1):
    uci_retail_coi_asp_twoway.standard_error(num_bootstrap_means=num_bootstrap_means, verbose=True)
    uci_retail_coi_asp_twoway.save_latest_result_as_pd_df()
    print(f"Twoway bootstrap run {i} saved.              ")

# Olist Brazilian e-Commerce - ASP

Some end dates used in our experiments:

* `datetime(2017, 1, 28)` - 146 days (20% of dataset duration)
* `datetime(2017, 9, 4)` - 365 days (50% of dataset duration / 1 year)
* `datetime(2018, 9, 4)` - Full dataset duration

These end dates define the expanding windows, which simulate different experiment durations.

In [None]:
start_date = datetime(2016, 9, 4)
end_date = datetime(2017, 1, 28)

num_std_error_samples = 5
num_bootstrap_means = 500

# Twoway bootstrap statistics
olist_ecommerce_coi_asp_twoway = (
    TwowayBootstrapStatistics(
        dataset="olist_brazilian_ecommerce_customer_order_item_view",
        response_col='r_SellingPrice',
        start_time=start_date,
        end_time=end_date
    )
)

for i in range(1, num_std_error_samples + 1):
    olist_ecommerce_coi_asp_twoway.standard_error(num_bootstrap_means=num_bootstrap_means, verbose=True)
    olist_ecommerce_coi_asp_twoway.save_latest_result_as_pd_df()
    print(f"Twoway bootstrap run {i} saved.                   ")

# Utilities

The following cell consolidates the many parquet files generated by each `save_as_pd_df()` to a dataframe in a single parquet file.

In [None]:
expt_method = "twoway"
dataset = "uci_online_retail_ii_customer_order_item_view" # e.g. "olist_brazilian_ecommerce_customer_order_item_view"
response_col = "r_SellingPrice"
expt_data_dir_path = "./data/"

consolidate_experiment_data_files(
    dir_path=expt_data_dir_path,
    expt_method=expt_method,
    dataset=dataset,
    response_col=response_col,
    cleanup_files=True
)