In [1]:
import os, sys
sys.path.append(os.path.join(os.getcwd(), 'src/'))

from oce_ecomm_abv_calculation.sample_statistics.vanilla import VanillaSampleStatistics
from oce_ecomm_abv_calculation.sample_statistics.oneway_bootstrap import OnewayBootstrapStatistics
from datetime import datetime

# UCI Online Retail II Dataset

Some end dates used in our experiments:

* `datetime(2009, 12, 15)` - 14 days
* `datetime(2009, 12, 29)` - 28 days
* `datetime(2010, 1, 12)` - 42 days / 6 weeks
* `datetime(2010, 1, 26)` - 56 days / 8 weeks
* `datetime(2010, 2, 13)` - 74 days (10% of dataset duration)
* `datetime(2010, 4, 28)` - 148 days (20% of dataset duration)
* `datetime(2010, 7, 11)` - 222 days (30% of dataset duration)
* `datetime(2010, 9, 23)` - 296 days (40% of dataset duration)
* `datetime(2010, 12, 6)` - 370 days (50% of dataset duration)
* `datetime(2011, 2, 18)` - 444 days (60% of dataset duration)
* `datetime(2011, 5, 3)` - 518 days (70% of dataset duration)
* `datetime(2011, 7, 16)` - 592 days (80% of dataset duration)
* `datetime(2011, 9, 28)` - 666 days (90% of dataset duration)
* `datetime(2012, 1, 1)` - Full dataset duration

These end dates define the expanding windows, which simulate different experiment durations.

## UCI Online Retail II - ABV (Oneway bootstrap estimation)

In [None]:
start_date = datetime(2009, 12, 1)
end_date = datetime(2009, 12, 29)

num_std_error_samples = 10
num_bootstrap_means = 500

uci_retail_co_abv_oneway = (
    OnewayBootstrapStatistics(
        dataset="uci_online_retail_ii_customer_order_view",
        response_col='r_BasketValue',
        start_time=start_date,
        end_time=end_date
    )
)

for i in range(1, num_std_error_samples + 1):
    uci_retail_co_abv_oneway.standard_error(num_bootstrap_means=num_bootstrap_means, verbose=True)
    uci_retail_co_abv_oneway.save_latest_result_as_pd_df()
    print(f"Run {i} saved.                              ")

## UCI Online Retail II - ABS (Oneway bootstrap estimation)

In [None]:
start_date = datetime(2009, 12, 1)
end_date = datetime(2009, 12, 29)

num_std_error_samples = 10
num_bootstrap_means = 500

uci_retail_co_abs_oneway = (
    OnewayBootstrapStatistics(
        dataset="uci_online_retail_ii_customer_order_view",
        response_col='r_BasketValue',
        start_time=start_date,
        end_time=end_date
    )
)

for i in range(1, num_std_error_samples + 1):
    uci_retail_co_abs_oneway.standard_error(num_bootstrap_means=num_bootstrap_means, verbose=True)
    uci_retail_co_abs_oneway.save_latest_result_as_pd_df()
    print(f"Run {i} saved.                              ")

## UCI Online Retail II - ASP (Oneway bootstrap estimation)

In [None]:
start_date = datetime(2009, 12, 1)
end_date = datetime(2009, 12, 29)

num_std_error_samples = 1
num_bootstrap_means = 500

uci_retail_coi_asp_oneway = (
    OnewayBootstrapStatistics(
        dataset="uci_online_retail_ii_customer_order_item_view",
        response_col='r_SellingPrice',
        start_time=start_date,
        end_time=end_date
    )
)

for i in range(1, num_std_error_samples + 1):
    uci_retail_coi_asp_oneway.standard_error(num_bootstrap_means=num_bootstrap_means, verbose=True)
    uci_retail_coi_asp_oneway.save_latest_result_as_pd_df()
    print(f"Run {i} saved.                               ")

# Olist Brazilian e-Commerce Dataset

Some end dates used in our experiments:

* `datetime(2016, 10, 18)` - first month (with only 2 orders) + 14 days
* `datetime(2016, 11, 1)` - first month (with only 2 orders) + 28 days
* `datetime(2016, 11, 15)` - first month (with only 2 orders) + 42 days / 6 weeks
* `datetime(2016, 11, 29)` - first month (with only 2 orders) + 56 days / 8 weeks
* `datetime(2017, 1, 28)` - 146 days (20% of dataset duration)
* `datetime(2017, 4, 11)` - 219 days (30% of dataset duration)
* `datetime(2017, 6, 23)` - 292 days (40% of dataset duration)
* `datetime(2017, 9, 4)` - 365 days (50% of dataset duration / 1 year)
* `datetime(2017, 11, 16)` - 438 days (60% of dataset duration)
* `datetime(2018, 1, 28)` - 511 days (70% of dataset duration)
* `datetime(2018, 4, 11)` - 584 days (80% of dataset duration)
* `datetime(2018, 6, 23)` - 657 days (90% of dataset duration)
* `datetime(2018, 9, 4)` - Full dataset duration

These end dates define the expanding windows, which simulate different experiment durations.

## Olist Brazilian e-Commerce - ABV (Oneway bootstrap estimation)

In [3]:
start_date = datetime(2016, 9, 4)
end_date = datetime(2016, 11, 1)

num_std_error_samples = 10
num_bootstrap_means = 500

olist_ecommerce_co_abv_oneway = (
    OnewayBootstrapStatistics(
        dataset="olist_brazilian_ecommerce_customer_order_view",
        response_col='r_BasketValue',
        start_time=start_date,
        end_time=end_date
    )
)

for i in range(1, num_std_error_samples + 1):
    olist_ecommerce_co_abv_oneway.standard_error(num_bootstrap_means=num_bootstrap_means, verbose=True)
    olist_ecommerce_co_abv_oneway.save_latest_result_as_pd_df()
    print(f"Run {i} saved.                                   ")

Run 1 saved.                                   
Run 2 saved.                                   
Run 3 saved.                                   
Run 4 saved.                                   
Run 5 saved.                                   
Run 6 saved.                                   
Run 7 saved.                                   
Run 8 saved.                                   
Run 9 saved.                                   
Run 10 saved.                                   


## Olist Brazilian e-Commerce - ABS (Oneway bootstrap estimation)

In [4]:
start_date = datetime(2016, 9, 4)
end_date = datetime(2016, 11, 1)

num_std_error_samples = 10
num_bootstrap_means = 500

olist_ecommerce_co_abs_oneway = (
    OnewayBootstrapStatistics(
        dataset="olist_brazilian_ecommerce_customer_order_view",
        response_col='r_BasketSize',
        start_time=start_date,
        end_time=end_date
    )
)

for i in range(1, num_std_error_samples + 1):
    olist_ecommerce_co_abs_oneway.standard_error(num_bootstrap_means=num_bootstrap_means, verbose=True)
    olist_ecommerce_co_abs_oneway.save_latest_result_as_pd_df()
    print(f"Run {i} saved.                                   ")

Run 1 saved.                                   
Run 2 saved.                                   
Run 3 saved.                                   
Run 4 saved.                                   
Run 5 saved.                                   
Run 6 saved.                                   
Run 7 saved.                                   
Run 8 saved.                                   
Run 9 saved.                                   
Run 10 saved.                                   


## Olist Brazilian e-Commerce - ASP (Oneway bootstrap estimation)

In [5]:
start_date = datetime(2016, 9, 4)
end_date = datetime(2016, 11, 1)

num_std_error_samples = 10
num_bootstrap_means = 500

olist_ecommerce_coi_asp_oneway = (
    OnewayBootstrapStatistics(
        dataset="olist_brazilian_ecommerce_customer_order_item_view",
        response_col='r_SellingPrice',
        start_time=start_date,
        end_time=end_date
    )
)

for i in range(1, num_std_error_samples + 1):
    olist_ecommerce_coi_asp_oneway.standard_error(num_bootstrap_means=num_bootstrap_means, verbose=True)
    olist_ecommerce_coi_asp_oneway.save_latest_result_as_pd_df()
    print(f"Run {i} saved.                                    ")

Run 1 saved.                                    
Run 2 saved.                                    
Run 3 saved.                                    
Run 4 saved.                                    
Run 5 saved.                                    
Run 6 saved.                                    
Run 7 saved.                                    
Run 8 saved.                                    
Run 9 saved.                                    
Run 10 saved.                                    
