In [1]:
import numpy as np
import pandas as pd

from ols_bootstrap.pairs import PairsBootstrap
from ols_bootstrap.residual import ResidualBootstrap
from ols_bootstrap.wild import WildBootstrap

pd.options.display.float_format = '{:20,.5f}'.format  ### Setting pd to have a numerical precision up to 5 decimal points

In [2]:
df = pd.read_csv('./balance2018.csv')
df = df[df['sales_clean'] != 0]
df = df[['sales_clean', 'tanass_clean', 'tax']]
df = df.dropna(subset=['tanass_clean', 'tax'])

df_scaled = df.applymap(lambda x: np.log(x + 1))

  exec(code_obj, self.user_global_ns, self.user_ns)


In [3]:
df_sample = df_scaled.sample(n=10000, replace=False)

Y_data = pd.DataFrame(df_sample.iloc[:, 0])
X_data = pd.DataFrame(df_sample.iloc[:, 1:])

## Default SE on the original OLS is HC3, default CI on bootstrapped parameter is BC. 

That is by default se_type = 'hc3', ci_type = 'bc'.

In [4]:
psb = PairsBootstrap(Y_data, X_data, reps = 1000, se_type='hc3', ci_type = 'bc')  # se_type = 'hc3' and ci_type = 'bc' are deafult options for these arguments.
psb.fit()

In [5]:
rsb = ResidualBootstrap(Y_data, X_data, reps = 1000)
rsb.fit()

In [6]:
wb_stdn = WildBootstrap(Y_data, X_data, reps = 1000, from_distro = "standard_normal")
wb_stdn.fit()

In [7]:
wb_rad = WildBootstrap(Y_data, X_data, reps = 1000, from_distro = "rademacher")
wb_rad.fit()

In [8]:
wb_mam = WildBootstrap(Y_data, X_data, reps = 1000, from_distro = "mammen")
wb_mam.fit()

In [9]:
psb.summary()

+----------------------------------------------------------------------------------------------------------------------------------+
|                     Pairs Bootstrap results with 10000 obs and 1000 BS reps using HC3 SE-s and 95.00% BC CI                      |
+--------------+------------+------------------+--------+---------------+--------------+--------------+------------------+---------+
|     Var      | OLS Params | Avg of BS Params |  Bias  | OLS Params SE | BS Params SE | % of SE Diff |        CI        | CI Diff |
+--------------+------------+------------------+--------+---------------+--------------+--------------+------------------+---------+
|    const     |   6.2077   |      6.2085      | 0.0007 |     0.0456    |    0.0422    |     7.40     | [6.1300, 6.2913] |  0.1613 |
+--------------+------------+------------------+--------+---------------+--------------+--------------+------------------+---------+
| tanass_clean |   0.1747   |      0.1748      | 0.0001 |     0.0051 

In [10]:
rsb.summary()

+----------------------------------------------------------------------------------------------------------------------------------+
|                    Residual Bootstrap results with 10000 obs and 1000 BS reps using HC3 SE-s and 95.00% BC CI                    |
+--------------+------------+------------------+--------+---------------+--------------+--------------+------------------+---------+
|     Var      | OLS Params | Avg of BS Params |  Bias  | OLS Params SE | BS Params SE | % of SE Diff |        CI        | CI Diff |
+--------------+------------+------------------+--------+---------------+--------------+--------------+------------------+---------+
|    const     |   6.2077   |      6.2077      | 0.0001 |     0.0456    |    0.0351    |    22.95     | [6.1405, 6.2740] |  0.1334 |
+--------------+------------+------------------+--------+---------------+--------------+--------------+------------------+---------+
| tanass_clean |   0.1747   |      0.1749      | 0.0002 |     0.0051 

In [11]:
wb_stdn.summary()

+----------------------------------------------------------------------------------------------------------------------------------+
|           Wild Bootstrap with Standard Normal results with 10000 obs and 1000 BS reps using HC3 SE-s and 95.00% BC CI            |
+--------------+------------+------------------+--------+---------------+--------------+--------------+------------------+---------+
|     Var      | OLS Params | Avg of BS Params |  Bias  | OLS Params SE | BS Params SE | % of SE Diff |        CI        | CI Diff |
+--------------+------------+------------------+--------+---------------+--------------+--------------+------------------+---------+
|    const     |   6.2077   |      6.2065      | 0.0012 |     0.0456    |    0.0351    |    22.91     | [6.1422, 6.2789] |  0.1366 |
+--------------+------------+------------------+--------+---------------+--------------+--------------+------------------+---------+
| tanass_clean |   0.1747   |      0.1749      | 0.0002 |     0.0051 

In [12]:
wb_rad.summary()

+----------------------------------------------------------------------------------------------------------------------------------+
|              Wild Bootstrap with Rademacher results with 10000 obs and 1000 BS reps using HC3 SE-s and 95.00% BC CI              |
+--------------+------------+------------------+--------+---------------+--------------+--------------+------------------+---------+
|     Var      | OLS Params | Avg of BS Params |  Bias  | OLS Params SE | BS Params SE | % of SE Diff |        CI        | CI Diff |
+--------------+------------+------------------+--------+---------------+--------------+--------------+------------------+---------+
|    const     |   6.2077   |      6.2065      | 0.0012 |     0.0456    |    0.0360    |    20.94     | [6.1345, 6.2813] |  0.1468 |
+--------------+------------+------------------+--------+---------------+--------------+--------------+------------------+---------+
| tanass_clean |   0.1747   |      0.1746      | 0.0001 |     0.0051 

In [13]:
wb_mam.summary()

+----------------------------------------------------------------------------------------------------------------------------------+
|                Wild Bootstrap with Mammen results with 10000 obs and 1000 BS reps using HC3 SE-s and 95.00% BC CI                |
+--------------+------------+------------------+--------+---------------+--------------+--------------+------------------+---------+
|     Var      | OLS Params | Avg of BS Params |  Bias  | OLS Params SE | BS Params SE | % of SE Diff |        CI        | CI Diff |
+--------------+------------+------------------+--------+---------------+--------------+--------------+------------------+---------+
|    const     |   6.2077   |      6.2092      | 0.0015 |     0.0456    |    0.0350    |    23.19     | [6.1415, 6.2789] |  0.1374 |
+--------------+------------+------------------+--------+---------------+--------------+--------------+------------------+---------+
| tanass_clean |   0.1747   |      0.1747      | 0.0000 |     0.0051 

## Some useful methods were implemented (let's use it on wb_mam object of Wild Bootstrap with Mammen)

- The common in the following three methods is that either a string (if one wishes to capture one variable or ci) or 1-D like array can be provided to 'which_var' and/or 'which_ci' (the latter if exists in that object class).

### get_ci() method

#### Vanila version is when only the actual CI was used with all independent variables

In [14]:
wb_mam.get_ci()

Unnamed: 0_level_0,bc,bc
Unnamed: 0_level_1,lwb,upb
const,6.14149,6.27892
tanass_clean,0.16728,0.18186
tax,0.50932,0.53428


#### However, any combination of ('bc', 'bca', 'percentile) CI types could be selected and any combinations of independent variables can be chosen with 'which_ci' and 'which_var' optional arguments, respectively.

Please note that if choosing 'bca' the calculation can take a while as it uses jacknife resampling for calculating the acceleration factor

In [15]:
wb_mam.get_ci(which_ci=['bc', 'percentile'], which_var=['tax', 'const'])

Unnamed: 0_level_0,bc,bc,percentile,percentile
Unnamed: 0_level_1,lwb,upb,lwb,upb
tax,0.50932,0.53428,0.50898,0.53389
const,6.14149,6.27892,6.14185,6.27977


In [16]:
wb_mam.get_ci(which_ci='bca', which_var='tanass_clean')

Unnamed: 0_level_0,bca,bca
Unnamed: 0_level_1,lwb,upb
tanass_clean,0.16729,0.1819


### get_all_se() method

The following SE-s are calculated: 
- bootstrapped - standard error of the bootstrapped parameters
- constant - model-based OLS Standard Errors, that is, constant variance is assumed 
- HC0, HC1, HC2, HC3, HC4, HC4m, HC5 - Heteroskedasticity-Consistent Standard Errors (HCE) using sandwich estimators 

#### Vanila version is when using all indepencdent variables. 

In [17]:
wb_mam.get_all_se()

Unnamed: 0,bootstrapped,constant,hc0,hc1,hc2,hc3,hc4,hc4m,hc5
const,0.03502,0.03534,0.04557,0.04558,0.04559,0.0456,0.0456,0.0456,0.0456
tanass_clean,0.00379,0.0039,0.00508,0.00509,0.00509,0.00509,0.00509,0.00509,0.00509
tax,0.00636,0.00656,0.00927,0.00927,0.00928,0.00928,0.00928,0.00928,0.00928


#### A subset of indepent variables can be chosen with 'which_var' argument to calculate the above-mentioned 9 SE-s

In [18]:
wb_mam.get_all_se(which_var=['tanass_clean', 'tax'])

Unnamed: 0,bootstrapped,constant,hc0,hc1,hc2,hc3,hc4,hc4m,hc5
tanass_clean,0.00379,0.0039,0.00508,0.00509,0.00509,0.00509,0.00509,0.00509,0.00509
tax,0.00636,0.00656,0.00927,0.00927,0.00928,0.00928,0.00928,0.00928,0.00928


In [19]:
wb_mam.get_all_se(which_var='tax')

Unnamed: 0,bootstrapped,constant,hc0,hc1,hc2,hc3,hc4,hc4m,hc5
tax,0.00636,0.00656,0.00927,0.00927,0.00928,0.00928,0.00928,0.00928,0.00928


### get_bootstrap_params() method

#### Vanila version: Returning a dataframe capturing the parameter estimate of ALL each independent variables in each (wild) bootstrap.

In [20]:
wb_mam.get_bootstrap_params()

Unnamed: 0,const,tanass_clean,tax
0,6.18135,0.17343,0.53010
1,6.20670,0.17552,0.52219
2,6.16693,0.17764,0.52024
3,6.20038,0.16977,0.52742
4,6.18001,0.17605,0.52605
...,...,...,...
995,6.22621,0.17621,0.51617
996,6.24430,0.17121,0.52315
997,6.21609,0.17924,0.51128
998,6.24108,0.17879,0.51032


#### As usual, the desired independent variable can be chosen with 'which_var' argument

In [21]:
wb_mam.get_bootstrap_params(which_var='tax')

Unnamed: 0,tax
0,0.53010
1,0.52219
2,0.52024
3,0.52742
4,0.52605
...,...
995,0.51617
996,0.52315
997,0.51128
998,0.51032


In [22]:
wb_mam.get_bootstrap_params(which_var=('const', 'tanass_clean'))

Unnamed: 0,const,tanass_clean
0,6.18135,0.17343
1,6.20670,0.17552
2,6.16693,0.17764
3,6.20038,0.16977
4,6.18001,0.17605
...,...,...
995,6.22621,0.17621
996,6.24430,0.17121
997,6.21609,0.17924
998,6.24108,0.17879
