In [1]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append("../")

In [2]:
from korr import bootcorr, pearson
import numpy as np

# Load Dataset

In [3]:
from sklearn.datasets import load_diabetes
X = load_diabetes().data

# Correlation Estimate

In [4]:
cmat, _ = pearson(X)
print(cmat.round(2))

[[ 1.    0.17  0.19  0.34  0.26  0.22 -0.08  0.2   0.27  0.3 ]
 [ 0.17  1.    0.09  0.24  0.04  0.14 -0.38  0.33  0.15  0.21]
 [ 0.19  0.09  1.    0.4   0.25  0.26 -0.37  0.41  0.45  0.39]
 [ 0.34  0.24  0.4   1.    0.24  0.19 -0.18  0.26  0.39  0.39]
 [ 0.26  0.04  0.25  0.24  1.    0.9   0.05  0.54  0.52  0.33]
 [ 0.22  0.14  0.26  0.19  0.9   1.   -0.2   0.66  0.32  0.29]
 [-0.08 -0.38 -0.37 -0.18  0.05 -0.2   1.   -0.74 -0.4  -0.27]
 [ 0.2   0.33  0.41  0.26  0.54  0.66 -0.74  1.    0.62  0.42]
 [ 0.27  0.15  0.45  0.39  0.52  0.32 -0.4   0.62  1.    0.46]
 [ 0.3   0.21  0.39  0.39  0.33  0.29 -0.27  0.42  0.46  1.  ]]


# Bootstrap Estimation

In [5]:
r3, _, _ = bootcorr(X, n_draws=30, subsample=0.7, replace=True, corr_fn=pearson)
cmat_bmean = r3.mean(axis=0)
print(cmat_bmean.round(2))

[[ 1.    0.17  0.19  0.35  0.28  0.25 -0.08  0.21  0.27  0.3 ]
 [ 0.17  1.    0.08  0.23  0.04  0.15 -0.38  0.33  0.14  0.21]
 [ 0.19  0.08  1.    0.39  0.26  0.27 -0.36  0.4   0.43  0.38]
 [ 0.35  0.23  0.39  1.    0.25  0.2  -0.17  0.25  0.38  0.39]
 [ 0.28  0.04  0.26  0.25  1.    0.9   0.04  0.55  0.52  0.33]
 [ 0.25  0.15  0.27  0.2   0.9   1.   -0.21  0.67  0.33  0.3 ]
 [-0.08 -0.38 -0.36 -0.17  0.04 -0.21  1.   -0.74 -0.39 -0.26]
 [ 0.21  0.33  0.4   0.25  0.55  0.67 -0.74  1.    0.61  0.4 ]
 [ 0.27  0.14  0.43  0.38  0.52  0.33 -0.39  0.61  1.    0.46]
 [ 0.3   0.21  0.38  0.39  0.33  0.3  -0.26  0.4   0.46  1.  ]]


How stable are estimates

In [6]:
cmat_bstd = r3.std(axis=0)
print(cmat_bstd.round(2))

[[0.   0.04 0.06 0.05 0.05 0.06 0.04 0.04 0.05 0.05]
 [0.04 0.   0.06 0.05 0.05 0.05 0.04 0.04 0.05 0.04]
 [0.06 0.06 0.   0.04 0.04 0.04 0.04 0.04 0.04 0.04]
 [0.05 0.05 0.04 0.   0.04 0.04 0.06 0.05 0.04 0.05]
 [0.05 0.05 0.04 0.04 0.   0.01 0.05 0.04 0.04 0.04]
 [0.06 0.05 0.04 0.04 0.01 0.   0.05 0.04 0.05 0.04]
 [0.04 0.04 0.04 0.06 0.05 0.05 0.   0.01 0.05 0.04]
 [0.04 0.04 0.04 0.05 0.04 0.04 0.01 0.   0.04 0.04]
 [0.05 0.05 0.04 0.04 0.04 0.05 0.05 0.04 0.   0.04]
 [0.05 0.04 0.04 0.05 0.04 0.04 0.04 0.04 0.04 0.  ]]


How different is the Bootstrap Estimation from the Full-Sample Estimation

In [7]:
print(np.abs(cmat_bmean - cmat).round(2))

[[0.   0.   0.   0.01 0.02 0.03 0.   0.01 0.   0.  ]
 [0.   0.   0.01 0.01 0.   0.   0.   0.   0.01 0.  ]
 [0.   0.01 0.   0.   0.01 0.01 0.01 0.01 0.01 0.01]
 [0.01 0.01 0.   0.   0.01 0.01 0.01 0.01 0.02 0.  ]
 [0.02 0.   0.01 0.01 0.   0.   0.01 0.01 0.   0.01]
 [0.03 0.   0.01 0.01 0.   0.   0.01 0.01 0.01 0.01]
 [0.   0.   0.01 0.01 0.01 0.01 0.   0.   0.   0.01]
 [0.01 0.   0.01 0.01 0.01 0.01 0.   0.   0.01 0.01]
 [0.   0.01 0.01 0.02 0.   0.01 0.   0.01 0.   0.01]
 [0.   0.   0.01 0.   0.01 0.01 0.01 0.01 0.01 0.  ]]
