In [1]:
import os
os.path

<module 'ntpath' from 'd:\\anaconda3\\envs\\simu\\lib\\ntpath.py'>

In [2]:
# Import libraries
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy import linalg as LA
from sklearn.datasets import make_sparse_spd_matrix
from utils.covest import CovEstWithNetwork
from utils.adpt_correlation_threshold import AdptCorrThreshold
from importlib import reload
from wlpy.gist import heatmap

In [3]:
rng = np.random.RandomState(19260817)  # we specify a random seed for replication

In [4]:
'''
N = 10
tau = 0.1
'''
N = 500
tau = 0.8 
# '''

In [5]:
# S = make_spd_matrix(N, random_state=5)
S = make_sparse_spd_matrix(N, random_state = 100) 
G = (abs(S - np.diag(np.diag(S))) > tau) * 1 # off-diagonal entries, of which |.| > tau

param_list = {
    'Number of Assets:': N, 
    'Generating Method': 'Randomly generate sparse p.d. matrix',
    'Observing threshold:' : tau,   
} 
# df = pd.DataFrame(param_list) 
print((~(S == 0)).sum()) # number of non-zero entries
print(G.sum()) # number of off-diagonal entries, of which |.| > tau
print(param_list)

88484
5166
{'Number of Assets:': 500, 'Generating Method': 'Randomly generate sparse p.d. matrix', 'Observing threshold:': 0.8}


In [6]:
# Same setting as in BL2008 cov regularization
# The model is AR(1)
# N = 400
# rho = 0.7
# S = np.zeros(shape=[N, N])
# for j in range(0, N):
#     S = S + np.diag(np.ones(N-j)*(rho**j), -j) + \
#         np.diag(np.ones(N-j)*(rho**j), j)
# G = (S >= 0.49) * 1

In [7]:
# AR(1)
def gen_S(rho = 0.8,N = 500):
    S_block = np.zeros(shape=[N, N])
    for j in range(0, N):
        S_block = S_block + np.diag(np.ones(N-j)*(rho**j), -j) + \
        np.diag(np.ones(N-j)*(rho**j), j)
    S = S_block - np.eye(N)
    return S

S = gen_S(rho=0.8, N=500)
S[0:5, 0:5]

array([[1.    , 0.8   , 0.64  , 0.512 , 0.4096],
       [0.8   , 1.    , 0.8   , 0.64  , 0.512 ],
       [0.64  , 0.8   , 1.    , 0.8   , 0.64  ],
       [0.512 , 0.64  , 0.8   , 1.    , 0.8   ],
       [0.4096, 0.512 , 0.64  , 0.8   , 1.    ]])

In [8]:
'''
G: 
    off-diag = S + noise
    diag = 0
''' 
def gen_G(S, scale):
    rng = np.random.RandomState(103)
    N = S.shape[0]
    G = rng.normal(S.reshape(-1), scale = scale).reshape(N, N) # scale: standard deviation of noise
    G= 0.5 * (G + G.transpose())
    G = G - np.diag(np.diag(G))
    return G

G = gen_G(S, 0.2)
print(G[0:5, 0:5])
print(G.max())

[[0.         0.92841126 0.58898673 0.62281627 0.48375082]
 [0.92841126 0.         0.83896895 0.57805038 0.20554933]
 [0.58898673 0.83896895 0.         0.56399136 0.64265429]
 [0.62281627 0.57805038 0.56399136 0.         0.74234067]
 [0.48375082 0.20554933 0.64265429 0.74234067 0.        ]]
1.165907438691144


In [9]:
# G2: G's big entries
def gen_G2(S, tau):
    G2 = ((S - np.diag(np.diag(S))) > tau) * 1
    return G2
G2 = gen_G2(S, tau=0.7)
G2[0:5, 0:5]

array([[0, 1, 0, 0, 0],
       [1, 0, 1, 0, 0],
       [0, 1, 0, 1, 0],
       [0, 0, 1, 0, 1],
       [0, 0, 0, 1, 0]])

In [10]:
def generate_sample(S, T = 200, is_random = False):
    if is_random:
        rng = np.random
    else:
        rng = np.random.RandomState(100)
    N = S.shape[0]
    X1 = rng.multivariate_normal(mean = np.zeros(N), cov = S, size = T)
    return X1
X1 = generate_sample(S, 200)
X1[0:5, 0:5]

array([[ 0.85562752,  0.85271085, -1.10093045, -1.38497397, -0.06303737],
       [-2.69609797, -2.49696008, -2.69361054, -3.01477429, -1.33323753],
       [-0.91323525, -1.12412896, -1.10301585, -0.31873741, -0.51136806],
       [-1.04047859, -0.70663271, -1.27260778, -1.51379285, -0.28734216],
       [ 0.54281987,  0.65436482,  0.44288667, -0.29582842,  0.19589774]])

In [None]:
def estimate(G, X1):
    m = AdptCorrThreshold(pd.DataFrame(X1), G)
    b = m.find_smallest_threshold_for_pd()
    params = m.params_by_cv('pd', b)
    S_new = m.fit_adaptive_corr_threshold(params)   
    return m, S_new, params
m, S_new, params = estimate(G, X1)
print(S_new[0:5, 0:5])
print(params) 
'''
When the parameters are default, the threshold
tau_ij = (params[0] + params[1] * G_ij) * scaling_factor
''' 

In [None]:
find_smallest_

In [None]:
# without cross-validation
S1 = m.fit_adaptive_corr_threshold(params = [2,0])

In [None]:
def dd_rslt(S, m, norm_type = 'fro'):
    dd = {"S": LA.norm(S, ord=norm_type),
        "Sample Cov": LA.norm(m.sample_cov() - S, ord=norm_type),
        "Linear Shrinkage": LA.norm(m.lw_lin_shrink() - S, ord=norm_type),
        "Nonlinear Shrinkage": LA.norm(m.nonlin_shrink() - S, ord=norm_type)}
    return dd

In [None]:
def print_rslt(norm_type='fro'):
    print('Norm', norm_type)
    print('true', LA.norm(S, ord=norm_type))
    print('sample_cov', LA.norm(m.sample_cov() - S, ord=norm_type))
    print('lin_shrk', LA.norm(m.lw_lin_shrink() - S, ord=norm_type))
    print('nonlin', LA.norm(m.nonlin_shrink() - S, ord=norm_type))
    # print(LA.norm(m.ha - S, ord=norm_type))
    print('S_new', LA.norm(S_new - S, ord=norm_type))
    print('S1', LA.norm(S1 - S, ord=norm_type))
    print()

In [None]:
print_rslt(1)
print_rslt(2)
print_rslt('fro')

In [None]:
# -> Created on 21 November 2020
# -> Author: Weiguang Liu

In [None]:
heatmap(S)

In [None]:
heatmap(S_new)
heatmap(S1)

In [None]:
heatmap(m.sample_cov())
heatmap(m.lw_lin_shrink())
heatmap(m.nonlin_shrink())

In [None]:
repeat = 2 # 100
[estimate(G, generate_sample(S, is_random=True)) for i in range(repeat)]

In [None]:
rslt = []
for i in range(1):
    for rho in [0.8, 0.9, 0.95, 0.99]:
        for scale in np.linspace(0, 0.4, 5):
            S = gen_S(rho, N = 500)
            G = gen_G(S, scale)
            X1 = generate_sample(S, T = 200)
            m, S_new, params = estimate(G, X1)
            dct = dd_rslt(S, m, 1)
            dct["Adapt Corr Thresholding"] = LA.norm(S_new - S, ord=1)
            dct["rho"] = rho
            dct["scale"] = scale
            rslt += [dct]
            # print(rho)

In [None]:
rslt2 = []
for i in range(1):
    for rho in [0.8, 0.9, 0.95, 0.99]:
        for l in [0.5, 0.6, 0.7, 0.8, 0.9]:
            S = gen_S(rho, N=500)
            G2 = gen_G2(S, tau=l)
            X1 = generate_sample(S, T=200)
            m, S_new, params = estimate(G2, X1)
            dct = dd_rslt(S, m, 1)
            dct["Adapt Corr Thresholding"] = LA.norm(S_new - S, ord=1)
            dct["rho"] = rho
            dct["l"] = l
            rslt2 += [dct]

In [None]:
df = pd.DataFrame(rslt)
pd.set_option("precision", 2)
df = df.set_index(['rho', 'scale'])
with open('rslt.json', 'w') as f:
    f.write(df.to_latex())