
# Covariance estimation using DAG model

In [1]:
import numpy as np
import pandas as pd

### First, give actual covar structure among assets

In [2]:
keys = ['SP500', 'TECH', 'AMZN', 'GOOG', 'GS']

vol = np.array([1, 1.5, 2, 3, 1.2])
n_asset = len(vol)
rho = 0.5
corr = np.eye(n_asset)*(1-rho) + np.ones((n_asset,n_asset))*rho
cov = corr * vol.reshape(-1,1) * vol

cov_df = pd.DataFrame(cov)
cov_df.columns = keys
cov_df

Unnamed: 0,SP500,TECH,AMZN,GOOG,GS
0,1.0,0.75,1.0,1.5,0.6
1,0.75,2.25,1.5,2.25,0.9
2,1.0,1.5,4.0,3.0,1.2
3,1.5,2.25,3.0,9.0,1.8
4,0.6,0.9,1.2,1.8,1.44


### Let's define two functions: 

In [3]:
# Return regression result given covariance matrix

def run_reg(cov, y_key, xs_key, keys):
    inv_map = dict(zip(keys, range(len(keys))))
    y_ind = inv_map[y_key]
    xs_ind = list(map(lambda x: inv_map[x], xs_key))

    cov_y = cov[y_ind, xs_ind]
    cov_xs = cov[np.ix_(xs_ind, xs_ind)]

    beta = np.linalg.solve(cov_xs, cov_y)

    #return dict(zip(xs_key, list(beta)))
    return pd.DataFrame( [beta], columns=xs_key)

# Estimate covariance from actual covar and dag

def dag_cov_est(cov, keys, dag):
    inv_map = dict(zip(keys, range(len(keys))))

    dag_ind = {}
    for k, pa in dag.items():
        dag_ind[inv_map[k]] = sorted(list(map(lambda x: inv_map[x], pa)))

    a_mat = np.eye(len(keys))
    eps_vec = np.diag(cov).copy()
    for k, ind_pa in dag_ind.items():
        temp = np.linalg.solve(cov[np.ix_(ind_pa, ind_pa)], -cov[k, ind_pa])
        a_mat[k, ind_pa] = temp
        eps_vec[k] += sum(cov[k, ind_pa] * temp)

    prec_est = a_mat.T @ np.diag(1/eps_vec) @ a_mat
    a_inv = np.linalg.inv(a_mat)
    cov_est = a_inv @ np.diag(eps_vec) @ a_inv.T

    return pd.DataFrame(cov_est, columns=keys), pd.DataFrame(prec_est, columns=keys)

### Let's regress 'GOOG' on other assets

In [5]:
run_reg(cov, 'GOOG', ['SP500'], keys)

Unnamed: 0,SP500
0,1.5


In [6]:
run_reg(cov, 'GOOG', ['SP500', 'TECH'], keys)

Unnamed: 0,SP500,TECH
0,1.0,0.666667


In [7]:
run_reg(cov, 'GOOG', ['SP500', 'TECH', 'AMZN'], keys)

Unnamed: 0,SP500,TECH,AMZN
0,0.75,0.5,0.375


### Let's regress 'GS' on other assets

In [8]:
run_reg(cov, 'GS', ['SP500'], keys)

Unnamed: 0,SP500
0,0.6


In [9]:
run_reg(cov, 'GS', ['SP500', 'TECH'], keys)

Unnamed: 0,SP500,TECH
0,0.4,0.266667


In [10]:
## Now let's estimate a new covariance matrix from DAG
### Let's give a DAG first

In [12]:
dag = {'TECH':['SP500'], 'AMZN':['SP500', 'TECH'], 'GOOG':['SP500', 'TECH'], 'GS':['SP500']}

In [13]:
cov_est_df, prec_est_df = dag_cov_est(cov, keys, dag)
cov_est, prec_est = cov_est_df.values, prec_est_df
corr_est = cov_est / vol.reshape(-1,1) / vol
print(cov_est_df)
print(corr_est - corr)

   SP500  TECH  AMZN  GOOG    GS
0   1.00  0.75   1.0  1.50  0.60
1   0.75  2.25   1.5  2.25  0.45
2   1.00  1.50   4.0  2.00  0.60
3   1.50  2.25   2.0  9.00  0.90
4   0.60  0.45   0.6  0.90  1.44
[[ 0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.         -0.25      ]
 [ 0.          0.          0.         -0.16666667 -0.25      ]
 [ 0.          0.         -0.16666667  0.         -0.25      ]
 [ 0.         -0.25       -0.25       -0.25        0.        ]]


In [14]:
run_reg(cov_est, 'GOOG', ['SP500'], keys)

Unnamed: 0,SP500
0,1.5


In [15]:
run_reg(cov_est, 'GOOG', ['SP500', 'TECH'], keys)

Unnamed: 0,SP500,TECH
0,1.0,0.666667


In [16]:
run_reg(cov_est, 'GOOG', ['SP500', 'TECH', 'AMZN'], keys)

Unnamed: 0,SP500,TECH,AMZN
0,1.0,0.666667,1.0408340000000001e-17


In [17]:
run_reg(cov_est, 'GS', ['SP500'], keys)

Unnamed: 0,SP500
0,0.6


In [18]:
run_reg(cov_est, 'GS', ['SP500', 'TECH'], keys)

Unnamed: 0,SP500,TECH
0,0.6,-1.6447750000000002e-17


### Let's check the conditionally independent paris and tweak them a bit 

In [19]:
cov_est_df

Unnamed: 0,SP500,TECH,AMZN,GOOG,GS
0,1.0,0.75,1.0,1.5,0.6
1,0.75,2.25,1.5,2.25,0.45
2,1.0,1.5,4.0,2.0,0.6
3,1.5,2.25,2.0,9.0,0.9
4,0.6,0.45,0.6,0.9,1.44


In [20]:
cov[2,3] = cov[3,2] = -2
cov[1,4] = cov[4,1] = -3

In [21]:
cov

array([[ 1.  ,  0.75,  1.  ,  1.5 ,  0.6 ],
       [ 0.75,  2.25,  1.5 ,  2.25, -3.  ],
       [ 1.  ,  1.5 ,  4.  , -2.  ,  1.2 ],
       [ 1.5 ,  2.25, -2.  ,  9.  ,  1.8 ],
       [ 0.6 , -3.  ,  1.2 ,  1.8 ,  1.44]])

### The estimated covariance from DAG doesn't change, which means the components were not used

In [22]:
cov_est_df2, prec_est_df2 = dag_cov_est(cov, keys, dag)

In [23]:
cov_est_df2 - cov_est_df

Unnamed: 0,SP500,TECH,AMZN,GOOG,GS
0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0
