![question](img/spring17q3.png)

In [76]:
import numpy as np
import pandas as pd
import pyquantlib as pq
from IPython.display import display
import matplotlib.pyplot as plt
%matplotlib inline
from importlib import reload

from scipy.linalg import cho_solve

In [10]:
# question given:
JPM = {'JPM': np.array([39.0, 35.8, 36.9, 35.7, 35.6, 34.8, 35.6, 36.7, 36.2, 37.6])}
GS  = {'GS': np.array([118.3, 112.7, 108.5, 104.2, 102.6, 101.5, 100.2, 98.8, 96.3, 99.1])}
MS  = {'MS': np.array([17.2, 16.4, 14.6, 13.1, 14.6, 13.7, 12.8, 12.7, 12.1, 11.3])}
BAC = {'BAC': np.array([8.7, 8.8, 8.5, 8.2, 7.8, 7.3, 7.5, 7.2, 7.3, 7.2])}
col_order = ['JPM', 'GS', 'MS', 'BAC']

In [11]:
px_dict = {**JPM, **GS, **MS, **BAC}
px_dict

{'BAC': array([ 8.7,  8.8,  8.5,  8.2,  7.8,  7.3,  7.5,  7.2,  7.3,  7.2]),
 'GS': array([ 118.3,  112.7,  108.5,  104.2,  102.6,  101.5,  100.2,   98.8,
          96.3,   99.1]),
 'JPM': array([ 39. ,  35.8,  36.9,  35.7,  35.6,  34.8,  35.6,  36.7,  36.2,  37.6]),
 'MS': array([ 17.2,  16.4,  14.6,  13.1,  14.6,  13.7,  12.8,  12.7,  12.1,  11.3])}

In [12]:
df = pd.DataFrame(px_dict)[col_order]
df

Unnamed: 0,JPM,GS,MS,BAC
0,39.0,118.3,17.2,8.7
1,35.8,112.7,16.4,8.8
2,36.9,108.5,14.6,8.5
3,35.7,104.2,13.1,8.2
4,35.6,102.6,14.6,7.8
5,34.8,101.5,13.7,7.3
6,35.6,100.2,12.8,7.5
7,36.7,98.8,12.7,7.2
8,36.2,96.3,12.1,7.3
9,37.6,99.1,11.3,7.2


#### Part i - If Log returns:

In [21]:
df_log = (np.log(df) - np.log(df.shift(1)))[1:]
df_log

Unnamed: 0,JPM,GS,MS,BAC
1,-0.085614,-0.048494,-0.047628,0.011429
2,0.030264,-0.037979,-0.11626,-0.034686
3,-0.033061,-0.040438,-0.108409,-0.035932
4,-0.002805,-0.015474,0.108409,-0.05001
5,-0.022728,-0.010779,-0.063626,-0.066249
6,0.022728,-0.012891,-0.067951,0.027029
7,0.030431,-0.014071,-0.007843,-0.040822
8,-0.013718,-0.025629,-0.048397,0.013793
9,0.037945,0.028661,-0.068403,-0.013793


#### Part i - Percentage returns

In [22]:
df_pct = df.pct_change()[1:]
df_pct

Unnamed: 0,JPM,GS,MS,BAC
1,-0.082051,-0.047337,-0.046512,0.011494
2,0.030726,-0.037267,-0.109756,-0.034091
3,-0.03252,-0.039631,-0.10274,-0.035294
4,-0.002801,-0.015355,0.114504,-0.04878
5,-0.022472,-0.010721,-0.061644,-0.064103
6,0.022989,-0.012808,-0.065693,0.027397
7,0.030899,-0.013972,-0.007813,-0.04
8,-0.013624,-0.025304,-0.047244,0.013889
9,0.038674,0.029076,-0.066116,-0.013699


#### Part ii - sample covariance matrix

In [25]:
t_bar = df_pct - df_pct.mean()
t_bar

Unnamed: 0,JPM,GS,MS,BAC
1,-0.078698,-0.02808,-0.002843,0.031848
2,0.03408,-0.018009,-0.066088,-0.013737
3,-0.029167,-0.020374,-0.059072,-0.01494
4,0.000552,0.003903,0.158172,-0.028426
5,-0.019118,0.008537,-0.017976,-0.043749
6,0.026342,0.00645,-0.022025,0.047751
7,0.034252,0.005286,0.035856,-0.019646
8,-0.010271,-0.006046,-0.003576,0.034243
9,0.042027,0.048334,-0.022448,0.006655


In [44]:
cov_mat = (1 / (len(t_bar) - 1)) * t_bar.transpose().dot(t_bar)
cov_mat

Unnamed: 0,JPM,GS,MS,BAC
JPM,0.001539,0.000559,-1.7e-05,-0.000151
GS,0.000559,0.000507,0.00024,-6.4e-05
MS,-1.7e-05,0.00024,0.004437,-0.000505
BAC,-0.000151,-6.4e-05,-0.000505,0.001004


In [56]:
# Check
np.cov(df_pct.transpose())

array([[  1.53877700e-03,   5.59201443e-04,  -1.66569264e-05,
         -1.50640136e-04],
       [  5.59201443e-04,   5.07275785e-04,   2.40187149e-04,
         -6.35138665e-05],
       [ -1.66569264e-05,   2.40187149e-04,   4.43675878e-03,
         -5.04755609e-04],
       [ -1.50640136e-04,  -6.35138665e-05,  -5.04755609e-04,
          1.00390508e-03]])

#### part ii - Correlation Matrix

In [51]:
D_inv = np.diag(1 / np.sqrt(np.diag(cov_mat)))
D_inv

array([[ 25.49248411,   0.        ,   0.        ,   0.        ],
       [  0.        ,  44.39948514,   0.        ,   0.        ],
       [  0.        ,   0.        ,  15.0129864 ,   0.        ],
       [  0.        ,   0.        ,   0.        ,  31.56121214]])

In [54]:
corr_mat = D_inv.dot(cov_mat).dot(D_inv)
corr_mat

array([[ 1.        ,  0.63293393, -0.00637491, -0.12120109],
       [ 0.63293393,  1.        ,  0.16010128, -0.08900208],
       [-0.00637491,  0.16010128,  1.        , -0.23916737],
       [-0.12120109, -0.08900208, -0.23916737,  1.        ]])

In [55]:
# check
np.corrcoef(df_pct.as_matrix().transpose())

array([[ 1.        ,  0.63293393, -0.00637491, -0.12120109],
       [ 0.63293393,  1.        ,  0.16010128, -0.08900208],
       [-0.00637491,  0.16010128,  1.        , -0.23916737],
       [-0.12120109, -0.08900208, -0.23916737,  1.        ]])

#### Part iii Linear Regression

In [60]:
df_pct

Unnamed: 0,JPM,GS,MS,BAC
1,-0.082051,-0.047337,-0.046512,0.011494
2,0.030726,-0.037267,-0.109756,-0.034091
3,-0.03252,-0.039631,-0.10274,-0.035294
4,-0.002801,-0.015355,0.114504,-0.04878
5,-0.022472,-0.010721,-0.061644,-0.064103
6,0.022989,-0.012808,-0.065693,0.027397
7,0.030899,-0.013972,-0.007813,-0.04
8,-0.013624,-0.025304,-0.047244,0.013889
9,0.038674,0.029076,-0.066116,-0.013699


In [67]:
gs_ts = df_pct['GS'].values.reshape(-1, 1)
gs_ts

array([[-0.04733728],
       [-0.03726708],
       [-0.03963134],
       [-0.01535509],
       [-0.01072125],
       [-0.01280788],
       [-0.01397206],
       [-0.02530364],
       [ 0.0290758 ]])

In [68]:
ones = np.array(len(gs_ts) * [1])
jpm_ts = df_pct['JPM'].values
ms_ts = df_pct['MS'].values
bac_ts = df_pct['BAC'].values
A = np.column_stack([ones, jpm_ts, ms_ts, bac_ts])
A

array([[ 1.        , -0.08205128, -0.04651163,  0.01149425],
       [ 1.        ,  0.03072626, -0.1097561 , -0.03409091],
       [ 1.        , -0.03252033, -0.10273973, -0.03529412],
       [ 1.        , -0.00280112,  0.11450382, -0.04878049],
       [ 1.        , -0.02247191, -0.06164384, -0.06410256],
       [ 1.        ,  0.02298851, -0.06569343,  0.02739726],
       [ 1.        ,  0.03089888, -0.0078125 , -0.04      ],
       [ 1.        , -0.01362398, -0.04724409,  0.01388889],
       [ 1.        ,  0.03867403, -0.0661157 , -0.01369863]])

In [69]:
AtA = A.transpose().dot(A)
Aty = A.transpose().dot(gs_ts)

In [78]:
# Cholesky decomp and solver:
Ut = np.linalg.cholesky(AtA)
x = cho_solve((Ut, True), Aty)
x

array([[-0.01508047],
       [ 0.36606523],
       [ 0.05787188],
       [ 0.02076033]])

In [89]:
# checking regression
from sklearn import linear_model as lm
m = lm.LinearRegression(fit_intercept=False)
m.fit(A, gs_ts)
m.coef_.reshape(-1, 1)

array([[-0.01508047],
       [ 0.36606523],
       [ 0.05787188],
       [ 0.02076033]])