In [26]:
import numpy as np
import pandas as pd
import pyquantlib as pq
from IPython.display import display
import matplotlib.pyplot as plt
%matplotlib inline
from importlib import reload

from scipy.linalg import cho_solve

In [27]:
# question given:
JPM = {'JPM': np.array([39.0, 36.8, 36.9, 36.7, 36.6, 35.8, 36.6, 33.7])}
GS  = {'GS': np.array([116.3, 105.7, 104.5, 103.2, 102.6, 100.5, 101.2, 93.8])}
MS  = {'MS': np.array([17.1, 15.0, 14.6, 14.5, 14.6, 13.8, 13.5, 12.7])}
BAC = {'BAC': np.array([8.8, 8.0, 8.2, 8.0, 7.7, 7.4, 7.3, 7.1])}
col_order = ['JPM', 'GS', 'MS', 'BAC']

In [28]:
px_dict = {**JPM, **GS, **MS, **BAC}
px_dict

{'BAC': array([ 8.8,  8. ,  8.2,  8. ,  7.7,  7.4,  7.3,  7.1]),
 'GS': array([ 116.3,  105.7,  104.5,  103.2,  102.6,  100.5,  101.2,   93.8]),
 'JPM': array([ 39. ,  36.8,  36.9,  36.7,  36.6,  35.8,  36.6,  33.7]),
 'MS': array([ 17.1,  15. ,  14.6,  14.5,  14.6,  13.8,  13.5,  12.7])}

In [29]:
df = pd.DataFrame(px_dict)[col_order]
df

Unnamed: 0,JPM,GS,MS,BAC
0,39.0,116.3,17.1,8.8
1,36.8,105.7,15.0,8.0
2,36.9,104.5,14.6,8.2
3,36.7,103.2,14.5,8.0
4,36.6,102.6,14.6,7.7
5,35.8,100.5,13.8,7.4
6,36.6,101.2,13.5,7.3
7,33.7,93.8,12.7,7.1


#### Part i - If Log returns:

In [None]:
df_log = (np.log(df) - np.log(df.shift(1)))[1:]
df_log

#### Part i - Percentage returns

In [30]:
df_pct = df.pct_change()[1:]
df_pct

Unnamed: 0,JPM,GS,MS,BAC
1,-0.05641,-0.091144,-0.122807,-0.090909
2,0.002717,-0.011353,-0.026667,0.025
3,-0.00542,-0.01244,-0.006849,-0.02439
4,-0.002725,-0.005814,0.006897,-0.0375
5,-0.021858,-0.020468,-0.054795,-0.038961
6,0.022346,0.006965,-0.021739,-0.013514
7,-0.079235,-0.073123,-0.059259,-0.027397


#### Part ii - sample covariance matrix

In [31]:
t_bar = df_pct - df_pct.mean()
t_bar

Unnamed: 0,JPM,GS,MS,BAC
1,-0.036327,-0.061518,-0.082061,-0.061242
2,0.022801,0.018272,0.014079,0.054667
3,0.014663,0.017185,0.033896,0.005277
4,0.017359,0.023811,0.047642,-0.007833
5,-0.001774,0.009157,-0.014049,-0.009294
6,0.04243,0.03659,0.019006,0.016154
7,-0.059152,-0.043497,-0.018514,0.00227


In [32]:
cov_mat = (1 / (len(t_bar) - 1)) * t_bar.transpose().dot(t_bar)
cov_mat

Unnamed: 0,JPM,GS,MS,BAC
JPM,0.001276,0.001238,0.001092,0.000663
GS,0.001238,0.001383,0.001399,0.000846
MS,0.001092,0.001399,0.001875,0.000999
BAC,0.000663,0.000846,0.000999,0.001197


In [33]:
# Check
np.cov(df_pct.transpose())

array([[ 0.00127637,  0.00123765,  0.00109209,  0.00066337],
       [ 0.00123765,  0.00138257,  0.00139909,  0.0008463 ],
       [ 0.00109209,  0.00139909,  0.0018754 ,  0.00099942],
       [ 0.00066337,  0.0008463 ,  0.00099942,  0.00119679]])

#### part ii - Correlation Matrix

In [9]:
D_inv = np.diag(1 / np.sqrt(np.diag(cov_mat)))
D_inv

array([[ 27.99061481,   0.        ,   0.        ,   0.        ],
       [  0.        ,  26.89406287,   0.        ,   0.        ],
       [  0.        ,   0.        ,  23.09154883,   0.        ],
       [  0.        ,   0.        ,   0.        ,  28.90620011]])

In [10]:
corr_mat = D_inv.dot(cov_mat).dot(D_inv)
corr_mat

array([[ 1.        ,  0.93168228,  0.70587094,  0.53673298],
       [ 0.93168228,  1.        ,  0.86887333,  0.65791811],
       [ 0.70587094,  0.86887333,  1.        ,  0.66710157],
       [ 0.53673298,  0.65791811,  0.66710157,  1.        ]])

In [11]:
# check
np.corrcoef(df_pct.as_matrix().transpose())

array([[ 1.        ,  0.93168228,  0.70587094,  0.53673298],
       [ 0.93168228,  1.        ,  0.86887333,  0.65791811],
       [ 0.70587094,  0.86887333,  1.        ,  0.66710157],
       [ 0.53673298,  0.65791811,  0.66710157,  1.        ]])

#### Part iii Linear Regression

In [13]:
df_pct

Unnamed: 0,JPM,GS,MS,BAC
1,-0.05641,-0.091144,-0.122807,-0.090909
2,0.002717,-0.011353,-0.026667,0.025
3,-0.00542,-0.01244,-0.006849,-0.02439
4,-0.002725,-0.005814,0.006897,-0.0375
5,-0.021858,-0.020468,-0.054795,-0.038961
6,0.022346,0.006965,-0.021739,-0.013514
7,-0.079235,-0.073123,-0.059259,-0.027397


In [20]:
jpm_ts = df_pct['JPM'].values.reshape(-1, 1)
jpm_ts

array([[-0.05641026],
       [ 0.00271739],
       [-0.00542005],
       [-0.0027248 ],
       [-0.02185792],
       [ 0.02234637],
       [-0.07923497]])

In [21]:
ones = np.array(len(jpm_ts) * [1])
gs_ts = df_pct['GS'].values
ms_ts = df_pct['MS'].values
bac_ts = df_pct['BAC'].values
A = np.column_stack([gs_ts, ms_ts, bac_ts, ones])
A

array([[-0.09114359, -0.12280702, -0.09090909,  1.        ],
       [-0.01135289, -0.02666667,  0.025     ,  1.        ],
       [-0.01244019, -0.00684932, -0.02439024,  1.        ],
       [-0.00581395,  0.00689655, -0.0375    ,  1.        ],
       [-0.02046784, -0.05479452, -0.03896104,  1.        ],
       [ 0.00696517, -0.02173913, -0.01351351,  1.        ],
       [-0.07312253, -0.05925926, -0.02739726,  1.        ]])

In [22]:
AtA = A.transpose().dot(A)
Aty = A.transpose().dot(jpm_ts)

In [23]:
# Cholesky decomp and solver:
Ut = np.linalg.cholesky(AtA)
x = cho_solve((Ut, True), Aty)
x

array([[ 1.26903478],
       [-0.32715898],
       [-0.06989255],
       [ 0.00210802]])

In [25]:
# checking regression
from sklearn import linear_model as lm
m = lm.LinearRegression(fit_intercept=False)
m.fit(A, jpm_ts)
m.coef_.reshape(-1, 1)

array([[ 1.26903478],
       [-0.32715898],
       [-0.06989255],
       [ 0.00210802]])