In [30]:
import numpy as np
import pandas as pd
from scipy.linalg import cho_solve

import matplotlib.pyplot as plt
%matplotlib inline

In [20]:
fn = 'financials2012.xlsx'
df = pd.read_excel(fn, parse_dates=['Date'])
df = df.set_index('Date').sort_index()
df.rename(columns={
    'RY (RBC)':'RBC',
    'BCS (Barclays)':'BARC'
}, inplace=True)

### Part i) Pct Change

In [47]:
dfp = df.pct_change()[1:]
dfp['ones'] = 1
dfp.head()

Unnamed: 0_level_0,JPM,GS,MS,BAC,RBS,CS,UBS,RBC,BARC,ones
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2012-01-17,0.040137,0.098813,0.106254,0.069909,0.16712,0.15071,0.147929,0.044201,0.127273,1
2012-01-23,-0.004105,0.027928,0.011526,0.03125,0.01397,0.028264,0.025773,-0.004854,0.010264,1
2012-01-30,0.028854,0.051531,0.094411,0.075758,0.043628,0.044135,0.04379,0.02478,0.071843,1
2012-02-06,-0.017628,-0.029024,-0.03173,0.029449,-0.035204,-0.076752,-0.05227,-0.003427,-0.018957,1
2012-02-13,0.02311,0.0157,-0.025602,-0.006219,0.009122,0.042169,0.030479,-0.001337,0.070393,1


### Part ii) Linear Regression

In [38]:
A = dfp[['GS', 'MS', 'BAC', 'RBS', 'CS', 'UBS', 'RBC', 'BARC', 'ones']].as_matrix()
b = dfp[['JPM']].as_matrix()

In [42]:
AtA = np.matmul(A.transpose(), A)
Atb = np.matmul(A.transpose(), b)
Ut = np.linalg.cholesky(AtA)
x = cho_solve((Ut, True), Atb)

** Solved Coefficients: **

In [43]:
print(x)

[[ 0.7662948 ]
 [-0.07776546]
 [ 0.29795058]
 [ 0.28117032]
 [-0.05761416]
 [-0.41404465]
 [ 0.14759435]
 [-0.00880578]
 [-0.00335278]]


In [45]:
pred = np.matmul(A, x)
err = b - pred

** Approx Error: **

In [46]:
print(np.sqrt(np.matmul(err.transpose(),err)))

[[ 0.1294512]]


### Part iii) Linear Regression - Financials

In [48]:
A = dfp[['GS', 'MS', 'BAC', 'ones']].as_matrix()
b = dfp[['JPM']].as_matrix()
AtA = np.matmul(A.transpose(), A)
Atb = np.matmul(A.transpose(), b)
Ut = np.linalg.cholesky(AtA)
x = cho_solve((Ut, True), Atb)
pred = np.matmul(A, x)
err = b - pred

** Solved Coefficients: **

In [49]:
print(x)

[[ 0.65371049]
 [-0.03750398]
 [ 0.26128533]
 [-0.00139422]]


** Approx Error: **

In [50]:
print(np.sqrt(np.matmul(err.transpose(),err)))

[[ 0.14988401]]


### Part iv) Linear Regression - Other Stocks

In [51]:
A = dfp[['RBS', 'CS', 'UBS', 'RBC', 'BARC', 'ones']].as_matrix()
b = dfp[['JPM']].as_matrix()
AtA = np.matmul(A.transpose(), A)
Atb = np.matmul(A.transpose(), b)
Ut = np.linalg.cholesky(AtA)
x = cho_solve((Ut, True), Atb)
pred = np.matmul(A, x)
err = b - pred

** Solved Coefficients: **

In [52]:
print(x)

[[ 0.3132172 ]
 [ 0.06125269]
 [ 0.05407512]
 [ 0.48829307]
 [-0.0079035 ]
 [ 0.00071738]]


** Approx Error: **

In [53]:
print(np.sqrt(np.matmul(err.transpose(),err)))

[[ 0.17541378]]


### Comments:

The approximation error of regressing JPM vs non-american banks was the highest, while regressing it against all of the stock was the lowest