In [None]:
#Direct Implementation of Linear Regression and PCA 
import numpy as np
from scipy import linalg as la
import matplotlib.pyplot as plt

# Linear Regression
Linear regression fits a linear model to a set of data such that the sum of squares error is minimised. 

- Often formulated separately for the 1 dimentional case, and yet again for multiple regression and multivariate regression (the most general form)


In [5]:
#Begin with Linear Regression: (multiple regression case)
def Fit_Linear_Model(X, y):
    #Through formulating the sum of squares loss function and taking the derivative with respect to the parameters,
    #one can obtain the well known linear regression matrix equation: inv(X^T@X)@X^T@y 
    return np.linalg.inv(np.transpose(X)@X)@np.transpose(X)@y

#generate some toy data to test the regression. 

d = 2

#generate example slope and offset to try predict (case of dim(y)=1)
true_params = np.random.randn(d)
samples = 10000
X = np.random.rand(samples, d)
y = [true_params@x for x in X] 
y_noisy = np.array([y_i + np.random.randn() for y_i in y])

#plt.plot(x_vals, X_noisy)
#plt.show()

res = Fit_Linear_Model(X, y_noisy)
fitted_values = [res@x for x in X]

def squared_loss(x, y):
    return np.sum([(x[i] - y[i])**2 for i in range(0,len(x))])


#check to see how close the estimated parameters are
print("Fitted parameters: " + str(res))
print("True parameters: " + str(true_params))

#find loss for fit
loss_reg = squared_loss(y, fitted_values)
print("Squared loss: " + str(loss_reg))


Fitted parameters: [1.65795389 0.09025386]
True parameters: [1.70870116 0.05053263]
Squared loss: 3.83359231972194



# PCA Notes
Decomposes a matrix into a new coordinate system, where each axis is orthogonal (uncorrelated). The axes are constructed such that the original data projected onto the first axis will have maximum variance, with the remaining axes being in decreasing order of their explained variance. Can then project onto a smaller set of axes while minimising the amount of variance which is lost.   

- Is an example of a biplot, in the sense that the data is represented with respect to 2 entities: the principal axes and the component score. 
- Is formulated in terms of maximising covariance for each component, but is equivalent to a singular value decomposition (and is implemented as such for numerical stability)
- Requires normalisation as, for example, different covariates will be on different scales (and so have different variance, which would effect the influence they have on how the axes are derived). This point explains why centering is a large issue in many of the CoDA papers.


In [98]:
#implement PCA
def PCA(X, d=X.shape[1]):
    #normalise the data
    X = [(x_i - np.mean(x_i))/np.std(x_i) for x_i in X]
    
    #easiest method: compute the singular value decomposition of X
    #the right singular vectors will be the principal axes, and the matrix US will be the scores  
    #(since S is the scaling of the unit directions corresponding to normalised matrix U)
    
    U, S, V_T = la.svd(X, full_matrices=False)
    Sigma_d = np.diag(S[:d])
    
    #score matrix T_d in terms of truncated U_d and Sigma_d
    
    T_d = U[:,:d]@Sigma_d
    return T_d