In [1]:
#Importing Libraries
import numpy as np
import pandas as pd
import scipy.io as scp

In [12]:
# Performs PCA and returns projected matrix into k dimensions
def pca(X,k):
    """ PCA dimensionality Reduction
    Args:
        X(numpy matrix): Design matrix
        K(int): number of principal components
    
    Returns:
        (Z,U): Tuple of new Design matrix and Unitary matrix from SVD
    
    """
    
    m = X.shape[0]
    sigma = (1/m)*X.T*X
    U,S,V = np.linalg.svd(sigma)
    S = np.diag(S)
    U_reduce = U[:,0:k];
    Z = X*U_reduce
    return (Z,U)

In [4]:
# Normalize Design matrix
def featureNormalize(X):
    X_norm = (X - X.mean(0))/X.std(0)
    return X_norm


In [3]:
# Recover data from PCA
def recoverdata(Z,U,k):
    """Recovers Data after PCA
    
    Args:
        Z(Numpy matrix): Transformed design matrix
        U(Numpy matrix): Unitary Matrix from SVD
        k(int): number of principal components
    
    Returns:
        X_rec: Recovered Design matrix
        
    """
    U_reduce = U[:,0:k]
    X_rec = Z*U_reduce.T
    return X_rec


In [2]:
data = np.random.randint(5, size=(3,4))
data = np.array(data)

In [4]:
y = np.random.randint(5, size=(3,1))
y = np.array(y)

In [2]:
#Partial Least Squares algorithm
def pls(X,y,k):
    n = X.shape[1]
    
    ym = np.zeros(X.shape)
    ym[:,[0]] = y
    
    Xm = np.matrix(np.zeros((X.shape[0],n*n)))
    Xm[:,0:n] = X
    
    zm = np.matrix(np.zeros((X.shape[0],n)))
    thetam = np.matrix(np.zeros((n,1)))
    
    for m in range(1,k+1):
        prev1 = (m-1)*n
        prev2 = m*n
        next2 = (m+1)*n
        last = m - 1
        W = Xm[:,prev1:prev2].T*y
        
        zm[:,[m]] = Xm[:,prev1:prev2]*W
        
        thetam[m] = (zm[:,[m]].T*y)/(zm[:,[m]].T*zm[:,[m]])
        ym[:,[m]] = ym[:,[last]] + np.asscalar(thetam[m])*zm[:,m]
        
        Xm[:,prev2:next2] = Xm[:,prev1:prev2] - zm[:,m]*((zm[:,m].T*Xm[:,prev1:prev2])/(zm[:,m].T*zm[:,m]))
    return (ym,zm,thetam)
        
        
    
    
    

In [57]:
(ym,zm,thetam) = pls(data,y,3)

(3,)
1
(3,)
(1, 1)
(3, 1)
2
(3,)
(1, 1)
(3, 1)
3
(3,)
(1, 1)
(3, 1)


In [13]:
mat = scp.loadmat("ex7data1.mat")
X = np.matrix(mat["X"])
X_norm = featureNormalize(X)
(z,u) = pca(X_norm,1)