In [1]:
import numpy as np
import matplotlib.pyplot as plt
import time
from data_utils import load_dataset

In [2]:
def lin_regress_SVD(x_train, y_train, x_test):
    
    # x_train is an (N,D) array
    # N training points, D features
    # y_train is an (N,1) array for regression and (N,y) for classification
    # assume classification datasets converted to number classes
    
    # add dummy feature (for bias/offset term)
    # change x_train to (N,D+1) array; x_test to (n,D+1) array
    # n is number of test points
    
    temp = np.ones((x_train.shape[0], 1))
    x_train = np.hstack((temp, x_train))
    temp = np.ones((x_test.shape[0], 1))
    x_test = np.hstack((temp, x_test))
    
    # compute SVD
    U, sigma, V_t = np.linalg.svd(x_train, full_matrices=False, compute_uv=True, hermitian=False)
    # U is (N,D+1); sigma is (D+1,) array of the singular values; because full_matrics=False
    # V_t is (D+1,D+1)
    
    # compute weights with numpy multidot
    w = np.linalg.multi_dot([V_t.T,np.linalg.inv(np.diag(sigma)),U.T,y_train])
    
    # compute prediction
    y_predict = np.dot(x_test,w)
    
    return y_predict

In [3]:
np.random.seed(100)

Run on regression datasets:

In [None]:
# mauna_loa dataset
x_train, x_valid, x_test, y_train, y_valid, y_test = load_dataset('mauna_loa')
# use both training and validation sets for train
x_train = np.vstack([x_valid, x_train])
y_train = np.vstack([y_valid, y_train])

y_predict = lin_regress_SVD(x_train, y_train, x_test)
RMSE = np.sqrt(np.mean(np.square(y_test-y_predict)))
print("RMSE = {}".format(round(RMSE,6)))

In [None]:
# rosenbrock dataset
x_train, x_valid, x_test, y_train, y_valid, y_test = load_dataset('rosenbrock', n_train=1000, d=2)
# use both training and validation sets for train
x_train = np.vstack([x_valid, x_train])
y_train = np.vstack([y_valid, y_train])

y_predict = lin_regress_SVD(x_train, y_train, x_test)
RMSE = np.sqrt(np.mean(np.square(y_test-y_predict)))
print("RMSE = {}".format(round(RMSE,6)))

In [None]:
# pumadyn32nm dataset
x_train, x_valid, x_test, y_train, y_valid, y_test = load_dataset('pumadyn32nm')
# use both training and validation sets for train
x_train = np.vstack([x_valid, x_train])
y_train = np.vstack([y_valid, y_train])

y_predict = lin_regress_SVD(x_train, y_train, x_test)
RMSE = np.sqrt(np.mean(np.square(y_test-y_predict)))
print("RMSE = {}".format(round(RMSE,6)))

Ru|n on classification datasets:

In [20]:
# iris dataset
x_train, x_valid, x_test, y_train, y_valid, y_test = load_dataset('iris')
# use both training and validation sets for train
x_train = np.vstack([x_valid, x_train])
y_train = np.vstack([y_valid, y_train])

y_predict = lin_regress_SVD(x_train, y_train, x_test)

# take argmax of y_predict_i to be its predicted class
y_predict = np.argmax(y_predict, axis=1).reshape((y_predict.shape[0],-1))
# convert one-hot encoding to numbers for the classes (for y_test)
y_test = np.argmax(y_test, axis=1).reshape((y_test.shape[0],-1))

acc = np.mean(y_predict == y_test)
print("Accuracy = {}".format(round(acc,6)))

Accuracy = 0.866667


In [21]:
# mnist_small dataset
x_train, x_valid, x_test, y_train, y_valid, y_test = load_dataset('mnist_small')
# use both training and validation sets for train
x_train = np.vstack([x_valid, x_train])
y_train = np.vstack([y_valid, y_train])

y_predict = lin_regress_SVD(x_train, y_train, x_test)

# take argmax of y_predict_i to be its predicted class
y_predict = np.argmax(y_predict, axis=1).reshape((y_predict.shape[0],-1))
# convert one-hot encoding to numbers for the classes (for y_test)
y_test = np.argmax(y_test, axis=1).reshape((y_test.shape[0],-1))

acc = np.mean(y_predict == y_test)
print("Accuracy = {}".format(round(acc,6)))

Accuracy = 0.855


In [None]:
x_train, x_valid, x_test, y_train, y_valid, y_test = load_dataset('rosenbrock', n_train=1000, d=4)

In [4]:
x_train, x_valid, x_test, y_train, y_valid, y_test = load_dataset('iris')

In [5]:
print(x_train.shape)
print(x_train)

print(x_test.shape)
print(x_test)

(104, 4)
[[ 1.89829664e-01 -3.62176246e-01  4.21733708e-01  3.95774101e-01]
 [-9.00681170e-01  5.58610819e-01 -1.16971425e+00 -9.20547742e-01]
 [-2.94841818e-01 -1.31979479e-01  1.94384000e-01  1.32509732e-01]
 [-1.74885626e+00  3.28414053e-01 -1.39706395e+00 -1.31544430e+00]
 [ 2.24968346e+00 -1.31979479e-01  1.33113254e+00  1.44883158e+00]
 [-1.02184904e+00  1.01900435e+00 -1.39706395e+00 -1.18381211e+00]
 [-9.00681170e-01  1.01900435e+00 -1.34022653e+00 -1.18381211e+00]
 [ 6.74501145e-01  9.82172869e-02  9.90107977e-01  7.90670654e-01]
 [ 5.53333275e-01 -3.62176246e-01  1.04694540e+00  7.90670654e-01]
 [-1.73673948e-01 -1.28296331e+00  7.05920842e-01  1.05393502e+00]
 [ 5.53333275e-01 -8.22569778e-01  6.49083415e-01  7.90670654e-01]
 [ 2.24968346e+00 -1.05276654e+00  1.78583195e+00  1.44883158e+00]
 [ 1.03800476e+00 -1.28296331e+00  1.16062026e+00  7.90670654e-01]
 [ 6.86617933e-02 -1.31979479e-01  2.51221427e-01  3.95774101e-01]
 [ 6.74501145e-01 -3.62176246e-01  3.08058854e-01  1.

In [6]:
y_predict_func = lin_regress_SVD(x_train, y_train, x_test)
print(y_predict_func.shape)
print(y_predict_func)

RMSE = np.sqrt(np.mean(np.square(y_test-y_predict_func)))
print(RMSE)

(15, 3)
[[-0.3398239   0.70066758  0.63915632]
 [ 0.97942561  0.14174983 -0.12117544]
 [ 0.88080068  0.26154896 -0.14234964]
 [ 0.02344624 -0.03427389  1.01082765]
 [ 1.23436376 -0.26874751  0.03438375]
 [ 0.8458851   0.32815345 -0.17403855]
 [ 0.14620878  0.7049016   0.14888962]
 [ 0.17794564  0.33079872  0.49125564]
 [ 0.11676886 -0.10779554  0.99102668]
 [ 1.15133864 -0.28529438  0.13395574]
 [-0.27380999  0.52799376  0.74581623]
 [-0.09538763  0.46082961  0.63455802]
 [ 0.11444264  0.57922571  0.30633165]
 [ 0.18535025  0.51901817  0.29563158]
 [ 0.2246223   0.66629083  0.10908687]]
0.2886676216080215


In [7]:
# x_train is an (N,D) array
# N training points, D features
# y_train is an (N,1) array 
# assume classification datasets converted to number classes

# add dummy feature
# change x_train to (N,D+1) array; x_test to (n,D+1) array
# n is number of test points

temp = np.ones((x_train.shape[0], 1))
x_train = np.hstack((temp, x_train))
temp = np.ones((x_test.shape[0], 1))
x_test = np.hstack((temp, x_test))

# compute SVD
U, sigma, V_t = np.linalg.svd(x_train, full_matrices=False, compute_uv=True, hermitian=False)
# U is (N,D+1); sigma is (D+1,) array of the singular values; because full_matrics=False
# V_t is (D+1,D+1)


In [8]:
print(x_train.shape)
print(x_train)
print(x_test.shape)
print(x_test)

print(U.shape)
print(U)
print(sigma.shape)
print(sigma)
print(V_t.shape)
print(V_t)

(104, 5)
[[ 1.00000000e+00  1.89829664e-01 -3.62176246e-01  4.21733708e-01
   3.95774101e-01]
 [ 1.00000000e+00 -9.00681170e-01  5.58610819e-01 -1.16971425e+00
  -9.20547742e-01]
 [ 1.00000000e+00 -2.94841818e-01 -1.31979479e-01  1.94384000e-01
   1.32509732e-01]
 [ 1.00000000e+00 -1.74885626e+00  3.28414053e-01 -1.39706395e+00
  -1.31544430e+00]
 [ 1.00000000e+00  2.24968346e+00 -1.31979479e-01  1.33113254e+00
   1.44883158e+00]
 [ 1.00000000e+00 -1.02184904e+00  1.01900435e+00 -1.39706395e+00
  -1.18381211e+00]
 [ 1.00000000e+00 -9.00681170e-01  1.01900435e+00 -1.34022653e+00
  -1.18381211e+00]
 [ 1.00000000e+00  6.74501145e-01  9.82172869e-02  9.90107977e-01
   7.90670654e-01]
 [ 1.00000000e+00  5.53333275e-01 -3.62176246e-01  1.04694540e+00
   7.90670654e-01]
 [ 1.00000000e+00 -1.73673948e-01 -1.28296331e+00  7.05920842e-01
   1.05393502e+00]
 [ 1.00000000e+00  5.53333275e-01 -8.22569778e-01  6.49083415e-01
   7.90670654e-01]
 [ 1.00000000e+00  2.24968346e+00 -1.05276654e+00  1.785

In [9]:
original = np.linalg.multi_dot([U,np.diag(sigma),V_t])

print(original.shape)
print(original)

err = np.linalg.norm(x_train-original)
print(err)

(104, 5)
[[ 1.00000000e+00  1.89829664e-01 -3.62176246e-01  4.21733708e-01
   3.95774101e-01]
 [ 1.00000000e+00 -9.00681170e-01  5.58610819e-01 -1.16971425e+00
  -9.20547742e-01]
 [ 1.00000000e+00 -2.94841818e-01 -1.31979479e-01  1.94384000e-01
   1.32509732e-01]
 [ 1.00000000e+00 -1.74885626e+00  3.28414053e-01 -1.39706395e+00
  -1.31544430e+00]
 [ 1.00000000e+00  2.24968346e+00 -1.31979479e-01  1.33113254e+00
   1.44883158e+00]
 [ 1.00000000e+00 -1.02184904e+00  1.01900435e+00 -1.39706395e+00
  -1.18381211e+00]
 [ 1.00000000e+00 -9.00681170e-01  1.01900435e+00 -1.34022653e+00
  -1.18381211e+00]
 [ 1.00000000e+00  6.74501145e-01  9.82172869e-02  9.90107977e-01
   7.90670654e-01]
 [ 1.00000000e+00  5.53333275e-01 -3.62176246e-01  1.04694540e+00
   7.90670654e-01]
 [ 1.00000000e+00 -1.73673948e-01 -1.28296331e+00  7.05920842e-01
   1.05393502e+00]
 [ 1.00000000e+00  5.53333275e-01 -8.22569778e-01  6.49083415e-01
   7.90670654e-01]
 [ 1.00000000e+00  2.24968346e+00 -1.05276654e+00  1.785

In [10]:
w = np.linalg.multi_dot([V_t.T,np.linalg.inv(np.diag(sigma)),U.T,y_train])
print(w.shape)
print(w)

(5, 3)
[[ 0.33978478  0.32058889  0.33962632]
 [ 0.08456662 -0.09853985  0.01397323]
 [ 0.09392187 -0.1710376   0.07711573]
 [-0.5070418   0.6337345  -0.1266927 ]
 [ 0.03191232 -0.53063949  0.49872717]]


In [11]:
y_predict = np.dot(x_test,w)
print(y_predict.shape)
print(y_predict)

(15, 3)
[[-0.3398239   0.70066758  0.63915632]
 [ 0.97942561  0.14174983 -0.12117544]
 [ 0.88080068  0.26154896 -0.14234964]
 [ 0.02344624 -0.03427389  1.01082765]
 [ 1.23436376 -0.26874751  0.03438375]
 [ 0.8458851   0.32815345 -0.17403855]
 [ 0.14620878  0.7049016   0.14888962]
 [ 0.17794564  0.33079872  0.49125564]
 [ 0.11676886 -0.10779554  0.99102668]
 [ 1.15133864 -0.28529438  0.13395574]
 [-0.27380999  0.52799376  0.74581623]
 [-0.09538763  0.46082961  0.63455802]
 [ 0.11444264  0.57922571  0.30633165]
 [ 0.18535025  0.51901817  0.29563158]
 [ 0.2246223   0.66629083  0.10908687]]


In [12]:
y_predict = np.argmax(y_predict, axis=1).reshape((y_predict.shape[0],-1))
y_test = np.argmax(y_test, axis=1).reshape((y_test.shape[0],-1))

print(y_predict.shape)
print(y_predict)
print(y_test.shape)
print(y_test)

(15, 1)
[[1]
 [0]
 [0]
 [2]
 [0]
 [0]
 [1]
 [2]
 [2]
 [0]
 [2]
 [2]
 [1]
 [1]
 [1]]
(15, 1)
[[2]
 [0]
 [0]
 [2]
 [0]
 [0]
 [1]
 [1]
 [2]
 [0]
 [2]
 [2]
 [1]
 [1]
 [1]]


In [13]:
acc = np.mean(y_predict == y_test)
print(acc)

0.8666666666666667


In [None]:
RMSE = np.sqrt(np.mean(np.square(y_test-y_predict)))
print(RMSE)

In [None]:
print(y_predict - y_predict_func)

In [None]:
print(np.linalg.norm(y_predict-y_predict_func))