In [1]:
import numpy as np
from scipy.io import loadmat
import matplotlib.pyplot as plt 

in_data = loadmat('movie.mat')
print([key for key in in_data]) # -- use this line to see the keys in the dictionary data structure

['__header__', '__version__', '__globals__', 'X']


In [2]:

def gram_schmidt(B):
    """Orthogonalize a set of vectors stored as the columns of matrix B."""
    # Get the number of vectors.
    m, n = B.shape
    # Create new matrix to hold the orthonormal basis
    U = np.zeros([m,n]) 
    for j in range(n):
        # To orthogonalize the vector in column j with respect to the
        # previous vectors, subtract from it its projection onto
        # each of the previous vectors.
        v = B[:,j].copy()
        for k in range(j):
            v -= np.dot(U[:, k], B[:, j]) * U[:, k]
        if np.linalg.norm(v)>1e-10:
            U[:, j] = v / np.linalg.norm(v)
    return U

if __name__ == '__main__':
    B1 = np.array([[1.0, 1.0, 0.0], [2.0, 2.0, 0.0], [2.0, 2.0, 1.0]])
    A1 = gram_schmidt(B1)
    print(A1)
    A2 = gram_schmidt(np.random.rand(4,2)@np.random.rand(2,5))
    print(A2.transpose()@A2)

[[ 0.33333333  0.         -0.2981424 ]
 [ 0.66666667  0.         -0.59628479]
 [ 0.66666667  0.          0.74535599]]
[[1.00000000e+00 3.88578059e-16 0.00000000e+00 0.00000000e+00
  0.00000000e+00]
 [3.88578059e-16 1.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00]]


In [3]:
x = in_data['X']

x_aug = np.hstack((np.ones((5,1)), x))

T = gram_schmidt(x_aug)
T

array([[ 4.47213595e-01, -3.65148372e-01, -6.32455532e-01,
        -5.16397779e-01,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00,  1.26565425e-14],
       [ 4.47213595e-01,  5.47722558e-01,  3.16227766e-01,
        -3.87298335e-01,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00,  5.00000000e-01],
       [ 4.47213595e-01, -3.65148372e-01,  2.80866677e-16,
         6.45497224e-01,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00,  5.00000000e-01],
       [ 4.47213595e-01,  5.47722558e-01, -3.16227766e-01,
         3.87298335e-01,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00, -5.00000000e-01],
       [ 4.47213595e-01, -3.65148372e-01,  6.32455532e-01,
        -1.29099445e-01,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00, -5.00000000e-01]])

# 3a) yes ! the first basis vector you obtain equal to t1

In [4]:
t1 = T[:,0]
t1


array([0.4472136, 0.4472136, 0.4472136, 0.4472136, 0.4472136])

In [5]:
x

array([[ 4,  7,  2,  8,  7,  4,  2],
       [ 9,  3,  5,  6, 10,  5,  5],
       [ 4,  8,  3,  7,  6,  4,  1],
       [ 9,  2,  6,  5,  9,  5,  4],
       [ 4,  9,  2,  8,  7,  4,  1]], dtype=uint8)

# 3b finding the Rank 1 Approximation by t1 transpose * x

In [6]:

w = t1 @ x
print("W is ")
print(w)

W is 
[13.41640786 12.96919427  8.04984472 15.20526225 17.44133022  9.8386991
  5.81377674]


In [7]:
residual = x - np.outer(t1, w.T)
print("the residual error is")
print(residual)

the residual error is
[[-2.   1.2 -1.6  1.2 -0.8 -0.4 -0.6]
 [ 3.  -2.8  1.4 -0.8  2.2  0.6  2.4]
 [-2.   2.2 -0.6  0.2 -1.8 -0.4 -1.6]
 [ 3.  -3.8  2.4 -1.8  1.2  0.6  1.4]
 [-2.   3.2 -1.6  1.2 -0.8 -0.4 -1.6]]


### 3c

In [8]:
t2 = T[:, [0,1]]
t2


array([[ 0.4472136 , -0.36514837],
       [ 0.4472136 ,  0.54772256],
       [ 0.4472136 , -0.36514837],
       [ 0.4472136 ,  0.54772256],
       [ 0.4472136 , -0.36514837]])

# w is

In [9]:
w2 = x.T @ t2
w2


array([[13.41640786,  5.47722558],
       [12.96919427, -6.02494813],
       [ 8.04984472,  3.46890953],
       [15.20526225, -2.37346442],
       [17.44133022,  3.10376116],
       [ 9.8386991 ,  1.09544512],
       [ 5.81377674,  3.46890953]])

In [10]:
print(t2.shape)
print(w2.shape)

(5, 2)
(7, 2)




# the residual error is 

In [11]:
residual = x - t2@w2.T
residual

array([[ 4.44089210e-16, -1.00000000e+00, -3.33333333e-01,
         3.33333333e-01,  3.33333333e-01,  4.44089210e-16,
         6.66666667e-01],
       [ 1.77635684e-15,  5.00000000e-01, -5.00000000e-01,
         5.00000000e-01,  5.00000000e-01,  8.88178420e-16,
         5.00000000e-01],
       [ 4.44089210e-16,  0.00000000e+00,  6.66666667e-01,
        -6.66666667e-01, -6.66666667e-01,  4.44089210e-16,
        -3.33333333e-01],
       [ 1.77635684e-15, -5.00000000e-01,  5.00000000e-01,
        -5.00000000e-01, -5.00000000e-01,  8.88178420e-16,
        -5.00000000e-01],
       [ 4.44089210e-16,  1.00000000e+00, -3.33333333e-01,
         3.33333333e-01,  3.33333333e-01,  4.44089210e-16,
        -3.33333333e-01]])

In [12]:

print(t2@w2.T)

[[4.         8.         2.33333333 7.66666667 6.66666667 4.
  1.33333333]
 [9.         2.5        5.5        5.5        9.5        5.
  4.5       ]
 [4.         8.         2.33333333 7.66666667 6.66666667 4.
  1.33333333]
 [9.         2.5        5.5        5.5        9.5        5.
  4.5       ]
 [4.         8.         2.33333333 7.66666667 6.66666667 4.
  1.33333333]]


# 3c

### 3c) t2 seems to be the contributing taste vector to the preference of Sci fi movies vs Romantic. Those with a preference for the scifi movies have a higher number in row 1 3 and 5

 ## 3d The more I have been increasing the rank the smaller the residual value

In [13]:
t3 = T[:, :3]
t3

array([[ 4.47213595e-01, -3.65148372e-01, -6.32455532e-01],
       [ 4.47213595e-01,  5.47722558e-01,  3.16227766e-01],
       [ 4.47213595e-01, -3.65148372e-01,  2.80866677e-16],
       [ 4.47213595e-01,  5.47722558e-01, -3.16227766e-01],
       [ 4.47213595e-01, -3.65148372e-01,  6.32455532e-01]])

In [14]:
np.set_printoptions(precision=3, suppress=True)
w3 = x.T @ t3
w3


array([[13.416,  5.477,  0.   ],
       [12.969, -6.025,  1.581],
       [ 8.05 ,  3.469, -0.316],
       [15.205, -2.373,  0.316],
       [17.441,  3.104,  0.316],
       [ 9.839,  1.095,  0.   ],
       [ 5.814,  3.469, -0.316]])

In [15]:
residual = x - t3@w3.T
residual

array([[ 0.   ,  0.   , -0.533,  0.533,  0.533,  0.   ,  0.467],
       [ 0.   , -0.   , -0.4  ,  0.4  ,  0.4  ,  0.   ,  0.6  ],
       [ 0.   ,  0.   ,  0.667, -0.667, -0.667,  0.   , -0.333],
       [ 0.   ,  0.   ,  0.4  , -0.4  , -0.4  ,  0.   , -0.6  ],
       [-0.   , -0.   , -0.133,  0.133,  0.133, -0.   , -0.133]])