In [None]:
import scipy
from scipy import linalg
import numpy as np
import math
import pickle
from tqdm import tqdm_notebook
import time

In [None]:
np.set_printoptions(threshold = 10000)

In [None]:
def svd(A):
    
    A_AT = np.matmul(rating_matrix, rating_matrix.T)
    w_U, U = scipy.linalg.eig(A_AT)
    
    U_sorted = sorted(zip(w_U, U.T), key = lambda x : x[0], reverse = True) 

    s = np.zeros(len(U_sorted), dtype = complex)
    u = np.array([np.zeros(len(U_sorted[0][1]), dtype = complex) for i in range(len(U_sorted))])

    for i in range(len(w_U)):
        if np.isclose(U_sorted[i][0].imag, 0) and np.isclose(U_sorted[i][0].real,0):
            u[i] = np.array([complex(0,0) for i in range(len(U_sorted[i][1]))])
            s[i] = 0
        else:
            u[i] = U_sorted[i][1]
            s[i] = U_sorted[i][0]

    u = u.T
    
    u = u.real
    s = s.real
    s = s**0.5
    s = s[:min(A.shape)]
    
    v = np.array([np.zeros(np.shape(A.T)[0]) for i in range(s.shape[0])], dtype = float)
    
    for i in tqdm_notebook(range(len(s))):
        if not np.isclose(s[i],0):
            v[:,i] = np.matmul(A.T, u[:,i])/s[i]
            
    return u, s, v

In [None]:
def energize_me (s, threshold) :
    
    total_energy = sum(s ** 2)
    curr_energy = 0
    activation_energy = total_energy * threshold
    index = 0
    energy_portion = []
    
    while curr_energy <  activation_energy:
        
        curr_energy += s[index] ** 2
        energy_portion.append(s[index])
        index += 1
        
    return np.array(energy_portion)        

In [None]:
def evaluate(prediction, actual):
    
    N = actual.shape[0]*actual.shape[1]
    
    rmse = (np.sum((prediction - actual)**2)/N)**0.5
    mae = np.sum(abs(prediction - actual))/N
    
    return rmse, mae

In [None]:
if __name__ == '__main__':
    
    with open("ratingsMatrix_noZeros.pickle", 'rb') as file:
        rating_matrix = pickle.load(file)
    
    # uSVD, sSVD, vhSVD = np.linalg.svd(rating_matrix, full_matrices=True)
    
    start = time.time()
    u, s, v = svd(rating_matrix)
    tmp = np.multiply(u[:, :len(s)], s)
    reconstructed = np.matmul(tmp, v.T) 
    print(time.time() - start) # 189.43088269233704
    
    evaluate(reconstructed, rating_matrix)
    # (3.187548670659706e-12, 2.2279925927069606e-12)
    
    s90 = energize_me(s, 0.90)
    tmp = np.multiply(u[:, :len(s90)], s90)
    reconstructed90 = np.matmul(tmp, v.T[:len(s90), :])
    reconstructed90 = np.matmul(np.matmul(u[:, :len(s90)], np.diag(s90)), v.T[:len(s90), :])
    
    evaluate(reconstructed90, rating_matrix)
    # (0.250737665891037, 0.14061418179187507)
    
#     with open('u.pickle', 'wb') as file:
#         pickle.dump(u,file)
#     with open('s.pickle', 'wb') as file:
#         pickle.dump(s,file)
#     with open('v.pickle', 'wb') as file:
#         pickle.dump(v,file)
        
#     with open('u.pickle', 'rb') as file:
#         u1 = pickle.load(file)
#     with open('s.pickle', 'rb') as file:
#         s1 = pickle.load(file)
#     with open('v.pickle', 'rb') as file:
#         v1 = pickle.load(file)