In [1]:
import scipy.io
import numpy as np
from scipy.sparse import csr_matrix
from projL1 import projL1
from time import time

from projL1 import *

In [2]:
def projNuc(Z, kappa):
    p = Z.shape[0]
    # Compute SVD decomposition
    U , s, Vt = scipy.linalg.svd(Z) 
    
    # Project into l1 norm ball
    s_proj = projL1(s, kappa)
    
    # Generate diagonal matrix of size [dim(U), dim(Vt)]
    S = scipy.sparse.spdiags(s_proj, 0, U.shape[0], Vt.shape[0]) 
    
    #print U.shape, S.shape, Vt.shape
    return U.dot(S.toarray()).dot(Vt)

In [7]:
data = scipy.io.loadmat('./dataset/ml-100k/ub_base')  # load 100k dataset

Rating = data['Rating'].flatten()
UserID = data['UserID'].flatten() - 1  # Python indexing starts from 0 whereas Matlab from 1
MovID = data['MovID'].flatten() - 1    # Python indexing starts from 0 whereas Matlab from 1

nM = np.amax(data['MovID'])
nU = np.amax(data['UserID'])

Z = csr_matrix((Rating, (MovID, UserID)),shape=(nM, nU),dtype=float).toarray()
kappa = 5000

def small_projection(Z, kappa):
    tstart = time()
    Z_proj = projNuc(Z, kappa)
    elapsed = time() - tstart
    print "proj for 100k data takes ",elapsed," sec"
    return elapsed

In [8]:
time_100k = []

for i in range(5):
    time_100k.append(small_projection(Z,kappa))

print "mean time for 100k data takes", np.mean(np.array(time_100k)), "sec"
    

proj for 100k data takes  1.63733196259  sec
proj for 100k data takes  1.59432601929  sec
proj for 100k data takes  1.71913385391  sec
proj for 100k data takes  1.62035608292  sec
proj for 100k data takes  1.66920495033  sec
mean time for 100k data takes 1.6480705738067627 sec


In [9]:
# NOTE: This one can take few minutes!
data = scipy.io.loadmat('./dataset/ml-1m/ml1m_base')  # load 1M dataset

Rating = data['Rating'].flatten()
UserID = data['UserID'].flatten() - 1  # Python indexing starts from 0 whereas Matlab from 1
MovID = data['MovID'].flatten() - 1    # Python indexing starts from 0 whereas Matlab from 1

nM = np.amax(data['MovID'])
nU = np.amax(data['UserID'])

Z = csr_matrix((Rating, (MovID, UserID)),shape=(nM, nU),dtype=float).toarray()
kappa = 5000

def large_projection(Z, kappa):
    tstart = time()
    Z_proj = projNuc(Z, kappa)
    elapsed = time() - tstart
    print "proj for 1M data takes ",elapsed," sec"
    return elapsed

In [10]:
time_1M = []

for i in range(5):
    time_1M.append(large_projection(Z,kappa))

print "mean time for 1M data takes", np.mean(np.array(time_1M)), "sec"

proj for 1M data takes  94.6625452042  sec
proj for 1M data takes  93.8041598797  sec
proj for 1M data takes  94.7760810852  sec
proj for 1M data takes  89.1428310871  sec
proj for 1M data takes  90.9789209366  sec
mean time for 1M data takes 92.6729076385498 sec
