In [1]:
import scipy.io
import numpy as np
from scipy.sparse import csr_matrix
from scipy.sparse import linalg
from time import time

In [2]:
def sharpNuc(Z, kappa):
    # Compute top singular vectors of Z
    U, s, Vt = scipy.sparse.linalg.svds(Z, k=1)
    
    #print U.shape, Vt.shape
    return -kappa * U.dot(Vt)

In [3]:
data = scipy.io.loadmat('./dataset/ml-100k/ub_base')  # load 100k dataset

Rating = data['Rating'].flatten()
UserID = data['UserID'].flatten() - 1  # Python indexing starts from 0 whereas Matlab from 1
MovID = data['MovID'].flatten() - 1    # Python indexing starts from 0 whereas Matlab from 1

nM = np.amax(data['MovID'])
nU = np.amax(data['UserID'])

Z = csr_matrix((Rating, (MovID, UserID)),shape=(nM, nU),dtype=float)
kappa = 5000

def small_sharp(Z, kappa):
    tstart = time()
    Z_proj = sharpNuc(Z, kappa)
    elapsed = time() - tstart
    print "sharp of 100k data takes ",elapsed," sec"
    return elapsed

In [4]:
time_100k = []

for i in range(5):
    time_100k.append(small_sharp(Z,kappa))

print "mean time for 100k data takes", np.mean(np.array(time_100k)), "sec"

sharp of 100k data takes  0.0914299488068  sec
sharp of 100k data takes  0.0138380527496  sec
sharp of 100k data takes  0.0145871639252  sec
sharp of 100k data takes  0.0139410495758  sec
sharp of 100k data takes  0.0139210224152  sec
mean time for 100k data takes 0.029543447494506835 sec


In [5]:
# NOTE: This one can take few minutes!
data = scipy.io.loadmat('./dataset/ml-1m/ml1m_base')  # load 1M dataset

Rating = data['Rating'].flatten()
UserID = data['UserID'].flatten() - 1  # Python indexing starts from 0 whereas Matlab from 1
MovID = data['MovID'].flatten() - 1    # Python indexing starts from 0 whereas Matlab from 1

nM = np.amax(data['MovID'])
nU = np.amax(data['UserID'])

Z = csr_matrix((Rating, (MovID, UserID)),shape=(nM, nU),dtype=float)
kappa = 5000

def large_sharp(Z, kappa):
    tstart = time()
    Z_proj = sharpNuc(Z, kappa)
    elapsed = time() - tstart
    print "sharp of 1M data takes ",elapsed," sec"
    return elapsed

In [6]:
time_1M = []

for i in range(5):
    time_1M.append(small_sharp(Z,kappa))

print "mean time for 1M data takes", np.mean(np.array(time_1M)), "sec"

sharp of 100k data takes  0.280818939209  sec
sharp of 100k data takes  0.259896039963  sec
sharp of 100k data takes  0.283408164978  sec
sharp of 100k data takes  0.28193116188  sec
sharp of 100k data takes  0.301149845123  sec
mean time for 1M data takes 0.28144083023071287 sec
