nzhiltsov/Ext-RESCAL

Subversion checkout URL

You can clone with
or
.

Merge branch 'sampling' into develop

commit 1566d70a209706dd4acd0ea3e8a80d26a342da5b 2 parents ac9aec1 + 16109ab
authored
12 commonFunctions.py
 @@ -1,4 +1,6 @@ from numpy import dot +from numpy.random import randint +from itertools import ifilter def squareFrobeniusNormOfSparse(M): """ @@ -15,4 +17,12 @@ def fitNorm(row, col, Xi, ARk, A): Computes i,j element of the squared Frobenius norm of the fitting matrix """ ARAtValue = dot(ARk[row,:], A[col,:]) - return (Xi[row, col] - ARAtValue)**2 + return (Xi[row, col] - ARAtValue)**2 + +def reservoir(it, k): + ls = [next(it) for _ in range(k)] + for i, x in enumerate(it, k + 1): + j = randint(0, i) + if j < k: + ls[j] = x + return ls
16 commonFunctionsTest.py
 @@ -1,9 +1,10 @@ from numpy import ones, dot import numpy as np from scipy.sparse import coo_matrix -from commonFunctions import squareFrobeniusNormOfSparse, fitNorm +from commonFunctions import squareFrobeniusNormOfSparse, fitNorm, reservoir from numpy.linalg.linalg import norm from nose.tools import assert_almost_equal +from itertools import product def testSquareFrobeniusNorm(): zeroCount = 2 @@ -28,6 +29,17 @@ def testFitNorm(): for i in xrange(n): for j in xrange(n): fits.append(fitNorm(i, j, X, ARk, A)) - assert_almost_equal(sum(fits), expectedNorm) + assert_almost_equal(sum(fits), expectedNorm) +def testSampling(): + xs = range(0, 3) + ys = range(0, 4) + size = int(0.9 * len(xs) * len(ys)) + sampledElements = reservoir(product(xs, ys), size) + assert len(sampledElements) == size + checkedElements = [] + for i in xrange(size): + assert checkedElements.count(sampledElements[i]) == 0 + checkedElements.append(sampledElements[i]) + assert len(checkedElements) == len(sampledElements)
48 extrescal.py
 @@ -1,5 +1,5 @@ import logging, time, argparse -from numpy import dot, zeros, empty, kron, array, eye, argmin, ones, savetxt, loadtxt +from numpy import dot, zeros, kron, array, eye, ones, savetxt, loadtxt from numpy.linalg import qr, pinv, norm, inv from numpy.random import rand from scipy import sparse @@ -8,9 +8,7 @@ import os import fnmatch from commonFunctions import squareFrobeniusNormOfSparse, fitNorm -from extrescalFunctions import updateA, updateV, matrixFitNormElement - -__version__ = "0.1" +from extrescalFunctions import updateA, updateV, matrixFitNormElement, checkingIndices __DEF_MAXITER = 50 __DEF_PREHEATNUM = 1 @@ -19,6 +17,8 @@ __DEF_CONV = 1e-5 __DEF_LMBDA = 0 __DEF_EXACT_FIT = False +__DEF_MATRIX_FIT_SAMPLE_RATIO = 1 +__DEF_TENSOR_SLICE_FIT_SAMPLE_RATIO = 0.1 def rescal(X, D, rank, **kwargs): """ @@ -82,6 +82,8 @@ def rescal(X, D, rank, **kwargs): lmbda = kwargs.pop('lmbda', __DEF_LMBDA) preheatnum = kwargs.pop('preheatnum', __DEF_PREHEATNUM) exactfit = kwargs.pop('exactfit', __DEF_EXACT_FIT) + matrixSampleRatio = kwargs.pop('matrixSampleRation', __DEF_MATRIX_FIT_SAMPLE_RATIO) + tensorSliceSampleRatio = kwargs.pop('tensorSliceSampleRation', __DEF_TENSOR_SLICE_FIT_SAMPLE_RATIO) if not len(kwargs) == 0: raise ValueError( 'Unknown keywords (%s)' % (kwargs.keys()) ) @@ -128,8 +130,17 @@ def rescal(X, D, rank, **kwargs): # compute factorization fit = fitchange = fitold = 0 exectimes = [] - - for iter in xrange(maxIter): + + # prepare the checking indices to compute the fit + if exactfit: + matrixFitIndices = [] + tensorFitIndices = [] + else : + matrixFitIndices = checkingIndices(D, ratio = matrixSampleRatio) + tensorFitIndices = [checkingIndices(M, ratio = tensorSliceSampleRatio) for M in X] + _log.debug('[Algorithm] Finished sampling of indices to compute the fit values.') + + for iterNum in xrange(maxIter): tic = time.clock() A = updateA(X, A, R, V, D, lmbda) @@ -149,7 +160,7 @@ def rescal(X, D, rank, **kwargs): extRegularizedFit = 0 regRFit = 0 fitDAV = 0 - if iter > preheatnum: + if iterNum > preheatnum: if lmbda != 0: for i in xrange(len(R)): regRFit += norm(R[i])**2 @@ -158,20 +169,21 @@ def rescal(X, D, rank, **kwargs): extRegularizedFit = lmbda*(norm(V)**2) if exactfit: fitDAV = norm(D - dot(A,V))**2 - else : - Drow, Dcol = D.nonzero() - for ff in xrange(len(Drow)): - fitDAV += matrixFitNormElement(Drow[ff], Dcol[ff], D, A, V) + else : + for ff in xrange(len(matrixFitIndices)): + x, y = matrixFitIndices[ff] + fitDAV += matrixFitNormElement(x, y, D, A, V) if exactfit: for i in xrange(len(R)): tensorFit += norm(X[i] - dot(A,dot(R[i], A.T)))**2 else : for i in xrange(len(R)): - ARk = dot(A, R[i]) - Xrow, Xcol = X[i].nonzero() - for rr in xrange(len(Xrow)): - tensorFit += fitNorm(Xrow[rr], Xcol[rr], X[i], ARk, A) + ARk = dot(A, R[i]) + iTensorFitIndices = tensorFitIndices[i] + for rr in xrange(len(iTensorFitIndices)): + m, l = iTensorFitIndices[rr] + tensorFit += fitNorm(m, l, X[i], ARk, A) fit = 0.5*tensorFit fit += regularizedFit @@ -184,13 +196,13 @@ def rescal(X, D, rank, **kwargs): toc = time.clock() exectimes.append( toc - tic ) fitchange = abs(fitold - fit) - _log.debug('[%3d] total fit: %.10f | tensor fit: %.10f | matrix fit: %.10f | delta: %.10f | secs: %.5f' % (iter, + _log.debug('[%3d] total fit: %.10f | tensor fit: %.10f | matrix fit: %.10f | delta: %.10f | secs: %.5f' % (iterNum, fit, tensorFit, fitDAV, fitchange, exectimes[-1])) fitold = fit - if iter > preheatnum and fitchange < conv: + if iterNum > preheatnum and fitchange < conv: break - return A, R, fit, iter+1, array(exectimes), V + return A, R, fit, iterNum+1, array(exectimes), V def __updateR(X, A, lmbda): r = A.shape[1]
20 extrescalFunctions.py
 @@ -1,6 +1,9 @@ import numpy as np from numpy import dot, zeros, eye, empty from numpy.linalg import inv +from commonFunctions import reservoir +from itertools import product +from numpy.random import random_integers def updateA(X, A, R, V, D, lmbda): n, rank = A.shape @@ -31,3 +34,20 @@ def matrixFitNormElement(i, j, D, A, V): Computes i,j element of the fitting matrix Frobenius norm ||D - A*V|| """ return (D[i,j] - dot(A[i,:], V[:, j]))**2 + +def checkingIndices(M, ratio = 1): + """ + Returns the indices for computing fit values + based on non-zero values as well as sample indices + (the sample size is proportional to the given ratio ([0,1]) and number of matrix columns) + """ + rowSize, colSize = M.shape + nonzeroRows, nonzeroCols = M.nonzero() + nonzeroIndices = [(nonzeroRows[i], nonzeroCols[i]) for i in range(len(nonzeroRows))] + sampledRows = random_integers(0, rowSize - 1, round(ratio*colSize)) + sampledCols = random_integers(0, colSize - 1, round(ratio*colSize)) + sampledIndices = zip(sampledRows, sampledCols) + indices = list(set(sampledIndices + nonzeroIndices)) + return indices + +
7 extrescalFunctionsTest.py
 @@ -1,7 +1,8 @@ from scipy.sparse import coo_matrix from numpy import ones, dot, eye import numpy as np -from extrescalFunctions import updateA, updateV, matrixFitNormElement +from extrescalFunctions import updateA, updateV, matrixFitNormElement,\ + checkingIndices from nose.tools import assert_almost_equal from numpy.linalg import inv from numpy.linalg.linalg import norm @@ -80,4 +81,8 @@ def testMatrixFitNorm(): fit += matrixFitNormElement(i, j, D, A, V) assert_almost_equal(fit, expectedNorm) +def testCheckingIndices(): + D = coo_matrix((ones(6),([0, 1, 2, 3, 4, 5], [0, 1, 1, 2, 3, 3])), shape=(6, 4), dtype=np.uint8).tocsr() + indices = checkingIndices(D) + assert len(indices) >= 6