Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

Comparing changes

Choose two branches to see what's changed or to start a new pull request. If you need to, you can also compare across forks.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also compare across forks.
base: 9817b6d826
...
compare: 96079e1576
Checking mergeability… Don't worry, you can still create the pull request.
  • 6 commits
  • 6 files changed
  • 0 commit comments
  • 1 contributor
View
29 README.md
@@ -4,18 +4,18 @@ Ext-RESCAL
Scalable Tensor Factorization
------------------------------
-Ext-RESCAL is a memory efficient implementation of the [RESCAL algorithm](http://www.cip.ifi.lmu.de/~nickel/data/slides-icml2011.pdf). It is written in Python and relies on the SciPy Sparse module.
+Ext-RESCAL is a memory efficient implementation of [RESCAL](http://www.cip.ifi.lmu.de/~nickel/data/slides-icml2011.pdf), a state-of-the-art algorithm for DEDICOM tensor factorization. Ext-RESCAL is written in Python and relies on the SciPy Sparse module.
Current Version
------------
-[0.1.3](https://github.com/nzhiltsov/Ext-RESCAL/archive/0.1.3.zip)
+[0.2](https://github.com/nzhiltsov/Ext-RESCAL/archive/0.2.zip)
Features
------------
* 3-D sparse tensor factorization [1]
* Joint 3-D sparse tensor and 2-D sparse matrix factorization (extended version) [2]
-* The implementation scales well to the domains with millions of nodes on the affordable hardware
+* The implementation provably scales well to the domains with millions of nodes on the affordable hardware
* Handy input format
[1] M. Nickel, V. Tresp, H. Kriegel. A Three-way Model for Collective Learning on Multi-relational Data // Proceedings of the 28th International Conference on Machine Learning (ICML'2011). - 2011.
@@ -47,17 +47,28 @@ The test data set represents a tiny entity graph of 3 adjacency matrices (tensor
<pre>python extrescal.py --latent 2 --lmbda 0.001 --input tiny-mixed-example --outputentities entity.embeddings.csv --outputterms term.embeddings.csv --log extrescal.log</pre>
-
-Credit
+Development and Contribution
----------------------
-The original algorithms are an intellectual property of the authors of the cited papers.
+This is a fork of the original code base provided by [Maximilian Nickel](http://www.cip.ifi.lmu.de/~nickel/). Ext-RESCAL has been developed by [Nikita Zhiltsov](http://cll.niimm.ksu.ru/cms/lang/en_US/main/people/zhiltsov). Ext-RESCAL may contain some bugs, so, if you find any of them, feel free to contribute the patches via pull requests into the _develop_ branch.
-Development and Contribution
-----------------------
-This is a fork of the original code base provided by [Maximilian Nickel](http://www.cip.ifi.lmu.de/~nickel/). Ext-RESCAL has been developed by [Nikita Zhiltsov](http://cll.niimm.ksu.ru/cms/lang/en_US/main/people/zhiltsov). Ext-RESCAL may contain some bugs, so, if you find any of them, feel free to contribute the patches via pull requests.
+Release Notes
+------------
+0.2 (February 26, 2013):
+
+* Add an opportunity to approximate the objective function via random sampling
+* Bug fixes
+* Change the default settings
+
+0.1 (January 31, 2013):
+* The basic implementation of both the algorithms
+
+Credit
+----------------------
+
+The original algorithms are an intellectual property of the authors of the cited papers.
Disclaimer
---------------------
View
12 commonFunctions.py
@@ -1,4 +1,6 @@
from numpy import dot
+from numpy.random import randint
+from itertools import ifilter
def squareFrobeniusNormOfSparse(M):
"""
@@ -15,4 +17,12 @@ def fitNorm(row, col, Xi, ARk, A):
Computes i,j element of the squared Frobenius norm of the fitting matrix
"""
ARAtValue = dot(ARk[row,:], A[col,:])
- return (Xi[row, col] - ARAtValue)**2
+ return (Xi[row, col] - ARAtValue)**2
+
+def reservoir(it, k):
+ ls = [next(it) for _ in range(k)]
+ for i, x in enumerate(it, k + 1):
+ j = randint(0, i)
+ if j < k:
+ ls[j] = x
+ return ls
View
16 commonFunctionsTest.py
@@ -1,9 +1,10 @@
from numpy import ones, dot
import numpy as np
from scipy.sparse import coo_matrix
-from commonFunctions import squareFrobeniusNormOfSparse, fitNorm
+from commonFunctions import squareFrobeniusNormOfSparse, fitNorm, reservoir
from numpy.linalg.linalg import norm
from nose.tools import assert_almost_equal
+from itertools import product
def testSquareFrobeniusNorm():
zeroCount = 2
@@ -28,6 +29,17 @@ def testFitNorm():
for i in xrange(n):
for j in xrange(n):
fits.append(fitNorm(i, j, X, ARk, A))
- assert_almost_equal(sum(fits), expectedNorm)
+ assert_almost_equal(sum(fits), expectedNorm)
+def testSampling():
+ xs = range(0, 3)
+ ys = range(0, 4)
+ size = int(0.9 * len(xs) * len(ys))
+ sampledElements = reservoir(product(xs, ys), size)
+ assert len(sampledElements) == size
+ checkedElements = []
+ for i in xrange(size):
+ assert checkedElements.count(sampledElements[i]) == 0
+ checkedElements.append(sampledElements[i])
+ assert len(checkedElements) == len(sampledElements)
View
48 extrescal.py
@@ -1,5 +1,5 @@
import logging, time, argparse
-from numpy import dot, zeros, empty, kron, array, eye, argmin, ones, savetxt, loadtxt
+from numpy import dot, zeros, kron, array, eye, ones, savetxt, loadtxt
from numpy.linalg import qr, pinv, norm, inv
from numpy.random import rand
from scipy import sparse
@@ -8,9 +8,7 @@
import os
import fnmatch
from commonFunctions import squareFrobeniusNormOfSparse, fitNorm
-from extrescalFunctions import updateA, updateV, matrixFitNormElement
-
-__version__ = "0.1"
+from extrescalFunctions import updateA, updateV, matrixFitNormElement, checkingIndices
__DEF_MAXITER = 50
__DEF_PREHEATNUM = 1
@@ -19,6 +17,8 @@
__DEF_CONV = 1e-5
__DEF_LMBDA = 0
__DEF_EXACT_FIT = False
+__DEF_MATRIX_FIT_SAMPLE_RATIO = 1
+__DEF_TENSOR_SLICE_FIT_SAMPLE_RATIO = 0.1
def rescal(X, D, rank, **kwargs):
"""
@@ -82,6 +82,8 @@ def rescal(X, D, rank, **kwargs):
lmbda = kwargs.pop('lmbda', __DEF_LMBDA)
preheatnum = kwargs.pop('preheatnum', __DEF_PREHEATNUM)
exactfit = kwargs.pop('exactfit', __DEF_EXACT_FIT)
+ matrixSampleRatio = kwargs.pop('matrixSampleRation', __DEF_MATRIX_FIT_SAMPLE_RATIO)
+ tensorSliceSampleRatio = kwargs.pop('tensorSliceSampleRation', __DEF_TENSOR_SLICE_FIT_SAMPLE_RATIO)
if not len(kwargs) == 0:
raise ValueError( 'Unknown keywords (%s)' % (kwargs.keys()) )
@@ -128,8 +130,17 @@ def rescal(X, D, rank, **kwargs):
# compute factorization
fit = fitchange = fitold = 0
exectimes = []
-
- for iter in xrange(maxIter):
+
+ # prepare the checking indices to compute the fit
+ if exactfit:
+ matrixFitIndices = []
+ tensorFitIndices = []
+ else :
+ matrixFitIndices = checkingIndices(D, ratio = matrixSampleRatio)
+ tensorFitIndices = [checkingIndices(M, ratio = tensorSliceSampleRatio) for M in X]
+ _log.debug('[Algorithm] Finished sampling of indices to compute the fit values.')
+
+ for iterNum in xrange(maxIter):
tic = time.clock()
A = updateA(X, A, R, V, D, lmbda)
@@ -149,7 +160,7 @@ def rescal(X, D, rank, **kwargs):
extRegularizedFit = 0
regRFit = 0
fitDAV = 0
- if iter > preheatnum:
+ if iterNum > preheatnum:
if lmbda != 0:
for i in xrange(len(R)):
regRFit += norm(R[i])**2
@@ -158,20 +169,21 @@ def rescal(X, D, rank, **kwargs):
extRegularizedFit = lmbda*(norm(V)**2)
if exactfit:
fitDAV = norm(D - dot(A,V))**2
- else :
- Drow, Dcol = D.nonzero()
- for ff in xrange(len(Drow)):
- fitDAV += matrixFitNormElement(Drow[ff], Dcol[ff], D, A, V)
+ else :
+ for ff in xrange(len(matrixFitIndices)):
+ x, y = matrixFitIndices[ff]
+ fitDAV += matrixFitNormElement(x, y, D, A, V)
if exactfit:
for i in xrange(len(R)):
tensorFit += norm(X[i] - dot(A,dot(R[i], A.T)))**2
else :
for i in xrange(len(R)):
- ARk = dot(A, R[i])
- Xrow, Xcol = X[i].nonzero()
- for rr in xrange(len(Xrow)):
- tensorFit += fitNorm(Xrow[rr], Xcol[rr], X[i], ARk, A)
+ ARk = dot(A, R[i])
+ iTensorFitIndices = tensorFitIndices[i]
+ for rr in xrange(len(iTensorFitIndices)):
+ m, l = iTensorFitIndices[rr]
+ tensorFit += fitNorm(m, l, X[i], ARk, A)
fit = 0.5*tensorFit
fit += regularizedFit
@@ -184,13 +196,13 @@ def rescal(X, D, rank, **kwargs):
toc = time.clock()
exectimes.append( toc - tic )
fitchange = abs(fitold - fit)
- _log.debug('[%3d] total fit: %.10f | tensor fit: %.10f | matrix fit: %.10f | delta: %.10f | secs: %.5f' % (iter,
+ _log.debug('[%3d] total fit: %.10f | tensor fit: %.10f | matrix fit: %.10f | delta: %.10f | secs: %.5f' % (iterNum,
fit, tensorFit, fitDAV, fitchange, exectimes[-1]))
fitold = fit
- if iter > preheatnum and fitchange < conv:
+ if iterNum > preheatnum and fitchange < conv:
break
- return A, R, fit, iter+1, array(exectimes), V
+ return A, R, fit, iterNum+1, array(exectimes), V
def __updateR(X, A, lmbda):
r = A.shape[1]
View
20 extrescalFunctions.py
@@ -1,6 +1,9 @@
import numpy as np
from numpy import dot, zeros, eye, empty
from numpy.linalg import inv
+from commonFunctions import reservoir
+from itertools import product
+from numpy.random import random_integers
def updateA(X, A, R, V, D, lmbda):
n, rank = A.shape
@@ -31,3 +34,20 @@ def matrixFitNormElement(i, j, D, A, V):
Computes i,j element of the fitting matrix Frobenius norm ||D - A*V||
"""
return (D[i,j] - dot(A[i,:], V[:, j]))**2
+
+def checkingIndices(M, ratio = 1):
+ """
+ Returns the indices for computing fit values
+ based on non-zero values as well as sample indices
+ (the sample size is proportional to the given ratio ([0,1]) and number of matrix columns)
+ """
+ rowSize, colSize = M.shape
+ nonzeroRows, nonzeroCols = M.nonzero()
+ nonzeroIndices = [(nonzeroRows[i], nonzeroCols[i]) for i in range(len(nonzeroRows))]
+ sampledRows = random_integers(0, rowSize - 1, round(ratio*colSize))
+ sampledCols = random_integers(0, colSize - 1, round(ratio*colSize))
+ sampledIndices = zip(sampledRows, sampledCols)
+ indices = list(set(sampledIndices + nonzeroIndices))
+ return indices
+
+
View
7 extrescalFunctionsTest.py
@@ -1,7 +1,8 @@
from scipy.sparse import coo_matrix
from numpy import ones, dot, eye
import numpy as np
-from extrescalFunctions import updateA, updateV, matrixFitNormElement
+from extrescalFunctions import updateA, updateV, matrixFitNormElement,\
+ checkingIndices
from nose.tools import assert_almost_equal
from numpy.linalg import inv
from numpy.linalg.linalg import norm
@@ -80,4 +81,8 @@ def testMatrixFitNorm():
fit += matrixFitNormElement(i, j, D, A, V)
assert_almost_equal(fit, expectedNorm)
+def testCheckingIndices():
+ D = coo_matrix((ones(6),([0, 1, 2, 3, 4, 5], [0, 1, 1, 2, 3, 3])), shape=(6, 4), dtype=np.uint8).tocsr()
+ indices = checkingIndices(D)
+ assert len(indices) >= 6

No commit comments for this range

Something went wrong with that request. Please try again.