Skip to content

Commit

Permalink
Dataset for dealing with precomputed kernels.
Browse files Browse the repository at this point in the history
  • Loading branch information
fullung committed Jul 14, 2006
1 parent 6c5f3df commit 69cd19a
Show file tree
Hide file tree
Showing 7 changed files with 59 additions and 52 deletions.
66 changes: 39 additions & 27 deletions Lib/sandbox/svm/dataset.py
Expand Up @@ -44,34 +44,31 @@ def __init__(self, kernel, origdata=None):

self.iddatamap = {}

# Create Gram matrix as a list of vectors that have extra
# entries for id and end of record marker.
# Create Gram matrix as a list of vectors which an extra entry
# for the id field.
n = len(origdata)
grammat = [N.empty((n+2,), dtype=libsvm.svm_node_dtype)
grammat = [N.empty((n+1,), dtype=libsvm.svm_node_dtype)
for i in range(n)]
self.grammat = grammat

# Calculate Gram matrix. Refer to Kernel::kernel_precomputed
# in svm.cpp to see how this precomputed setup works.
for i, (y1, x1) in enumerate(origdata):
for i, (yi, xi) in enumerate(origdata):
id = i + 1
# XXX possible numpy bug
#grammat[i][[0,-1]] = (0, id), (-1, 0.0)
grammat[i][0] = 0, id
grammat[i][-1] = -1, 0.0
for j, (y2, x2) in enumerate(origdata[i:]):
# Map id to original vector so that we can find it again
# after the model has been trained. libsvm essentially
# provides the ids of the support vectors.
self.iddatamap[id] = xi
for j, (yj, xj) in enumerate(origdata[i:]):
# Gram matrix is symmetric, so calculate dot product
# once and store it in both required locations
z = kernel(x1, x2, svm_node_dot)
z = self.kernel(xi, xj, svm_node_dot)
# fix index so we assign to the right place
j += i
grammat[i][j+1] = 0, z
grammat[j][i+1] = 0, z
# Map id to original vector so that we can find it again
# after the model has been trained. libsvm essentially
# provides the ids of the support vectors.
self.iddatamap[id] = x1

grammat[i][j + 1] = 0, z
grammat[j][i + 1] = 0, z

def getdata(self):
return zip(map(lambda x: x[0], self.origdata), self.grammat)
data = property(getdata)
Expand All @@ -89,30 +86,45 @@ def combine(self, dataset):
Combine this dataset with another dataset by extending the
Gram matrix with the new inner products into a new matrix.
"""
n = len(self.origdata) + len(dataset.data)
n = len(self.origdata) + len(dataset.data) + 1
newgrammat = []

# copy original Gram matrix
for i in range(len(self.origdata)):
row = N.empty((n,), dtype=libsvm.svm_node_dtype)
row[:-1] = self.grammat[i]
newgrammat.append(row)

# copy id->vector map
newiddatamap = dict(self.iddatamap.items())
newrow = N.zeros((n,), dtype=libsvm.svm_node_dtype)
oldrow = self.grammat[i]
newrow[:len(oldrow)] = oldrow
newgrammat.append(newrow)

# prepare Gram matrix for new data
for i in range(len(dataset.data)):
id = i + len(self.origdata) + 1
row = N.empty((n,), dtype=libsvm.svm_node_dtype)
row[[0,-1]] = (0, id), (-1, 0.0)
row = N.zeros((n,), dtype=libsvm.svm_node_dtype)
newgrammat.append(row)
newiddatamap[id] = dataset.data[i][1]

newiddatamap = dict(self.iddatamap.items())
m = len(self.origdata)
for i, (yi, xi) in enumerate(dataset.data):
i += m
for j, (yj, xj) in enumerate(self.origdata):
z = self.kernel(xi, xj, svm_node_dot)
newgrammat[i][j + 1] = 0, z
newgrammat[j][i + 1] = 0, z
for i, (yi, xi) in enumerate(dataset.data):
k = m + i
id = k + 1
newgrammat[k][0] = 0, id
newiddatamap[id] = xi
for j, (yj, xj) in enumerate(dataset.data[i:]):
z = self.kernel(xi, xj, svm_node_dot)
j += k
newgrammat[k][j + 1] = 0, z
newgrammat[j][k + 1] = 0, z

newdataset = self.__class__(self.kernel)
newdataset.origdata = self.origdata + dataset.data
newdataset.iddatamap = newiddatamap
newdataset.grammat = newgrammat
return newdataset

class LibSvmRegressionDataSet(LibSvmDataSet):
def __init__(self, origdata):
Expand Down
4 changes: 1 addition & 3 deletions Lib/sandbox/svm/tests/test_classification.py
@@ -1,13 +1,11 @@
from numpy.testing import *
import numpy as N

from numpy.testing import *
set_local_path('../..')

from svm.classification import *
from svm.dataset import LibSvmClassificationDataSet
from svm.dataset import LibSvmTestDataSet
from svm.kernel import *

restore_path()

class test_classification(NumpyTestCase):
Expand Down
25 changes: 15 additions & 10 deletions Lib/sandbox/svm/tests/test_dataset.py
@@ -1,13 +1,11 @@
from numpy.testing import *
import numpy as N

from numpy.testing import *
set_local_path('../..')

from svm.dataset import *
from svm.kernel import *
from svm.dataset import convert_to_svm_node, svm_node_dot
from svm.libsvm import svm_node_dtype

restore_path()

class test_dataset(NumpyTestCase):
Expand Down Expand Up @@ -95,23 +93,30 @@ def check_precompute(self):
# get a new dataset containing the precomputed data
pcdata = origdata.precompute(kernel)
for i, row in enumerate(pcdata.grammat):
valuerow = row[1:-1]['value']
valuerow = row[1:]['value']
assert_array_almost_equal(valuerow, expt_grammat[i])

def check_combine(self):
kernel = LinearKernel()

y1 = N.random.randn(2)
x1 = N.random.randn(len(y1), 2)
y1 = N.random.randn(10)
x1 = N.random.randn(len(y1), 10)
origdata = LibSvmRegressionDataSet(zip(y1, x1))
pcdata = origdata.precompute(kernel)

y2 = N.random.randn(1)
y2 = N.random.randn(5)
x2 = N.random.randn(len(y2), x1.shape[1])
moredata = LibSvmRegressionDataSet(zip(y2, x2))

#pcdata.combine(moredata)
#pcdata.copy_and_extend(moredata)
morepcdata = pcdata.combine(moredata)

expt_grammat = N.empty((len(y1) + len(y2),)*2)
x = N.vstack([x1,x2])
for i, xi in enumerate(x):
for j, xj in enumerate(x):
expt_grammat[i, j] = kernel(xi, xj, N.dot)
for i, row in enumerate(morepcdata.grammat):
valuerow = row[1:]['value']
assert_array_almost_equal(valuerow, expt_grammat[i])

if __name__ == '__main__':
NumpyTest().run()
4 changes: 1 addition & 3 deletions Lib/sandbox/svm/tests/test_kernel.py
@@ -1,10 +1,8 @@
from numpy.testing import *
import numpy as N

from numpy.testing import *
set_local_path('../..')

from svm.kernel import *

restore_path()

class test_kernel(NumpyTestCase):
Expand Down
4 changes: 1 addition & 3 deletions Lib/sandbox/svm/tests/test_libsvm.py
@@ -1,10 +1,8 @@
from numpy.testing import *
import numpy as N

from numpy.testing import *
set_local_path('../..')

import svm.libsvm as libsvm

restore_path()

class test_libsvm(NumpyTestCase):
Expand Down
4 changes: 1 addition & 3 deletions Lib/sandbox/svm/tests/test_oneclass.py
@@ -1,13 +1,11 @@
from numpy.testing import *
import numpy as N

from numpy.testing import *
set_local_path('../..')

from svm.oneclass import *
from svm.dataset import LibSvmOneClassDataSet
from svm.dataset import LibSvmTestDataSet
from svm.kernel import *

restore_path()

class test_oneclass(NumpyTestCase):
Expand Down
4 changes: 1 addition & 3 deletions Lib/sandbox/svm/tests/test_regression.py
@@ -1,13 +1,11 @@
from numpy.testing import *
import numpy as N

from numpy.testing import *
set_local_path('../..')

from svm.regression import *
from svm.dataset import LibSvmRegressionDataSet
from svm.dataset import LibSvmTestDataSet
from svm.kernel import *

restore_path()

class test_regression(NumpyTestCase):
Expand Down

0 comments on commit 69cd19a

Please sign in to comment.