Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

Loading…

Made kmeans be multithreaded #12

Open
wants to merge 2 commits into from

2 participants

@mynameisfiber

Added openmp support in the milk/unsupervised/_kmeans.cpp module.

@luispedro
Owner

This is all good, but I don't want to have openmp on the master branch because of portability (it sucks, but I don't have a good solution).

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
This page is out of date. Refresh to see the latest.
View
2  milk/tests/test_kmeans.py
@@ -54,6 +54,6 @@ def test_kmeans_return_partial():
assignments,centroids = milk.unsupervised.kmeans(features, 2, R=129)
centroids_ = milk.unsupervised.kmeans(features, 2, R=129, return_assignments=False)
assignments_ = milk.unsupervised.kmeans(features, 2, R=129, return_centroids=False)
- assert np.all(centroids == centroids_)
assert np.all(assignments == assignments_)
+ assert np.allclose(centroids, centroids_)
View
122 milk/unsupervised/_kmeans.cpp
@@ -20,8 +20,79 @@ void assert_type_contiguous(PyArrayObject* array,int type) {
}
template <typename ftype>
-int computecentroids(ftype* f, ftype* centroids, PyArrayObject* a_assignments, PyArrayObject* a_counts, const int N, const int Nf, const int k) {
+bool assignclass_euclidian(ftype* f, ftype* centroids, PyArrayObject* a_assignments, const int N, const int Nf, const int k) {
+ Py_BEGIN_ALLOW_THREADS
+ npy_int32* assignments = static_cast<npy_int32*>(PyArray_DATA(a_assignments));
+
+ #pragma parallel for schedule(static)
+ for (int i=0; i < N; i++) {
+ int best_cluster = -1;
+ ftype min_distance = 0.0;
+ for (int c=0; c < k; c++) {
+ ftype cur_distance = 0.0;
+ for (int j=0; j < Nf; j++) {
+ ftype term = (f[i * Nf + j] - centroids[c * Nf + j]);
+ cur_distance += term * term;
+ }
+ if (best_cluster == -1 || cur_distance < min_distance) {
+ min_distance = cur_distance;
+ best_cluster = c;
+ }
+ }
+ assignments[i] = 1; //best_cluster;
+ }
+ return true;
+ Py_END_ALLOW_THREADS
+}
+
+PyObject* py_assignclass_euclidian(PyObject* self, PyObject* args) {
+ try {
+ PyArrayObject* fmatrix;
+ PyArrayObject* centroids;
+ PyArrayObject* assignments;
+ if (!PyArg_ParseTuple(args, "OOO", &fmatrix, &centroids, &assignments)) { throw Kmeans_Exception("Wrong number of arguments for assignclass_euclidian."); }
+ if (!PyArray_Check(fmatrix) || !PyArray_ISCONTIGUOUS(fmatrix)) throw Kmeans_Exception("fmatrix not what was expected.");
+ if (!PyArray_Check(centroids) || !PyArray_ISCONTIGUOUS(centroids)) throw Kmeans_Exception("centroids not what was expected.");
+ if (!PyArray_Check(assignments) || !PyArray_ISCONTIGUOUS(assignments)) throw Kmeans_Exception("assignments not what was expected.");
+ if (PyArray_TYPE(assignments) != NPY_INT32) throw Kmeans_Exception("assignments should be int32.");
+ if (PyArray_TYPE(fmatrix) != PyArray_TYPE(centroids)) throw Kmeans_Exception("centroids and fmatrix should have same type.");
+ if (PyArray_NDIM(fmatrix) != 2) throw Kmeans_Exception("fmatrix should be two dimensional");
+ if (PyArray_NDIM(centroids) != 2) throw Kmeans_Exception("centroids should be two dimensional");
+ if (PyArray_NDIM(assignments) != 1) throw Kmeans_Exception("assignments should be two dimensional");
+
+ const int N = PyArray_DIM(fmatrix, 0);
+ const int Nf = PyArray_DIM(fmatrix, 1);
+ const int k = PyArray_DIM(centroids, 0);
+ if (PyArray_DIM(centroids, 1) != Nf) throw Kmeans_Exception("centroids has wrong number of features.");
+ if (PyArray_DIM(assignments, 0) != N) throw Kmeans_Exception("assignments has wrong size.");
+ switch (PyArray_TYPE(fmatrix)) {
+#define TRY_TYPE_ASSIGN(code, ftype) \
+ case code: \
+ if (assignclass_euclidian<ftype>( \
+ static_cast<ftype*>(PyArray_DATA(fmatrix)), \
+ static_cast<ftype*>(PyArray_DATA(centroids)), \
+ assignments, \
+ N, Nf, k)) { \
+ Py_RETURN_TRUE; \
+ } \
+ Py_RETURN_FALSE;
+
+ TRY_TYPE_ASSIGN(NPY_FLOAT, float);
+ TRY_TYPE_ASSIGN(NPY_DOUBLE, double);
+ }
+ throw Kmeans_Exception("Cannot handle this type.");
+ } catch (const Kmeans_Exception& exc) {
+ PyErr_SetString(PyExc_RuntimeError,exc.msg);
+ return 0;
+ } catch (...) {
+ PyErr_SetString(PyExc_RuntimeError,"Some sort of exception in assignclass_euclidian.");
+ return 0;
+ }
+}
+
+template <typename ftype>
+int computecentroids(ftype* f, ftype* centroids, PyArrayObject* a_assignments, PyArrayObject* a_counts, const int N, const int Nf, const int k) {
int zero_counts = 0;
Py_BEGIN_ALLOW_THREADS
const npy_int32* assignments = static_cast<npy_int32*>(PyArray_DATA(a_assignments));
@@ -30,16 +101,42 @@ int computecentroids(ftype* f, ftype* centroids, PyArrayObject* a_assignments, P
std::fill(counts, counts + k, 0);
std::fill(centroids, centroids + k*Nf, 0.0);
- for (int i = 0; i != N; i++){
- const int c = assignments[i];
- if (c >= k) continue; // throw Kmeans_Exception("wrong value in assignments");
- ftype* ck = centroids + Nf*c;
- for (int j = 0; j != Nf; ++j) {
- *ck++ += *f++;
+ #pragma omp parallel shared(assignments, counts, centroids, f)
+ {
+ int *local_counts = (int*) malloc(k * sizeof(int));
+ ftype *local_ck = (ftype*) malloc(k * Nf * sizeof(ftype));
+ std::fill(local_counts, local_counts + k, 0);
+ std::fill(local_ck, local_ck + k*Nf, ftype());
+
+ #pragma omp for schedule(static)
+ for (int i = 0; i < N; i++){
+ const int c = assignments[i];
+ if (c >= k) continue; // throw Kmeans_Exception("wrong value in assignments");
+ ftype* lck = local_ck + Nf*c;
+ for (int j = 0; j != Nf; ++j) {
+ *lck++ += f[i*Nf + j];
+ }
+ ++local_counts[c];
}
- ++counts[c];
+
+ ftype* ck = centroids;
+ ftype* lck = local_ck;
+ for (int c=0; c < k; c++) {
+ #pragma omp critical(reduce)
+ {
+ for (int j = 0; j != Nf; ++j) {
+ *ck++ += *lck++;
+ }
+ counts[c] += local_counts[c];
+ }
+ }
+
+ free(local_counts);
+ free(local_ck);
}
- for (int i = 0; i != k; ++i) {
+
+ #pragma omp parallel for reduction(+:zero_counts) schedule(static) shared(counts,centroids)
+ for (int i = 0; i < k; ++i) {
ftype* ck = centroids + Nf*i;
const ftype c = counts[i];
if (c == 0) {
@@ -80,7 +177,7 @@ PyObject* py_computecentroids(PyObject* self, PyObject* args) {
if (PyArray_DIM(assignments, 0) != N) throw Kmeans_Exception("assignments has wrong size.");
if (PyArray_DIM(counts, 0) != k) throw Kmeans_Exception("counts has wrong size.");
switch (PyArray_TYPE(fmatrix)) {
-#define TRY_TYPE(code, ftype) \
+#define TRY_TYPE_CENTROIDS(code, ftype) \
case code: \
if (computecentroids<ftype>( \
static_cast<ftype*>(PyArray_DATA(fmatrix)), \
@@ -92,8 +189,8 @@ PyObject* py_computecentroids(PyObject* self, PyObject* args) {
} \
Py_RETURN_FALSE;
- TRY_TYPE(NPY_FLOAT, float);
- TRY_TYPE(NPY_DOUBLE, double);
+ TRY_TYPE_CENTROIDS(NPY_FLOAT, float);
+ TRY_TYPE_CENTROIDS(NPY_DOUBLE, double);
}
throw Kmeans_Exception("Cannot handle this type.");
} catch (const Kmeans_Exception& exc) {
@@ -107,6 +204,7 @@ PyObject* py_computecentroids(PyObject* self, PyObject* args) {
PyMethodDef methods[] = {
{"computecentroids", py_computecentroids, METH_VARARGS , "Do NOT call directly.\n" },
+ {"assignclass_euclidian", py_assignclass_euclidian, METH_VARARGS , "Do NOT call directly.\n" },
{NULL, NULL,0,NULL},
};
View
22 milk/unsupervised/kmeans.py
@@ -219,13 +219,13 @@ def kmeans(fmatrix, k, distance='euclidean', max_iter=1000, R=None, return_assig
fmatrix = zscore(fmatrix)
distance = 'euclidean'
if distance == 'euclidean':
- def distfunction(fmatrix, cs, dists):
- dists = _dot3(fmatrix, (-2)*cs.T, dists)
+ def distfunction(fmatrix, cs, _):
+ dists = _dot3(fmatrix, (-2)*cs.T, None)
dists += np.array([np.dot(c,c) for c in cs])
# For a distance, we'd need to add the fmatrix**2 components, but
# it doesn't matter because we are going to perform an argmin() on
# the result.
- return dists
+ return dists.argmin(1)
elif distance == 'mahalanobis':
icov = kwargs.get('icov', None)
if icov is None:
@@ -234,13 +234,18 @@ def distfunction(fmatrix, cs, dists):
covmat = np.cov(fmatrix.T)
icov = linalg.inv(covmat)
def distfunction(fmatrix, cs, _):
- return np.array([_mahalanobis2(fmatrix, c, icov) for c in cs]).T
+ dists = np.array([_mahalanobis2(fmatrix, c, icov) for c in cs]).T
+ return dists.argmin(1)
else:
raise ValueError('milk.unsupervised.kmeans: `distance` argument unknown (%s)' % distance)
if k < 2:
raise ValueError('milk.unsupervised.kmeans `k` should be >= 2.')
if fmatrix.dtype in (np.float32, np.float64) and fmatrix.flags['C_CONTIGUOUS']:
computecentroids = _kmeans.computecentroids
+ if distance == 'euclidian':
+ def distfunction(fmatrix, centroids, assignments):
+ _kmeans.assignclass_euclidian(fmatrix, centroids, assignments)
+ return assignments
else:
computecentroids = _pycomputecentroids
@@ -256,19 +261,16 @@ def distfunction(fmatrix, cs, _):
prev = np.zeros(len(fmatrix), np.int32)
counts = np.empty(k, np.int32)
- dists = None
+ assignments = np.empty((fmatrix.shape[0],), dtype=np.int32)
for i in xrange(max_iter):
- dists = distfunction(fmatrix, centroids, dists)
- assignments = dists.argmin(1)
+ assignments = distfunction(fmatrix, centroids, assignments)
if np.all(assignments == prev):
break
- if computecentroids(fmatrix, centroids, assignments.astype(np.int32), counts):
+ if computecentroids(fmatrix, centroids, assignments.astype(np.int32, copy=False), counts):
(empty,) = np.where(counts == 0)
centroids = np.delete(centroids, empty, axis=0)
k = len(centroids)
counts = np.empty(k, np.int32)
- # This will cause new matrices to be allocated in the next iteration
- dists = None
prev[:] = assignments
if return_centroids and return_assignments:
return assignments, centroids
View
4 setup.py
@@ -57,7 +57,8 @@
'milk.supervised._lasso' : ['milk/supervised/_lasso.cpp'],
}
-compiler_args = ['-std=c++0x']
+compiler_args = ['-std=c++0x', '-fopenmp']
+link_args=['-lgomp']
if platform.system() == 'Darwin':
compiler_args.append('-stdlib=libc++')
@@ -67,6 +68,7 @@
undef_macros=undef_macros,
define_macros=define_macros,
extra_compile_args=compiler_args,
+ extra_link_args=link_args,
)
for key,sources in _extensions.items()
]
Something went wrong with that request. Please try again.