Skip to content
Browse files

ENH Preliminary LASSO implementation

Accepts missing values (as NaNs)
  • Loading branch information...
1 parent 01f6d40 commit 6bc8474796ebfa010b83f08fa23afd53e985e082 @luispedro committed
Showing with 151 additions and 1 deletion.
  1. +1 −0 .gitignore
  2. +124 −0 milk/supervised/_lasso.cpp
  3. +20 −0 milk/supervised/lasso.py
  4. +6 −1 setup.py
View
1 .gitignore
@@ -2,6 +2,7 @@
milk/supervised/_svm.so
milk/supervised/_tree.so
milk/supervised/_perceptron.so
+milk/supervised/_lasso.so
milk/unsupervised/_som.so
milk/unsupervised/_kmeans.so
build
View
124 milk/supervised/_lasso.cpp
@@ -0,0 +1,124 @@
+// Copyright (C) 2012, Luis Pedro Coelho <luis@luispedro.org>
+// License: MIT
+
+#include <iostream>
+#include <memory>
+#include <cmath>
+#include <random>
+#include <cassert>
+#include <eigen3/Eigen/Dense>
+
+using namespace Eigen;
+
+extern "C" {
+ #include <Python.h>
+ #include <numpy/ndarrayobject.h>
+}
+
+
+namespace {
+
+template <typename T>
+int random_int(T& random, const int max) {
+ std::uniform_int_distribution<int> dist(0, max - 1);
+ return dist(random);
+}
+
+
+int coordinate_descent(const MatrixXf& X, const MatrixXf& Y, MatrixXf& B, const int max_iter, const float lam, int maxnops=-1, const float eps=1e-7) {
+ std::mt19937 r;
+ MatrixXf residuals = Y - B*X;
+ if (maxnops == -1) {
+ maxnops = 2*B.size();
+ }
+ int nops = 0;
+ for (int it = 0; it != max_iter; ++it) {
+ const int i = random_int(r, B.rows());
+ const int j = random_int(r, B.cols());
+ // Given everything else as fixed, this comes down to a very simple
+ // 1-dimensional problem. We remember the current value:
+ const float prev = B(i,j);
+ int n = 0;
+ float x = 0.0;
+ float xy = 0.0;
+ float y = 0.0;
+ float x2 = 0.0;
+ for (int k = 0; k != Y.cols(); ++k) {
+ if (isnan(Y(i,k))) continue;
+ x += X(i,k);
+ y += Y(i,k);
+ x2 += X(i,k)*X(i,k);
+ xy += X(i,k)*Y(i,k);
+ ++n;
+ }
+ const float best = (x2 == 0. ? 0.0 : (xy - x*y/n) / (x2 - x*x/n));
+ if (fabs(best - prev) < eps) {
+ ++nops;
+ if (nops > maxnops) return it;
+ } else {
+ nops = 0;
+ B(i,j) = best;
+ }
+ }
+ return max_iter;
+}
+
+Map<MatrixXf> as_eigen(PyArrayObject* arr) {
+ assert(PyArray_EquivTypenums(PyArray_TYPE(arr), NPY_FLOAT32));
+ return Map<MatrixXf>(
+ static_cast<float*>(PyArray_DATA(arr)),
+ PyArray_DIM(arr, 0),
+ PyArray_DIM(arr, 1));
+}
+
+const char* errmsg = "INTERNAL ERROR";
+PyObject* py_lasso(PyObject* self, PyObject* args) {
+ PyArrayObject* Y;
+ PyArrayObject* X;
+ PyArrayObject* B;
+ int max_iter;
+ float lam;
+ float eps;
+ if (!PyArg_ParseTuple(args, "OOOiff", &X, &Y, &B, &max_iter, &lam, &eps)) return NULL;
+ if (!PyArray_Check(X) || !PyArray_ISCARRAY_RO(X) ||
+ !PyArray_Check(Y) || !PyArray_ISCARRAY_RO(Y) ||
+ !PyArray_Check(B) || !PyArray_ISCARRAY(B) ||
+ !PyArray_EquivTypenums(PyArray_TYPE(X), NPY_FLOAT32) ||
+ !PyArray_EquivTypenums(PyArray_TYPE(Y), NPY_FLOAT32) ||
+ !PyArray_EquivTypenums(PyArray_TYPE(B), NPY_FLOAT32)) {
+ PyErr_SetString(PyExc_RuntimeError,errmsg);
+ return 0;
+ }
+ MatrixXf mX = as_eigen(X);
+ MatrixXf mY = as_eigen(Y);
+ MatrixXf mB = as_eigen(B);
+ const int iters = coordinate_descent(mX, mY, mB, max_iter, lam, eps);
+ float* rB = static_cast<float*>(PyArray_DATA(B));
+ for (int y = 0; y != mB.rows(); ++y) {
+ for (int x = 0; x != mB.cols(); ++x) {
+ *rB++ = mB(y,x);
+ }
+ }
+
+ return Py_BuildValue("i", iters);
+}
+
+PyMethodDef methods[] = {
+ {"lasso", py_lasso, METH_VARARGS , "Do NOT call directly.\n" },
+ {NULL, NULL,0,NULL},
+};
+
+const char * module_doc =
+ "Internal Module.\n"
+ "\n"
+ "Do NOT use directly!\n";
+
+} // namespace
+
+extern "C"
+void init_lasso()
+ {
+ import_array();
+ (void)Py_InitModule3("_lasso", methods, module_doc);
+ }
+
View
20 milk/supervised/lasso.py
@@ -0,0 +1,20 @@
+import numpy as np
+import _lasso
+
+def lasso(X, Y, B=None, lam=1., max_iter=None, eps=None):
+ X = np.ascontiguousarray(X, dtype=np.float32)
+ Y = np.ascontiguousarray(Y, dtype=np.float32)
+ if B is None:
+ B = np.zeros((Y.shape[0],X.shape[0]), np.float32)
+ else:
+ B = np.ascontiguousarray(B, dtype=np.float32)
+ if max_iter is None:
+ max_iter = 1024
+ if eps is None:
+ eps = 1e-5
+ if X.shape[0] != B.shape[1] or \
+ Y.shape[0] != B.shape[0] or \
+ X.shape[1] != Y.shape[1]:
+ raise ValueError('milk.supervised.lasso: Dimensions do not match')
+ _lasso.lasso(X, Y, B, max_iter, float(lam), float(eps))
+ return B
View
7 setup.py
@@ -39,7 +39,10 @@
if os.environ.get('DEBUG'):
undef_macros = ['NDEBUG']
if os.environ.get('DEBUG') == '2':
- define_macros = [('_GLIBCXX_DEBUG','1')]
+ define_macros = [
+ ('_GLIBCXX_DEBUG','1'),
+ ('EIGEN_INTERNAL_DEBUGGING', '1'),
+ ]
_extensions = {
@@ -49,12 +52,14 @@
'milk.supervised._svm' : ['milk/supervised/_svm.cpp'],
'milk.supervised._tree' : ['milk/supervised/_tree.cpp'],
'milk.supervised._perceptron' : ['milk/supervised/_perceptron.cpp'],
+ 'milk.supervised._lasso' : ['milk/supervised/_lasso.cpp'],
}
ext_modules = [
Extension(key,
sources=sources,
undef_macros=undef_macros,
define_macros=define_macros,
+ extra_compile_args=['--std=c++0x'],
)
for key,sources in _extensions.items()
]

0 comments on commit 6bc8474

Please sign in to comment.
Something went wrong with that request. Please try again.