Browse files

ENH Add perceptron algorithm

An oldie, but goodie. Simple, but O(N) learning.
  • Loading branch information...
1 parent e9fa7f2 commit 9a142e1876c822344c7479e692d764a217b2ec3f Luis Pedro Coelho committed Jul 29, 2011
Showing with 158 additions and 0 deletions.
  1. +1 −0 .gitignore
  2. +1 −0 ChangeLog
  3. +91 −0 milk/supervised/_perceptron.cpp
  4. +39 −0 milk/supervised/perceptron.py
  5. +25 −0 milk/tests/test_perceptron.py
  6. +1 −0 setup.py
View
1 .gitignore
@@ -1,6 +1,7 @@
*.pyc
milk/supervised/_svm.so
milk/supervised/_tree.so
+milk/supervised/_perceptron.so
milk/unsupervised/_som.so
milk/unsupervised/_kmeans.so
build
View
1 ChangeLog
@@ -1,4 +1,5 @@
Version 0.3.10+
+ * Add perceptron learner
* Set random seed in random forest learner
* Add warning to milk/__init__.py if import fails
* Add return value to ``gridminimise``
View
91 milk/supervised/_perceptron.cpp
@@ -0,0 +1,91 @@
+// Copyright (C) 2011, Luis Pedro Coelho <luis@luispedro.org>
+// License: MIT
+
+#include <iostream>
+#include <memory>
+#include <cmath>
+#include <cassert>
+extern "C" {
+ #include <Python.h>
+ #include <numpy/ndarrayobject.h>
+}
+
+
+namespace {
+
+template <typename T>
+int perceptron(PyArrayObject* data_arr, const int* labels, PyArrayObject* weights_arr, double eta) {
+ const T* data = reinterpret_cast<T*>(PyArray_DATA(data_arr));
+ T* weights = reinterpret_cast<T*>(PyArray_DATA(weights_arr));
+ const int N0 = PyArray_DIM(data_arr, 0);
+ const int N1 = PyArray_DIM(data_arr, 1);
+ int nr_errors = 0;
+ for (int i = 0; i != N0; ++i, data += N1, ++labels) {
+ T val = weights[0];
+ for (int j = 0; j != N1; ++j) {
+ val += weights[j+1] * data[j];
+ }
+ int ell = (val > 0);
+ if (ell != *labels) {
+ int pm = (*labels ? +1 : -1);
+ ++nr_errors;
+ T error = pm * eta * std::abs(pm-val);
+ weights[0] += error;
+ for (int j = 0; j != N1; ++j) {
+ weights[j+1] += error*data[j];
+ }
+ }
+ }
+ return nr_errors;
+}
+
+PyObject* py_perceptron(PyObject* self, PyObject* args) {
+ const char* errmsg = "Arguments were not what was expected for perceptron.\n"
+ "This is an internal function: Do not call directly unless you know exactly what you're doing.\n";
+ PyArrayObject* data;
+ PyArrayObject* labels;
+ PyArrayObject* weights;
+ double eta;
+ if (!PyArg_ParseTuple(args, "OOOd", &data, &labels, &weights, &eta)) {
+ PyErr_SetString(PyExc_RuntimeError,errmsg);
+ return 0;
+ }
+ if (!PyArray_Check(data) || !PyArray_ISCONTIGUOUS(data) ||
+ !PyArray_Check(weights) || !PyArray_ISCONTIGUOUS(weights) ||
+ !PyArray_Check(labels) || !PyArray_ISCONTIGUOUS(labels) || !PyArray_EquivTypenums(PyArray_TYPE(labels), NPY_INT) ||
+ PyArray_TYPE(data) != PyArray_TYPE(weights)||
+ PyArray_NDIM(data) != 2 || PyArray_NDIM(weights) != 1 || PyArray_DIM(data,1) + 1 != PyArray_DIM(weights,0)) {
+ PyErr_SetString(PyExc_RuntimeError,errmsg);
+ return 0;
+ }
+ int nr_errors;
+ if (PyArray_TYPE(data) == NPY_FLOAT) {
+ nr_errors = perceptron<float>(data, reinterpret_cast<const int*>(PyArray_DATA(labels)), weights, eta);
+ } else if (PyArray_TYPE(data) == NPY_DOUBLE) {
+ nr_errors = perceptron<double>(data, reinterpret_cast<const int*>(PyArray_DATA(labels)), weights, eta);
+ } else {
+ PyErr_SetString(PyExc_RuntimeError, errmsg);
+ return 0;
+ }
+ return PyLong_FromLong(nr_errors);
+}
+
+PyMethodDef methods[] = {
+ {"perceptron", py_perceptron, METH_VARARGS , "Do NOT call directly.\n" },
+ {NULL, NULL,0,NULL},
+};
+
+const char * module_doc =
+ "Internal Module.\n"
+ "\n"
+ "Do NOT use directly!\n";
+
+} // namespace
+
+extern "C"
+void init_perceptron()
+ {
+ import_array();
+ (void)Py_InitModule3("_perceptron", methods, module_doc);
+ }
+
View
39 milk/supervised/perceptron.py
@@ -0,0 +1,39 @@
+# -*- coding: utf-8 -*-
+# Copyright (C) 2011, Luis Pedro Coelho <luis@luispedro.org>
+# vim: set ts=4 sts=4 sw=4 expandtab smartindent:
+#
+# License: MIT. See COPYING.MIT file in the milk distribution
+
+import numpy as np
+from .classifier import normaliselabels
+from .base import supervised_model
+from . import _perceptron
+
+class perceptron_model(supervised_model):
+ def __init__(self, w):
+ self.w = w
+
+ def apply(self, f):
+ f = np.asanyarray(f)
+ v = self.w[0] + np.dot(f, self.w[1:])
+ return v > 0
+
+class perceptron_learner(object):
+ def __init__(self, eta=.1, max_iters=128):
+ self.eta = eta
+ self.max_iters = max_iters
+
+ def train(self, features, labels, normalisedlabels=False, **kwargs):
+ if not normalisedlabels:
+ labels, _ = normaliselabels(labels)
+ features = np.asanyarray(features)
+ if features.dtype not in (np.float32, np.float64):
+ features = features.astype(np.float64)
+ weights = np.zeros(features.shape[1]+1, features.dtype)
+ for i in xrange(self.max_iters):
+ errors = _perceptron.perceptron(features, labels, weights, self.eta)
+ if not errors:
+ break
+ return perceptron_model(weights)
+
+
View
25 milk/tests/test_perceptron.py
@@ -0,0 +1,25 @@
+import numpy as np
+from milk.supervised.perceptron import perceptron_learner
+from milk.supervised import _perceptron
+from milksets.yeast import load
+
+def test_raw():
+ np.random.seed(23)
+ data = np.random.random((100,10))
+ data[50:] += .5
+ labels = np.repeat((0,1), 50)
+ weights = np.zeros((11))
+ eta = 0.1
+ for i in xrange(20):
+ _perceptron.perceptron(data, labels, weights, eta)
+ errs = _perceptron.perceptron(data, labels, weights, eta)
+ assert errs < 10
+
+def test_wrapper():
+ features,labels = load()
+ labels = (labels >= 5)
+
+ learner = perceptron_learner()
+ model = learner.train(features, labels)
+ test = map(model.apply, features)
+ assert np.mean(labels != test) < .35
View
1 setup.py
@@ -40,6 +40,7 @@
'milk.supervised._svm' : ['milk/supervised/_svm.cpp'],
'milk.supervised._tree' : ['milk/supervised/_tree.cpp'],
+ 'milk.supervised._perceptron' : ['milk/supervised/_perceptron.cpp'],
}
ext_modules = [
Extension(ext, sources=sources) for ext,sources in _extensions.iteritems()

0 comments on commit 9a142e1

Please sign in to comment.