Merge pull request #2016 from khalednasr/develop

Basic neural networks, issue #1975
shogun-toolbox · Mar 25, 2014 · 2baecff · 2baecff
2 parents b493737 + acf5ccd
commit 2baecff
Show file tree

Hide file tree

Showing 14 changed files with 2,236 additions and 0 deletions.
diff --git a/doc/ipython-notebooks/neuralnets/neuralnets_digits.ipynb b/doc/ipython-notebooks/neuralnets/neuralnets_digits.ipynb
@@ -0,0 +1,86 @@
+{
+ "metadata": {
+  "name": "neuralnets_digits"
+ },
+ "nbformat": 3,
+ "nbformat_minor": 0,
+ "worksheets": [
+  {
+   "cells": [
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "#Neural Networks for digit classification\n",
+      "##by Khaled Nasr\n"
+     ]
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "This notebook illustrates how to use the NeuralNets module for digit classification. We'll use the USPS dataset of handwritten digits to train and test a neural network."
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "import numpy as np\n",
+      "from scipy.io import loadmat\n",
+      "from modshogun import*\n",
+      "\n",
+      "# load the dataset\n",
+      "dataset = loadmat('../../../data/multiclass/usps.mat')\n",
+      "\n",
+      "Xall = dataset['data']\n",
+      "# the usps dataset has the digits labeled from 1 to 10 \n",
+      "# we'll subtract 1 to make them in the 0-9 range instead\n",
+      "Yall = np.array(dataset['label'].squeeze(), dtype=np.double)-1 \n",
+      "\n",
+      "# use the first 5000 examples for training, the rest will be used for testing\n",
+      "Xtrain = RealFeatures(Xall[:,0:5000])\n",
+      "Ytrain = MulticlassLabels(Yall[0:5000])\n",
+      "Xtest = RealFeatures(Xall[:,5001:-1])\n",
+      "Ytest = MulticlassLabels(Yall[5001:-1])\n",
+      "\n",
+      "# setup the network's layers\n",
+      "layers = DynamicObjectArray()\n",
+      "layers.append_element(NeuralLogisticLayer(50)) # 50 neurons in the hidden layer\n",
+      "layers.append_element(NeuralLogisticLayer(10)) # 10 neurons in the output layer\n",
+      "\n",
+      "# create the network\n",
+      "net = NeuralNetwork()\n",
+      "net.initialize(256, layers) # 256 inputs, one for each pixel (images in the dataset are 16*16 pixels)\n",
+      "\n",
+      "# set training parameters\n",
+      "net.l2_coefficient = 0.001\n",
+      "\n",
+      "# train the network\n",
+      "net.set_labels(Ytrain)\n",
+      "net.train(Xtrain)\n",
+      "\n",
+      "# test the network\n",
+      "predictions = net.apply_multiclass(Xtest)\n",
+      "evaluator = MulticlassAccuracy()\n",
+      "accuracy = evaluator.evaluate(predictions, Ytest)\n",
+      "\n",
+      "print \"Accuracy on the test set =\", accuracy * 100"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": []
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [],
+     "language": "python",
+     "metadata": {},
+     "outputs": []
+    }
+   ],
+   "metadata": {}
+  }
+ ]
+}
diff --git a/src/interfaces/modular/NeuralNets.i b/src/interfaces/modular/NeuralNets.i
@@ -0,0 +1,15 @@
+%newobject apply(CFeatures* data);
+%newobject apply_multiclass(CFeatures* data);
+
+/* Remove C Prefix */
+%rename(NeuralNetwork) CNeuralNetwork;
+%rename(NeuralLayer) CNeuralLayer;
+%rename(NeuralLinearLayer) CNeuralLinearLayer;
+%rename(NeuralLogisticLayer) CNeuralLogisticLayer;
+
+/* Include Class Headers to make them visible from within the target language */
+%include <shogun/neuralnets/NeuralNetwork.h>
+%include <shogun/neuralnets/NeuralLayer.h>
+%include <shogun/neuralnets/NeuralLinearLayer.h>
+%include <shogun/neuralnets/NeuralLogisticLayer.h>
+
diff --git a/src/interfaces/modular/NeuralNets_includes.i b/src/interfaces/modular/NeuralNets_includes.i
@@ -0,0 +1,7 @@
+%{
+#include <shogun/neuralnets/NeuralNetwork.h>
+#include <shogun/neuralnets/NeuralLayer.h>
+#include <shogun/neuralnets/NeuralLinearLayer.h>
+#include <shogun/neuralnets/NeuralLogisticLayer.h>
+%}
+
diff --git a/src/interfaces/modular/modshogun.i b/src/interfaces/modular/modshogun.i
@@ -67,6 +67,7 @@
 %include "Metric_includes.i"
 %include "GaussianProcess_includes.i"
 %include "Ensemble_includes.i"
+%include "NeuralNets_includes.i"
 
 %include "SGBase.i"
 %include "Machine.i"
@@ -93,6 +94,7 @@
 %include "Metric.i"
 %include "GaussianProcess.i"
 %include "Ensemble.i"
+%include "NeuralNets.i"
 
 #if defined(SWIGPERL)
 %include "abstract_types_extension.i"

diff --git a/src/shogun/machine/Machine.h b/src/shogun/machine/Machine.h
@@ -85,6 +85,7 @@ enum EMachineType
 	CT_GAUSSIANPROCESSBINARY = 530,
 	CT_GAUSSIANPROCESSMULTICLASS = 540,
 	CT_STOCHASTICSOSVM = 550,
+	CT_NEURALNETWORK = 560,
 	CT_BAGGING
 };
 

diff --git a/src/shogun/neuralnets/NeuralLayer.cpp b/src/shogun/neuralnets/NeuralLayer.cpp
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2014, Shogun Toolbox Foundation
+ * All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without 
+ * modification, are permitted provided that the following conditions are met:
+
+ * 1. Redistributions of source code must retain the above copyright notice, 
+ * this list of conditions and the following disclaimer.
+ * 
+ * 2. Redistributions in binary form must reproduce the above copyright notice, 
+ * this list of conditions and the following disclaimer in the documentation 
+ * and/or other materials provided with the distribution.
+ * 
+ * 3. Neither the name of the copyright holder nor the names of its 
+ * contributors may be used to endorse or promote products derived from this 
+ * software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
+ * POSSIBILITY OF SUCH DAMAGE.
+ * 
+ * Written (W) 2014 Khaled Nasr
+ */
+
+#include <shogun/base/Parameter.h>
+#include <shogun/neuralnets/NeuralLayer.h>
+
+using namespace shogun;
+
+CNeuralLayer::CNeuralLayer() 
+: CSGObject()
+{
+	init();
+}
+
+
+CNeuralLayer::CNeuralLayer(int32_t num_neurons)
+: CSGObject()
+{
+	init();
+	m_num_neurons = num_neurons;
+}
+
+CNeuralLayer::~CNeuralLayer()
+{
+}
+
+void CNeuralLayer::initialize(int32_t previous_layer_num_neurons)
+{
+	m_previous_layer_num_neurons = previous_layer_num_neurons;
+}
+
+void CNeuralLayer::set_batch_size(int32_t batch_size)
+{
+	m_batch_size = batch_size;
+
+	if (m_activations.vector!=NULL) SG_FREE(m_activations.vector);
+	if (m_input_gradients.vector!=NULL) SG_FREE(m_input_gradients.vector);
+	if (m_local_gradients.vector!=NULL) SG_FREE(m_local_gradients.vector);
+
+	m_activations.vlen = m_num_neurons * m_batch_size;
+	m_input_gradients.vlen = m_previous_layer_num_neurons * m_batch_size;
+	m_local_gradients.vlen = m_num_neurons * m_batch_size;
+
+	m_activations.vector = SG_MALLOC(float64_t, m_activations.vlen);
+	m_input_gradients.vector = SG_MALLOC(float64_t, m_input_gradients.vlen);
+	m_local_gradients.vector = SG_MALLOC(float64_t, m_local_gradients.vlen);
+}
+
+void CNeuralLayer::init()
+{
+	m_num_neurons = 0; 
+	m_previous_layer_num_neurons = 0;
+	m_batch_size = 0;
+
+	SG_ADD(&m_num_neurons, "num_neurons",
+	       "Number of Neurons", MS_NOT_AVAILABLE);
+	SG_ADD(&m_previous_layer_num_neurons, "previous_layer_num_neurons",
+	       "Number of neurons in the previous layer", MS_NOT_AVAILABLE);
+	SG_ADD(&m_batch_size, "batch_size",
+	       "Batch Size", MS_NOT_AVAILABLE);
+	SG_ADD(&m_activations, "activations",
+	       "Activations", MS_NOT_AVAILABLE);
+	SG_ADD(&m_input_gradients, "input_gradients",
+	       "Input Gradients", MS_NOT_AVAILABLE);
+	SG_ADD(&m_local_gradients, "local_gradients",
+	       "Local Gradients", MS_NOT_AVAILABLE);
+}