basic neural networks

shogun-toolbox · Mar 23, 2014 · 77716f2 · 77716f2
1 parent adc9500
commit 77716f2
Show file tree

Hide file tree

Showing 12 changed files with 1,468 additions and 0 deletions.
diff --git a/doc/ipython-notebooks/neuralnets/neuralnets_digits.ipynb b/doc/ipython-notebooks/neuralnets/neuralnets_digits.ipynb
@@ -0,0 +1,108 @@
+{
+ "metadata": {
+  "name": "neuralnets_digits"
+ },
+ "nbformat": 3,
+ "nbformat_minor": 0,
+ "worksheets": [
+  {
+   "cells": [
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "#Neural Networks for digit classification\n",
+      "##by Khaled Nasr\n"
+     ]
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "This notebook illustrates how to use the NeuralNets module for digit classification. We'll use the USPS dataset of handwritten digits to train and test a neural network."
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "import numpy as np\n",
+      "from scipy.io import loadmat\n",
+      "from modshogun import NeuralNetwork\n",
+      "from modshogun import NeuralLogisticLayer\n",
+      "from modshogun import DynamicObjectArray\n",
+      "from modshogun import RealFeatures\n",
+      "\n",
+      "# load the dataset\n",
+      "dataset = loadmat('../../../data/multiclass/usps.mat')\n",
+      "\n",
+      "Xall = dataset['data']\n",
+      "# the usps dataset has the digits labeled from 1 to 10 \n",
+      "# we'll subtract 1 to make them in the 0-9 range instead\n",
+      "Yall = dataset['label']-1 \n",
+      "\n",
+      "# the neural network will have 10 neurons in its output layer, one for each digit\n",
+      "# therefore we need to give it the label for each example needs to be a vector of 10 elements\n",
+      "Yall_expanded = np.eye(10)[:,np.squeeze(Yall)]\n",
+      "\n",
+      "# use the first 5000 examples for training, the rest will be used for testing\n",
+      "Xtrain = Xall[:,0:5000]\n",
+      "Ytrain = Yall_expanded[:,0:5000]\n",
+      "\n",
+      "# setup the network's layers\n",
+      "layers = DynamicObjectArray()\n",
+      "layers.append_element(NeuralLogisticLayer(50)) # 50 neurons in the hidden layer\n",
+      "layers.append_element(NeuralLogisticLayer(10)) # 10 neurons in the output layer\n",
+      "\n",
+      "# create the network\n",
+      "net = NeuralNetwork()\n",
+      "net.initialize(256, layers) # 256 inputs, one for each pixel (images in the dataset are 16*16 pixels)\n",
+      "\n",
+      "# turn on regularization to reduce overfitting\n",
+      "net.set_L2_regularization(0.001)\n",
+      "\n",
+      "# train the network, the error each iteration is printed to the console\n",
+      "net.train_gradient_descent(RealFeatures(Xtrain), \n",
+      "                           RealFeatures(Ytrain),\n",
+      "                           300, # number of iterations over the training set\n",
+      "                           1000); # mini-batch size\n",
+      "\n",
+      "# prepere the test set\n",
+      "Xtest = Xall[:,5001:-1]\n",
+      "Ytest = Yall[:,5001:-1]\n",
+      "\n",
+      "# apply the network to the test inputs\n",
+      "predictions = net.apply(RealFeatures(Xtest)).get_feature_matrix()\n",
+      "predictions = np.argmax(predictions, axis=0)\n",
+      "\n",
+      "# measure the test error\n",
+      "test_error = float(np.sum(predictions!=Ytest))/Ytest.shape[1] * 100\n",
+      "\n",
+      "print \"Test Error =\", test_error, \"%\""
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "Test Error = 8.58938547486 %\n"
+       ]
+      }
+     ],
+     "prompt_number": 4
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [],
+     "language": "python",
+     "metadata": {},
+     "outputs": []
+    }
+   ],
+   "metadata": {}
+  }
+ ]
+}
diff --git a/src/interfaces/modular/NeuralNets.i b/src/interfaces/modular/NeuralNets.i
@@ -0,0 +1,23 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Written (W) Khaled Nasr
+ */
+
+%newobject apply();
+
+/* Remove C Prefix */
+%rename(NeuralNetwork) CNeuralNetwork;
+%rename(NeuralLayer) CNeuralLayer;
+%rename(NeuralLinearLayer) CNeuralLinearLayer;
+%rename(NeuralLogisticLayer) CNeuralLogisticLayer;
+
+/* Include Class Headers to make them visible from within the target language */
+%include <shogun/neuralnets/NeuralNetwork.h>
+%include <shogun/neuralnets/NeuralLayer.h>
+%include <shogun/neuralnets/NeuralLinearLayer.h>
+%include <shogun/neuralnets/NeuralLogisticLayer.h>
+
diff --git a/src/interfaces/modular/NeuralNets_includes.i b/src/interfaces/modular/NeuralNets_includes.i
@@ -0,0 +1,7 @@
+%{
+#include <shogun/neuralnets/NeuralNetwork.h>
+#include <shogun/neuralnets/NeuralLayer.h>
+#include <shogun/neuralnets/NeuralLinearLayer.h>
+#include <shogun/neuralnets/NeuralLogisticLayer.h>
+%}
+
diff --git a/src/interfaces/modular/modshogun.i b/src/interfaces/modular/modshogun.i
@@ -67,6 +67,7 @@
 %include "Metric_includes.i"
 %include "GaussianProcess_includes.i"
 %include "Ensemble_includes.i"
+%include "NeuralNets_includes.i"
 
 %include "SGBase.i"
 %include "Machine.i"
@@ -93,6 +94,7 @@
 %include "Metric.i"
 %include "GaussianProcess.i"
 %include "Ensemble.i"
+%include "NeuralNets.i"
 
 #if defined(SWIGPERL)
 %include "abstract_types_extension.i"

diff --git a/src/shogun/neuralnets/NeuralLayer.cpp b/src/shogun/neuralnets/NeuralLayer.cpp
@@ -0,0 +1,95 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Written (W) 2014 Khaled Nasr
+ */
+
+#include <shogun/base/Parameter.h>
+#include <shogun/neuralnets/NeuralLayer.h>
+
+using namespace shogun;
+
+CNeuralLayer::CNeuralLayer() 
+: CSGObject(), m_num_neurons(0)
+{
+	init();
+}
+
+
+CNeuralLayer::CNeuralLayer(int32_t num_neurons)
+: CSGObject(), m_num_neurons(num_neurons)
+{
+	init();
+}
+
+CNeuralLayer::CNeuralLayer(const CNeuralLayer& orig) : CSGObject()
+{
+	shallow_copy(orig);
+	init();
+}
+
+
+CNeuralLayer::~CNeuralLayer()
+{
+}
+
+void CNeuralLayer::initialize(int32_t previous_layer_num_neurons)
+{
+	m_previous_layer_num_neurons = previous_layer_num_neurons;
+}
+
+void CNeuralLayer::set_batch_size(int32_t batch_size)
+{
+	m_batch_size = batch_size;
+
+	if (m_activations.vector!=NULL) SG_FREE(m_activations.vector);
+	if (m_input_gradients.vector!=NULL) SG_FREE(m_input_gradients.vector);
+	if (m_local_gradients.vector!=NULL) SG_FREE(m_local_gradients.vector);
+
+	m_activations.vlen = m_num_neurons * m_batch_size;
+	m_input_gradients.vlen = m_previous_layer_num_neurons * m_batch_size;
+	m_local_gradients.vlen = m_num_neurons * m_batch_size;
+
+	m_activations.vector = SG_MALLOC(float64_t, m_activations.vlen);
+	m_input_gradients.vector = SG_MALLOC(float64_t, m_input_gradients.vlen);
+	m_local_gradients.vector = SG_MALLOC(float64_t, m_local_gradients.vlen);
+}
+
+void CNeuralLayer::init()
+{
+	SG_ADD(&m_num_neurons, "num_neurons",
+	       "Number of Neurons", MS_NOT_AVAILABLE);
+	SG_ADD(&m_previous_layer_num_neurons, "previous_layer_num_neurons",
+	       "Number of neurons in the previous layer", MS_NOT_AVAILABLE);
+	SG_ADD(&m_batch_size, "batch_size",
+	       "Batch Size", MS_NOT_AVAILABLE);
+	SG_ADD(&m_activations, "activations",
+	       "Activations", MS_NOT_AVAILABLE);
+	SG_ADD(&m_input_gradients, "input_gradients",
+	       "Input Gradients", MS_NOT_AVAILABLE);
+	SG_ADD(&m_local_gradients, "local_gradients",
+	       "Local Gradients", MS_NOT_AVAILABLE);
+}
+
+void CNeuralLayer::shallow_copy(const CNeuralLayer &orig)
+{
+	m_num_neurons = orig.m_num_neurons;
+	m_previous_layer_num_neurons = orig.m_previous_layer_num_neurons;
+	m_batch_size = orig.m_batch_size;
+	m_activations = SGVector<float64_t>(orig.m_activations);
+	m_input_gradients = SGVector<float64_t>(orig.m_input_gradients);
+	m_local_gradients = SGVector<float64_t>(orig.m_local_gradients);
+}
+
+
+
+
+
+
+
+
+
+