Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added Neural HardTanh Layer and unit test #3037

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
74 changes: 74 additions & 0 deletions src/shogun/neuralnets/NeuralHardTanhLayer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
/*
* Copyright (c) 2016, Shogun Toolbox Foundation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:

* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.

* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Written (W) 2016 Arasu Arun
*/

#include <shogun/neuralnets/NeuralHardTanhLayer.h>
#include <shogun/mathematics/Math.h>
#include <shogun/lib/SGVector.h>

using namespace shogun;

CNeuralHardTanhLayer::CNeuralHardTanhLayer() : CNeuralLinearLayer()
{
init();
}

CNeuralHardTanhLayer::CNeuralHardTanhLayer(int32_t num_neurons):
CNeuralLinearLayer(num_neurons)
{
init();
}

void CNeuralHardTanhLayer::compute_activations(
SGVector<float64_t> parameters,
CDynamicObjectArray* layers)
{
CNeuralLinearLayer::compute_activations(parameters, layers);

int32_t len = m_num_neurons*m_batch_size;
for (int32_t i=0; i<len; i++)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

parallelize

{
m_activations[i] = m_activations[i] >= m_max_act ? m_max_act :
CMath::max<float64_t>(m_activations[i],m_min_act);
}
}

void CNeuralHardTanhLayer::init()
{
m_min_act = -1.0;
m_max_act = 1.0;

SG_ADD(&m_min_act, "min_act",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice. Yeah if you find unregistered parameters, do add them. They are what makes Shogun classes serialisable.
Also make sure that they are initialised properly, otherwise the automatically generated serialisation unit tests will fail in travis

"Minimum Value", MS_NOT_AVAILABLE);
SG_ADD(&m_max_act, "max_act",
"Maximum Value", MS_NOT_AVAILABLE);
}
127 changes: 127 additions & 0 deletions src/shogun/neuralnets/NeuralHardTanhLayer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
/*
* Copyright (c) 2016 Shogun Toolbox Foundation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:

* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.

* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Written (W) 2016 Arasu Arun
*/

#ifndef __NEURALHARDTANHLAYER_H__
#define __NEURALHARDTANHLAYER_H__

#include <shogun/neuralnets/NeuralLinearLayer.h>

namespace shogun
{
/** @brief Neural layer with [hard tanh neurons]
* (http://ronan.collobert.com/pub/matos/2004_links_icml.pdf)
*
* Activations are computed according to:
* \f[
* \begin{cases}
* max_act &\mbox{if } z > max_act \\
* min_act &\mbox{if } z < min_act \\
* z& \mbox{otherwise}
* \end{cases}
* \f]
* where
* \f[ z=W*x+b \f]
* and W is the weight matrix, b is the bias vector, x is the input vector,
* and min_act, max_act are parameters.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reference should be included in here, not in the PR description. Nobody reads PR descriptions ...

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh! I misunderstood your intention. Handled it. :P

* Default value of min_act is -1.0 and max_act is 1.0.
*
* When used as an output layer, a squared error measure is used
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any reference to cite where this is coming from and where one can find evidence that it is useful?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I edited this PR's description to include some sources. Should've done so at the beginning, sorry about that.

*/
class CNeuralHardTanhLayer : public CNeuralLinearLayer
{
public:
/** default constructor */
CNeuralHardTanhLayer();

/** Constuctor
*
* @param num_neurons Number of neurons in this layer
*/
CNeuralHardTanhLayer(int32_t num_neurons);

virtual ~CNeuralHardTanhLayer() {}

/** Sets the lower bound of the activation value
*
* @param min_act new value of min_act
*/
virtual void set_min_act(float64_t min_act) { m_min_act=min_act; }

/** Gets the lower bound of the activation value
*
* @return min_act
*/
virtual float64_t get_min_act() { return m_min_act; }

/** Sets the upper bound of the activation value
*
* @param max_act new value of max_act
*/
virtual void set_max_act(float64_t max_act) { m_max_act=max_act; }

/** Gets the upper bound of the activation value
*
* @return max_act
*/
virtual float64_t get_max_act() { return m_max_act; }

/** Computes the activations of the neurons in this layer, results should
* be stored in m_activations. To be used only with non-input layers
*
* @param parameters Vector of size get_num_parameters(), contains the
* parameters of the layer
*
* @param layers Array of layers that form the network that this layer is
* being used with
*
*/
virtual void compute_activations(SGVector<float64_t> parameters,
CDynamicObjectArray* layers);

virtual const char* get_name() const { return "NeuralHardTanhLayer"; }

private:
void init();

protected:
/** Parameter used to set lower bound of activation value
*/
float64_t m_min_act;

/** Parameter used to set upper bound of activation value
*/
float64_t m_max_act;
};

}
#endif //__NEURALHARDTANHLAYER_H__
107 changes: 107 additions & 0 deletions tests/unit/neuralnets/NeuralHardTanhLayer_unittest.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
/*
* Copyright (c) 2016 Shogun Toolbox Foundation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:

* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.

* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.

* Written (W) 2016 Arasu Arun
*/

#include <shogun/neuralnets/NeuralHardTanhLayer.h>
#include <shogun/neuralnets/NeuralInputLayer.h>
#include <shogun/lib/SGVector.h>
#include <shogun/lib/SGMatrix.h>
#include <shogun/mathematics/Math.h>
#include <gtest/gtest.h>

using namespace shogun;

/** Compares the activations computed using the layer against manually computed
* activations
*/
TEST(NeuralHardTanhLayer, compute_activations)
{
CNeuralHardTanhLayer layer(9);
float64_t min_act = -1.0;
float64_t max_act = 1.0;
// initialize some random inputs
CMath::init_random(100);
SGMatrix<float64_t> x(12,3);
for (int32_t i=0; i<x.num_rows*x.num_cols; i++)
x[i] = CMath::random(-10.0,10.0);

CNeuralInputLayer* input = new CNeuralInputLayer (x.num_rows);
input->set_batch_size(x.num_cols);

CDynamicObjectArray* layers = new CDynamicObjectArray();
layers->append_element(input);

SGVector<int32_t> input_indices(1);
input_indices[0] = 0;

// initialize the layer
layer.initialize_neural_layer(layers, input_indices);
SGVector<float64_t> params(layer.get_num_parameters());
SGVector<bool> param_regularizable(layer.get_num_parameters());
layer.initialize_parameters(params, param_regularizable, 1.0);
layer.set_batch_size(x.num_cols);

// compute the layer's activations
input->compute_activations(x);
layer.set_min_act(min_act);
layer.set_max_act(max_act);
layer.compute_activations(params, layers);
SGMatrix<float64_t> A = layer.get_activations();

// manually compute the layer's activations
SGMatrix<float64_t> A_ref(layer.get_num_neurons(), x.num_cols);

SGVector<float64_t> biases = params;
SGVector<float64_t> weights(params.vector,params.vlen,layer.get_num_neurons());

for (int32_t i=0; i<A_ref.num_rows; i++)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

parallelize

{
for (int32_t j=0; j<A_ref.num_cols; j++)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

parallelize

{
A_ref(i,j) = biases[i];

for (int32_t k=0; k<x.num_rows; k++)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

certainly simd

A_ref(i,j) += weights[i+k*A_ref.num_rows]*x(k,j);

A_ref(i,j) = A_ref(i,j) >= max_act ? max_act :
CMath::max<float64_t>(A_ref(i,j),min_act);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so many loops .... what about some eigen3/linalg dot products and or openmp?

}
}

// compare
EXPECT_EQ(A_ref.num_rows, A.num_rows);
EXPECT_EQ(A_ref.num_cols, A.num_cols);
for (int32_t i=0; i<A.num_rows*A.num_cols; i++)
EXPECT_NEAR(A_ref[i], A[i], 1e-12);

SG_UNREF(layers);
}