-
-
Notifications
You must be signed in to change notification settings - Fork 1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Added Neural HardTanh Layer and unit test #3037
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
/* | ||
* Copyright (c) 2016, Shogun Toolbox Foundation | ||
* All rights reserved. | ||
* | ||
* Redistribution and use in source and binary forms, with or without | ||
* modification, are permitted provided that the following conditions are met: | ||
|
||
* 1. Redistributions of source code must retain the above copyright notice, | ||
* this list of conditions and the following disclaimer. | ||
* | ||
* 2. Redistributions in binary form must reproduce the above copyright notice, | ||
* this list of conditions and the following disclaimer in the documentation | ||
* and/or other materials provided with the distribution. | ||
* | ||
* 3. Neither the name of the copyright holder nor the names of its | ||
* contributors may be used to endorse or promote products derived from this | ||
* software without specific prior written permission. | ||
|
||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE | ||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | ||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | ||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | ||
* POSSIBILITY OF SUCH DAMAGE. | ||
* | ||
* Written (W) 2016 Arasu Arun | ||
*/ | ||
|
||
#include <shogun/neuralnets/NeuralHardTanhLayer.h> | ||
#include <shogun/mathematics/Math.h> | ||
#include <shogun/lib/SGVector.h> | ||
|
||
using namespace shogun; | ||
|
||
CNeuralHardTanhLayer::CNeuralHardTanhLayer() : CNeuralLinearLayer() | ||
{ | ||
init(); | ||
} | ||
|
||
CNeuralHardTanhLayer::CNeuralHardTanhLayer(int32_t num_neurons): | ||
CNeuralLinearLayer(num_neurons) | ||
{ | ||
init(); | ||
} | ||
|
||
void CNeuralHardTanhLayer::compute_activations( | ||
SGVector<float64_t> parameters, | ||
CDynamicObjectArray* layers) | ||
{ | ||
CNeuralLinearLayer::compute_activations(parameters, layers); | ||
|
||
int32_t len = m_num_neurons*m_batch_size; | ||
for (int32_t i=0; i<len; i++) | ||
{ | ||
m_activations[i] = m_activations[i] >= m_max_act ? m_max_act : | ||
CMath::max<float64_t>(m_activations[i],m_min_act); | ||
} | ||
} | ||
|
||
void CNeuralHardTanhLayer::init() | ||
{ | ||
m_min_act = -1.0; | ||
m_max_act = 1.0; | ||
|
||
SG_ADD(&m_min_act, "min_act", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice. Yeah if you find unregistered parameters, do add them. They are what makes Shogun classes serialisable. |
||
"Minimum Value", MS_NOT_AVAILABLE); | ||
SG_ADD(&m_max_act, "max_act", | ||
"Maximum Value", MS_NOT_AVAILABLE); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
/* | ||
* Copyright (c) 2016 Shogun Toolbox Foundation | ||
* All rights reserved. | ||
* | ||
* Redistribution and use in source and binary forms, with or without | ||
* modification, are permitted provided that the following conditions are met: | ||
|
||
* 1. Redistributions of source code must retain the above copyright notice, | ||
* this list of conditions and the following disclaimer. | ||
* | ||
* 2. Redistributions in binary form must reproduce the above copyright notice, | ||
* this list of conditions and the following disclaimer in the documentation | ||
* and/or other materials provided with the distribution. | ||
* | ||
* 3. Neither the name of the copyright holder nor the names of its | ||
* contributors may be used to endorse or promote products derived from this | ||
* software without specific prior written permission. | ||
|
||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE | ||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | ||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | ||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | ||
* POSSIBILITY OF SUCH DAMAGE. | ||
* | ||
* Written (W) 2016 Arasu Arun | ||
*/ | ||
|
||
#ifndef __NEURALHARDTANHLAYER_H__ | ||
#define __NEURALHARDTANHLAYER_H__ | ||
|
||
#include <shogun/neuralnets/NeuralLinearLayer.h> | ||
|
||
namespace shogun | ||
{ | ||
/** @brief Neural layer with [hard tanh neurons] | ||
* (http://ronan.collobert.com/pub/matos/2004_links_icml.pdf) | ||
* | ||
* Activations are computed according to: | ||
* \f[ | ||
* \begin{cases} | ||
* max_act &\mbox{if } z > max_act \\ | ||
* min_act &\mbox{if } z < min_act \\ | ||
* z& \mbox{otherwise} | ||
* \end{cases} | ||
* \f] | ||
* where | ||
* \f[ z=W*x+b \f] | ||
* and W is the weight matrix, b is the bias vector, x is the input vector, | ||
* and min_act, max_act are parameters. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The reference should be included in here, not in the PR description. Nobody reads PR descriptions ... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh! I misunderstood your intention. Handled it. :P |
||
* Default value of min_act is -1.0 and max_act is 1.0. | ||
* | ||
* When used as an output layer, a squared error measure is used | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there any reference to cite where this is coming from and where one can find evidence that it is useful? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I edited this PR's description to include some sources. Should've done so at the beginning, sorry about that. |
||
*/ | ||
class CNeuralHardTanhLayer : public CNeuralLinearLayer | ||
{ | ||
public: | ||
/** default constructor */ | ||
CNeuralHardTanhLayer(); | ||
|
||
/** Constuctor | ||
* | ||
* @param num_neurons Number of neurons in this layer | ||
*/ | ||
CNeuralHardTanhLayer(int32_t num_neurons); | ||
|
||
virtual ~CNeuralHardTanhLayer() {} | ||
|
||
/** Sets the lower bound of the activation value | ||
* | ||
* @param min_act new value of min_act | ||
*/ | ||
virtual void set_min_act(float64_t min_act) { m_min_act=min_act; } | ||
|
||
/** Gets the lower bound of the activation value | ||
* | ||
* @return min_act | ||
*/ | ||
virtual float64_t get_min_act() { return m_min_act; } | ||
|
||
/** Sets the upper bound of the activation value | ||
* | ||
* @param max_act new value of max_act | ||
*/ | ||
virtual void set_max_act(float64_t max_act) { m_max_act=max_act; } | ||
|
||
/** Gets the upper bound of the activation value | ||
* | ||
* @return max_act | ||
*/ | ||
virtual float64_t get_max_act() { return m_max_act; } | ||
|
||
/** Computes the activations of the neurons in this layer, results should | ||
* be stored in m_activations. To be used only with non-input layers | ||
* | ||
* @param parameters Vector of size get_num_parameters(), contains the | ||
* parameters of the layer | ||
* | ||
* @param layers Array of layers that form the network that this layer is | ||
* being used with | ||
* | ||
*/ | ||
virtual void compute_activations(SGVector<float64_t> parameters, | ||
CDynamicObjectArray* layers); | ||
|
||
virtual const char* get_name() const { return "NeuralHardTanhLayer"; } | ||
|
||
private: | ||
void init(); | ||
|
||
protected: | ||
/** Parameter used to set lower bound of activation value | ||
*/ | ||
float64_t m_min_act; | ||
|
||
/** Parameter used to set upper bound of activation value | ||
*/ | ||
float64_t m_max_act; | ||
}; | ||
|
||
} | ||
#endif //__NEURALHARDTANHLAYER_H__ |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
/* | ||
* Copyright (c) 2016 Shogun Toolbox Foundation | ||
* All rights reserved. | ||
* | ||
* Redistribution and use in source and binary forms, with or without | ||
* modification, are permitted provided that the following conditions are met: | ||
|
||
* 1. Redistributions of source code must retain the above copyright notice, | ||
* this list of conditions and the following disclaimer. | ||
* | ||
* 2. Redistributions in binary form must reproduce the above copyright notice, | ||
* this list of conditions and the following disclaimer in the documentation | ||
* and/or other materials provided with the distribution. | ||
* | ||
* 3. Neither the name of the copyright holder nor the names of its | ||
* contributors may be used to endorse or promote products derived from this | ||
* software without specific prior written permission. | ||
|
||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE | ||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | ||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | ||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | ||
* POSSIBILITY OF SUCH DAMAGE. | ||
|
||
* Written (W) 2016 Arasu Arun | ||
*/ | ||
|
||
#include <shogun/neuralnets/NeuralHardTanhLayer.h> | ||
#include <shogun/neuralnets/NeuralInputLayer.h> | ||
#include <shogun/lib/SGVector.h> | ||
#include <shogun/lib/SGMatrix.h> | ||
#include <shogun/mathematics/Math.h> | ||
#include <gtest/gtest.h> | ||
|
||
using namespace shogun; | ||
|
||
/** Compares the activations computed using the layer against manually computed | ||
* activations | ||
*/ | ||
TEST(NeuralHardTanhLayer, compute_activations) | ||
{ | ||
CNeuralHardTanhLayer layer(9); | ||
float64_t min_act = -1.0; | ||
float64_t max_act = 1.0; | ||
// initialize some random inputs | ||
CMath::init_random(100); | ||
SGMatrix<float64_t> x(12,3); | ||
for (int32_t i=0; i<x.num_rows*x.num_cols; i++) | ||
x[i] = CMath::random(-10.0,10.0); | ||
|
||
CNeuralInputLayer* input = new CNeuralInputLayer (x.num_rows); | ||
input->set_batch_size(x.num_cols); | ||
|
||
CDynamicObjectArray* layers = new CDynamicObjectArray(); | ||
layers->append_element(input); | ||
|
||
SGVector<int32_t> input_indices(1); | ||
input_indices[0] = 0; | ||
|
||
// initialize the layer | ||
layer.initialize_neural_layer(layers, input_indices); | ||
SGVector<float64_t> params(layer.get_num_parameters()); | ||
SGVector<bool> param_regularizable(layer.get_num_parameters()); | ||
layer.initialize_parameters(params, param_regularizable, 1.0); | ||
layer.set_batch_size(x.num_cols); | ||
|
||
// compute the layer's activations | ||
input->compute_activations(x); | ||
layer.set_min_act(min_act); | ||
layer.set_max_act(max_act); | ||
layer.compute_activations(params, layers); | ||
SGMatrix<float64_t> A = layer.get_activations(); | ||
|
||
// manually compute the layer's activations | ||
SGMatrix<float64_t> A_ref(layer.get_num_neurons(), x.num_cols); | ||
|
||
SGVector<float64_t> biases = params; | ||
SGVector<float64_t> weights(params.vector,params.vlen,layer.get_num_neurons()); | ||
|
||
for (int32_t i=0; i<A_ref.num_rows; i++) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. parallelize |
||
{ | ||
for (int32_t j=0; j<A_ref.num_cols; j++) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. parallelize |
||
{ | ||
A_ref(i,j) = biases[i]; | ||
|
||
for (int32_t k=0; k<x.num_rows; k++) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. certainly simd |
||
A_ref(i,j) += weights[i+k*A_ref.num_rows]*x(k,j); | ||
|
||
A_ref(i,j) = A_ref(i,j) >= max_act ? max_act : | ||
CMath::max<float64_t>(A_ref(i,j),min_act); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so many loops .... what about some eigen3/linalg dot products and or openmp? |
||
} | ||
} | ||
|
||
// compare | ||
EXPECT_EQ(A_ref.num_rows, A.num_rows); | ||
EXPECT_EQ(A_ref.num_cols, A.num_cols); | ||
for (int32_t i=0; i<A.num_rows*A.num_cols; i++) | ||
EXPECT_NEAR(A_ref[i], A[i], 1e-12); | ||
|
||
SG_UNREF(layers); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
parallelize