Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cost functions now support Stan Math, Kept the previous classes for backward compatability. #4294

Closed
wants to merge 39 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
220b11d
Changed ordering of includes to avoid boost issues
FaroukY May 22, 2018
ff2dd1a
Built a class on stan that can define any arbitrary cost function wit…
FaroukY May 22, 2018
3cbecec
Wrote two unit tests to the new class to demonstrate how to write a n…
FaroukY May 22, 2018
110337f
Changed the order of including the headers
FaroukY May 22, 2018
8829625
Clang format
FaroukY May 23, 2018
7737c07
Adjusted editor to get rid of all the noise
FaroukY May 24, 2018
74dddef
Required changes in the review addressed
FaroukY May 24, 2018
1e8002c
Added the required changes from iteration 2 on #4294, several changes…
FaroukY May 26, 2018
afc5327
Changed unit tests so it works with the new style including a few typ…
FaroukY May 26, 2018
bc16178
Changed file names from FirstOrderSAGCostFunctionInterface to StanFir…
FaroukY Jun 2, 2018
9dd2d19
Changed the way I check for empty vector
FaroukY Jun 2, 2018
34d8aaa
Got rid of setters for changing behavior of cost function and paramet…
FaroukY Jun 2, 2018
8d89e4a
Added safeguards in constructor of StanFirstOrderSAGCostFunction to m…
FaroukY Jun 2, 2018
1bc7318
Shortened the names of very long variables using template typedefs
FaroukY Jun 2, 2018
a926972
Clang formatting
FaroukY Jun 2, 2018
1c60921
Remove memory from headers as its not required
FaroukY Jun 2, 2018
4e8947b
Changed the interface of the Cost funtion so that it works fine with …
FaroukY Jun 8, 2018
c29fb06
Wrote new unit tests to test SGD Minimizer with the cost function tha…
FaroukY Jun 8, 2018
21a9b7f
updated old unit tests to work with new updated interface of cost fun…
FaroukY Jun 8, 2018
b719131
clang-formatting on all edited files [ci skip]
FaroukY Jun 8, 2018
cdadb8d
Addressed some changed from the reviews
FaroukY Jun 15, 2018
87a663e
Changed the unittest to suit the changed Interface of the cost functi…
FaroukY Jun 15, 2018
cf2e3f1
changed parent class of StanFirstOrderSAGCostFunction to FirstOrderSt…
FaroukY Jun 16, 2018
d79f878
Created a new class StanNeuralLayer which will be the base class for …
FaroukY Jun 17, 2018
6ef7f67
Defined StanMatrix just like StanVector
FaroukY Jun 17, 2018
11f8c1b
updated the API of StanNeuralLayer and got rid of gradient computatio…
FaroukY Jun 17, 2018
272144d
Wrote the class StanNeuralLinearLayer which is a linear layer in the …
FaroukY Jun 17, 2018
91cfef9
fix bug in StanNeuralLinearLayer class where m_stan_activations wasn'…
FaroukY Jun 17, 2018
cd56095
Created a Logistic layer for the stan Neural network, the class is ca…
FaroukY Jun 17, 2018
6592f3f
removed regularization temporarily from neural layer untill we have a…
FaroukY Jun 20, 2018
5ba8e0e
Started creating the neural network class that uses the stan neural
FaroukY Jun 20, 2018
955a289
removed apply_multiclass since its not needed in the this case
FaroukY Jun 25, 2018
11f6dbd
Changed a typo in Stan and changed interface of compute_activations a…
FaroukY Jun 25, 2018
88b2c3c
adapted the logistic linear layer to the new API and fixed a few typos
FaroukY Jun 25, 2018
42c626f
Changed initialize parameters interface to remove regularization temp…
FaroukY Jun 25, 2018
8ae7fac
[ci skip] refactorred some code in neural net, finished the implement…
FaroukY Jun 25, 2018
3398d7d
[ci skip] 1) Added set_batch_size and implemented it 2) Fixed all syn…
FaroukY Jun 26, 2018
a1a5b41
Added the input layer headers and implementation using stan
FaroukY Jul 2, 2018
aea5dfa
[ci skip] various updates to interfaces, neural network module is now…
FaroukY Jul 2, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
51 changes: 51 additions & 0 deletions src/shogun/neuralnets/StanNeuralInputLayer.cpp
@@ -0,0 +1,51 @@


#include <shogun/neuralnets/StanNeuralInputLayer.h>

using namespace shogun;

StanNeuralInputLayer::StanNeuralInputLayer() : StanNeuralLayer()
{
init();
}

StanNeuralInputLayer::StanNeuralInputLayer(int32_t num_neurons, int32_t start_index):
StanNeuralLayer(num_neurons)
{
init();
m_start_index = start_index;
}

StanNeuralInputLayer::StanNeuralInputLayer(int32_t width, int32_t height,
int32_t num_channels, int32_t start_index): StanNeuralLayer(width*height*num_channels)
{
init();
m_width = width;
m_height = height;
m_start_index = start_index;
}

void StanNeuralInputLayer::compute_activations(StanMatrix& inputs, StanVector& parameters)
{
auto biases = parameters.block(0,0,m_num_neurons, 1);
StanMatrix& A = m_stan_activations;
A.resize(m_num_neurons, m_batch_size);
A.colwise() = biases;

int32_t weights_index_offset = m_num_neurons;

auto W = parameters.block(weights_index_offset, 0, m_num_neurons * inputs.rows() , 1);
W.resize(m_num_neurons, inputs.rows());

A += W*inputs;
}

void StanNeuralInputLayer::init()
{
m_start_index = 0;
gaussian_noise = 0;
SG_ADD(&m_start_index, "start_index",
"Start Index", MS_NOT_AVAILABLE);
SG_ADD(&gaussian_noise, "gaussian_noise",
"Gaussian Noise Standard Deviation", MS_NOT_AVAILABLE);
}
93 changes: 93 additions & 0 deletions src/shogun/neuralnets/StanNeuralInputLayer.h
@@ -0,0 +1,93 @@


#ifndef SHOGUN_STANNEURALINPUTLAYER_H
#define SHOGUN_STANNEURALINPUTLAYER_H

#include <shogun/neuralnets/StanNeuralLayer.h>
#include <shogun/lib/common.h>

namespace shogun
{
/** @brief Represents an input layer. The layer can be either
* connected to all the input features that a network receives (default) or
* connected to just a small part of those features
*/
class StanNeuralInputLayer : public StanNeuralLayer
{
public:
/** default constructor */
StanNeuralInputLayer();

/** Constuctor
*
* @param num_neurons Number of neurons in this layer
*
* @param start_index Index of the first feature that the layer connects to,
* i.e the activations of the layer are copied from
* input_features[start_index:start_index+num_neurons]
*/
StanNeuralInputLayer(int32_t num_neurons, int32_t start_index = 0);

/** Constructs an input layer that deals with images (for convolutional nets).
* Sets the number of neurons to width*height*num_channels
*
* @param width Width of the image
*
* @param height Width of the image
*
* @param num_channels Number of channels
*
* @param start_index Index of the first feature that the layer connects to,
* i.e the activations of the layer are copied from
* input_features[start_index:start_index+num_neurons]
*/
StanNeuralInputLayer(int32_t width, int32_t height, int32_t num_channels,
int32_t start_index = 0);

virtual ~StanNeuralInputLayer() {}

/** Returns true */
virtual bool is_input() { return true; }

/** Copies inputs[start_index:start_index+num_neurons, :] into the
* layer's activations
*
* @param inputs Input features matrix, size num_features*num_cases
* @param parameters are the parameters of the neural network
*/
virtual void compute_activations(StanMatrix& inputs, StanVector& parameters);

/** Gets the index of the first feature that the layer connects to,
* i.e the activations of the layer are copied from
* input_features[start_index:start_index+num_neurons]
*/
virtual int32_t get_start_index() { return m_start_index; }

/** Sets the index of the first feature that the layer connects to,
* i.e the activations of the layer are copied from
* input_features[start_index:start_index+num_neurons]
*/
virtual void set_start_index(int32_t i) { m_start_index = i; }

virtual const char* get_name() const { return "StanNeuralInputLayer"; }

private:
void init();

public:
/** Standard deviation of the gaussian noise added to the activations of
* the layer. Useful for denoising autoencoders. Default value is 0.0.
*/
float64_t gaussian_noise;

protected:
/** Index of the first feature that the layer connects to,
* i.e the activations of the layer are copied from
* input_features[start_index:start_index+num_neurons]
*/
int32_t m_start_index;
};
}


#endif //SHOGUN_STANNEURALINPUTLAYER_H
112 changes: 112 additions & 0 deletions src/shogun/neuralnets/StanNeuralLayer.cpp
@@ -0,0 +1,112 @@
/*
* This software is distributed under BSD 3-clause license (see LICENSE file).
*
* Authors: Elfarouk, Khaled Nasr
*/

#include <shogun/neuralnets/StanNeuralLayer.h>
#include <shogun/base/Parameter.h>
#include <shogun/lib/SGVector.h>
#include <shogun/mathematics/Math.h>

using namespace shogun;

StanNeuralLayer::StanNeuralLayer()
: CSGObject()
{
init();
}


StanNeuralLayer::StanNeuralLayer(int32_t num_neurons)
: CSGObject()
{
init();
m_num_neurons = num_neurons;
m_width = m_num_neurons;
m_height = 1;
}

StanNeuralLayer::~StanNeuralLayer()
{
}

void StanNeuralLayer::initialize_neural_layer(CDynamicObjectArray* layers,
SGVector< int32_t > input_indices)
{
m_input_indices = input_indices;
m_input_sizes = SGVector<int32_t>(input_indices.vlen);

for (int32_t i=0; i<m_input_sizes.vlen; i++)
{
StanNeuralLayer* layer = (StanNeuralLayer*)layers->element(m_input_indices[i]);
m_input_sizes[i] = layer->get_num_neurons();
SG_UNREF(layer);
}
}

void StanNeuralLayer::set_batch_size(int32_t batch_size)
{
m_batch_size = batch_size;

m_stan_activations.resize(m_num_neurons, m_batch_size);
m_dropout_mask = SGMatrix<bool>(m_num_neurons, m_batch_size);
}

void StanNeuralLayer::dropout_activations()
{
if (dropout_prop==0.0) return;

if (is_training)
{
for(int32_t i=0; i<m_num_neurons; ++i)
{
for(int32_t j = 0; j<m_batch_size; ++j)
{
m_dropout_mask(i,j) = CMath::random(0.0,1.0) >= dropout_prop;
m_stan_activations(i,j) *= m_dropout_mask(i,j);
}
}
}
else
{
for(int32_t i=0; i<m_num_neurons; ++i)
{
for(int32_t j = 0; j<m_batch_size; ++j)
{
m_stan_activations(i,j) *= (1.0 - dropout_prop);
}
}
}
}

void StanNeuralLayer::init()
{
m_num_neurons = 0;
m_width = 0;
m_height = 0;
m_num_parameters = 0;
m_batch_size = 0;
dropout_prop = 0.0;
is_training = false;

SG_ADD(&m_num_neurons, "num_neurons",
"Number of Neurons", MS_NOT_AVAILABLE);
SG_ADD(&m_width, "width",
"Width", MS_NOT_AVAILABLE);
SG_ADD(&m_height, "height",
"Height", MS_NOT_AVAILABLE);
SG_ADD(&m_input_indices, "input_indices",
"Input Indices", MS_NOT_AVAILABLE);
SG_ADD(&m_input_sizes, "input_sizes",
"Input Sizes", MS_NOT_AVAILABLE);
SG_ADD(&dropout_prop, "dropout_prop",
"Dropout Probabilty", MS_NOT_AVAILABLE);
SG_ADD(&is_training, "is_training",
"is_training", MS_NOT_AVAILABLE);
SG_ADD(&m_batch_size, "batch_size",
"Batch Size", MS_NOT_AVAILABLE);
SG_ADD(&m_dropout_mask, "dropout_mask",
"Dropout mask", MS_NOT_AVAILABLE);

}