diff --git a/data b/data index 4dbe5d58b9f..ab406ce7f66 160000 --- a/data +++ b/data @@ -1 +1 @@ -Subproject commit 4dbe5d58b9f8e4432ccc34499b8890af7cdc9f14 +Subproject commit ab406ce7f66a84454753f31205f134179fd226c0 diff --git a/doc/cookbook/source/examples/neural_nets/convolutional_net_classification.rst b/doc/cookbook/source/examples/neural_nets/convolutional_net_classification.rst new file mode 100644 index 00000000000..beba627fec3 --- /dev/null +++ b/doc/cookbook/source/examples/neural_nets/convolutional_net_classification.rst @@ -0,0 +1,48 @@ +=============================================== +Convolutional Neural Network for Classification +=============================================== + +Convolutional neural network is a class of deep, feed-forward artificial neural networks, most commonly applied to analyzing visual imagery. +The network is a directed acyclic graph composed of an input layer, a few hidden layers can be convolutional layers, pooling layers, fully connected layers or normalization layers and a softmax output layer. +To compute the pre-nonlinearity input to some unit :math:`x_{ij}^{l}` in any layer, we can sum up the contributions from the previous layer cells: + +.. math:: + + x_{ij}^{l}=\sum_{a=0}^{m-1}\sum_{b=0}^{m-1}w_{ab}y_{(i+1)(j+1)}^{l-1} + +where :math:`x_{ij}^{l}` is the input to a neuron in layer :math:`l`, :math:`y_{ij}^{l}` is output of the neuron and :math:`w_{ij}` is it's weight. +See chapter 9 in :cite:`Goodfellow-et-al-2016-Book` for a detailed introduction. + +------- +Example +------- + +Imagine we have files with training and test data. We create CDenseFeatures (here 64 bit floats aka RealFeatures) and :sgclass:`CMulticlassLabels` as + +.. sgexample:: convolutional_net_classification.sg:create_features + +We create a :sgclass:`CNeuralNetwork` instance and randomly initialize the network parameters by sampling from a gaussian distribution. We also set appropriate parameters like regularization coefficient, dropout probabilities, learning rate, etc. as shown below. More parameters can be found in the documentation of :sgclass:`CNeuralNetwork`. + +.. sgexample:: convolutional_net_classification.sg:set_parameters + +.. sgexample:: convolutional_net_classification.sg:create_instance + +We create instances of :sgclass:`CNeuralInputLayer`, :sgclass:`CNeuralConvolutionalLayer` and :sgclass:`NeuralSoftmaxLayer` which are building blocks of :sgclass:`CNeuralNetwork` + +.. sgexample:: convolutional_net_classification.sg:add_layers + +We train the model and apply it to some test data. + +.. sgexample:: convolutional_net_classification.sg:train_and_apply + +Finally, we compute accuracy. + +.. sgexample:: convolutional_net_classification.sg:evaluate_accuracy + +---------- +References +---------- +:wiki:`Convolutional_neural_network` + +.. bibliography:: ../../references.bib + :filter: docname in docnames diff --git a/doc/cookbook/source/examples/neural_nets/feedforward_net_classification.rst b/doc/cookbook/source/examples/neural_nets/feedforward_net_classification.rst index ea72c8aebd3..845878bdf2f 100644 --- a/doc/cookbook/source/examples/neural_nets/feedforward_net_classification.rst +++ b/doc/cookbook/source/examples/neural_nets/feedforward_net_classification.rst @@ -22,17 +22,14 @@ Imagine we have files with training and test data. We create CDenseFeatures (her .. sgexample:: feedforward_net_classification.sg:create_features -We create instances of :sgclass:`CNeuralInputLayer`, :sgclass:`CNeuralLinearLayer` and :sgclass:`NeuralSoftmaxLayer` which are building blocks of :sgclass:`CNeuralNetwork` - -.. sgexample:: feedforward_net_classification.sg:add_layers +We create a :sgclass:`CNeuralNetwork` instance by using the above layers and randomly initialize the network parameters by sampling from a gaussian distribution.We set appropriate parameters like regularization coefficient, dropout probabilities, learning rate, etc. as shown below. More parameters can be found in the documentation of :sgclass:`CNeuralNetwork`. -We create a :sgclass:`CNeuralNetwork` instance by using the above layers and randomly initialize the network parameters by sampling from a gaussian distribution. .. sgexample:: feedforward_net_classification.sg:create_instance -Before training, we need to set appropriate parameters like regularization coefficient, dropout probabilities, learning rate, etc. as shown below. More parameters can be found in the documentation of :sgclass:`CNeuralNetwork`. +We create instances of :sgclass:`CNeuralInputLayer`, :sgclass:`CNeuralLinearLayer` and :sgclass:`NeuralSoftmaxLayer` which are building blocks of :sgclass:`CNeuralNetwork` -.. sgexample:: feedforward_net_classification.sg:set_parameters +.. sgexample:: feedforward_net_classification.sg:add_layers We train the model and apply it to some test data. diff --git a/doc/cookbook/source/examples/neural_nets/feedforward_net_regression.rst b/doc/cookbook/source/examples/neural_nets/feedforward_net_regression.rst index 7ca0f7d0272..065ce986383 100644 --- a/doc/cookbook/source/examples/neural_nets/feedforward_net_regression.rst +++ b/doc/cookbook/source/examples/neural_nets/feedforward_net_regression.rst @@ -12,17 +12,13 @@ Imagine we have files with training and test data. We create CDenseFeatures (her .. sgexample:: feedforward_net_regression.sg:create_features -We create instances of :sgclass:`CNeuralLayers` and add an input layer, hidden layer and output layer which are building blocks of :sgclass:`CNeuralNetwork` - -.. sgexample:: feedforward_net_regression.sg:add_layers - -We create a :sgclass:`CNeuralNetwork` instance by using the above layers and randomly initialize the network parameters by sampling from a gaussian distribution. +We create a :sgclass:`CNeuralNetwork` instance by using the above layers and randomly initialize the network parameters by sampling from a gaussian distribution. We set appropriate parameters like regularization coefficient, number of epochs, learning rate, etc. as shown below. More parameters can be found in the documentation of :sgclass:`CNeuralNetwork`. .. sgexample:: feedforward_net_regression.sg:create_instance -Before training, we need to set appropriate parameters like regularization coefficient, number of epochs, learning rate, etc. as shown below. More parameters can be found in the documentation of :sgclass:`CNeuralNetwork`. +We create instances of :sgclass:`CNeuralLayers` and add an input layer, hidden layer and output layer which are building blocks of :sgclass:`CNeuralNetwork` -.. sgexample:: feedforward_net_regression.sg:set_parameters +.. sgexample:: feedforward_net_regression.sg:add_layers We train the model and apply it to test data. diff --git a/examples/meta/src/neural_nets/convolutional_net_classification.sg b/examples/meta/src/neural_nets/convolutional_net_classification.sg new file mode 100644 index 00000000000..2522be2b836 --- /dev/null +++ b/examples/meta/src/neural_nets/convolutional_net_classification.sg @@ -0,0 +1,41 @@ +File f_feats_train = csv_file("../../data/mnist_3class_256d_features_train.dat") +File f_feats_test = csv_file("../../data/mnist_3class_256d_features_test.dat") +File f_labels_train = csv_file("../../data/mnist_3class_256d_labels_train.dat") +File f_labels_test = csv_file("../../data/mnist_3class_256d_labels_test.dat") + +Math:init_random(10) + +#![create_features] +Features features_train = features(f_feats_train) +Features features_test = features(f_feats_test) +Labels labels_train = labels(f_labels_train) +Labels labels_test = labels(f_labels_test) +#![create_features] + +#![create_instance] +Machine network = machine("NeuralNetwork", labels=labels_train, auto_quick_initialize=True, max_num_epochs=4, epsilon=0.0, optimization_method=enum ENNOptimizationMethod.NNOM_GRADIENT_DESCENT, gd_learning_rate=0.01, gd_mini_batch_size=3, max_norm=1.0, dropout_input=0.5) +#![create_instance] + +#![add_layers] +NeuralLayer input = layer("NeuralInputLayer", width=16, height=16, num_neurons=256) +network.add("layers", input) +NeuralLayer conv1 = layer("NeuralConvolutionalLayer", num_maps=3, radius_x=2, radius_y=2, pooling_width=2, pooling_height=2, stride_x=1, stride_y=1) +network.add("layers", conv1) +NeuralLayer conv2 = layer("NeuralConvolutionalLayer", num_maps=3, radius_x=2, radius_y=2, pooling_width=2, pooling_height=2, stride_x=1, stride_y=1) +network.add("layers", conv2) +NeuralLayer softmax = layer("NeuralSoftmaxLayer", num_neurons=3) +network.add("layers", softmax) +#![add_layers] + +#![train_and_apply] +network.train(features_train) +Labels labels_predict = network.apply(features_test) +#![train_and_apply] + +#![evaluate_accuracy] +Evaluation eval = evaluation("MulticlassAccuracy") +real accuracy = eval.evaluate(labels_predict, labels_test) +#![evaluate_accuracy] + +# additional integration testing variables +RealVector output = labels_predict.get_real_vector("labels") diff --git a/examples/meta/src/neural_nets/feedforward_net_classification.sg b/examples/meta/src/neural_nets/feedforward_net_classification.sg index e0edc4af9cf..2dc8b64b60f 100644 --- a/examples/meta/src/neural_nets/feedforward_net_classification.sg +++ b/examples/meta/src/neural_nets/feedforward_net_classification.sg @@ -1,55 +1,44 @@ -CSVFile f_feats_train("../../data/classifier_binary_2d_nonlinear_features_train.dat") -CSVFile f_feats_test("../../data/classifier_binary_2d_nonlinear_features_test.dat") -CSVFile f_labels_train("../../data/classifier_binary_2d_nonlinear_labels_train.dat") -CSVFile f_labels_test("../../data/classifier_binary_2d_nonlinear_labels_test.dat") +File f_feats_train = csv_file("../../data/classifier_binary_2d_nonlinear_features_train.dat") +File f_feats_test = csv_file("../../data/classifier_binary_2d_nonlinear_features_test.dat") +File f_labels_train = csv_file("../../data/classifier_binary_2d_nonlinear_labels_train.dat") +File f_labels_test = csv_file("../../data/classifier_binary_2d_nonlinear_labels_test.dat") Math:init_random(1) #![create_features] Features features_train = features(f_feats_train) Features features_test = features(f_feats_test) -BinaryLabels labels_train(f_labels_train) -BinaryLabels labels_test(f_labels_test) +Labels labels_train = labels(f_labels_train) +Labels labels_test = labels(f_labels_test) #![create_features] -#![add_layers] -int num_feats = features_train.get_int("num_features") -NeuralLayers layers() -layers.input(num_feats) -layers.rectified_linear(10) -layers.softmax(2) -DynamicObjectArray all_layers = layers.done() -#![add_layers] - #![create_instance] -NeuralNetwork network(all_layers) -network.quick_connect() -network.initialize_neural_network() +int num_feats = features_train.get_int("num_features") +Machine network = machine("NeuralNetwork", labels=labels_train, auto_quick_initialize=True, l2_coefficient=0.01, dropout_hidden=0.5, max_num_epochs=50, gd_mini_batch_size=num_feats, gd_learning_rate=0.1, gd_momentum=0.9) #![create_instance] -#![set_parameters] -network.set_l2_coefficient(0.01) -network.set_dropout_hidden(0.5) -network.set_max_num_epochs(50) -network.set_gd_mini_batch_size(num_feats) -network.set_gd_learning_rate(0.1) -network.set_gd_momentum(0.9) -#![set_parameters] +#![add_layers] +NeuralLayer input = layer("NeuralInputLayer", num_neurons=num_feats) +network.add("layers", input) +NeuralLayer relu = layer("NeuralRectifiedLinearLayer", num_neurons=10) +network.add("layers", relu) +NeuralLayer softmax = layer("NeuralSoftmaxLayer", num_neurons=2) +network.add("layers", softmax) +#![add_layers] #![train_and_apply] -network.set_labels(labels_train) network.train(features_train) -BinaryLabels labels_predict = network.apply_binary(features_test) +Labels labels_predict = network.apply(features_test) #![train_and_apply] #![get_params] -RealVector parameters = network.get_parameters() +RealVector parameters = network.get_real_vector("params") #![get_params] #![evaluate_accuracy] -AccuracyMeasure am() -real accuracy = am.evaluate(labels_predict, labels_test) +Evaluation eval = evaluation("AccuracyMeasure") +real accuracy = eval.evaluate(labels_predict, labels_test) #![evaluate_accuracy] # additional integration testing variables -RealVector output = labels_predict.get_labels() +RealVector output = labels_predict.get_real_vector("labels") diff --git a/examples/meta/src/neural_nets/feedforward_net_regression.sg b/examples/meta/src/neural_nets/feedforward_net_regression.sg index b62f6f52ac9..cb938c631eb 100644 --- a/examples/meta/src/neural_nets/feedforward_net_regression.sg +++ b/examples/meta/src/neural_nets/feedforward_net_regression.sg @@ -1,54 +1,44 @@ -CSVFile f_feats_train("../../data/regression_1d_sinc_features_train_with_9d_noise.dat") -CSVFile f_feats_test("../../data/regression_1d_sinc_features_test_with_9d_noise.dat") -CSVFile f_labels_train("../../data/regression_1d_sinc_labels_train.dat") -CSVFile f_labels_test("../../data/regression_1d_sinc_labels_test.dat") +File f_feats_train = csv_file("../../data/regression_1d_sinc_features_train_with_9d_noise.dat") +File f_feats_test = csv_file("../../data/regression_1d_sinc_features_test_with_9d_noise.dat") +File f_labels_train = csv_file("../../data/regression_1d_sinc_labels_train.dat") +File f_labels_test = csv_file("../../data/regression_1d_sinc_labels_test.dat") Math:init_random(1) #![create_features] Features features_train = features(f_feats_train) Features features_test = features(f_feats_test) -RegressionLabels labels_train(f_labels_train) -RegressionLabels labels_test(f_labels_test) +Labels labels_train = labels(f_labels_train) +Labels labels_test = labels(f_labels_test) #![create_features] -#![add_layers] -int dimensions = features_train.get_int("num_features") -NeuralLayers layers() -layers.input(dimensions) -layers.rectified_linear(20) -layers.linear(1) -DynamicObjectArray all_layers = layers.done() -#![add_layers] - #![create_instance] -NeuralNetwork network(all_layers) -network.quick_connect() -network.initialize_neural_network() +int num_feats = features_train.get_int("num_features") +Machine network = machine("NeuralNetwork", labels=labels_train, auto_quick_initialize=True, l2_coefficient=0.1, epsilon=0.0, max_num_epochs=40, gd_learning_rate=0.1, gd_momentum=0.9) #![create_instance] -#![set_parameters] -network.set_l2_coefficient(0.1) -network.set_max_num_epochs(40) -network.set_epsilon(0.0) -network.set_gd_learning_rate(0.1) -network.set_gd_momentum(0.9) -#![set_parameters] +#![add_layers] +NeuralLayer input = layer("NeuralInputLayer", num_neurons=num_feats) +network.add("layers", input) +NeuralLayer relu = layer("NeuralRectifiedLinearLayer", num_neurons=20) +network.add("layers", relu) +NeuralLayer linear = layer("NeuralLinearLayer", num_neurons=1) +network.add("layers", linear) +#![add_layers] #![train_and_apply] -network.set_labels(labels_train) network.train(features_train) -RegressionLabels labels_predict = network.apply_regression(features_test) +Labels labels_predict = network.apply(features_test) #![train_and_apply] #![get_params] -RealVector parameters = network.get_parameters() +RealVector parameters = network.get_real_vector("params") #![get_params] -#![evaluate_error] -MeanSquaredError err() -real mse = err.evaluate(labels_predict, labels_test) -#![evaluate_error] +#![evaluate_accuracy] +Evaluation eval = evaluation("MeanSquaredError") +real mse = eval.evaluate(labels_predict, labels_test) +#![evaluate_accuracy] # additional integration testing variables -RealVector output = labels_predict.get_labels() +RealVector output = labels_predict.get_real_vector("labels") diff --git a/src/interfaces/swig/shogun.i b/src/interfaces/swig/shogun.i index 0bc9fc5306e..424117d2258 100644 --- a/src/interfaces/swig/shogun.i +++ b/src/interfaces/swig/shogun.i @@ -256,6 +256,7 @@ PUT_ADD(CECOCDecoder) PUT_ADD(CMulticlassStrategy) PUT_ADD(CCombinationRule) PUT_ADD(CDifferentiableFunction) +PUT_ADD(CNeuralLayer) %template(kernel) kernel; %template(features) features; diff --git a/src/shogun/base/base_types.h b/src/shogun/base/base_types.h index 4be9e866613..4a091e92d4c 100644 --- a/src/shogun/base/base_types.h +++ b/src/shogun/base/base_types.h @@ -19,7 +19,7 @@ namespace shogun class CECOCEncoder; class CECOCDecoder; class CMulticlassStrategy; - + class CNeuralLayer; // type trait to enable certain methods only for shogun base types template struct is_sg_base @@ -31,7 +31,8 @@ namespace shogun std::is_same::value || std::is_same::value || std::is_same::value || - std::is_same::value> + std::is_same::value || + std::is_same::value> { }; } diff --git a/src/shogun/neuralnets/NeuralNetwork.cpp b/src/shogun/neuralnets/NeuralNetwork.cpp index d1a7ed99a5b..a5ca73c70af 100644 --- a/src/shogun/neuralnets/NeuralNetwork.cpp +++ b/src/shogun/neuralnets/NeuralNetwork.cpp @@ -60,10 +60,19 @@ void CNeuralNetwork::set_layers(CDynamicObjectArray* layers) SG_UNREF(m_layers); SG_REF(layers); m_layers = layers; + init_adj_matrix(); +} + +void CNeuralNetwork::connect(int32_t i, int32_t j) +{ + REQUIRE("iget_num_elements(); m_adj_matrix = SGMatrix(m_num_layers, m_num_layers); - m_adj_matrix.zero(); m_num_inputs = 0; for (int32_t i=0; iis_input()) m_num_inputs += get_layer(i)->get_num_neurons(); } -} - -void CNeuralNetwork::connect(int32_t i, int32_t j) -{ - REQUIRE("iis_input()) @@ -145,7 +150,7 @@ void CNeuralNetwork::initialize_neural_network(float64_t sigma) get_section(m_param_regularizable, i); get_layer(i)->initialize_parameters(layer_param, - layer_param_regularizable, sigma); + layer_param_regularizable, m_sigma); get_layer(i)->set_batch_size(m_batch_size); } @@ -229,6 +234,13 @@ CDenseFeatures< float64_t >* CNeuralNetwork::transform( bool CNeuralNetwork::train_machine(CFeatures* data) { + if (m_auto_quick_initialize) + { + quick_connect(); + initialize_neural_network(m_sigma); + } + + REQUIRE(m_max_num_epochs>=0, "Maximum number of epochs (%i) must be >= 0\n", m_max_num_epochs); @@ -772,7 +784,11 @@ void CNeuralNetwork::init() m_lbfgs_temp_inputs = NULL; m_lbfgs_temp_targets = NULL; m_is_training = false; - + m_auto_quick_initialize = false; + m_sigma = 0.01f; + m_layers=new CDynamicObjectArray(); + SG_REF(m_layers); + SG_ADD((machine_int_t*)&m_optimization_method, "optimization_method", "Optimization Method", MS_NOT_AVAILABLE); SG_ADD(&m_gd_mini_batch_size, "gd_mini_batch_size", @@ -813,9 +829,13 @@ void CNeuralNetwork::init() "Parameters", MS_NOT_AVAILABLE); SG_ADD(&m_param_regularizable, "param_regularizable", "Parameter Regularizable", MS_NOT_AVAILABLE); - SG_ADD((CSGObject**)&m_layers, "layers", - "DynamicObjectArray of NeuralNetwork objects", - MS_NOT_AVAILABLE); + SG_ADD( + &m_layers, "layers", "DynamicObjectArray of NeuralNetwork objects", + MS_NOT_AVAILABLE); + SG_ADD(&m_auto_quick_initialize, "auto_quick_initialize", "auto_quick_initialize", MS_NOT_AVAILABLE); SG_ADD(&m_is_training, "is_training", "is_training", MS_NOT_AVAILABLE); + SG_ADD( + &m_sigma, "sigma", "sigma", + MS_NOT_AVAILABLE); } diff --git a/src/shogun/neuralnets/NeuralNetwork.h b/src/shogun/neuralnets/NeuralNetwork.h index 55cd0973911..681ddada3b1 100644 --- a/src/shogun/neuralnets/NeuralNetwork.h +++ b/src/shogun/neuralnets/NeuralNetwork.h @@ -136,6 +136,10 @@ friend class CDeepBeliefNetwork; */ virtual void connect(int32_t i, int32_t j); + /** Initialize adjacency matrix + */ + virtual void init_adj_matrix(); + /** Connects each layer to the layer after it. That is, connects layer i to * as input to layer i+1 for all i. */ @@ -622,6 +626,16 @@ friend class CDeepBeliefNetwork; */ bool m_is_training; + /** True if the network layers are to be quick connected and initialized + * initial value is False + */ + bool m_auto_quick_initialize; + + /** Standard deviation of the gaussian used to randomly + * initialize the parameters + */ + float64_t m_sigma; + /** Optimization method, default is NNOM_LBFGS */ ENNOptimizationMethod m_optimization_method; diff --git a/src/shogun/util/factory.h b/src/shogun/util/factory.h index 48e08d34882..4a364353c46 100644 --- a/src/shogun/util/factory.h +++ b/src/shogun/util/factory.h @@ -21,6 +21,7 @@ #include #include #include +#include #include namespace shogun @@ -34,6 +35,7 @@ namespace shogun CECOCEncoder* ecoc_encoder(const std::string& name); CECOCDecoder* ecoc_decoder(const std::string& name); CTransformer* transformer(const std::string& name); + CNeuralLayer* layer(const std::string& name); #define BASE_CLASS_FACTORY(T, factory_name) \ T* factory_name(const std::string& name) \ @@ -53,6 +55,7 @@ namespace shogun BASE_CLASS_FACTORY(CECOCEncoder, ecoc_encoder) BASE_CLASS_FACTORY(CECOCDecoder, ecoc_decoder) BASE_CLASS_FACTORY(CTransformer, transformer) + BASE_CLASS_FACTORY(CNeuralLayer, layer) template CFeatures* features(SGMatrix mat)