shogun-toolbox · FaroukY · May 22, 2018 · May 22, 2018 · May 22, 2018 · May 22, 2018
diff --git a/src/shogun/optimization/FirstOrderSAGCostFunction.h b/src/shogun/optimization/FirstOrderSAGCostFunction.h
@@ -1,38 +1,13 @@
-/*
- * Copyright (c) The Shogun Machine Learning Toolbox
- * Written (w) 2015 Wu Lin
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice, this
- *    list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- *    this list of conditions and the following disclaimer in the documentation
- *    and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * The views and conclusions contained in the software and documentation are those
- * of the authors and should not be interpreted as representing official policies,
- * either expressed or implied, of the Shogun Development Team.
- *
- */
+ /*
+  * This software is distributed under BSD 3-clause license (see LICENSE file).
+  *
+  * Authors: Wu Lin
+  */
 
 #ifndef FIRSTORDERSAGCOSTFUNCTION_H
 #define FIRSTORDERSAGCOSTFUNCTION_H
-#include <shogun/lib/config.h>
 #include <shogun/optimization/FirstOrderStochasticCostFunction.h>
+#include <shogun/lib/config.h>
 namespace shogun
 {
 /** @brief The class is about a stochastic cost function for stochastic average minimizers.
@@ -106,7 +81,7 @@ class FirstOrderSAGCostFunction
 	 *
 	 * For least squares cost function, that is the value of
 	 * \f$\frac{\partial f_i(w) }{\partial w}\f$ given \f$w\f$ is known
-	 * where the index \f$i\f$ is obtained by next_sample() 
+	 * where the index \f$i\f$ is obtained by next_sample()  
 	 *
 	 * @return sample gradient of target variables
 	 */

diff --git a/src/shogun/optimization/FirstOrderStochasticMinimizer.h b/src/shogun/optimization/FirstOrderStochasticMinimizer.h
@@ -30,8 +30,9 @@
  */
 #ifndef FIRSTORDERSTOCHASTICMINIMIZER_H
 #define FIRSTORDERSTOCHASTICMINIMIZER_H
-#include <shogun/optimization/FirstOrderMinimizer.h>
+
 #include <shogun/optimization/FirstOrderStochasticCostFunction.h>
+#include <shogun/optimization/FirstOrderMinimizer.h>
 #include <shogun/optimization/DescendUpdater.h>
 #include <shogun/optimization/LearningRate.h>
 namespace shogun

diff --git a/src/shogun/optimization/StanFirstOrderSAGCostFunction.cpp b/src/shogun/optimization/StanFirstOrderSAGCostFunction.cpp
@@ -0,0 +1,140 @@
+/*
+ * This software is distributed under BSD 3-clause license (see LICENSE file).
+ *
+ * Authors: Elfarouk
+ */
+
+#include <shogun/optimization/StanFirstOrderSAGCostFunction.h>
+#include <shogun/base/range.h>
+#include <shogun/mathematics/Math.h>
+using namespace shogun;
+using stan::math::var;
+using std::function;
+using Eigen::Matrix;
+using Eigen::Dynamic;
+
+StanFirstOrderSAGCostFunction::StanFirstOrderSAGCostFunction(
+    SGMatrix<float64_t> X, SGMatrix<float64_t> y,
+    StanVector* trainable_parameters,
+    StanFunctionsVector<float64_t>* cost_for_ith_point,
+    FunctionReturnsStan<StanVector*>* total_cost)
+{
+	REQUIRE(X.size() > 0, "Empty X provided");
+	REQUIRE(y.size() > 0, "Empty y provided");
+	auto num_of_variables = trainable_parameters->rows();
+	REQUIRE(
+	    num_of_variables > 0, "Provided %d variables in the parameters, more "
+	                          "than 0 parameters required",
+	    num_of_variables);
+	REQUIRE(cost_for_ith_point != NULL, "Cost for ith point is not provided");
+	REQUIRE(total_cost != NULL, "Total cost function is not provided");
+	m_X = X;
+	m_y = y;
+	m_trainable_parameters = trainable_parameters;
+	m_cost_for_ith_point = cost_for_ith_point;
+	m_total_cost = total_cost;
+	m_ref_trainable_parameters = SGVector<float64_t>(num_of_variables);
+	for (auto i : range(num_of_variables))
+	{
+		m_ref_trainable_parameters[i] = (*m_trainable_parameters)(i, 0).val();
+	}
+}
+
+void StanFirstOrderSAGCostFunction::set_training_data(
+    SGMatrix<float64_t> X_new, SGMatrix<float64_t> y_new)
+{
+	REQUIRE(X_new.size() > 0, "Empty X provided");
+	REQUIRE(y_new.size() > 0, "Empty y provided");
+	this->m_X = X_new;
+	this->m_y = y_new;
+}
+
+StanFirstOrderSAGCostFunction::~StanFirstOrderSAGCostFunction()
+{
+}
+
+void StanFirstOrderSAGCostFunction::begin_sample()
+{
+	m_index_of_sample = -1;
+}
+
+bool StanFirstOrderSAGCostFunction::next_sample()
+{
+	++m_index_of_sample;
+	return m_index_of_sample < get_sample_size();
+}
+
+void StanFirstOrderSAGCostFunction::update_stan_vectors_to_reference_values()
+{
+	auto num_of_variables = m_trainable_parameters->rows();
+	for (auto i : range(num_of_variables))
+	{
+		(*m_trainable_parameters)(i, 0) = m_ref_trainable_parameters[i];
+	}
+}
+SGVector<float64_t> StanFirstOrderSAGCostFunction::get_gradient()
+{
+	auto num_of_variables = m_trainable_parameters->rows();
+	REQUIRE(
+	    num_of_variables > 0,
+	    "Number of sample must be greater than 0, you provided no samples");
+
+	update_stan_vectors_to_reference_values();
+	var f_i = (*m_cost_for_ith_point)(m_index_of_sample, 0)(
+	    m_trainable_parameters, m_index_of_sample);
+
+	stan::math::set_zero_all_adjoints();
+	f_i.grad();
+
+	SGVector<float64_t>::EigenVectorXt gradients =
+	    m_trainable_parameters->unaryExpr(
+	        [](stan::math::var x) -> float64_t { return x.adj(); });
+	// clone needed because gradients is local variable
+	return SGVector<float64_t>(gradients).clone();
+}
+
+float64_t StanFirstOrderSAGCostFunction::get_cost()
+{
+	auto n = get_sample_size();
+	StanVector cost_argument(n);
+
+	update_stan_vectors_to_reference_values();
+	for (auto i : range(n))
+	{
+		cost_argument(i, 0) =
+		    (*m_cost_for_ith_point)(i, 0)(m_trainable_parameters, i);
+	}
+	var cost = (*m_total_cost)(&cost_argument);
+	return cost.val();
+}
+
+index_t StanFirstOrderSAGCostFunction::get_sample_size()
+{
+	return m_X.num_cols;
+}
+
+SGVector<float64_t> StanFirstOrderSAGCostFunction::get_average_gradient()
+{
+	int32_t params_num = m_trainable_parameters->rows();
+	SGVector<float64_t> average_gradients(params_num);
+
+	auto old_index_sample = m_index_of_sample;
+	auto n = get_sample_size();
+	REQUIRE(
+	    n > 0,
+	    "Number of sample must be greater than 0, you provided no samples");
+
+	for (index_t i = 0; i < n; ++i)
+	{
+		m_index_of_sample = i;
+		average_gradients += get_gradient();
+	}
+	average_gradients.scale(1.0 / n);
+	m_index_of_sample = old_index_sample;
+	return average_gradients;
+}
+
+SGVector<float64_t> StanFirstOrderSAGCostFunction::obtain_variable_reference()
+{
+	return m_ref_trainable_parameters;
+}
diff --git a/src/shogun/optimization/StanFirstOrderSAGCostFunction.h b/src/shogun/optimization/StanFirstOrderSAGCostFunction.h
@@ -0,0 +1,155 @@
+/*
+ * This software is distributed under BSD 3-clause license (see LICENSE file).
+ *
+ * Authors: Elfarouk
+ */
+
+#ifndef StanFirstOrderSAGCostFunction_H
+#define StanFirstOrderSAGCostFunction_H
+
+#include <stan/math.hpp>
+#include <functional>
+#include <shogun/lib/SGMatrix.h>
+#include <shogun/lib/SGVector.h>
+#include <shogun/lib/config.h>
+#include <shogun/mathematics/eigen3.h>
+#include <shogun/optimization/FirstOrderSAGCostFunction.h>
+using StanVector = Eigen::Matrix<stan::math::var, Eigen::Dynamic, 1>;
+template <class T>
+using FunctionReturnsStan = std::function<stan::math::var(T)>;
+template <class T>
+using FunctionStanVectorArg = std::function<stan::math::var(StanVector*, T)>;
+template <class S>
+using StanFunctionsVector =
+    Eigen::Matrix<FunctionStanVectorArg<S>, Eigen::Dynamic, 1>;
+namespace shogun
+{
+	/** @brief The first order stochastic cost function base class for
+	 * implementing the SAG Cost function
+	 *
+	 * The class gives the implementation used in first order stochastic
+	 * minimizers
+	 *
+	 * The cost function must be Written as a finite sample-specific sum of
+	 * cost.
+	 * For example, least squares cost function,
+	 * \f[
+	 * f(w)=\frac{ \sum_i{ (y_i-w^T x_i)^2 } }{2}
+	 * \f]
+	 * where \f$(y_i,x_i)\f$ is the i-th sample,
+	 * \f$y_i\f$ is the label and \f$x_i\f$ is the features
+	 */
+	class StanFirstOrderSAGCostFunction : public FirstOrderSAGCostFunction
+	{
+	public:
+		StanFirstOrderSAGCostFunction(
+		    SGMatrix<float64_t> X, SGMatrix<float64_t> y,
+		    StanVector* trainable_parameters,
+		    StanFunctionsVector<float64_t>* cost_for_ith_point,
+		    FunctionReturnsStan<StanVector*>* total_cost);
+
+		StanFirstOrderSAGCostFunction(){};
+
+		/** Setter for the training data X */
+		virtual void
+		set_training_data(SGMatrix<float64_t> X_new, SGMatrix<float64_t> y_new);
+
+		virtual ~StanFirstOrderSAGCostFunction();
+
+		/** Initialize to generate a sample sequence
+		 *
+		 */
+		virtual void begin_sample();
+
+		/** Get next sample
+		 *
+		 * @return false if reach the end of the sample sequence
+		 * */
+		virtual bool next_sample();
+
+		/** Get the sample gradient value wrt target variables
+		 *
+		 * WARNING
+		 * This method does return
+		 * \f$ \frac{\partial f_i(w) }{\partial w} \f$,
+		 * instead of
+		 * \f$\sum_i{ \frac{\partial f_i(w) }{\partial w} }\f$
+		 *
+		 * For least squares cost function, that is the value of
+		 * \f$\frac{\partial f_i(w) }{\partial w}\f$ given \f$w\f$ is known
+		 * where the index \f$i\f$ is obtained by next_sample()
+		 *
+		 * @return sample gradient of variables
+		 */
+		virtual SGVector<float64_t> get_gradient();
+
+		/** Get the cost given current target variables
+		 *
+		 * For least squares, that is the value of \f$f(w)\f$.
+		 *
+		 * @return cost
+		 */
+		virtual float64_t get_cost();
+
+		/** Get the sample size
+		 *
+		 * @return the sample size
+		 */
+		virtual index_t get_sample_size();
+
+		/** Get the average gradient value wrt target variables
+		 *
+		 * Note that the average gradient is the mean of sample gradient from
+		 * get_gradient()
+		 * if samples are generated (uniformly) at random.
+		 *
+		 * WARNING
+		 * This method returns
+		 * \f$ \frac{\sum_i^n{ \frac{\partial f_i(w) }{\partial w} }}{n}\f$
+		 *
+		 * For least squares, that is the value of
+		 * \f$ \frac{\frac{\partial f(w) }{\partial w}}{n} \f$ given \f$w\f$ is
+		 * known
+		 * where \f$f(w)=\frac{ \sum_i^n{ (y_i-w^t x_i)^2 } }{2}\f$
+		 *
+		 * @return average gradient of target variables
+		 */
+		virtual SGVector<float64_t> get_average_gradient();
+
+		virtual SGVector<float64_t> obtain_variable_reference();
+
+		/** Updates m_trainable_parameters values to m_ref_trainable_parameters
+		 */
+		void update_stan_vectors_to_reference_values();
+
+	protected:
+		/** X is the training data in column major matrix format */
+		SGMatrix<float64_t> m_X;
+
+		/** y is the ground truth, or the correct prediction */
+		SGMatrix<float64_t> m_y;
+
+		/** trainable_parameters are the variables that are optimized for */
+		StanVector* m_trainable_parameters;
+
+		/** cost_for_ith_point is the cost contributed by each point in the
+		 * training data */
+
+		StanFunctionsVector<float64_t>* m_cost_for_ith_point;
+
+		/** total_cost is the total cost to be minimized, that in this case is a
+		 * form of sum of cost_for_ith_point*/
+		// std::function<stan::math::var(StanVector*)>* m_total_cost;
+		FunctionReturnsStan<StanVector*>* m_total_cost;
+
+		/** Reference values for trainable_parameters so that minimizers can
+		 * perform inplace updates */
+		SGVector<float64_t> m_ref_trainable_parameters;
+
+		/** index_of_sample is the index of the column in X for the current
+		 * sample */
+		index_t m_index_of_sample;
+	};
+}
+
+#endif /* StanFirstOrderSAGCostFunction_H  */