-
-
Notifications
You must be signed in to change notification settings - Fork 1.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
parallel sgd #603
parallel sgd #603
Changes from all commits
1bb4efe
39c3175
2378633
a981f83
d2f6797
0fc2e9a
619463c
88da0a9
a06e94d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,6 +4,7 @@ set(DIRS | |
aug_lagrangian | ||
lbfgs | ||
minibatch_sgd | ||
parallel_sgd | ||
rmsprop | ||
sa | ||
sdp | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
set(SOURCES | ||
sgdp.hpp | ||
sgdp_impl.hpp | ||
test_function.hpp | ||
test_function.cpp | ||
) | ||
|
||
set(DIR_SRCS) | ||
foreach(file ${SOURCES}) | ||
set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file}) | ||
endforeach() | ||
|
||
set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
/** | ||
* @file sgdp.hpp | ||
* @author Ranjan Mondal | ||
* | ||
* Parallel Stochastic Gradient Descent (SGD). | ||
*/ | ||
#ifndef __MLPACK_CORE_OPTIMIZERS_PARALLELSGD_SGDP_HPP | ||
#define __MLPACK_CORE_OPTIMIZERS_PARALLELSGD_SGDP_HPP | ||
|
||
#include <mlpack/core.hpp> | ||
#include<omp.h> | ||
#include<vector> | ||
|
||
namespace mlpack { | ||
namespace optimization { | ||
|
||
template<typename DecomposableFunctionType> | ||
class ParallelSGD | ||
{ | ||
public: | ||
/** | ||
* @param function Function to be optimized (minimized). | ||
* @param stepSize Step size for each iteration. | ||
* @param maxIterations Maximum number of iterations allowed (0 means no | ||
* limit). | ||
* @param tolerance Maximum absolute tolerance to terminate algorithm. | ||
* | ||
**/ | ||
ParallelSGD(DecomposableFunctionType& function, | ||
const double stepSize = 0.01, | ||
const size_t maxIterations = 100000, | ||
const double tolerance = 1e-5); | ||
|
||
/** | ||
* Optimize the given function using parallel stochastic gradient descent. | ||
* The given starting point will be modified to store the finishing point of the | ||
* algorithm, and the final objective value is returned. | ||
* @param iterate Starting point (will be modified). | ||
* @return Objective value of the final point. | ||
*/ | ||
double Optimize(arma::mat& iterate); | ||
|
||
//! Get the instantiated function to be optimized. | ||
const DecomposableFunctionType& Function() const { return function; } | ||
//! Modify the instantiated function. | ||
DecomposableFunctionType& Function() { return function; } | ||
|
||
//! Get the step size. | ||
double StepSize() const { return stepSize; } | ||
//! Modify the step size. | ||
double& StepSize() { return stepSize; } | ||
|
||
//! Get the maximum number of iterations (0 indicates no limit). | ||
size_t MaxIterations() const { return maxIterations; } | ||
//! Modify the maximum number of iterations (0 indicates no limit). | ||
size_t& MaxIterations() { return maxIterations; } | ||
|
||
//! Get the tolerance for termination. | ||
double Tolerance() const { return tolerance; } | ||
//! Modify the tolerance for termination. | ||
double& Tolerance() { return tolerance; } | ||
|
||
|
||
private: | ||
//! The instantiated function. | ||
DecomposableFunctionType& function; | ||
|
||
//! The step size for each example. | ||
double stepSize; | ||
|
||
//! The maximum number of allowed iterations. | ||
size_t maxIterations; | ||
|
||
//! The tolerance for termination. | ||
double tolerance; | ||
|
||
}; | ||
|
||
} // namespace optimization | ||
} // namespace mlpack | ||
|
||
// Include implementation. | ||
#include "sgdp_impl.hpp" | ||
|
||
#endif |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,153 @@ | ||
/** | ||
* @file sgd_impl.hpp | ||
* @author Ranjan Mondal | ||
* | ||
* Implementation of Parallel stochastic gradient descent. | ||
*/ | ||
#ifndef __MLPACK_CORE_OPTIMIZERS_PARALLELSGD_SGDP_IMPL_HPP | ||
#define __MLPACK_CORE_OPTIMIZERS_PARALLELSGD_SGDP_IMPL_HPP | ||
|
||
#include <mlpack/methods/regularized_svd/regularized_svd_function.hpp> | ||
// In case it hasn't been included yet. | ||
#include "sgdp.hpp" | ||
namespace mlpack { | ||
namespace optimization { | ||
|
||
template<typename DecomposableFunctionType> | ||
ParallelSGD<DecomposableFunctionType>::ParallelSGD(DecomposableFunctionType& function, | ||
const double stepSize, | ||
const size_t maxIterations, | ||
const double tolerance) : | ||
function(function), | ||
stepSize(stepSize), | ||
maxIterations(maxIterations), | ||
tolerance(tolerance) | ||
{ /* Nothing to do. */ } | ||
|
||
|
||
|
||
//! Optimize the function (minimize). | ||
template<typename DecomposableFunctionType> | ||
double ParallelSGD<DecomposableFunctionType>::Optimize(arma::mat& iterate) | ||
{ | ||
|
||
// Find the number of functions to use. | ||
const size_t numFunctions = function.NumFunctions(); | ||
|
||
// To keep track of where we are and how things are going. | ||
double overallObjective = 0; | ||
double lastObjective = DBL_MAX; | ||
|
||
//get maximum number of threads that will be running. with is defined by OMP_NUM_THREADS | ||
size_t num_thread=omp_get_max_threads(); | ||
|
||
//T: after T iteration it will go for tollerence checking; we can define T in many way | ||
//size_t T=maxIterations/num_thread; | ||
//Here I have thaken T=contant | ||
size_t T=1000; | ||
|
||
|
||
//vector of iterate. length of tIterate is same as number of threads available. | ||
std::vector<arma::mat> tIterate; | ||
|
||
//initializing each element of tIterate with initial iterate value | ||
for(size_t i=0;i<num_thread;i++) | ||
{ | ||
tIterate.push_back(iterate); | ||
} | ||
|
||
//sumIterate is taken track the sum all other computed iterate value from each thread. | ||
arma::mat sumIterate(iterate.n_rows,iterate.n_cols); | ||
arma::mat gradient(iterate.n_rows, iterate.n_cols); //gradient for each thread. | ||
size_t it; | ||
bool halt=false; | ||
sumIterate.zeros(); | ||
math::RandomSeed(std::time(NULL)); | ||
|
||
|
||
#pragma omp parallel shared(sumIterate,halt) private(it,gradient) | ||
{ | ||
it=1; | ||
int selectedFunction; | ||
int th_num; | ||
while(it!=maxIterations && halt != true) | ||
{ | ||
it++; | ||
|
||
th_num=omp_get_thread_num(); //thread number is stored in which the thread is running. | ||
selectedFunction=(int)numFunctions*math::Random(); | ||
function.Gradient(tIterate[th_num],selectedFunction, gradient); | ||
tIterate[th_num] -= stepSize * gradient; | ||
|
||
//checking whether or not it will go for tollerence checking | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. T is added to run each thread independently T iteration to increase speedup. |
||
if(it%T!=0) | ||
{ | ||
continue; | ||
} | ||
|
||
|
||
#pragma omp critical | ||
{ | ||
sumIterate += tIterate[th_num]; | ||
} | ||
|
||
//wait untill all thread update sumIterate | ||
#pragma omp barrier | ||
|
||
|
||
//runing a single thread for tollerence checking | ||
#pragma omp master | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I thought that the tolerance checking was done for each algorithm individually. As in, all you would need to do for this algorithm would be something like:
I think that this could be a lot simpler still. Since we already have the |
||
{ | ||
|
||
sumIterate=sumIterate/num_thread; | ||
overallObjective=0; | ||
for (size_t i = 0; i < numFunctions; ++i) | ||
{ | ||
overallObjective += function.Evaluate(sumIterate,i); | ||
} | ||
|
||
if (std::isnan(overallObjective) || std::isinf(overallObjective)) | ||
{ | ||
Log::Warn << "Parallel SGD: converged to " <<overallObjective << "; terminating"<< " with failure. Try a smaller step size?" << std::endl; | ||
halt=true; | ||
} | ||
|
||
if (std::abs(lastObjective - overallObjective) < tolerance) | ||
{ | ||
Log::Info << "SGD: minimized within tolerance " << tolerance << "; "<< "terminating optimization." << std::endl; | ||
halt=true; | ||
} | ||
|
||
sumIterate.zeros(); | ||
lastObjective=overallObjective; | ||
|
||
} | ||
#pragma omp barrier | ||
|
||
} //end of while loop | ||
} //end of all thread | ||
|
||
|
||
|
||
sumIterate.zeros(); | ||
for(size_t t=0;t<num_thread;t++) | ||
{ | ||
sumIterate += tIterate[t]; | ||
} | ||
sumIterate=sumIterate/num_thread; | ||
|
||
|
||
|
||
iterate=sumIterate; | ||
overallObjective=0; | ||
// Calculating the objective function with computed iterate | ||
for (size_t i = 0; i < numFunctions; ++i) | ||
overallObjective += function.Evaluate(iterate, i); | ||
|
||
return overallObjective; | ||
} | ||
|
||
} // namespace optimization | ||
} // namespace mlpack | ||
|
||
#endif |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
/** | ||
* @file test_function.cpp | ||
* @author Ryan Curtin | ||
* | ||
* Implementation of very simple test function for stochastic gradient descent | ||
* (PSGD). | ||
*/ | ||
#include "test_function.hpp" | ||
|
||
using namespace mlpack; | ||
using namespace mlpack::optimization; | ||
using namespace mlpack::optimization::test; | ||
|
||
double PSGDTestFunction::Evaluate(const arma::mat& coordinates, const size_t i) | ||
const | ||
{ | ||
switch (i) | ||
{ | ||
case 0: | ||
return -std::exp(-std::abs(coordinates[0])); | ||
case 1: | ||
return std::pow(coordinates[1], 2); | ||
|
||
case 2: | ||
return std::pow(coordinates[2], 4) + 3 * std::pow(coordinates[2], 2); | ||
default: | ||
return 0; | ||
} | ||
} | ||
|
||
void PSGDTestFunction::Gradient(const arma::mat& coordinates, | ||
const size_t i, | ||
arma::mat& gradient) const | ||
{ | ||
gradient.zeros(3); | ||
switch (i) | ||
{ | ||
case 0: | ||
if (coordinates[0] >= 0) | ||
gradient[0] = std::exp(-coordinates[0]); | ||
else | ||
gradient[0] = -std::exp(coordinates[1]); | ||
break; | ||
case 1: | ||
gradient[1] = 2 * coordinates[1]; | ||
break; | ||
|
||
case 2: | ||
gradient[2] = 4 * std::pow(coordinates[2], 3) + 6 * coordinates[2]; | ||
break; | ||
} | ||
} | ||
|
||
|
||
double BoothsFunction::Evaluate(const arma::mat& coordinates, const size_t i) const | ||
{ | ||
switch (i) | ||
{ | ||
case 0: | ||
return std::pow((coordinates[0] + 2*coordinates[1] - 7),2); | ||
case 1: | ||
return std::pow((2*coordinates[0] + coordinates[1] -5), 2); | ||
default: | ||
return 0; | ||
} | ||
} | ||
|
||
|
||
|
||
|
||
|
||
|
||
void BoothsFunction::Gradient(const arma::mat& coordinates,const size_t i,arma::mat& gradient) const | ||
{ | ||
gradient.zeros(2); | ||
switch(i) | ||
{ | ||
case 0: | ||
gradient[0]=2*(coordinates[0] + 2*coordinates[1] - 7); | ||
gradient[1]=4*(coordinates[0] + 2*coordinates[1] - 7); | ||
break; | ||
case 1: | ||
gradient[0]=4*(2*coordinates[0] +coordinates[1] -5 ); | ||
gradient[1]=2*(2*coordinates[0] +coordinates[1] -5 ); | ||
break; | ||
} | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
T can be a function of number of Thread , maxIterations, current Iteration value . Here I have taken Constant. If T=1. It will check tolerance each time which will slow down the algorithm.