Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

parallel sgd #603

Closed
wants to merge 9 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/mlpack/core/optimizers/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ set(DIRS
aug_lagrangian
lbfgs
minibatch_sgd
parallel_sgd
rmsprop
sa
sdp
Expand Down
13 changes: 13 additions & 0 deletions src/mlpack/core/optimizers/parallel_sgd/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
set(SOURCES
sgdp.hpp
sgdp_impl.hpp
test_function.hpp
test_function.cpp
)

set(DIR_SRCS)
foreach(file ${SOURCES})
set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file})
endforeach()

set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
85 changes: 85 additions & 0 deletions src/mlpack/core/optimizers/parallel_sgd/sgdp.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
/**
* @file sgdp.hpp
* @author Ranjan Mondal
*
* Parallel Stochastic Gradient Descent (SGD).
*/
#ifndef __MLPACK_CORE_OPTIMIZERS_PARALLELSGD_SGDP_HPP
#define __MLPACK_CORE_OPTIMIZERS_PARALLELSGD_SGDP_HPP

#include <mlpack/core.hpp>
#include<omp.h>
#include<vector>

namespace mlpack {
namespace optimization {

template<typename DecomposableFunctionType>
class ParallelSGD
{
public:
/**
* @param function Function to be optimized (minimized).
* @param stepSize Step size for each iteration.
* @param maxIterations Maximum number of iterations allowed (0 means no
* limit).
* @param tolerance Maximum absolute tolerance to terminate algorithm.
*
**/
ParallelSGD(DecomposableFunctionType& function,
const double stepSize = 0.01,
const size_t maxIterations = 100000,
const double tolerance = 1e-5);

/**
* Optimize the given function using parallel stochastic gradient descent.
* The given starting point will be modified to store the finishing point of the
* algorithm, and the final objective value is returned.
* @param iterate Starting point (will be modified).
* @return Objective value of the final point.
*/
double Optimize(arma::mat& iterate);

//! Get the instantiated function to be optimized.
const DecomposableFunctionType& Function() const { return function; }
//! Modify the instantiated function.
DecomposableFunctionType& Function() { return function; }

//! Get the step size.
double StepSize() const { return stepSize; }
//! Modify the step size.
double& StepSize() { return stepSize; }

//! Get the maximum number of iterations (0 indicates no limit).
size_t MaxIterations() const { return maxIterations; }
//! Modify the maximum number of iterations (0 indicates no limit).
size_t& MaxIterations() { return maxIterations; }

//! Get the tolerance for termination.
double Tolerance() const { return tolerance; }
//! Modify the tolerance for termination.
double& Tolerance() { return tolerance; }


private:
//! The instantiated function.
DecomposableFunctionType& function;

//! The step size for each example.
double stepSize;

//! The maximum number of allowed iterations.
size_t maxIterations;

//! The tolerance for termination.
double tolerance;

};

} // namespace optimization
} // namespace mlpack

// Include implementation.
#include "sgdp_impl.hpp"

#endif
153 changes: 153 additions & 0 deletions src/mlpack/core/optimizers/parallel_sgd/sgdp_impl.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
/**
* @file sgd_impl.hpp
* @author Ranjan Mondal
*
* Implementation of Parallel stochastic gradient descent.
*/
#ifndef __MLPACK_CORE_OPTIMIZERS_PARALLELSGD_SGDP_IMPL_HPP
#define __MLPACK_CORE_OPTIMIZERS_PARALLELSGD_SGDP_IMPL_HPP

#include <mlpack/methods/regularized_svd/regularized_svd_function.hpp>
// In case it hasn't been included yet.
#include "sgdp.hpp"
namespace mlpack {
namespace optimization {

template<typename DecomposableFunctionType>
ParallelSGD<DecomposableFunctionType>::ParallelSGD(DecomposableFunctionType& function,
const double stepSize,
const size_t maxIterations,
const double tolerance) :
function(function),
stepSize(stepSize),
maxIterations(maxIterations),
tolerance(tolerance)
{ /* Nothing to do. */ }



//! Optimize the function (minimize).
template<typename DecomposableFunctionType>
double ParallelSGD<DecomposableFunctionType>::Optimize(arma::mat& iterate)
{

// Find the number of functions to use.
const size_t numFunctions = function.NumFunctions();

// To keep track of where we are and how things are going.
double overallObjective = 0;
double lastObjective = DBL_MAX;

//get maximum number of threads that will be running. with is defined by OMP_NUM_THREADS
size_t num_thread=omp_get_max_threads();

//T: after T iteration it will go for tollerence checking; we can define T in many way
//size_t T=maxIterations/num_thread;
//Here I have thaken T=contant
size_t T=1000;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

T can be a function of number of Thread , maxIterations, current Iteration value . Here I have taken Constant. If T=1. It will check tolerance each time which will slow down the algorithm.



//vector of iterate. length of tIterate is same as number of threads available.
std::vector<arma::mat> tIterate;

//initializing each element of tIterate with initial iterate value
for(size_t i=0;i<num_thread;i++)
{
tIterate.push_back(iterate);
}

//sumIterate is taken track the sum all other computed iterate value from each thread.
arma::mat sumIterate(iterate.n_rows,iterate.n_cols);
arma::mat gradient(iterate.n_rows, iterate.n_cols); //gradient for each thread.
size_t it;
bool halt=false;
sumIterate.zeros();
math::RandomSeed(std::time(NULL));


#pragma omp parallel shared(sumIterate,halt) private(it,gradient)
{
it=1;
int selectedFunction;
int th_num;
while(it!=maxIterations && halt != true)
{
it++;

th_num=omp_get_thread_num(); //thread number is stored in which the thread is running.
selectedFunction=(int)numFunctions*math::Random();
function.Gradient(tIterate[th_num],selectedFunction, gradient);
tIterate[th_num] -= stepSize * gradient;

//checking whether or not it will go for tollerence checking
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

T is added to run each thread independently T iteration to increase speedup.
Here I have Taken T as constant(1000).
T can be a function of maxiteration, number of thread, current value of iteration.

if(it%T!=0)
{
continue;
}


#pragma omp critical
{
sumIterate += tIterate[th_num];
}

//wait untill all thread update sumIterate
#pragma omp barrier


//runing a single thread for tollerence checking
#pragma omp master
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought that the tolerance checking was done for each algorithm individually. As in, all you would need to do for this algorithm would be something like:

#pragma omp parallel
{
  SGD sgd(...);
  sgd.Optimize(...);
}

// Then combine results.

I think that this could be a lot simpler still. Since we already have the SGD class, why not use it? :) Let me know if I've misunderstood something.

{

sumIterate=sumIterate/num_thread;
overallObjective=0;
for (size_t i = 0; i < numFunctions; ++i)
{
overallObjective += function.Evaluate(sumIterate,i);
}

if (std::isnan(overallObjective) || std::isinf(overallObjective))
{
Log::Warn << "Parallel SGD: converged to " <<overallObjective << "; terminating"<< " with failure. Try a smaller step size?" << std::endl;
halt=true;
}

if (std::abs(lastObjective - overallObjective) < tolerance)
{
Log::Info << "SGD: minimized within tolerance " << tolerance << "; "<< "terminating optimization." << std::endl;
halt=true;
}

sumIterate.zeros();
lastObjective=overallObjective;

}
#pragma omp barrier

} //end of while loop
} //end of all thread



sumIterate.zeros();
for(size_t t=0;t<num_thread;t++)
{
sumIterate += tIterate[t];
}
sumIterate=sumIterate/num_thread;



iterate=sumIterate;
overallObjective=0;
// Calculating the objective function with computed iterate
for (size_t i = 0; i < numFunctions; ++i)
overallObjective += function.Evaluate(iterate, i);

return overallObjective;
}

} // namespace optimization
} // namespace mlpack

#endif
89 changes: 89 additions & 0 deletions src/mlpack/core/optimizers/parallel_sgd/test_function.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
/**
* @file test_function.cpp
* @author Ryan Curtin
*
* Implementation of very simple test function for stochastic gradient descent
* (PSGD).
*/
#include "test_function.hpp"

using namespace mlpack;
using namespace mlpack::optimization;
using namespace mlpack::optimization::test;

double PSGDTestFunction::Evaluate(const arma::mat& coordinates, const size_t i)
const
{
switch (i)
{
case 0:
return -std::exp(-std::abs(coordinates[0]));
case 1:
return std::pow(coordinates[1], 2);

case 2:
return std::pow(coordinates[2], 4) + 3 * std::pow(coordinates[2], 2);
default:
return 0;
}
}

void PSGDTestFunction::Gradient(const arma::mat& coordinates,
const size_t i,
arma::mat& gradient) const
{
gradient.zeros(3);
switch (i)
{
case 0:
if (coordinates[0] >= 0)
gradient[0] = std::exp(-coordinates[0]);
else
gradient[0] = -std::exp(coordinates[1]);
break;
case 1:
gradient[1] = 2 * coordinates[1];
break;

case 2:
gradient[2] = 4 * std::pow(coordinates[2], 3) + 6 * coordinates[2];
break;
}
}


double BoothsFunction::Evaluate(const arma::mat& coordinates, const size_t i) const
{
switch (i)
{
case 0:
return std::pow((coordinates[0] + 2*coordinates[1] - 7),2);
case 1:
return std::pow((2*coordinates[0] + coordinates[1] -5), 2);
default:
return 0;
}
}






void BoothsFunction::Gradient(const arma::mat& coordinates,const size_t i,arma::mat& gradient) const
{
gradient.zeros(2);
switch(i)
{
case 0:
gradient[0]=2*(coordinates[0] + 2*coordinates[1] - 7);
gradient[1]=4*(coordinates[0] + 2*coordinates[1] - 7);
break;
case 1:
gradient[0]=4*(2*coordinates[0] +coordinates[1] -5 );
gradient[1]=2*(2*coordinates[0] +coordinates[1] -5 );
break;
}
}


Loading