Skip to content

Commit

Permalink
Some optimzations in the training routine.
Browse files Browse the repository at this point in the history
  • Loading branch information
Simon Pfreundschuh committed Jul 21, 2016
1 parent 7f559f7 commit 66587d0
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 24 deletions.
45 changes: 38 additions & 7 deletions tmva/tmva/inc/TMVA/DNN/Minimizers.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,12 +90,19 @@ class TGradientDescent
Scalar_t Step(Net_t &net,
Matrix_t &input,
const Matrix_t &output);
/** Similar to Step(...) but only trains bias terms in the first layer. This is
* for compatibility with the previous implementation. */
/** Does not evaluate the loss and therefore not trigger a possible synchronization
* with the device. Trains the weights of each layer, but only the bias terms of
* the first layer for compatibility with the previous implementation. */
template <typename Net_t>
Scalar_t StepReducedWeights(Net_t &net,
Matrix_t &input,
const Matrix_t &output);
void StepReducedWeights(Net_t &net,
Matrix_t &input,
const Matrix_t &output);
/** Similar to StepReducedWeights(...) but also evaluates the loss. May trigger
* synchronization with the device. */
template <typename Net_t>
Scalar_t StepReducedWeightsLoss(Net_t &net,
Matrix_t &input,
const Matrix_t &output);
template <typename Net_t>
inline void TestError(Net_t &net,
Matrix_t &input,
Expand Down Expand Up @@ -212,10 +219,35 @@ template<typename Architecture_t>
return loss;
}

//______________________________________________________________________________
template<typename Architecture_t>
template <typename Net_t>
void inline TGradientDescent<Architecture_t>::StepReducedWeights(
Net_t & net,
Matrix_t &input,
const Matrix_t &output)
{
net.Forward(input);
net.Backward(input, output);

for (size_t i = 0; i < net.GetDepth(); i++)
{
auto &layer = net.GetLayer(i);
Architecture_t::ScaleAdd(layer.GetWeights(),
layer.GetWeightGradients(),
-fLearningRate);
if (i == 0) {
Architecture_t::ScaleAdd(layer.GetBiases(),
layer.GetBiasGradients(),
-fLearningRate);
}
}
}

//______________________________________________________________________________
template<typename Architecture_t>
template <typename Net_t>
auto inline TGradientDescent<Architecture_t>::StepReducedWeights(
auto inline TGradientDescent<Architecture_t>::StepReducedWeightsLoss(
Net_t & net,
Matrix_t &input,
const Matrix_t &output)
Expand All @@ -240,7 +272,6 @@ template<typename Architecture_t>
return loss;
}


//______________________________________________________________________________
template<typename Architecture_t>
template <typename Net_t>
Expand Down
35 changes: 18 additions & 17 deletions tmva/tmva/src/MethodDNN.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -749,28 +749,28 @@ void TMVA::MethodDNN::TrainGPU()
itSettings != itSettingsEnd; ++itSettings, ++idxSetting)
{

std::shared_ptr<TMVA::DNN::Settings> ptrSettings = *itSettings;
ptrSettings->setMonitoring (fMonitoring);
TMVA::DNN::Settings settings = **itSettings;
settings.setMonitoring (fMonitoring);

Log() << kINFO
<< "Training on GPU with learning rate = "
<< ptrSettings->learningRate ()
<< ", momentum = " << ptrSettings->momentum ()
<< ", repetitions = " << ptrSettings->repetitions ()
<< settings.learningRate ()
<< ", momentum = " << settings.momentum ()
<< ", repetitions = " << settings.repetitions ()
<< Endl;

ptrSettings->setProgressLimits ((idxSetting)*100.0/(fSettings.size ()),
settings.setProgressLimits ((idxSetting)*100.0/(fSettings.size ()),
(idxSetting+1)*100.0/(fSettings.size ()));

const std::vector<double>& dropConfig = ptrSettings->dropFractions ();
const std::vector<double>& dropConfig = settings.dropFractions ();
if (!dropConfig.empty ())
{
Log () << kINFO << "Drop configuration" << Endl
<< " drop repetitions = "
<< ptrSettings->dropRepetitions () << Endl;
<< settings.dropRepetitions () << Endl;
}

auto trainNet = GPUNet.CreateClone(ptrSettings->batchSize());
auto trainNet = GPUNet.CreateClone(settings.batchSize());
int idx = 0;
for (auto f : dropConfig)
{
Expand All @@ -780,7 +780,6 @@ void TMVA::MethodDNN::TrainGPU()
}
Log () << kINFO << Endl;

std::cout << "train samples: " << nTrainingSamples << std::endl;
using DataLoader_t = typename DNN::TCuda::DataLoader_t<DNN::TMVAInput_t>;
DataLoader_t trainingData(GetEventCollection(Types::kTraining),
nTrainingSamples,
Expand All @@ -797,9 +796,9 @@ void TMVA::MethodDNN::TrainGPU()
DNN::TGradientDescent<DNN::TCuda> minimizer{};

minimizer.Reset();
minimizer.SetLearningRate(ptrSettings->learningRate());
minimizer.SetTestInterval(ptrSettings->testRepetitions());
minimizer.SetConvergenceSteps(ptrSettings->convergenceSteps());
minimizer.SetLearningRate(settings.learningRate());
minimizer.SetTestInterval(settings.testRepetitions());
minimizer.SetConvergenceSteps(settings.convergenceSteps());

bool converged = false;
size_t stepCount = 0;
Expand All @@ -818,7 +817,7 @@ void TMVA::MethodDNN::TrainGPU()
for (auto batch : trainingData) {
auto inputMatrix = batch.GetInput();
auto outputMatrix = batch.GetOutput();
trainingError += minimizer.StepReducedWeights(
trainingError += minimizer.StepReducedWeightsLoss(
trainNet,
inputMatrix,
outputMatrix);
Expand All @@ -835,11 +834,13 @@ void TMVA::MethodDNN::TrainGPU()
minimizer.GetTestError(),
(int) stepCount,
(int) minimizer.GetConvergenceCount ());
std::cout << convText << std::endl;
Double_t progress = minimizer.GetConvergenceCount()
/ settings.convergenceSteps();
settings.cycle(progress, convText);
converged = minimizer.HasConverged();
}
ptrSettings.reset ();
Log () << kINFO << Endl;
Log () << kINFO << Endl;
stepCount++;
}
fMonitoring = 0;
}
Expand Down

0 comments on commit 66587d0

Please sign in to comment.