From 6fb7fc4f31d78a34ca35c8a1d9d042a4451398f7 Mon Sep 17 00:00:00 2001 From: Ilya Matiach Date: Thu, 2 Dec 2021 00:54:10 -0500 Subject: [PATCH] perf: improve lightgbm training performance 4x-10x by setting num_threads to be cores-1 --- .../azure/synapse/ml/lightgbm/LightGBMBase.scala | 8 ++++++-- .../azure/synapse/ml/lightgbm/LightGBMClassifier.scala | 2 +- .../azure/synapse/ml/lightgbm/LightGBMRanker.scala | 2 +- .../azure/synapse/ml/lightgbm/LightGBMRegressor.scala | 2 +- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/LightGBMBase.scala b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/LightGBMBase.scala index 08f30a7c06..0ad6ac64de 100644 --- a/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/LightGBMBase.scala +++ b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/LightGBMBase.scala @@ -247,8 +247,12 @@ trait LightGBMBase[TrainedModel <: Model[TrainedModel]] extends Estimator[Traine * * @return ExecutionParams object containing parameters related to LightGBM execution. */ - protected def getExecutionParams: ExecutionParams = { - ExecutionParams(getChunkSize, getMatrixType, getNumThreads, getUseSingleDatasetMode) + protected def getExecutionParams(numTasksPerExec: Int): ExecutionParams = { + val execNumThreads = + if (getUseSingleDatasetMode) get(numThreads).getOrElse(numTasksPerExec - 1) + else getNumThreads + + ExecutionParams(getChunkSize, getMatrixType, execNumThreads, getUseSingleDatasetMode) } protected def getColumnParams: ColumnParams = { diff --git a/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/LightGBMClassifier.scala b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/LightGBMClassifier.scala index 887903dae3..1bbc397849 100644 --- a/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/LightGBMClassifier.scala +++ b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/LightGBMClassifier.scala @@ -54,7 +54,7 @@ class LightGBMClassifier(override val uid: String) getIsUnbalance, getVerbosity, categoricalIndexes, actualNumClasses, getBoostFromAverage, getBoostingType, get(lambdaL1), get(lambdaL2), get(isProvideTrainingMetric), get(metric), get(minGainToSplit), get(maxDeltaStep), getMaxBinByFeature, get(minDataInLeaf), getSlotNames, - getDelegate, getDartParams, getExecutionParams, getObjectiveParams) + getDelegate, getDartParams, getExecutionParams(numTasksPerExec), getObjectiveParams) } def getModel(trainParams: TrainParams, lightGBMBooster: LightGBMBooster): LightGBMClassificationModel = { diff --git a/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/LightGBMRanker.scala b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/LightGBMRanker.scala index 65d1dcb47c..3a18d26bdf 100644 --- a/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/LightGBMRanker.scala +++ b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/LightGBMRanker.scala @@ -60,7 +60,7 @@ class LightGBMRanker(override val uid: String) getVerbosity, categoricalIndexes, getBoostingType, get(lambdaL1), get(lambdaL2), getMaxPosition, getLabelGain, get(isProvideTrainingMetric), get(metric), getEvalAt, get(minGainToSplit), get(maxDeltaStep), getMaxBinByFeature, get(minDataInLeaf), getSlotNames, getDelegate, getDartParams, - getExecutionParams, getObjectiveParams) + getExecutionParams(numTasksPerExec), getObjectiveParams) } def getModel(trainParams: TrainParams, lightGBMBooster: LightGBMBooster): LightGBMRankerModel = { diff --git a/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/LightGBMRegressor.scala b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/LightGBMRegressor.scala index 99f5c12f71..6b070ccfe9 100644 --- a/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/LightGBMRegressor.scala +++ b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/LightGBMRegressor.scala @@ -70,7 +70,7 @@ class LightGBMRegressor(override val uid: String) getBoostFromAverage, getBoostingType, get(lambdaL1), get(lambdaL2), get(isProvideTrainingMetric), get(metric), get(minGainToSplit), get(maxDeltaStep), getMaxBinByFeature, get(minDataInLeaf), getSlotNames, getDelegate, - getDartParams, getExecutionParams, getObjectiveParams) + getDartParams, getExecutionParams(numTasksPerExec), getObjectiveParams) } def getModel(trainParams: TrainParams, lightGBMBooster: LightGBMBooster): LightGBMRegressionModel = {