Skip to content

Commit

Permalink
Use per-BestSplitAllocs rand source to prevent contention
Browse files Browse the repository at this point in the history
* When learning on many threads, there is a _lot_ of contention
  on the mutex in global rand source for no real reason.
* In my use case, results in speed up from 4m30s to 33s
  on 64 core system (7.7x faster)
  • Loading branch information
Vojtech Bocek committed Feb 29, 2016
1 parent bb6f91b commit 71df1c0
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 8 deletions.
6 changes: 2 additions & 4 deletions densecatfeature.go
Original file line number Diff line number Diff line change
Expand Up @@ -657,7 +657,7 @@ func (f *DenseCatFeature) BestCatSplit(target Target,
bits = i
if !useExhaustive {
//generate random partition
bits = rand.Int()
bits = allocs.Rnd.Int()
}

// //check the value of the j'th bit of i and
Expand Down Expand Up @@ -859,7 +859,6 @@ func (f *DenseCatFeature) BestCatSplitBig(target Target, cases *[]int, parentImp

bits := big.NewInt(1)

var randgn *rand.Rand
var maxPart *big.Int
useExhaustive := nCats <= maxEx
nPartitions := big.NewInt(2)
Expand All @@ -871,7 +870,6 @@ func (f *DenseCatFeature) BestCatSplitBig(target Target, cases *[]int, parentImp
nPartitions.Lsh(nPartitions, uint(maxEx-2))
maxPart = big.NewInt(2)
maxPart.Lsh(maxPart, uint(nCats-2))
randgn = rand.New(rand.NewSource(0))
}

//iteratively build a combination of categories until they
Expand All @@ -881,7 +879,7 @@ func (f *DenseCatFeature) BestCatSplitBig(target Target, cases *[]int, parentImp
bits.Set(i)
if !useExhaustive {
//generate random partition
bits.Rand(randgn, maxPart)
bits.Rand(allocs.Rnd, maxPart)
}

//check the value of the j'th bit of i and
Expand Down
2 changes: 1 addition & 1 deletion densenumfeature.go
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,7 @@ func (f *DenseNumFeature) BestNumSplit(target Target,
lasti := leafSize - 1

if randomSplit {
leafSize = leafSize + rand.Intn(stop-leafSize)
leafSize = leafSize + allocs.Rnd.Intn(stop-leafSize)
lasti = leafSize - 1
stop = leafSize + 1

Expand Down
2 changes: 1 addition & 1 deletion featurematrix.go
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ func (fm *FeatureMatrix) BestSplitter(target Target,
if lcans > nDrawnConstants+lastSample {

randi = lastSample
randi += rand.Intn(lcans - nDrawnConstants - lastSample)
randi += allocs.Rnd.Intn(lcans - nDrawnConstants - lastSample)
//randi = lastSample + rand.Intn(nnonconstant-lastSample)
if randi >= lcans-nConstants {
nDrawnConstants++
Expand Down
9 changes: 7 additions & 2 deletions splitallocations.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package CloudForest

import ()
import (
"math/rand"
)

//BestSplitAllocs contains reusable allocations for split searching and evaluation.
//Seprate instances should be used in each go routing doing learning.
Expand All @@ -27,6 +29,7 @@ type BestSplitAllocs struct {
SortVals []float64
Sorter *SortableFeature //for learning from numerical features
ContrastTarget Target
Rnd *rand.Rand //prevent contention on global rand source
}

//NewBestSplitAllocs initializes all of the reusable allocations for split
Expand Down Expand Up @@ -63,6 +66,8 @@ func NewBestSplitAllocs(nTotalCases int, target Target) (bsa *BestSplitAllocs) {
make([]float64, nTotalCases, nTotalCases),
&SortableFeature{make([]float64, nTotalCases, nTotalCases),
nil},
target.(Feature).Copy().(Target)}
target.(Feature).Copy().(Target),
rand.New(rand.NewSource(rand.Int63())),
}
return
}

0 comments on commit 71df1c0

Please sign in to comment.