-
Notifications
You must be signed in to change notification settings - Fork 0
/
Bandit.go
44 lines (37 loc) · 1.04 KB
/
Bandit.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
package main
import (
"math"
"math/rand"
)
type Bandit struct {
id, wins, games int
probability, reward float32
}
func (bandit *Bandit) pull() bool {
bandit.games += 1
var win bool = rand.Float32() < bandit.probability
if win {
bandit.wins += 1
}
return win
}
func (bandit *Bandit) getPullsNumber(alpha float64) int {
// Вычисление количества дерганий за руку
// Необходимое для получения хотя бы одной награды
// С вероятностью [bandit.probability] в [(1-alpha)]% случаев
var q float64 = float64(1 - bandit.probability)
var pullsNumber int = int(math.Log(alpha) / math.Log(q))
if pullsNumber == 0 {
pullsNumber++
}
return pullsNumber
}
func (bandit *Bandit) getPosterior() float32 {
return float32(bandit.wins) / float32(bandit.games)
}
func (bandit *Bandit) getGamesReward() float32 {
return bandit.reward * bandit.getPosterior()
}
func (bandit *Bandit) getDiscountedReward() float32 {
return bandit.reward * bandit.probability
}