-
Notifications
You must be signed in to change notification settings - Fork 0
/
rollout.go
145 lines (131 loc) · 3.67 KB
/
rollout.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
package nrpa
import (
"math"
"math/rand"
"time"
"alda/entities"
"alda/utils"
)
// Data structure for a NRPA Rollout
type Rollout struct {
Tour []int
Length int
Score float64
Makespan float64
cost float64
Violations int
d *PlayoutData
}
type PlayoutData struct {
visited []bool
legalMovesPerStep [][]int
moveProb []float64
t *entities.TSPTW
randGen *rand.Rand
}
func (d *PlayoutData) Reset() {
for i := 1; i < d.t.N; i++ {
d.visited[i] = false
}
for state := 0; state < d.t.N-1; state++ {
d.legalMovesPerStep[state] = d.legalMovesPerStep[state][:0]
}
}
func NewPlayoutData(t *entities.TSPTW) *PlayoutData {
d := &PlayoutData{
legalMovesPerStep: make([][]int, t.N-1),
moveProb: make([]float64, t.N),
visited: make([]bool, t.N),
t: t,
randGen: rand.New(rand.NewSource(time.Now().UnixNano())),
}
for i := range d.legalMovesPerStep {
d.legalMovesPerStep[i] = make([]int, 0, t.N)
}
return d
}
func NewRollout(data *PlayoutData) *Rollout {
return &Rollout{
Tour: make([]int, data.t.N+1),
Length: 1,
Score: -math.MaxFloat64,
d: data,
}
}
// Do a Rollout for a TSPTW with the given policy
func (r *Rollout) Do(policy [][]float64) float64 {
u := 0 // starts at the depot
for step := 0; r.Length < r.d.t.N; step++ {
r.calculateLegalMoves(u, step)
v := r.pickMove(u, step, policy)
r.move(u, v)
u = v
}
r.move(u, 0) // go back to the depot
r.Score = -(float64(offset*r.Violations) + r.cost)
return r.Score
}
// Calculates the legal moves that can be made from u node
func (r *Rollout) calculateLegalMoves(u, step int) {
for v := 1; v < r.d.t.N; v++ {
if !r.d.visited[v] {
if r.Makespan+r.d.t.Distances[u][v] > r.d.t.WindowEnd[v] { // violation of the TW
// take that move since there's no other path u->k->v faster than u->v
r.d.legalMovesPerStep[step] = append(r.d.legalMovesPerStep[step], v) // that don't violate the TW constraint (Triangle Inequality)
}
}
}
if len(r.d.legalMovesPerStep[step]) == 0 {
for v := 1; v < r.d.t.N; v++ {
if !r.d.visited[v] {
impossibleMove := false
for k := 0; k < r.d.t.N && !impossibleMove; k++ {
if !r.d.visited[v] {
if r.Makespan <= r.d.t.WindowEnd[k] && r.Makespan+r.d.t.Distances[u][k] <= r.d.t.WindowEnd[k] && // is valid u->k
(r.Makespan+r.d.t.Distances[u][v] > r.d.t.WindowEnd[k] || r.d.t.WindowStart[v] > r.d.t.WindowEnd[k]) { //take u->v make impossible to go to k
impossibleMove = true
}
}
}
if !impossibleMove {
r.d.legalMovesPerStep[step] = append(r.d.legalMovesPerStep[step], v)
}
}
}
}
if len(r.d.legalMovesPerStep[step]) == 0 { // there's no moves that do not violate a TW constrain
for v := 1; v < r.d.t.N; v++ {
if !r.d.visited[v] {
r.d.legalMovesPerStep[step] = append(r.d.legalMovesPerStep[step], v)
}
}
}
}
// Picks a move from u to a random node according to the policy distribution
func (r *Rollout) pickMove(u, step int, policy [][]float64) int {
z := float64(0)
for i := range r.d.legalMovesPerStep[step] {
v := r.d.legalMovesPerStep[step][i]
r.d.moveProb[i] = utils.Exp(policy[u][v])
z += r.d.moveProb[i]
}
idx := 0
random := r.d.randGen.Float64() * z
probAcc := r.d.moveProb[idx]
for probAcc < random {
idx++
probAcc += r.d.moveProb[idx]
}
return r.d.legalMovesPerStep[step][idx]
}
//Make a move from u to v
func (r *Rollout) move(u, v int) {
r.Tour[r.Length] = v
r.Length++
r.d.visited[v] = true
r.cost += r.d.t.Distances[u][v]
r.Makespan = utils.Max(r.Makespan+r.d.t.Distances[u][v], r.d.t.WindowStart[v])
if r.Makespan > r.d.t.WindowEnd[v] {
r.Violations++
}
}