Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 304 lines (260 sloc) 7.728 kb
302209cc » Timo Ewalds
2010-02-22 Commit the beginnings of a UCT MCTS player
1
2 #ifndef __PLAYER_H_
3 #define __PLAYER_H_
4
141e49fc » Timo Ewalds
2010-03-03 The player should first consult the solver for some tactics. Spend 20…
5 #include <stdint.h>
6 #include <cmath>
7
18c886f4 » Timo Ewalds
2010-02-26 add a simpler move struct, add a basic, though likely incomplete, RAV…
8 #include "move.h"
302209cc » Timo Ewalds
2010-02-22 Commit the beginnings of a UCT MCTS player
9 #include "board.h"
10 #include "time.h"
11 #include "timer.h"
0aa0c763 » Timo Ewalds
2010-02-28 Factor the stats out into a separate file, display some better stats
12 #include "depthstats.h"
141e49fc » Timo Ewalds
2010-03-03 The player should first consult the solver for some tactics. Spend 20…
13 #include "solver.h"
302209cc » Timo Ewalds
2010-02-22 Commit the beginnings of a UCT MCTS player
14
15 class Player {
ed6410cf » Timo Ewalds
2010-03-28 Deallocate the root properly, fixes a reverse memory leak
16 public:
302209cc » Timo Ewalds
2010-02-22 Commit the beginnings of a UCT MCTS player
17 struct Node {
d4de1e0b » Timo Ewalds
2010-03-04 Various tweaks to make to learn more from the proof number search ini…
18 float rave, score;
576fa842 » Timo Ewalds
2010-03-07 Fix a few bugs: use factor instead of scale. Stop assigning rave loss…
19 uint32_t ravevisits, visits;
6275a82c » Timo Ewalds
2010-02-27 Switch from explicit x,y to using Move in Player. Add some scaffoldin…
20 Move move;
302209cc » Timo Ewalds
2010-02-22 Commit the beginnings of a UCT MCTS player
21 uint16_t numchildren;
22 Node * children;
23
1ddb3de8 » Timo Ewalds
2010-03-08 Fix a warning...
24 Node(const Move & m, float s = 0, int v = 0) : rave(0), score(s), ravevisits(0), visits(v), move(m), numchildren(0), children(NULL) { }
25 Node(int x = 0, int y = 0, float s = 0, int v = 0) : rave(0), score(s), ravevisits(0), visits(v), move(Move(x,y)), numchildren(0), children(NULL) { }
141e49fc » Timo Ewalds
2010-03-03 The player should first consult the solver for some tactics. Spend 20…
26
ced4d1cf » Timo Ewalds
2010-03-28 Keep the tree between moves. For now that means turning off the solve…
27 void neuter(){
28 numchildren = 0;
29 children = NULL;
30 }
31
32 void print() const {
33 printf("Node: exp %.2f/%i, rave %.2f/%i, move %i,%i, %i children\n", score/visits, visits, rave/ravevisits, ravevisits, move.x, move.y, numchildren);
34 }
35
8b97aa58 » Timo Ewalds
2010-03-07 Include the proof tree in the player tree size
36 int construct(const Solver::PNSNode * n, int pnsscore){
12ee6f88 » Timo Ewalds
2010-03-09 Switch the solver to use Move instead of individual x and y for a few…
37 move = n->move;
141e49fc » Timo Ewalds
2010-03-03 The player should first consult the solver for some tactics. Spend 20…
38
39 rave = 0;
576fa842 » Timo Ewalds
2010-03-07 Fix a few bugs: use factor instead of scale. Stop assigning rave loss…
40 ravevisits = 0;
141e49fc » Timo Ewalds
2010-03-03 The player should first consult the solver for some tactics. Spend 20…
41
42 if(n->delta == 0){ //a win!
43 score = 2000;
44 visits = 1000;
45 }else if(n->phi == 0){ //a loss or tie
46 //set high but not insurmountable visits just in case it is a tie
47 score = 0;
48 visits = 100;
0d545c20 » Timo Ewalds
2010-03-09 Fix proof score initialization to be in the right range, and fix the …
49 }else if(pnsscore > 0){
50 if(n->phi >= n->delta)
51 score = pnsscore*(1 - n->delta/(2*n->phi));
52 else
53 score = pnsscore*(n->phi/(2*n->delta));
54
ff9a09be » Timo Ewalds
2010-03-05 Parameterize a few features
55 visits = pnsscore;
0d545c20 » Timo Ewalds
2010-03-09 Fix proof score initialization to be in the right range, and fix the …
56 }else{
57 score = 0;
58 visits = 0;
141e49fc » Timo Ewalds
2010-03-03 The player should first consult the solver for some tactics. Spend 20…
59 }
60
61 numchildren = n->numchildren;
62 children = NULL;
63
8b97aa58 » Timo Ewalds
2010-03-07 Include the proof tree in the player tree size
64 int num = 1;
141e49fc » Timo Ewalds
2010-03-03 The player should first consult the solver for some tactics. Spend 20…
65 if(numchildren){
66 children = new Node[numchildren];
67 for(int i = 0; i < numchildren; i++)
8b97aa58 » Timo Ewalds
2010-03-07 Include the proof tree in the player tree size
68 num += children[i].construct(& n->children[i], pnsscore);
141e49fc » Timo Ewalds
2010-03-03 The player should first consult the solver for some tactics. Spend 20…
69 }
8b97aa58 » Timo Ewalds
2010-03-07 Include the proof tree in the player tree size
70 return num;
141e49fc » Timo Ewalds
2010-03-03 The player should first consult the solver for some tactics. Spend 20…
71 }
72
302209cc » Timo Ewalds
2010-02-22 Commit the beginnings of a UCT MCTS player
73 ~Node(){
141e49fc » Timo Ewalds
2010-03-03 The player should first consult the solver for some tactics. Spend 20…
74 if(children)
302209cc » Timo Ewalds
2010-02-22 Commit the beginnings of a UCT MCTS player
75 delete[] children;
ced4d1cf » Timo Ewalds
2010-03-28 Keep the tree between moves. For now that means turning off the solve…
76 neuter();
302209cc » Timo Ewalds
2010-02-22 Commit the beginnings of a UCT MCTS player
77 }
78
79 int alloc(int num){
80 numchildren = num;
81 children = new Node[num];
82 return num;
83 }
84 int dealloc(){
85 int s = numchildren;
86 if(numchildren){
87 for(int i = 0; i < numchildren; i++)
88 s += children[i].dealloc();
89 delete[] children;
ced4d1cf » Timo Ewalds
2010-03-28 Keep the tree between moves. For now that means turning off the solve…
90 neuter();
302209cc » Timo Ewalds
2010-02-22 Commit the beginnings of a UCT MCTS player
91 }
92 return s;
93 }
94
ced4d1cf » Timo Ewalds
2010-03-28 Keep the tree between moves. For now that means turning off the solve…
95 //need to return a pointer to a new object due to extra copies and destructor calls made during function return... need a move constructor...
96 Node * make_move(Move m){
97 for(int i = 0; i < numchildren; i++){
98 if(children[i].move == m){
99 Node * ret = new Node(children[i]); //move the child
100 children[i].neuter();
101 return ret;
102 }
103 }
104 return new Node();
105 }
106
302209cc » Timo Ewalds
2010-02-22 Commit the beginnings of a UCT MCTS player
107 float winrate(){
d4de1e0b » Timo Ewalds
2010-03-04 Various tweaks to make to learn more from the proof number search ini…
108 return score/visits;
302209cc » Timo Ewalds
2010-02-22 Commit the beginnings of a UCT MCTS player
109 }
576fa842 » Timo Ewalds
2010-03-07 Fix a few bugs: use factor instead of scale. Stop assigning rave loss…
110 //*
214afe6a » Timo Ewalds
2010-03-05 Update to a more standard way of doing rave scores.
111 //new way, more standard way of changing over from rave scores to real scores
22038b14 » Timo Ewalds
2010-03-27 Major refactoring of UCT/RAVE. Pulled rave updates out of walk_tree. …
112 float value(float ravefactor, float fpurgency){
6beda471 » Timo Ewalds
2010-03-27 Make the minimum rave_factor a constant
113 if(ravefactor <= min_rave)
22038b14 » Timo Ewalds
2010-03-27 Major refactoring of UCT/RAVE. Pulled rave updates out of walk_tree. …
114 return (visits == 0 ? fpurgency : score/visits);
115
355cea0b » Timo Ewalds
2010-03-27 Minor optimizations
116 if(ravevisits == 0 && visits == 0)
8d2523bd » Timo Ewalds
2010-03-11 Allow the first play urgency to be configured
117 return fpurgency;
214afe6a » Timo Ewalds
2010-03-05 Update to a more standard way of doing rave scores.
118
22038b14 » Timo Ewalds
2010-03-27 Major refactoring of UCT/RAVE. Pulled rave updates out of walk_tree. …
119 float alpha = ravefactor/(ravefactor + visits);
120 // float alpha = sqrt(ravefactor/(ravefactor + 3*visits));
e2963828 » Timo Ewalds
2010-03-27 Add another possible rave balancing formula
121 // float alpha = (float)ravevisits/((float)visits + (float)ravevisits + 4.0*visits*ravevisits*ravefactor);
214afe6a » Timo Ewalds
2010-03-05 Update to a more standard way of doing rave scores.
122
7cc84b29 » Timo Ewalds
2010-03-06 Avoid a potential divide by 0, set the explore rate based on tests of…
123 float val = 0;
576fa842 » Timo Ewalds
2010-03-07 Fix a few bugs: use factor instead of scale. Stop assigning rave loss…
124 if(ravevisits) val += alpha*rave/ravevisits;
125 if(visits) val += (1-alpha)*score/visits;
7cc84b29 » Timo Ewalds
2010-03-06 Avoid a potential divide by 0, set the explore rate based on tests of…
126
127 return val;
214afe6a » Timo Ewalds
2010-03-05 Update to a more standard way of doing rave scores.
128 }
576fa842 » Timo Ewalds
2010-03-07 Fix a few bugs: use factor instead of scale. Stop assigning rave loss…
129 /*/
214afe6a » Timo Ewalds
2010-03-05 Update to a more standard way of doing rave scores.
130 //my understanding of how fuego does it
22038b14 » Timo Ewalds
2010-03-27 Major refactoring of UCT/RAVE. Pulled rave updates out of walk_tree. …
131 float value(float ravefactor, float fpurgency){
214afe6a » Timo Ewalds
2010-03-05 Update to a more standard way of doing rave scores.
132 float val = 0;
133 float weight = 0;
134 if(visits) {
135 val += score;
136 weight += visits;
137 }
576fa842 » Timo Ewalds
2010-03-07 Fix a few bugs: use factor instead of scale. Stop assigning rave loss…
138 if(ravevisits){
cd8a3a94 » Timo Ewalds
2010-03-25 Update the fuego based value function, fix a misunderstanding as well…
139 float bias = 1.0/(1.1 + ravevisits/20000.0);
140 val += rave*bias;
141 weight += ravevisits*bias;
214afe6a » Timo Ewalds
2010-03-05 Update to a more standard way of doing rave scores.
142 }
143 if(weight > 0)
144 return val / weight;
145 else
cd8a3a94 » Timo Ewalds
2010-03-25 Update the fuego based value function, fix a misunderstanding as well…
146 return fpurgency;
147 }
148
149 //based directly on fuego
22038b14 » Timo Ewalds
2010-03-27 Major refactoring of UCT/RAVE. Pulled rave updates out of walk_tree. …
150 float value(float ravefactor, float fpurgency){
cd8a3a94 » Timo Ewalds
2010-03-25 Update the fuego based value function, fix a misunderstanding as well…
151 float val = 0.f;
152 float weightSum = 0.f;
153 bool hasValue = false;
154 if(visits){
155 val += score;
156 weightSum += visits;
157 hasValue = true;
158 }
159 if(ravevisits){
160 float weight = ravevisits / ( 1.1 + ravevisits/20000.);
161 val += weight * rave / ravevisits;
162 weightSum += weight;
163 hasValue = true;
164 }
165 if(hasValue)
166 return val / weightSum;
167 else
168 return fpurgency;
214afe6a » Timo Ewalds
2010-03-05 Update to a more standard way of doing rave scores.
169 }
cd8a3a94 » Timo Ewalds
2010-03-25 Update the fuego based value function, fix a misunderstanding as well…
170
214afe6a » Timo Ewalds
2010-03-05 Update to a more standard way of doing rave scores.
171 //*/
302209cc » Timo Ewalds
2010-02-22 Commit the beginnings of a UCT MCTS player
172 };
173
7149ea6e » Timo Ewalds
2010-03-05 Add an option to make the rave scores scale from 2 down to 0
174 struct RaveMoveList {
609a9713 » Timo Ewalds
2010-03-09 Fix rave values so it updates wins to the winner and losses to the lo…
175 struct RaveMove {
176 Move move;
177 char player;
178 float score;
179
180 RaveMove(const Move & m) : move(m), player(0), score(0) { }
181
182 bool operator< (const RaveMove & b) const { return (move < b.move); }
183 bool operator<=(const RaveMove & b) const { return (move <= b.move); }
184 bool operator> (const RaveMove & b) const { return (move > b.move); }
185 bool operator>=(const RaveMove & b) const { return (move >= b.move); }
186 bool operator==(const RaveMove & b) const { return (move == b.move); }
187 bool operator!=(const RaveMove & b) const { return (move != b.move); }
188 };
189
190 vector<RaveMove> list;
7149ea6e » Timo Ewalds
2010-03-05 Add an option to make the rave scores scale from 2 down to 0
191
192 RaveMoveList(int s = 0){
193 list.reserve(s);
194 }
195
196 void add(const Move & move){
197 list.push_back(move);
198 }
199 void clear(){
200 list.clear();
201 }
22038b14 » Timo Ewalds
2010-03-27 Major refactoring of UCT/RAVE. Pulled rave updates out of walk_tree. …
202 unsigned int size() const {
7149ea6e » Timo Ewalds
2010-03-05 Add an option to make the rave scores scale from 2 down to 0
203 return list.size();
204 }
609a9713 » Timo Ewalds
2010-03-09 Fix rave values so it updates wins to the winner and losses to the lo…
205 const RaveMove & operator[](int i) const {
7149ea6e » Timo Ewalds
2010-03-05 Add an option to make the rave scores scale from 2 down to 0
206 return list[i];
207 }
208 //remove the moves that were played by the loser
209 //sort in y,x order
609a9713 » Timo Ewalds
2010-03-09 Fix rave values so it updates wins to the winner and losses to the lo…
210 void clean(int player, bool scale){
7149ea6e » Timo Ewalds
2010-03-05 Add an option to make the rave scores scale from 2 down to 0
211 float base, factor;
212
213 if(scale){
576fa842 » Timo Ewalds
2010-03-07 Fix a few bugs: use factor instead of scale. Stop assigning rave loss…
214 base = 2; //2 instead of 1 so the average of wins stays at 1
609a9713 » Timo Ewalds
2010-03-09 Fix rave values so it updates wins to the winner and losses to the lo…
215 factor = 2*2.0/(list.size()+1); //+1 to keep it from going negative, 4 = base*2 since half the values are skipped
7149ea6e » Timo Ewalds
2010-03-05 Add an option to make the rave scores scale from 2 down to 0
216 }else{
217 base = 1;
218 factor = 0;
219 }
220
576fa842 » Timo Ewalds
2010-03-07 Fix a few bugs: use factor instead of scale. Stop assigning rave loss…
221 //the wins get values, the losses stay at default=0
609a9713 » Timo Ewalds
2010-03-09 Fix rave values so it updates wins to the winner and losses to the lo…
222 for(unsigned int i = 0; i < list.size(); i++){
223 list[i].player = player;
576fa842 » Timo Ewalds
2010-03-07 Fix a few bugs: use factor instead of scale. Stop assigning rave loss…
224 list[i].score = base - i/2*factor;
609a9713 » Timo Ewalds
2010-03-09 Fix rave values so it updates wins to the winner and losses to the lo…
225 player = 3 - player;
226 }
7149ea6e » Timo Ewalds
2010-03-05 Add an option to make the rave scores scale from 2 down to 0
227
576fa842 » Timo Ewalds
2010-03-07 Fix a few bugs: use factor instead of scale. Stop assigning rave loss…
228 sort(list.begin(), list.end()); //sort in y,x order
7149ea6e » Timo Ewalds
2010-03-05 Add an option to make the rave scores scale from 2 down to 0
229 }
230 };
231
cb365ad3 » Timo Ewalds
2010-02-28 Allow the explore and ravefactor values to be set over gtp
232 public:
6beda471 » Timo Ewalds
2010-03-27 Make the minimum rave_factor a constant
233
234 static const float min_rave = 0.1;
235
18c886f4 » Timo Ewalds
2010-02-26 add a simpler move struct, add a basic, though likely incomplete, RAV…
236 float explore; //greater than one favours exploration, smaller than one favours exploitation
22038b14 » Timo Ewalds
2010-03-27 Major refactoring of UCT/RAVE. Pulled rave updates out of walk_tree. …
237 float ravefactor; //big numbers favour rave scores, small ignore it
7149ea6e » Timo Ewalds
2010-03-05 Add an option to make the rave scores scale from 2 down to 0
238 bool ravescale; //scale rave numbers from 2 down to 0 in decreasing order of move recency instead of always 1
2fd58ebd » Timo Ewalds
2010-03-09 Add the good opponent moves are good moves for me heuristic
239 bool opmoves; //take the opponents rave updates too, a good move for my opponent is a good move for me.
8d2523bd » Timo Ewalds
2010-03-11 Allow the first play urgency to be configured
240 int skiprave; //how often to skip rave, skip once in this many checks
dca42bd0 » Timo Ewalds
2010-03-29 Make keeping tree optional
241 bool keeptree; //reuse the tree from the previous move
8d2523bd » Timo Ewalds
2010-03-11 Allow the first play urgency to be configured
242 float fpurgency; //what value to return for a move that hasn't been played yet
ff9a09be » Timo Ewalds
2010-03-05 Parameterize a few features
243 float prooftime; //fraction of time spent in proof number search, looking for a provable win and losses to avoid
8d2523bd » Timo Ewalds
2010-03-11 Allow the first play urgency to be configured
244 int proofscore; //how many virtual rollouts to assign based on the proof number search values
ff9a09be » Timo Ewalds
2010-03-05 Parameterize a few features
245 bool rolloutpattern; //play the response to a virtual connection threat in rollouts
8a654dd8 » Timo Ewalds
2010-02-28 Allow the player to be reused multiple times
246
ced4d1cf » Timo Ewalds
2010-03-28 Keep the tree between moves. For now that means turning off the solve…
247 Node root;
248 Board rootboard;
249
302209cc » Timo Ewalds
2010-02-22 Commit the beginnings of a UCT MCTS player
250 int runs;
0aa0c763 » Timo Ewalds
2010-02-28 Factor the stats out into a separate file, display some better stats
251 DepthStats treelen, gamelen;
8a654dd8 » Timo Ewalds
2010-02-28 Allow the player to be reused multiple times
252 uint64_t nodes, maxnodes;
302209cc » Timo Ewalds
2010-02-22 Commit the beginnings of a UCT MCTS player
253 bool timeout;
254
bdc8b7da » Timo Ewalds
2010-02-25 Add some version of time control...
255 double time_used;
256
302209cc » Timo Ewalds
2010-02-22 Commit the beginnings of a UCT MCTS player
257 Player() {
ced4d1cf » Timo Ewalds
2010-03-28 Keep the tree between moves. For now that means turning off the solve…
258 nodes = 0;
8a654dd8 » Timo Ewalds
2010-02-28 Allow the player to be reused multiple times
259 time_used = 0;
141e49fc » Timo Ewalds
2010-03-03 The player should first consult the solver for some tactics. Spend 20…
260
e2759eff » Timo Ewalds
2010-03-11 Minor changes to the constants and descriptions
261 explore = 0.85;
05514b02 » Timo Ewalds
2010-03-09 Add a different possible formula, tweak the explore and rave paramete…
262 ravefactor = 50;
22038b14 » Timo Ewalds
2010-03-27 Major refactoring of UCT/RAVE. Pulled rave updates out of walk_tree. …
263 ravescale = false;
2fd58ebd » Timo Ewalds
2010-03-09 Add the good opponent moves are good moves for me heuristic
264 opmoves = false;
22038b14 » Timo Ewalds
2010-03-27 Major refactoring of UCT/RAVE. Pulled rave updates out of walk_tree. …
265 skiprave = 0;
dca42bd0 » Timo Ewalds
2010-03-29 Make keeping tree optional
266 keeptree = true;
e2759eff » Timo Ewalds
2010-03-11 Minor changes to the constants and descriptions
267 fpurgency = 1;
22038b14 » Timo Ewalds
2010-03-27 Major refactoring of UCT/RAVE. Pulled rave updates out of walk_tree. …
268 prooftime = 0;
269 proofscore = 0;
270 rolloutpattern = false;
302209cc » Timo Ewalds
2010-02-22 Commit the beginnings of a UCT MCTS player
271 }
272 void timedout(){ timeout = true; }
273
ced4d1cf » Timo Ewalds
2010-03-28 Keep the tree between moves. For now that means turning off the solve…
274 void set_board(const Board & board){
275 rootboard = board;
ed6410cf » Timo Ewalds
2010-03-28 Deallocate the root properly, fixes a reverse memory leak
276 nodes -= root.dealloc();
ced4d1cf » Timo Ewalds
2010-03-28 Keep the tree between moves. For now that means turning off the solve…
277 root = Node();
278 }
279 void move(const Move & m){
280 rootboard.move(m);
dca42bd0 » Timo Ewalds
2010-03-29 Make keeping tree optional
281 if(keeptree){
282 Node * child = root.make_move(m);
283 nodes -= root.dealloc();
284 root = *child;
285 child->neuter();
286 delete child;
287 }else{
288 nodes -= root.dealloc();
289 root = Node();
290 }
ced4d1cf » Timo Ewalds
2010-03-28 Keep the tree between moves. For now that means turning off the solve…
291 }
292
293 Move mcts(double time, int maxruns, int memlimit);
294 vector<Move> get_pv();
302209cc » Timo Ewalds
2010-02-22 Commit the beginnings of a UCT MCTS player
295
296 protected:
7149ea6e » Timo Ewalds
2010-03-05 Add an option to make the rave scores scale from 2 down to 0
297 int walk_tree(Board & board, Node * node, RaveMoveList & movelist, int depth);
9a10c57b » Timo Ewalds
2010-03-27 Factor move choice out into a separate function, mainly for profiling
298 Node * choose_move(const Node * node) const;
22038b14 » Timo Ewalds
2010-03-27 Major refactoring of UCT/RAVE. Pulled rave updates out of walk_tree. …
299 void update_rave(const Node * node, const RaveMoveList & movelist, int won, int toplay);
7149ea6e » Timo Ewalds
2010-03-05 Add an option to make the rave scores scale from 2 down to 0
300 int rand_game(Board & board, RaveMoveList & movelist, Move move, int depth);
ec26f286 » Timo Ewalds
2010-03-01 Switch the default explore/rave params to be consistent with the valu…
301 bool check_pattern(const Board & board, Move & move);
302209cc » Timo Ewalds
2010-02-22 Commit the beginnings of a UCT MCTS player
302 };
303
304 #endif
305
Something went wrong with that request. Please try again.