-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
85 lines (78 loc) · 2.6 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import numpy as np
import torch as T
import torch.nn as nn
from pyagentlab import *
from gomoku_state import GomokuState
# runs many episodes of Tic-Tac-Toe with one neural network playing itself.
# first, <Const> is set to the desired specifications.
# then, the profiles are set up and players are created.
# next, the environment is created,
# and finally, the episodes are run.
def main():
Const.ENV_NAME = "Gomoku"
width, height = 3, 3
Const.N_PLAYERS = 2
Const.CONV_INPUT_DIMS = (Const.N_PLAYERS + 1, width, height)
Const.ADD_FC_INPUT_DIM = 0
Const.DISCRETE_ACTION_DIMS = (width, height)
Const.CONTINUOUS_ACTION_DIM = 0
Const.WIN_ENDS_ENTIRE_ENV = True
Const.WINS_BY_ENDING_LOSS = True
Const.LOSES_BY_ENDING_WIN = True
Const.ALL_LOSSES_IS_DRAW = True
Const.finalize()
GomokuState.WIN_LENGTH = 3
env = Environment(StateClass=GomokuState)
NEURAL_PROFILE = NeuralProfile(
# outcome settings.
WIN_REWARD=1.0,
DRAW_REWARD=1.0,
LOSS_REWARD=0.0,
ILLEGAL_REWARD=-0.1,
# epsilon settings.
EPS_START=0.7,
EPS_DEC=5e-6,
EPS_END=0.05,
# learn settings.
ENFORCE_LEGALITY=False,
ENFORCE_LEGALITY_ON_RANDOM=True,
USE_TOTAL_RETURNS=False,
GAMMA=0.8,
MINIBATCH_SIZE=6,
REPLACE_TARGET_INTERVAL=0,
SET_Q_OF_ILLEGALS_IN_NEXTS_TO_ZERO=True,
# learn rate settings.
LR=0.001,
LR_SCHEDULER_FACTOR=0.5,
LR_SCHEDULER_PATIENCE=50000,
# optimizer settings.
ALPHA=0.0,
GRADIENT_MOMENTUM=0.0,
WEIGHT_DECAY=0.0,
MIN_SQUARED_GRADIENT=1e-8,
GRADIENTS_MAX_NORM=-1.0,
# convolutional layer settings.
CONV_LAYER_SPECS=[],
# fully-connected layer settings.
FC_LAYER_SPECS=[FClayerSpec(128, BIAS=True)],
USE_DUELING_ARCHITECTURE=False,
OUTPUT_USE_BATCH_NORM=False,
OUTPUT_ACTIVATION_FUNC=None,
# loss settings.
LOSS=nn.MSELoss(),
L1_REG_STRENGTH=-1.0,
L2_REG_STRENGTH=-1.0,
# memory settings.
STARTING_MEM_COUNT=1000,
MAX_MEM_SIZE=10000,
CONTINUOUS_MEMORY=False,
# save directory settings.
ALGORITHM_NAME="neural",
)
neural_player = NeuralPlayer(NEURAL_PROFILE)
players = [neural_player, neural_player]
print("Beginning to run episodes.")
play_episodes(100000, env, players, is_training=True)
neural_player.save_checkpoints()
if __name__ == "__main__":
main()