-
Notifications
You must be signed in to change notification settings - Fork 63
/
ddpg.py
112 lines (92 loc) · 2.79 KB
/
ddpg.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# -*- coding: utf-8 -*-
"""Run module for DDPG on LunarLanderContinuous-v2.
- Author: Curt Park
- Contact: curt.park@medipixel.io
"""
import argparse
import gym
import torch
import torch.optim as optim
from algorithms.common.networks.mlp import MLP
from algorithms.common.noise import OUNoise
from algorithms.ddpg.agent import DDPGAgent
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# hyper parameters
hyper_params = {
"GAMMA": 0.99,
"TAU": 5e-3,
"BUFFER_SIZE": int(1e4),
"BATCH_SIZE": 64,
"LR_ACTOR": 3e-4,
"LR_CRITIC": 3e-4,
"OU_NOISE_THETA": 0.0,
"OU_NOISE_SIGMA": 0.0,
"WEIGHT_DECAY": 1e-6,
"INITIAL_RANDOM_ACTION": int(1e4),
"MULTIPLE_LEARN": 1, # multiple learning updates
"GRADIENT_CLIP_AC": 0.5,
"GRADIENT_CLIP_CR": 1.0,
}
def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int):
"""Run training or test.
Args:
env (gym.Env): openAI Gym environment with continuous action space
args (argparse.Namespace): arguments including training settings
state_dim (int): dimension of states
action_dim (int): dimension of actions
"""
hidden_sizes_actor = [256, 256]
hidden_sizes_critic = [256, 256]
# create actor
actor = MLP(
input_size=state_dim,
output_size=action_dim,
hidden_sizes=hidden_sizes_actor,
output_activation=torch.tanh,
).to(device)
actor_target = MLP(
input_size=state_dim,
output_size=action_dim,
hidden_sizes=hidden_sizes_actor,
output_activation=torch.tanh,
).to(device)
actor_target.load_state_dict(actor.state_dict())
# create critic
critic = MLP(
input_size=state_dim + action_dim,
output_size=1,
hidden_sizes=hidden_sizes_critic,
).to(device)
critic_target = MLP(
input_size=state_dim + action_dim,
output_size=1,
hidden_sizes=hidden_sizes_critic,
).to(device)
critic_target.load_state_dict(critic.state_dict())
# create optimizer
actor_optim = optim.Adam(
actor.parameters(),
lr=hyper_params["LR_ACTOR"],
weight_decay=hyper_params["WEIGHT_DECAY"],
)
critic_optim = optim.Adam(
critic.parameters(),
lr=hyper_params["LR_CRITIC"],
weight_decay=hyper_params["WEIGHT_DECAY"],
)
# noise
noise = OUNoise(
action_dim,
theta=hyper_params["OU_NOISE_THETA"],
sigma=hyper_params["OU_NOISE_SIGMA"],
)
# make tuples to create an agent
models = (actor, actor_target, critic, critic_target)
optims = (actor_optim, critic_optim)
# create an agent
agent = DDPGAgent(env, args, hyper_params, models, optims, noise)
# run
if args.test:
agent.test()
else:
agent.train()