# Hindsight Experience Replay

Paper this research is based on: https://arxiv.org/abs/1707.01495

In [1]:
import torch
import gym
import gym.spaces
import rocket_lander_gym
import warnings
from agent.config import Config
from agent.td3_agent import TD3Agent
from agent.utils import seed_all, plot_scores

warnings.filterwarnings('ignore')

In [2]:
# RocketLander-v0 | LunarLanderContinuous-v2
env = gym.make('LunarLanderContinuous-v2')

In [3]:
config = Config()

config.seed = 0
config.env = env
config.env_solved = 200
config.times_solved = 100
config.buffer_size = int(1e6)
config.batch_size = 32
config.num_episodes = 2000
config.num_updates = 1 
config.max_steps = 2000
config.max_steps_reward = None
config.state_size = env.observation_space.shape[0]
config.action_size = env.action_space.shape[0]
config.gamma = 0.99
config.tau = 1e-34
config.lr_actor = 1e-4
config.lr_critic = 1e-3
config.hidden_actor = (64, 64)
config.hidden_critic = (64, 64)
config.activ_actor = torch.nn.ReLU()
config.activ_critic = torch.nn.ReLU()
config.optim_actor = torch.optim.Adam
config.optim_critic = torch.optim.Adam
config.grad_clip_actor = None
config.grad_clip_critic = None
config.use_huber_loss = False
config.update_every = 1
config.use_ou_noise = False
config.ou_mu = 0.0
config.ou_theta = 0.15
config.ou_sigma = 0.2
config.expl_noise = 0.1
config.noise_weight = 1.0
config.decay_noise = False
config.use_linear_decay = False
config.noise_linear_decay = 1e-6
config.noise_decay = 0.99
config.log_every = 100
config.policy_noise = 0.1
config.noise_clip = 0.1
config.policy_freq_update = 2
config.log_std_min=-20
config.log_std_max=2

In [4]:
seed_all(config.seed, env)

In [5]:
agent = TD3Agent(config)

In [6]:
agent.summary()

TD3 Agent:

Actor Network:
--------------
Actor(
  (activ): ReLU()
  (layers): ModuleList(
    (0): Linear(in_features=8, out_features=64, bias=True)
    (1): Linear(in_features=64, out_features=64, bias=True)
    (2): Linear(in_features=64, out_features=2, bias=True)
  )
)

Critic Network:
---------------
Critic(
  (activ): ReLU()
  (layers): ModuleList(
    (0): Linear(in_features=10, out_features=64, bias=True)
    (1): Linear(in_features=64, out_features=64, bias=True)
    (2): Linear(in_features=64, out_features=1, bias=True)
  )
)

Twin Network:
-------------
Critic(
  (activ): ReLU()
  (layers): ModuleList(
    (0): Linear(in_features=10, out_features=64, bias=True)
    (1): Linear(in_features=64, out_features=64, bias=True)
    (2): Linear(in_features=64, out_features=1, bias=True)
  )
)


In [None]:
scores = agent.train()

Episode 16	Avg Score: -248.57	Avg Actor Loss: 2.91	Avg Critic Loss: 172.61

In [None]:
plot_scores(scores, polyfit_deg=6)