In [1]:
from collections import deque

import gym
import imageio
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
from torch.distributions import Categorical 
from running_mean_std import RunningMeanStd 

In [2]:
class ActorCriticNet(nn.Module):
    def __init__(self, obs_space, action_space):
        super().__init__()
        h = 64
        self.head = nn.Sequential(
            nn.Linear(obs_space, h),
            nn.Tanh()
        )
        self.pol = nn.Sequential(
            nn.Linear(h, h),
            nn.Tanh(),
            nn.Linear(h, action_space)
        )
        self.val = nn.Sequential(
            nn.Linear(h, h),
            nn.Tanh(),
            nn.Linear(h, 1)
        )
        self.log_softmax = nn.LogSoftmax(dim=-1)

    def forward(self, x):
        out = self.head(x)
        logit = self.pol(out).reshape(out.shape[0], -1)
        log_p = self.log_softmax(logit)
        v = self.val(out).reshape(out.shape[0], 1)

        return log_p, v

In [3]:
def get_action_and_value(obs, old_net):
    old_net.eval()
    with torch.no_grad():
        state = torch.tensor([obs]).to(device).float()
        log_p, v = old_net(state)
        m = Categorical(log_p.exp())
        action = m.sample()

    return action.item(), v.item()

## Main

In [4]:
# set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# make an environment
# env = gym.make('CartPole-v0')
env = gym.make('CartPole-v1')
# env = gym.make('MountainCar-v0')
# env = gym.make('LunarLander-v2')

SEED = 0
env.seed(SEED)
obs_space = env.observation_space.shape[0]
action_space = env.action_space.n

OBS_NORM = False
n_episodes = 10000
n_eval = env.spec.trials

# global values
norm_obs = RunningMeanStd(shape=env.observation_space.shape)
total_steps = 0
obses = []
rewards = []
reward_eval = deque(maxlen=n_eval)

# load a model
old_net = ActorCriticNet(obs_space, action_space).to(device)
old_net.load_state_dict(torch.load(
    './saved_models/CartPole-v0_up90_clear_model_ppo_st.pt'))

  result = entry_point.load(False)


In [None]:
env.spec.max_episode_steps

500

In [None]:
env.spec.trials

100

In [None]:
env.spec.reward_threshold

475.0

In [None]:
# play
# frames = []
for i in range(1, n_episodes + 1):
    obs = env.reset()
    done = False
    ep_reward = 0
    while not done:
#         frames.append(env.render(mode = 'rgb_array'))
        env.render()
        if OBS_NORM:
            norm_obs.update(obs)
            obs_norm = np.clip(
                (obs - norm_obs.mean) / np.sqrt(norm_obs.var),
                -5, 5)
            action, _ = get_action_and_value(obs_norm, old_net)
        else:
            action, _ = get_action_and_value(obs, old_net)        
        
        _obs, reward, done, _ = env.step(action)
        obs = _obs
        total_steps += 1
        ep_reward += reward     

    if done:
        env.render()
        norm_obs.update(_obs)
        rewards.append(ep_reward)
        reward_eval.append(ep_reward)
        print('{:3} Episode in {:5} steps, reward {:.2f}'.format(
            i, total_steps, ep_reward))
#         frames.append(env.render(mode = 'rgb_array'))
#         imageio.mimsave(f'{env.spec.id}.gif', frames,)
        
        if len(reward_eval) >= n_eval:
            if np.mean(reward_eval) >= env.spec.reward_threshold:
                print('\n{} is sloved! {:3} Episode in {:3} steps'.format(
                    env.spec.id, i, total_steps))
                print(np.mean(reward_eval))
                break
env.close()

  1 Episode in    99 steps, reward 99.00
  2 Episode in   113 steps, reward 14.00
  3 Episode in   128 steps, reward 15.00
  4 Episode in   139 steps, reward 11.00
  5 Episode in   154 steps, reward 15.00
  6 Episode in   169 steps, reward 15.00
  7 Episode in   182 steps, reward 13.00
  8 Episode in   197 steps, reward 15.00
  9 Episode in   212 steps, reward 15.00
 10 Episode in   226 steps, reward 14.00
 11 Episode in   243 steps, reward 17.00
 12 Episode in   255 steps, reward 12.00
 13 Episode in   272 steps, reward 17.00
 14 Episode in   289 steps, reward 17.00
 15 Episode in   303 steps, reward 14.00
 16 Episode in   322 steps, reward 19.00
 17 Episode in   336 steps, reward 14.00
 18 Episode in   356 steps, reward 20.00
 19 Episode in   374 steps, reward 18.00
 20 Episode in   390 steps, reward 16.00
 21 Episode in   411 steps, reward 21.00
 22 Episode in   428 steps, reward 17.00
 23 Episode in   448 steps, reward 20.00
 24 Episode in   466 steps, reward 18.00
 25 Episode in  

201 Episode in  4807 steps, reward 29.00
202 Episode in  4836 steps, reward 29.00
203 Episode in  4860 steps, reward 24.00
204 Episode in  4888 steps, reward 28.00
205 Episode in  4913 steps, reward 25.00
206 Episode in  4937 steps, reward 24.00
207 Episode in  4957 steps, reward 20.00
208 Episode in  4989 steps, reward 32.00
209 Episode in  5013 steps, reward 24.00
210 Episode in  5047 steps, reward 34.00
211 Episode in  5076 steps, reward 29.00
212 Episode in  5100 steps, reward 24.00
213 Episode in  5127 steps, reward 27.00
214 Episode in  5158 steps, reward 31.00
215 Episode in  5187 steps, reward 29.00
216 Episode in  5210 steps, reward 23.00
217 Episode in  5244 steps, reward 34.00
218 Episode in  5268 steps, reward 24.00
219 Episode in  5296 steps, reward 28.00
220 Episode in  5328 steps, reward 32.00
221 Episode in  5361 steps, reward 33.00
222 Episode in  5391 steps, reward 30.00
223 Episode in  5429 steps, reward 38.00
224 Episode in  5464 steps, reward 35.00
225 Episode in  

401 Episode in 10657 steps, reward 38.00
402 Episode in 10686 steps, reward 29.00
403 Episode in 10713 steps, reward 27.00
404 Episode in 10737 steps, reward 24.00
405 Episode in 10763 steps, reward 26.00
406 Episode in 10788 steps, reward 25.00
407 Episode in 10816 steps, reward 28.00
408 Episode in 10846 steps, reward 30.00
409 Episode in 10872 steps, reward 26.00
410 Episode in 10901 steps, reward 29.00
411 Episode in 10927 steps, reward 26.00
412 Episode in 10957 steps, reward 30.00
413 Episode in 10983 steps, reward 26.00
414 Episode in 11012 steps, reward 29.00
415 Episode in 11051 steps, reward 39.00
416 Episode in 11079 steps, reward 28.00
417 Episode in 11104 steps, reward 25.00
418 Episode in 11135 steps, reward 31.00
419 Episode in 11162 steps, reward 27.00
420 Episode in 11187 steps, reward 25.00
421 Episode in 11224 steps, reward 37.00
422 Episode in 11262 steps, reward 38.00
423 Episode in 11284 steps, reward 22.00
424 Episode in 11310 steps, reward 26.00
425 Episode in 1

601 Episode in 16585 steps, reward 32.00
602 Episode in 16606 steps, reward 21.00
603 Episode in 16635 steps, reward 29.00
604 Episode in 16666 steps, reward 31.00
605 Episode in 16691 steps, reward 25.00
606 Episode in 16714 steps, reward 23.00
607 Episode in 16742 steps, reward 28.00
608 Episode in 16768 steps, reward 26.00
609 Episode in 16794 steps, reward 26.00
610 Episode in 16823 steps, reward 29.00
611 Episode in 16852 steps, reward 29.00
612 Episode in 16886 steps, reward 34.00
613 Episode in 16914 steps, reward 28.00
614 Episode in 16943 steps, reward 29.00
615 Episode in 16969 steps, reward 26.00
616 Episode in 16996 steps, reward 27.00
617 Episode in 17020 steps, reward 24.00
618 Episode in 17055 steps, reward 35.00
619 Episode in 17086 steps, reward 31.00
620 Episode in 17120 steps, reward 34.00
621 Episode in 17153 steps, reward 33.00
622 Episode in 17177 steps, reward 24.00
623 Episode in 17204 steps, reward 27.00
624 Episode in 17234 steps, reward 30.00
625 Episode in 1

801 Episode in 22190 steps, reward 24.00
802 Episode in 22220 steps, reward 30.00
803 Episode in 22248 steps, reward 28.00
804 Episode in 22280 steps, reward 32.00
805 Episode in 22312 steps, reward 32.00
806 Episode in 22343 steps, reward 31.00
807 Episode in 22381 steps, reward 38.00
808 Episode in 22411 steps, reward 30.00
809 Episode in 22436 steps, reward 25.00
810 Episode in 22464 steps, reward 28.00
811 Episode in 22492 steps, reward 28.00
812 Episode in 22516 steps, reward 24.00
813 Episode in 22537 steps, reward 21.00
814 Episode in 22629 steps, reward 92.00
815 Episode in 22663 steps, reward 34.00
816 Episode in 22694 steps, reward 31.00
817 Episode in 22722 steps, reward 28.00
818 Episode in 22752 steps, reward 30.00
819 Episode in 22779 steps, reward 27.00
820 Episode in 22803 steps, reward 24.00
821 Episode in 22835 steps, reward 32.00
822 Episode in 22862 steps, reward 27.00
823 Episode in 22893 steps, reward 31.00
824 Episode in 22925 steps, reward 32.00
825 Episode in 2

1001 Episode in 27927 steps, reward 32.00
1002 Episode in 27950 steps, reward 23.00
1003 Episode in 27977 steps, reward 27.00
1004 Episode in 28008 steps, reward 31.00
1005 Episode in 28032 steps, reward 24.00
1006 Episode in 28060 steps, reward 28.00
1007 Episode in 28095 steps, reward 35.00
1008 Episode in 28129 steps, reward 34.00
1009 Episode in 28160 steps, reward 31.00
1010 Episode in 28195 steps, reward 35.00
1011 Episode in 28220 steps, reward 25.00
1012 Episode in 28244 steps, reward 24.00
1013 Episode in 28290 steps, reward 46.00
1014 Episode in 28317 steps, reward 27.00
1015 Episode in 28343 steps, reward 26.00
1016 Episode in 28372 steps, reward 29.00
1017 Episode in 28402 steps, reward 30.00
1018 Episode in 28435 steps, reward 33.00
1019 Episode in 28460 steps, reward 25.00
1020 Episode in 28488 steps, reward 28.00
1021 Episode in 28512 steps, reward 24.00
1022 Episode in 28537 steps, reward 25.00
1023 Episode in 28571 steps, reward 34.00
1024 Episode in 28599 steps, rewar

1197 Episode in 33602 steps, reward 33.00
1198 Episode in 33634 steps, reward 32.00
1199 Episode in 33663 steps, reward 29.00
1200 Episode in 33690 steps, reward 27.00
1201 Episode in 33721 steps, reward 31.00
1202 Episode in 33750 steps, reward 29.00
1203 Episode in 33783 steps, reward 33.00
1204 Episode in 33808 steps, reward 25.00
1205 Episode in 33832 steps, reward 24.00
1206 Episode in 33866 steps, reward 34.00
1207 Episode in 33890 steps, reward 24.00
1208 Episode in 33926 steps, reward 36.00
1209 Episode in 33965 steps, reward 39.00
1210 Episode in 33989 steps, reward 24.00
1211 Episode in 34024 steps, reward 35.00
1212 Episode in 34055 steps, reward 31.00
1213 Episode in 34082 steps, reward 27.00
1214 Episode in 34111 steps, reward 29.00
1215 Episode in 34144 steps, reward 33.00
1216 Episode in 34171 steps, reward 27.00
1217 Episode in 34196 steps, reward 25.00
1218 Episode in 34226 steps, reward 30.00
1219 Episode in 34259 steps, reward 33.00
1220 Episode in 34282 steps, rewar

1394 Episode in 39278 steps, reward 30.00
1395 Episode in 39307 steps, reward 29.00
1396 Episode in 39330 steps, reward 23.00
1397 Episode in 39357 steps, reward 27.00
1398 Episode in 39387 steps, reward 30.00
1399 Episode in 39425 steps, reward 38.00
1400 Episode in 39445 steps, reward 20.00
1401 Episode in 39468 steps, reward 23.00
1402 Episode in 39494 steps, reward 26.00
1403 Episode in 39523 steps, reward 29.00
1404 Episode in 39555 steps, reward 32.00
1405 Episode in 39586 steps, reward 31.00
1406 Episode in 39613 steps, reward 27.00
1407 Episode in 39638 steps, reward 25.00
1408 Episode in 39665 steps, reward 27.00
1409 Episode in 39688 steps, reward 23.00
1410 Episode in 39714 steps, reward 26.00
1411 Episode in 39752 steps, reward 38.00
1412 Episode in 39779 steps, reward 27.00
1413 Episode in 39805 steps, reward 26.00
1414 Episode in 39829 steps, reward 24.00
1415 Episode in 39863 steps, reward 34.00
1416 Episode in 39892 steps, reward 29.00
1417 Episode in 39914 steps, rewar

1590 Episode in 44975 steps, reward 37.00
1591 Episode in 45010 steps, reward 35.00
1592 Episode in 45045 steps, reward 35.00
1593 Episode in 45079 steps, reward 34.00
1594 Episode in 45102 steps, reward 23.00
1595 Episode in 45137 steps, reward 35.00
1596 Episode in 45160 steps, reward 23.00
1597 Episode in 45189 steps, reward 29.00
1598 Episode in 45222 steps, reward 33.00
1599 Episode in 45254 steps, reward 32.00
1600 Episode in 45285 steps, reward 31.00
1601 Episode in 45308 steps, reward 23.00
1602 Episode in 45334 steps, reward 26.00
1603 Episode in 45357 steps, reward 23.00
1604 Episode in 45392 steps, reward 35.00
1605 Episode in 45415 steps, reward 23.00
1606 Episode in 45437 steps, reward 22.00
1607 Episode in 45465 steps, reward 28.00
1608 Episode in 45497 steps, reward 32.00
1609 Episode in 45522 steps, reward 25.00
1610 Episode in 45547 steps, reward 25.00
1611 Episode in 45580 steps, reward 33.00
1612 Episode in 45605 steps, reward 25.00
1613 Episode in 45632 steps, rewar

1786 Episode in 50732 steps, reward 31.00
1787 Episode in 50765 steps, reward 33.00
1788 Episode in 50795 steps, reward 30.00
1789 Episode in 50818 steps, reward 23.00
1790 Episode in 50841 steps, reward 23.00
1791 Episode in 50876 steps, reward 35.00
1792 Episode in 50909 steps, reward 33.00
1793 Episode in 50935 steps, reward 26.00
1794 Episode in 50965 steps, reward 30.00
1795 Episode in 50997 steps, reward 32.00
1796 Episode in 51028 steps, reward 31.00
1797 Episode in 51056 steps, reward 28.00
1798 Episode in 51091 steps, reward 35.00
1799 Episode in 51119 steps, reward 28.00
1800 Episode in 51149 steps, reward 30.00
1801 Episode in 51173 steps, reward 24.00
1802 Episode in 51192 steps, reward 19.00
1803 Episode in 51218 steps, reward 26.00
1804 Episode in 51246 steps, reward 28.00
1805 Episode in 51281 steps, reward 35.00
1806 Episode in 51307 steps, reward 26.00
1807 Episode in 51329 steps, reward 22.00
1808 Episode in 51362 steps, reward 33.00
1809 Episode in 51396 steps, rewar

1982 Episode in 56486 steps, reward 30.00
1983 Episode in 56519 steps, reward 33.00
1984 Episode in 56550 steps, reward 31.00
1985 Episode in 56579 steps, reward 29.00
1986 Episode in 56603 steps, reward 24.00
1987 Episode in 56630 steps, reward 27.00
1988 Episode in 56658 steps, reward 28.00
1989 Episode in 56692 steps, reward 34.00
1990 Episode in 56716 steps, reward 24.00
1991 Episode in 56752 steps, reward 36.00
1992 Episode in 56777 steps, reward 25.00
1993 Episode in 56808 steps, reward 31.00
1994 Episode in 56828 steps, reward 20.00
1995 Episode in 56853 steps, reward 25.00
1996 Episode in 56887 steps, reward 34.00
1997 Episode in 56924 steps, reward 37.00
1998 Episode in 56952 steps, reward 28.00
1999 Episode in 57052 steps, reward 100.00
2000 Episode in 57079 steps, reward 27.00
2001 Episode in 57108 steps, reward 29.00
2002 Episode in 57140 steps, reward 32.00
2003 Episode in 57163 steps, reward 23.00
2004 Episode in 57196 steps, reward 33.00
2005 Episode in 57232 steps, rewa

2178 Episode in 62321 steps, reward 26.00
2179 Episode in 62354 steps, reward 33.00
2180 Episode in 62377 steps, reward 23.00
2181 Episode in 62408 steps, reward 31.00
2182 Episode in 62437 steps, reward 29.00
2183 Episode in 62462 steps, reward 25.00
2184 Episode in 62486 steps, reward 24.00
2185 Episode in 62516 steps, reward 30.00
2186 Episode in 62545 steps, reward 29.00
2187 Episode in 62567 steps, reward 22.00
2188 Episode in 62603 steps, reward 36.00
2189 Episode in 62634 steps, reward 31.00
2190 Episode in 62662 steps, reward 28.00
2191 Episode in 62696 steps, reward 34.00
2192 Episode in 62730 steps, reward 34.00
2193 Episode in 62759 steps, reward 29.00
2194 Episode in 62792 steps, reward 33.00
2195 Episode in 62828 steps, reward 36.00
2196 Episode in 62856 steps, reward 28.00
2197 Episode in 62887 steps, reward 31.00


In [None]:
plt.figure(figsize=(15, 5))
plt.title('reward')
plt.plot(rewards)
plt.show()

In [None]:
[
    ('CartPole-v0', 412, 1),
    ('CartPole-v1', 452, 0.05),
    ('MountainCar-v0', 193, 0.1),
    ('LunarLander-v2', 260, 0.1)
]