# Run PPO on Ant from [here](https://github.com/pat-coady/trpo)
#### More descriptions can be found on Patrick Coady's blog regarding [gym and ppo](https://learningai.io/projects/2017/07/28/ai-gym-workout.html) or [descriptions about Ant env](https://gist.github.com/pat-coady/bac60888f011199aad72d2f1e6f5a4fa)

In [1]:
import gym,mujoco_py,warnings,time,os,glob,shutil,csv,skvideo.io
gym.logger.set_level(40)
warnings.filterwarnings("ignore") 
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from gym.envs import mujoco
from datetime import datetime
from util import PID_class,Scaler,Logger,display_frames_as_gif
from custom_ant import AntEnvCustom
from ppo import NNValueFunction,Policy,run_episode,run_policy,add_value,discount,\
    add_disc_sum_rew,add_gae,build_train_set,log_batch_stats,run_episode_vid
np.set_printoptions(precision=2,linewidth=150)
%matplotlib inline  
%config InlineBackend.figure_format = 'retina'
print ("Packages Loaded") 

Packages Loaded


### main

In [2]:
env = AntEnvCustom()
obs_dim = env.observation_space.shape[0]
act_dim = env.action_space.shape[0]
env.reset() # Reset 
# render_img = env.render(mode='rgb_array')
print ("obs_dim:[%d] act_dim:[%d]"%(obs_dim,act_dim))

obs_dim += 1  # add 1 to obs dimension for time step feature (see run_episode())
# Logger
env_name = 'Ant'
now = datetime.utcnow().strftime("%b-%d_%H:%M:%S")  # create unique directories
logger = Logger(logName=env_name,now=now,_NOTUSE=True)
aigym_path = os.path.join('/tmp', env_name, now)
# Scaler
scaler = Scaler(obs_dim)
# Value function
hid1_mult = 10
val_func = NNValueFunction(obs_dim, hid1_mult)
# Policy Function
kl_targ = 0.003
policy_logvar = -1.0
policy = Policy(obs_dim, act_dim, kl_targ, hid1_mult, policy_logvar)

Custom Ant Environment made by SJ.
obs_dim:[111] act_dim:[8]
Value Params -- h1: 1120, h2: 74, h3: 5, lr: 0.00116
Policy Params -- h1: 1120, h2: 299, h3: 80, lr: 5.2e-05, logvar_speed: 16
setting up loss with KL penalty


### Run policy for the first time

In [3]:
trajectories = run_policy(env, policy, scaler, logger, episodes=5)
add_value(trajectories, val_func)  # add estimated values to episodes
gamma = 0.995 # Discount factor 
lam = 0.95 # Lambda for GAE
add_disc_sum_rew(trajectories, gamma)  # calculated discounted sum of Rs
add_gae(trajectories, gamma, lam)  # calculate advantage
print ('observes shape:',trajectories[0]['observes'].shape)
print ('actions shape:',trajectories[0]['actions'].shape)
print ('rewards shape:',trajectories[0]['rewards'].shape)
print ('unscaled_obs shape:',trajectories[0]['unscaled_obs'].shape)
print ('values shape:',trajectories[0]['values'].shape)
print ('disc_sum_rew shape:',trajectories[0]['disc_sum_rew'].shape)
print ('advantages shape:',trajectories[0]['advantages'].shape)

observes shape: (94, 112)
actions shape: (94, 8)
rewards shape: (94,)
unscaled_obs shape: (94, 112)
values shape: (94,)
disc_sum_rew shape: (94,)
advantages shape: (94,)


### Loop

In [4]:
SAVE_VID = True
MAKE_GIF = False 

In [5]:
maxEpoch  = 1000
batchSize = 50
for _epoch in range(maxEpoch):
    # 1. Run policy
    trajectories = run_policy(env, policy, scaler, logger, episodes=batchSize)
    # 2. Get (predict) value from the critic network 
    add_value(trajectories, val_func)  # add estimated values to episodes
    # 3. Get GAE
    gamma = 0.995 # Discount factor 
    lam = 0.95 # Lambda for GAE
    add_disc_sum_rew(trajectories, gamma)  # calculated discounted sum of Rs
    add_gae(trajectories, gamma, lam)  # calculate advantage
    # concatenate all episodes into single NumPy arrays
    observes, actions, advantages, disc_sum_rew = build_train_set(trajectories)
    # add various stats to training log:
    # log_batch_stats(observes, actions, advantages, disc_sum_rew, logger, episode)
    # Update
    policy.update(observes, actions, advantages, logger)  # update policy
    val_func.fit(observes, disc_sum_rew, logger)  # update value function
    # logger.write(display=True)  # write logger results to file and stdout
    
    # Print
    for _tIdx in range(len(trajectories)):
        rs = trajectories[_tIdx]['rewards']
        if _tIdx == 0: rTotal = rs
        else: rTotal = np.concatenate((rTotal,rs))
        # Reward details      
    avgRwd = rTotal.mean()
    reward_contacts,reward_ctrls,reward_forwards,reward_survives = [],[],[],[]
    for traj in trajectories:
        cTraj = traj['rDetails']
        for _iIdx in range(len(cTraj)):
            reward_contacts.append(cTraj[_iIdx]['reward_contact'])
            reward_ctrls.append(cTraj[_iIdx]['reward_ctrl'])
            reward_forwards.append(cTraj[_iIdx]['reward_forward'])
            reward_survives.append(cTraj[_iIdx]['reward_survive'])
    avgReward_contact = np.asarray(reward_contacts).mean()
    avgReward_ctrl = np.asarray(reward_ctrls).mean()
    avgReward_forward = np.asarray(reward_forwards).mean()
    avgReward_survive = np.asarray(reward_survives).mean()
    print ("[%d/%d](#total:%d) avgRwd:[%.3f](cntct:%.3f+ctrl:%.3f+fwd:%.3f+srv:%.3f)"%
           (_epoch,maxEpoch,(_epoch+1)*batchSize,avgRwd,
           avgReward_contact,avgReward_ctrl,avgReward_forward,avgReward_survive))
    
    # SHOW EVERY 
    PLOT_EVERY = 20
    DO_ANIMATE = False
    if ((_epoch%PLOT_EVERY)==0 ) | (_epoch==(maxEpoch-1)):
        ret = run_episode_vid(env, policy, scaler)
        print ("  [^] avgRwd:[%.3f] Xdisp:[%.3f] hDisp:[%.1f]"%
               (np.asarray(ret['rewards']).mean(),ret['xDisp'],ret['hDisp']))
        
        if MAKE_GIF:
            display_frames_as_gif(ret['frames'])
        if SAVE_VID:
            outputdata = np.asarray(ret['frames']).astype(np.uint8)
            vidName = 'vids/ant_ppo_epoch%03d.mp4'%(_epoch)
            skvideo.io.vwrite(vidName,outputdata)
            print ("[%s] saved."%(vidName))
print ("Done.")

[0/1000](#total:50) avgRwd:[-0.677](cntct:-0.001+ctrl:-1.711+fwd:0.036+srv:1.000)
Creating window glfw
  [^] avgRwd:[-0.922] Xdisp:[-0.909] hDisp:[-34.8]
[vids/ant_ppo_epoch000.mp4] saved.
[1/1000](#total:100) avgRwd:[-0.754](cntct:-0.001+ctrl:-1.748+fwd:-0.005+srv:1.000)
[2/1000](#total:150) avgRwd:[-0.744](cntct:-0.001+ctrl:-1.736+fwd:-0.007+srv:1.000)
[3/1000](#total:200) avgRwd:[-0.776](cntct:-0.001+ctrl:-1.742+fwd:-0.033+srv:1.000)
[4/1000](#total:250) avgRwd:[-0.648](cntct:-0.001+ctrl:-1.731+fwd:0.084+srv:1.000)
[5/1000](#total:300) avgRwd:[-0.717](cntct:-0.001+ctrl:-1.742+fwd:0.027+srv:1.000)
[6/1000](#total:350) avgRwd:[-0.702](cntct:-0.001+ctrl:-1.714+fwd:0.013+srv:1.000)
[7/1000](#total:400) avgRwd:[-0.724](cntct:-0.001+ctrl:-1.704+fwd:-0.018+srv:1.000)
[8/1000](#total:450) avgRwd:[-0.648](cntct:-0.001+ctrl:-1.686+fwd:0.040+srv:1.000)
[9/1000](#total:500) avgRwd:[-0.611](cntct:-0.001+ctrl:-1.650+fwd:0.040+srv:1.000)
[10/1000](#total:550) avgRwd:[-0.594](cntct:-0.001+ctrl:-1.6

[92/1000](#total:4650) avgRwd:[-0.133](cntct:-0.001+ctrl:-1.330+fwd:0.198+srv:1.000)
[93/1000](#total:4700) avgRwd:[-0.187](cntct:-0.001+ctrl:-1.357+fwd:0.171+srv:1.000)
[94/1000](#total:4750) avgRwd:[-0.155](cntct:-0.001+ctrl:-1.329+fwd:0.175+srv:1.000)
[95/1000](#total:4800) avgRwd:[-0.126](cntct:-0.001+ctrl:-1.325+fwd:0.200+srv:1.000)
[96/1000](#total:4850) avgRwd:[-0.106](cntct:-0.001+ctrl:-1.300+fwd:0.195+srv:1.000)
[97/1000](#total:4900) avgRwd:[-0.115](cntct:-0.001+ctrl:-1.288+fwd:0.175+srv:1.000)
[98/1000](#total:4950) avgRwd:[-0.061](cntct:-0.001+ctrl:-1.306+fwd:0.247+srv:1.000)
[99/1000](#total:5000) avgRwd:[-0.018](cntct:-0.001+ctrl:-1.299+fwd:0.282+srv:1.000)
[100/1000](#total:5050) avgRwd:[-0.054](cntct:-0.001+ctrl:-1.269+fwd:0.216+srv:1.000)
  [^] avgRwd:[-0.115] Xdisp:[1.141] hDisp:[-100.4]
[vids/ant_ppo_epoch100.mp4] saved.
[101/1000](#total:5100) avgRwd:[0.008](cntct:-0.001+ctrl:-1.291+fwd:0.300+srv:1.000)
[102/1000](#total:5150) avgRwd:[-0.104](cntct:-0.001+ctrl:-1.28

[184/1000](#total:9250) avgRwd:[0.265](cntct:-0.001+ctrl:-1.016+fwd:0.283+srv:1.000)
[185/1000](#total:9300) avgRwd:[0.342](cntct:-0.001+ctrl:-1.010+fwd:0.354+srv:1.000)
[186/1000](#total:9350) avgRwd:[0.323](cntct:-0.001+ctrl:-1.011+fwd:0.335+srv:1.000)
[187/1000](#total:9400) avgRwd:[0.296](cntct:-0.001+ctrl:-1.014+fwd:0.312+srv:1.000)
[188/1000](#total:9450) avgRwd:[0.348](cntct:-0.001+ctrl:-1.025+fwd:0.374+srv:1.000)
[189/1000](#total:9500) avgRwd:[0.259](cntct:-0.001+ctrl:-1.031+fwd:0.291+srv:1.000)
[190/1000](#total:9550) avgRwd:[0.320](cntct:-0.001+ctrl:-1.028+fwd:0.349+srv:1.000)
[191/1000](#total:9600) avgRwd:[0.422](cntct:-0.001+ctrl:-1.037+fwd:0.459+srv:1.000)
[192/1000](#total:9650) avgRwd:[0.325](cntct:-0.001+ctrl:-1.004+fwd:0.330+srv:1.000)
[193/1000](#total:9700) avgRwd:[0.280](cntct:-0.001+ctrl:-1.008+fwd:0.289+srv:1.000)
[194/1000](#total:9750) avgRwd:[0.336](cntct:-0.001+ctrl:-1.004+fwd:0.341+srv:1.000)
[195/1000](#total:9800) avgRwd:[0.320](cntct:-0.001+ctrl:-1.014+f

[276/1000](#total:13850) avgRwd:[0.536](cntct:-0.001+ctrl:-0.887+fwd:0.423+srv:1.000)
[277/1000](#total:13900) avgRwd:[0.568](cntct:-0.001+ctrl:-0.887+fwd:0.457+srv:1.000)
[278/1000](#total:13950) avgRwd:[0.598](cntct:-0.001+ctrl:-0.887+fwd:0.486+srv:1.000)
[279/1000](#total:14000) avgRwd:[0.503](cntct:-0.001+ctrl:-0.869+fwd:0.374+srv:1.000)
[280/1000](#total:14050) avgRwd:[0.599](cntct:-0.001+ctrl:-0.872+fwd:0.472+srv:1.000)
  [^] avgRwd:[0.639] Xdisp:[4.471] hDisp:[88.3]
[vids/ant_ppo_epoch280.mp4] saved.
[281/1000](#total:14100) avgRwd:[0.568](cntct:-0.001+ctrl:-0.876+fwd:0.445+srv:1.000)
[282/1000](#total:14150) avgRwd:[0.591](cntct:-0.001+ctrl:-0.883+fwd:0.476+srv:1.000)
[283/1000](#total:14200) avgRwd:[0.621](cntct:-0.001+ctrl:-0.867+fwd:0.488+srv:1.000)
[284/1000](#total:14250) avgRwd:[0.543](cntct:-0.001+ctrl:-0.872+fwd:0.416+srv:1.000)
[285/1000](#total:14300) avgRwd:[0.563](cntct:-0.001+ctrl:-0.865+fwd:0.430+srv:1.000)
[286/1000](#total:14350) avgRwd:[0.550](cntct:-0.001+ctrl

[367/1000](#total:18400) avgRwd:[0.811](cntct:-0.001+ctrl:-0.805+fwd:0.617+srv:1.000)
[368/1000](#total:18450) avgRwd:[0.837](cntct:-0.001+ctrl:-0.808+fwd:0.645+srv:1.000)
[369/1000](#total:18500) avgRwd:[0.799](cntct:-0.001+ctrl:-0.804+fwd:0.604+srv:1.000)
[370/1000](#total:18550) avgRwd:[0.849](cntct:-0.001+ctrl:-0.818+fwd:0.669+srv:1.000)
[371/1000](#total:18600) avgRwd:[0.895](cntct:-0.001+ctrl:-0.799+fwd:0.696+srv:1.000)
[372/1000](#total:18650) avgRwd:[0.888](cntct:-0.001+ctrl:-0.804+fwd:0.693+srv:1.000)
[373/1000](#total:18700) avgRwd:[0.760](cntct:-0.001+ctrl:-0.834+fwd:0.595+srv:1.000)
[374/1000](#total:18750) avgRwd:[0.823](cntct:-0.001+ctrl:-0.817+fwd:0.642+srv:1.000)
[375/1000](#total:18800) avgRwd:[0.869](cntct:-0.001+ctrl:-0.809+fwd:0.680+srv:1.000)
[376/1000](#total:18850) avgRwd:[0.859](cntct:-0.001+ctrl:-0.811+fwd:0.672+srv:1.000)
[377/1000](#total:18900) avgRwd:[0.849](cntct:-0.001+ctrl:-0.810+fwd:0.660+srv:1.000)
[378/1000](#total:18950) avgRwd:[0.868](cntct:-0.001+c

[459/1000](#total:23000) avgRwd:[1.140](cntct:-0.001+ctrl:-0.748+fwd:0.889+srv:1.000)
[460/1000](#total:23050) avgRwd:[1.154](cntct:-0.001+ctrl:-0.749+fwd:0.904+srv:1.000)
  [^] avgRwd:[0.951] Xdisp:[5.888] hDisp:[45.4]
[vids/ant_ppo_epoch460.mp4] saved.
[461/1000](#total:23100) avgRwd:[1.141](cntct:-0.001+ctrl:-0.745+fwd:0.887+srv:1.000)
[462/1000](#total:23150) avgRwd:[1.126](cntct:-0.001+ctrl:-0.750+fwd:0.877+srv:1.000)
[463/1000](#total:23200) avgRwd:[1.067](cntct:-0.001+ctrl:-0.743+fwd:0.812+srv:1.000)
[464/1000](#total:23250) avgRwd:[1.107](cntct:-0.001+ctrl:-0.757+fwd:0.865+srv:1.000)
[465/1000](#total:23300) avgRwd:[1.115](cntct:-0.001+ctrl:-0.747+fwd:0.863+srv:1.000)
[466/1000](#total:23350) avgRwd:[1.162](cntct:-0.001+ctrl:-0.753+fwd:0.917+srv:1.000)
[467/1000](#total:23400) avgRwd:[1.155](cntct:-0.001+ctrl:-0.755+fwd:0.911+srv:1.000)
[468/1000](#total:23450) avgRwd:[1.115](cntct:-0.001+ctrl:-0.742+fwd:0.858+srv:1.000)
[469/1000](#total:23500) avgRwd:[1.204](cntct:-0.001+ctrl

[550/1000](#total:27550) avgRwd:[1.475](cntct:-0.001+ctrl:-0.727+fwd:1.203+srv:1.000)
[551/1000](#total:27600) avgRwd:[1.489](cntct:-0.001+ctrl:-0.720+fwd:1.210+srv:1.000)
[552/1000](#total:27650) avgRwd:[1.512](cntct:-0.001+ctrl:-0.725+fwd:1.237+srv:1.000)
[553/1000](#total:27700) avgRwd:[1.424](cntct:-0.001+ctrl:-0.729+fwd:1.154+srv:1.000)
[554/1000](#total:27750) avgRwd:[1.482](cntct:-0.001+ctrl:-0.727+fwd:1.211+srv:1.000)
[555/1000](#total:27800) avgRwd:[1.523](cntct:-0.001+ctrl:-0.727+fwd:1.251+srv:1.000)
[556/1000](#total:27850) avgRwd:[1.454](cntct:-0.001+ctrl:-0.722+fwd:1.177+srv:1.000)
[557/1000](#total:27900) avgRwd:[1.446](cntct:-0.001+ctrl:-0.726+fwd:1.173+srv:1.000)
[558/1000](#total:27950) avgRwd:[1.518](cntct:-0.001+ctrl:-0.725+fwd:1.244+srv:1.000)
[559/1000](#total:28000) avgRwd:[1.420](cntct:-0.001+ctrl:-0.726+fwd:1.147+srv:1.000)
[560/1000](#total:28050) avgRwd:[1.463](cntct:-0.001+ctrl:-0.723+fwd:1.187+srv:1.000)
  [^] avgRwd:[1.322] Xdisp:[9.340] hDisp:[-76.2]
[vids

[641/1000](#total:32100) avgRwd:[1.757](cntct:-0.001+ctrl:-0.716+fwd:1.474+srv:1.000)
[642/1000](#total:32150) avgRwd:[1.757](cntct:-0.001+ctrl:-0.706+fwd:1.463+srv:1.000)
[643/1000](#total:32200) avgRwd:[1.731](cntct:-0.001+ctrl:-0.712+fwd:1.444+srv:1.000)
[644/1000](#total:32250) avgRwd:[1.808](cntct:-0.001+ctrl:-0.704+fwd:1.513+srv:1.000)
[645/1000](#total:32300) avgRwd:[1.792](cntct:-0.001+ctrl:-0.706+fwd:1.500+srv:1.000)
[646/1000](#total:32350) avgRwd:[1.792](cntct:-0.001+ctrl:-0.717+fwd:1.510+srv:1.000)
[647/1000](#total:32400) avgRwd:[1.762](cntct:-0.001+ctrl:-0.705+fwd:1.469+srv:1.000)
[648/1000](#total:32450) avgRwd:[1.809](cntct:-0.001+ctrl:-0.697+fwd:1.507+srv:1.000)
[649/1000](#total:32500) avgRwd:[1.695](cntct:-0.001+ctrl:-0.708+fwd:1.404+srv:1.000)
[650/1000](#total:32550) avgRwd:[1.801](cntct:-0.001+ctrl:-0.702+fwd:1.504+srv:1.000)
[651/1000](#total:32600) avgRwd:[1.766](cntct:-0.001+ctrl:-0.714+fwd:1.481+srv:1.000)
[652/1000](#total:32650) avgRwd:[1.855](cntct:-0.001+c

[733/1000](#total:36700) avgRwd:[2.031](cntct:-0.001+ctrl:-0.683+fwd:1.715+srv:1.000)
[734/1000](#total:36750) avgRwd:[1.983](cntct:-0.001+ctrl:-0.676+fwd:1.660+srv:1.000)
[735/1000](#total:36800) avgRwd:[2.043](cntct:-0.001+ctrl:-0.681+fwd:1.724+srv:1.000)
[736/1000](#total:36850) avgRwd:[1.972](cntct:-0.001+ctrl:-0.687+fwd:1.660+srv:1.000)
[737/1000](#total:36900) avgRwd:[1.936](cntct:-0.001+ctrl:-0.705+fwd:1.642+srv:1.000)
[738/1000](#total:36950) avgRwd:[2.015](cntct:-0.001+ctrl:-0.688+fwd:1.704+srv:1.000)
[739/1000](#total:37000) avgRwd:[2.076](cntct:-0.001+ctrl:-0.670+fwd:1.748+srv:1.000)
[740/1000](#total:37050) avgRwd:[1.987](cntct:-0.001+ctrl:-0.680+fwd:1.668+srv:1.000)
  [^] avgRwd:[2.261] Xdisp:[17.686] hDisp:[-33.1]
[vids/ant_ppo_epoch740.mp4] saved.
[741/1000](#total:37100) avgRwd:[2.035](cntct:-0.001+ctrl:-0.680+fwd:1.716+srv:1.000)
[742/1000](#total:37150) avgRwd:[2.058](cntct:-0.001+ctrl:-0.681+fwd:1.740+srv:1.000)
[743/1000](#total:37200) avgRwd:[2.077](cntct:-0.001+ct

[824/1000](#total:41250) avgRwd:[2.209](cntct:-0.001+ctrl:-0.671+fwd:1.881+srv:1.000)
[825/1000](#total:41300) avgRwd:[2.261](cntct:-0.001+ctrl:-0.684+fwd:1.947+srv:1.000)
[826/1000](#total:41350) avgRwd:[2.259](cntct:-0.001+ctrl:-0.663+fwd:1.923+srv:1.000)
[827/1000](#total:41400) avgRwd:[2.236](cntct:-0.001+ctrl:-0.679+fwd:1.916+srv:1.000)
[828/1000](#total:41450) avgRwd:[2.302](cntct:-0.001+ctrl:-0.671+fwd:1.974+srv:1.000)
[829/1000](#total:41500) avgRwd:[2.230](cntct:-0.001+ctrl:-0.665+fwd:1.896+srv:1.000)
[830/1000](#total:41550) avgRwd:[2.282](cntct:-0.001+ctrl:-0.660+fwd:1.943+srv:1.000)
[831/1000](#total:41600) avgRwd:[2.286](cntct:-0.001+ctrl:-0.664+fwd:1.951+srv:1.000)
[832/1000](#total:41650) avgRwd:[2.299](cntct:-0.001+ctrl:-0.670+fwd:1.970+srv:1.000)
[833/1000](#total:41700) avgRwd:[2.385](cntct:-0.001+ctrl:-0.666+fwd:2.052+srv:1.000)
[834/1000](#total:41750) avgRwd:[2.282](cntct:-0.001+ctrl:-0.684+fwd:1.966+srv:1.000)
[835/1000](#total:41800) avgRwd:[2.298](cntct:-0.001+c

[916/1000](#total:45850) avgRwd:[2.290](cntct:-0.001+ctrl:-0.607+fwd:1.899+srv:1.000)
[917/1000](#total:45900) avgRwd:[2.196](cntct:-0.001+ctrl:-0.617+fwd:1.814+srv:1.000)
[918/1000](#total:45950) avgRwd:[2.168](cntct:-0.001+ctrl:-0.603+fwd:1.772+srv:1.000)
[919/1000](#total:46000) avgRwd:[2.165](cntct:-0.001+ctrl:-0.613+fwd:1.779+srv:1.000)
[920/1000](#total:46050) avgRwd:[2.234](cntct:-0.001+ctrl:-0.608+fwd:1.843+srv:1.000)
  [^] avgRwd:[2.564] Xdisp:[19.328] hDisp:[-56.6]
[vids/ant_ppo_epoch920.mp4] saved.
[921/1000](#total:46100) avgRwd:[2.296](cntct:-0.001+ctrl:-0.608+fwd:1.905+srv:1.000)
[922/1000](#total:46150) avgRwd:[2.348](cntct:-0.001+ctrl:-0.609+fwd:1.959+srv:1.000)
[923/1000](#total:46200) avgRwd:[2.296](cntct:-0.001+ctrl:-0.608+fwd:1.904+srv:1.000)
[924/1000](#total:46250) avgRwd:[2.390](cntct:-0.001+ctrl:-0.586+fwd:1.977+srv:1.000)
[925/1000](#total:46300) avgRwd:[2.277](cntct:-0.001+ctrl:-0.602+fwd:1.880+srv:1.000)
[926/1000](#total:46350) avgRwd:[2.288](cntct:-0.001+ct

### Animate final motion

In [None]:
SAVE_VID_FINAL = True
MAKE_GIF_FINAL = False 

In [None]:
for _i in range(3):
    ret = run_episode_vid(env, policy, scaler)
    if MAKE_GIF_FINAL:
        display_frames_as_gif(ret['frames'])
    if SAVE_VID_FINAL:
        outputdata = np.asarray(ret['frames']).astype(np.uint8)
        vidName = 'vids/ant_ppo_final_%d.mp4'%(_i)
        skvideo.io.vwrite(vidName,outputdata)
        print ("[%s] saved."%(vidName))

### Finished

In [None]:
DO_CLOSE = False # There is no turning back. 
if DO_CLOSE:
    logger.close()
    policy.close_sess()
    val_func.close_sess()