In [11]:
import Config as C
import numpy as np
np.random.seed(C.SEED)
import random
random.seed(C.SEED)
from RewardFnSpace import *
import pickle
import more_itertools
from AcrobotUtils import *
from scipy.spatial.distance import pdist, squareform
import os
import os.path as osp
from A2C import *
from PlotUtils import *
from Eval import *
from rlpyt.samplers.serial.sampler import SerialSampler
from rlpyt.samplers.parallel.gpu.sampler import GpuSampler
from datetime import datetime

In [2]:

def findSamplesInTrajs (stateSamples, trajs) : 
    """ 
    For each state sample, find all indices (i, j) such that
    the jth state in ith trajectory is approximately the state
    sample
    """
    nSamples = stateSamples.shape[0]
    stateOccurenceIndices = [[] for _ in range(nSamples)]
    allStates = [np.stack([s for s, _, _ in t]) for t in trajs]
    for i, traj in enumerate(trajs) : 
        trajLen = len(traj)
        D = squareform(pdist(np.concatenate((stateSamples, allStates[i]), axis=0)))
        D = D[:nSamples, nSamples:]
        indices = np.where(D < C.STATE_SIMILARITY_THRESH)
        for j, k  in zip(*indices) : 
            stateOccurenceIndices[j].append((i, k))
    return stateOccurenceIndices


In [3]:
def generateStateSamples (trajs, nSamples) : 
    """ get the distribution of start states """
    allStates = [[s for s, _, _ in t] for t in trajs[:10]]
    allStates = list(more_itertools.flatten(allStates))
    states = random.sample(allStates, k=nSamples)
    states = np.array(states)
    return states

In [4]:
def estimateValueFromTrajs (stateIndices, trajs, rewardFn) :
    """ 
    Estimate the value for each state from expert 
    trajectories.
    """
    def computeReturnOnTraj (traj) : 
        R = [rewardFn(s) for s, _, _ in traj]
        return computeReturns(R, C.DISCOUNT)[0]
    values = []
    for i, indices in enumerate(stateIndices) : 
        truncatedTrajs = [trajs[i][j:] for i, j in indices] 
        vhat = np.mean([computeReturnOnTraj(t) for t in truncatedTrajs])
        values.append(vhat)
    return values

In [5]:
def estimateValueFromAgent (stateSamples, agent, rewardFn) : 
    """
    Use the learnt value function network through
    A2C to estimate value for states.
    """
    def estimateForState (s) : 
        cpus = list(range(C.N_PARALLEL))
        affinity = dict(cuda_idx=C.CUDA_IDX, workers_cpus=cpus)
        agent_ = CategoricalPgAgent(
            AcrobotNet, 
            initial_model_state_dict=agent.state_dict())
        sampler = SerialSampler(
            EnvCls=rlpyt_make,
            env_kwargs=dict(
                id=C.ENV, 
                reward=rewardFn, 
                internalStateFn=C.INTERNAL_STATE_FN, 
                s0=s),
            batch_T=C.HORIZON,
            batch_B=C.BATCH_B,
            max_decorrelation_steps=0,
        )
        sampler.initialize(
            agent=agent_,
            affinity=affinity,
            seed=C.SEED
        )
        _, traj_info = sampler.obtain_samples(0)
        returns = [t['DiscountedReturn'] for t in traj_info]
        return np.mean(returns)
    estimates = list(map(estimateForState, stateSamples))
    return estimates


In [6]:
def getAllTraj () : 
    """ get all trajectories from C.TRAJ_DIR """
    def loadPickle (f) : 
        with open(osp.join(C.TRAJ_DIR, f), 'rb') as fd : 
            return pickle.load(fd)
    return list(map(loadPickle, os.listdir(C.TRAJ_DIR)))

In [7]:
def irl (rewardFnSpace) :
    """
    Find the explanatory reward function for expert's 
    policy in the space of reward functions.
    """
    trajs = getAllTraj()
    stateSamples = generateStateSamples(trajs, C.IRL_STATE_SAMPLES)
    indices = findSamplesInTrajs(stateSamples, trajs) 
    for i in range(C.IRL_ITR) : 
        rewardFn = rewardFnSpace.current()
        agent = findOptimalAgent(rewardFn)
        env = rlpyt_make('Acrobot-v1', internalStateFn=C.INTERNAL_STATE_FN)
        expertValues = [estimateValueFromTrajs(indices, trajs, _) 
                        for _ in rewardFnSpace.rewardBases]
        inferiorValues = [estimateValueFromAgent(stateSamples, agent, _)
                          for _ in rewardFnSpace.rewardBases]
        rewardFnSpace.refine(expertValues, inferiorValues)
    return agent, rewardFn

In [8]:
# Chạy thuật toán IRL
agent, rewardFn = irl(RewardFnSpace(acrobotRewardBases(np.pi / 2, np.pi / 2)))

d:\ASUS\Trí tuệ nhân tạo nâng cao - CS211\Inverse Reinforcement Learning\IRL\a2c_acrobot-v1\run_0
D:\ASUS\Trí tuệ nhân tạo nâng cao - CS211\Inverse Reinforcement Learning\Inverse-Reinforcement-Learning\rlpyt\data
2024-11-14 10:29:22.303702  | a2c_acrobot-v1_0 Runner  master CPU affinity: [0, 1, 2, 3, 4, 5, 6, 7].
2024-11-14 10:29:22.304701  | a2c_acrobot-v1_0 Runner  master Torch threads: 4.
[32musing seed 3260[0m
2024-11-14 10:29:22.457821  | a2c_acrobot-v1_0 Sampler decorrelating envs, max steps: 400


  deprecation(
  deprecation(
  if not isinstance(terminated, (bool, np.bool8)):
  deprecation(


2024-11-14 10:29:24.930146  | a2c_acrobot-v1_0 Serial Sampler initialized.
2024-11-14 10:29:24.931144  | a2c_acrobot-v1_0 Running 1872 iterations of minibatch RL.
2024-11-14 10:29:28.617870  | a2c_acrobot-v1_0 Optimizing over 312 iterations.


0% [##############################] 100% | ETA: 00:00:00

2024-11-14 10:31:06.796075  | a2c_acrobot-v1_0 itr #311 saving snapshot...
2024-11-14 10:31:06.831336  | a2c_acrobot-v1_0 itr #311 saved
2024-11-14 10:31:06.831336  | -----------------------------  -------------
2024-11-14 10:31:06.843349  | Diagnostics/NewCompletedTrajs    199
2024-11-14 10:31:06.843349  | Diagnostics/StepsInTrajWindow  50000
2024-11-14 10:31:06.843349  | Diagnostics/Iteration            311
2024-11-14 10:31:06.843349  | Diagnostics/CumTime (s)           98.2115
2024-11-14 10:31:06.843349  | Diagnostics/CumSteps           99840
2024-11-14 10:31:06.843349  | Diagnostics/CumCompletedTrajs    199
2024-11-14 10:31:06.843349  | Diagnostics/CumUpdates           312
2024-11-14 10:31:06.843349  | Diagnostics/StepsPerSecond      1016.58
2024-11-14 10:31:06.843349  | Diagnostics/UpdatesPerSecond       3.17682
2024-11-14 10:31:06.843349  | Diagnostics/ReplayRatio            1
2024-11-14 10:31:06.858977  | Diagnostics/CumReplayRatio         1
2024-11-14 10:31:06.858977  | LengthA


Total time elapsed: 00:01:38
0% [##############################] 100% | ETA: 00:00:00

2024-11-14 10:32:48.289430  | a2c_acrobot-v1_0 itr #623 saving snapshot...
2024-11-14 10:32:48.299936  | a2c_acrobot-v1_0 itr #623 saved
2024-11-14 10:32:48.305446  | -----------------------------  --------------
2024-11-14 10:32:48.305446  | Diagnostics/NewCompletedTrajs     201
2024-11-14 10:32:48.305446  | Diagnostics/StepsInTrajWindow   50000
2024-11-14 10:32:48.305446  | Diagnostics/Iteration             623
2024-11-14 10:32:48.305446  | Diagnostics/CumTime (s)           199.686
2024-11-14 10:32:48.305446  | Diagnostics/CumSteps           199680
2024-11-14 10:32:48.305446  | Diagnostics/CumCompletedTrajs     400
2024-11-14 10:32:48.305446  | Diagnostics/CumUpdates            624
2024-11-14 10:32:48.305446  | Diagnostics/StepsPerSecond        983.896
2024-11-14 10:32:48.305446  | Diagnostics/UpdatesPerSecond        3.07468
2024-11-14 10:32:48.321072  | Diagnostics/ReplayRatio             1
2024-11-14 10:32:48.321072  | Diagnostics/CumReplayRatio          1
2024-11-14 10:32:48.32107


Total time elapsed: 00:01:41
0% [##############################] 100% | ETA: 00:00:00

2024-11-14 10:34:27.449196  | a2c_acrobot-v1_0 itr #935 saving snapshot...
2024-11-14 10:34:27.449196  | a2c_acrobot-v1_0 itr #935 saved
2024-11-14 10:34:27.464817  | -----------------------------  --------------
2024-11-14 10:34:27.464817  | Diagnostics/NewCompletedTrajs     199
2024-11-14 10:34:27.464817  | Diagnostics/StepsInTrajWindow   50000
2024-11-14 10:34:27.464817  | Diagnostics/Iteration             935
2024-11-14 10:34:27.464817  | Diagnostics/CumTime (s)           298.829
2024-11-14 10:34:27.464817  | Diagnostics/CumSteps           299520
2024-11-14 10:34:27.464817  | Diagnostics/CumCompletedTrajs     599
2024-11-14 10:34:27.464817  | Diagnostics/CumUpdates            936
2024-11-14 10:34:27.464817  | Diagnostics/StepsPerSecond       1007.02
2024-11-14 10:34:27.464817  | Diagnostics/UpdatesPerSecond        3.14695
2024-11-14 10:34:27.464817  | Diagnostics/ReplayRatio             1
2024-11-14 10:34:27.464817  | Diagnostics/CumReplayRatio          1
2024-11-14 10:34:27.464817


Total time elapsed: 00:01:39
0% [##############################] 100% | ETA: 00:00:00

2024-11-14 10:35:38.952898  | a2c_acrobot-v1_0 itr #1247 saving snapshot...
2024-11-14 10:35:38.952898  | a2c_acrobot-v1_0 itr #1247 saved
2024-11-14 10:35:38.968519  | -----------------------------  --------------
2024-11-14 10:35:38.968519  | Diagnostics/NewCompletedTrajs     201
2024-11-14 10:35:38.968519  | Diagnostics/StepsInTrajWindow   50000
2024-11-14 10:35:38.968519  | Diagnostics/Iteration            1247
2024-11-14 10:35:38.968519  | Diagnostics/CumTime (s)           370.333
2024-11-14 10:35:38.968519  | Diagnostics/CumSteps           399360
2024-11-14 10:35:38.968519  | Diagnostics/CumCompletedTrajs     800
2024-11-14 10:35:38.968519  | Diagnostics/CumUpdates           1248
2024-11-14 10:35:38.968519  | Diagnostics/StepsPerSecond       1396.29
2024-11-14 10:35:38.968519  | Diagnostics/UpdatesPerSecond        4.36341
2024-11-14 10:35:38.968519  | Diagnostics/ReplayRatio             1
2024-11-14 10:35:38.968519  | Diagnostics/CumReplayRatio          1
2024-11-14 10:35:38.9685


Total time elapsed: 00:01:11
0% [##############################] 100% | ETA: 00:00:00

2024-11-14 10:36:51.812500  | a2c_acrobot-v1_0 itr #1559 saving snapshot...
2024-11-14 10:36:51.812500  | a2c_acrobot-v1_0 itr #1559 saved
2024-11-14 10:36:51.812500  | -----------------------------  --------------
2024-11-14 10:36:51.812500  | Diagnostics/NewCompletedTrajs     199
2024-11-14 10:36:51.812500  | Diagnostics/StepsInTrajWindow   50000
2024-11-14 10:36:51.828121  | Diagnostics/Iteration            1559
2024-11-14 10:36:51.828121  | Diagnostics/CumTime (s)           443.193
2024-11-14 10:36:51.828121  | Diagnostics/CumSteps           499200
2024-11-14 10:36:51.828121  | Diagnostics/CumCompletedTrajs     999
2024-11-14 10:36:51.828121  | Diagnostics/CumUpdates           1560
2024-11-14 10:36:51.828121  | Diagnostics/StepsPerSecond       1370.31
2024-11-14 10:36:51.828121  | Diagnostics/UpdatesPerSecond        4.28221
2024-11-14 10:36:51.828121  | Diagnostics/ReplayRatio             1
2024-11-14 10:36:51.828121  | Diagnostics/CumReplayRatio          1
2024-11-14 10:36:51.8281


Total time elapsed: 00:01:12
0% [##############################] 100% | ETA: 00:00:00

2024-11-14 10:38:15.138075  | a2c_acrobot-v1_0 itr #1871 saving snapshot...
2024-11-14 10:38:15.138075  | a2c_acrobot-v1_0 itr #1871 saved
2024-11-14 10:38:15.138075  | -----------------------------  --------------
2024-11-14 10:38:15.154883  | Diagnostics/NewCompletedTrajs     200
2024-11-14 10:38:15.154883  | Diagnostics/StepsInTrajWindow   50000
2024-11-14 10:38:15.154883  | Diagnostics/Iteration            1871
2024-11-14 10:38:15.154883  | Diagnostics/CumTime (s)           526.518
2024-11-14 10:38:15.154883  | Diagnostics/CumSteps           599040
2024-11-14 10:38:15.154883  | Diagnostics/CumCompletedTrajs    1199
2024-11-14 10:38:15.154883  | Diagnostics/CumUpdates           1872
2024-11-14 10:38:15.154883  | Diagnostics/StepsPerSecond       1198.19
2024-11-14 10:38:15.154883  | Diagnostics/UpdatesPerSecond        3.74435
2024-11-14 10:38:15.154883  | Diagnostics/ReplayRatio             1
2024-11-14 10:38:15.154883  | Diagnostics/CumReplayRatio          1
2024-11-14 10:38:15.1709


Total time elapsed: 00:01:23
  deprecation(
  deprecation(
  if not isinstance(terminated, (bool, np.bool8)):


2024-11-14 10:38:16.033559  | Sampler decorrelating envs, max steps: 0
2024-11-14 10:38:16.034555  | Serial Sampler initialized.


  deprecation(


2024-11-14 10:38:20.075832  | Sampler decorrelating envs, max steps: 0
2024-11-14 10:38:20.077827  | Serial Sampler initialized.
2024-11-14 10:38:23.349060  | Sampler decorrelating envs, max steps: 0
2024-11-14 10:38:23.349060  | Serial Sampler initialized.
2024-11-14 10:38:27.445908  | Sampler decorrelating envs, max steps: 0
2024-11-14 10:38:27.445908  | Serial Sampler initialized.
2024-11-14 10:38:31.605656  | Sampler decorrelating envs, max steps: 0
2024-11-14 10:38:31.605656  | Serial Sampler initialized.
2024-11-14 10:38:34.688285  | Sampler decorrelating envs, max steps: 0
2024-11-14 10:38:34.688285  | Serial Sampler initialized.
2024-11-14 10:38:38.015845  | Sampler decorrelating envs, max steps: 0
2024-11-14 10:38:38.015845  | Serial Sampler initialized.
2024-11-14 10:38:41.354095  | Sampler decorrelating envs, max steps: 0
2024-11-14 10:38:41.354095  | Serial Sampler initialized.
2024-11-14 10:38:44.668806  | Sampler decorrelating envs, max steps: 0
2024-11-14 10:38:44.668806



2024-11-14 10:46:57.530539  | a2c_acrobot-v1_0 Serial Sampler initialized.
2024-11-14 10:46:57.530539  | a2c_acrobot-v1_0 Running 1872 iterations of minibatch RL.
2024-11-14 10:46:57.530539  | a2c_acrobot-v1_0 Optimizing over 312 iterations.


0% [##############################] 100% | ETA: 00:00:00

2024-11-14 10:48:37.374592  | a2c_acrobot-v1_0 itr #311 saving snapshot...
2024-11-14 10:48:37.395057  | a2c_acrobot-v1_0 itr #311 saved
2024-11-14 10:48:37.400040  | -----------------------------  --------------
2024-11-14 10:48:37.401038  | Diagnostics/NewCompletedTrajs    360
2024-11-14 10:48:37.402036  | Diagnostics/StepsInTrajWindow  47686
2024-11-14 10:48:37.403033  | Diagnostics/Iteration            311
2024-11-14 10:48:37.404031  | Diagnostics/CumTime (s)           99.8665
2024-11-14 10:48:37.405654  | Diagnostics/CumSteps           99840
2024-11-14 10:48:37.407654  | Diagnostics/CumCompletedTrajs    360
2024-11-14 10:48:37.408652  | Diagnostics/CumUpdates           312
2024-11-14 10:48:37.409650  | Diagnostics/StepsPerSecond       999.735
2024-11-14 10:48:37.410646  | Diagnostics/UpdatesPerSecond       3.12417
2024-11-14 10:48:37.411643  | Diagnostics/ReplayRatio            1
2024-11-14 10:48:37.412640  | Diagnostics/CumReplayRatio         1
2024-11-14 10:48:37.414137  | Lengt


Total time elapsed: 00:01:39
0% [##############################] 100% | ETA: 00:00:00

2024-11-14 10:50:13.945468  | a2c_acrobot-v1_0 itr #623 saving snapshot...
2024-11-14 10:50:13.955183  | a2c_acrobot-v1_0 itr #623 saved
2024-11-14 10:50:13.964733  | -----------------------------  ---------------
2024-11-14 10:50:13.966324  | Diagnostics/NewCompletedTrajs     201
2024-11-14 10:50:13.967329  | Diagnostics/StepsInTrajWindow   50000
2024-11-14 10:50:13.968323  | Diagnostics/Iteration             623
2024-11-14 10:50:13.973627  | Diagnostics/CumTime (s)           196.427
2024-11-14 10:50:13.975622  | Diagnostics/CumSteps           199680
2024-11-14 10:50:13.976621  | Diagnostics/CumCompletedTrajs     561
2024-11-14 10:50:13.978614  | Diagnostics/CumUpdates            624
2024-11-14 10:50:13.979611  | Diagnostics/StepsPerSecond       1033.97
2024-11-14 10:50:13.980609  | Diagnostics/UpdatesPerSecond        3.23115
2024-11-14 10:50:13.981606  | Diagnostics/ReplayRatio             1
2024-11-14 10:50:13.982604  | Diagnostics/CumReplayRatio          1
2024-11-14 10:50:13.98360


Total time elapsed: 00:01:36
0% [##############################] 100% | ETA: 00:00:00

2024-11-14 10:51:51.647361  | a2c_acrobot-v1_0 itr #935 saving snapshot...
2024-11-14 10:51:51.653590  | a2c_acrobot-v1_0 itr #935 saved
2024-11-14 10:51:51.658574  | -----------------------------  ----------------
2024-11-14 10:51:51.660569  | Diagnostics/NewCompletedTrajs     198
2024-11-14 10:51:51.660569  | Diagnostics/StepsInTrajWindow   50000
2024-11-14 10:51:51.662565  | Diagnostics/Iteration             935
2024-11-14 10:51:51.663563  | Diagnostics/CumTime (s)           294.125
2024-11-14 10:51:51.665038  | Diagnostics/CumSteps           299520
2024-11-14 10:51:51.666036  | Diagnostics/CumCompletedTrajs     759
2024-11-14 10:51:51.668031  | Diagnostics/CumUpdates            936
2024-11-14 10:51:51.668031  | Diagnostics/StepsPerSecond       1021.92
2024-11-14 10:51:51.669029  | Diagnostics/UpdatesPerSecond        3.1935
2024-11-14 10:51:51.671024  | Diagnostics/ReplayRatio             1
2024-11-14 10:51:51.671755  | Diagnostics/CumReplayRatio          1
2024-11-14 10:51:51.67231


Total time elapsed: 00:01:37
0% [##############################] 100% | ETA: 00:00:00

2024-11-14 10:53:40.777390  | a2c_acrobot-v1_0 itr #1247 saving snapshot...
2024-11-14 10:53:40.777390  | a2c_acrobot-v1_0 itr #1247 saved
2024-11-14 10:53:40.802321  | -----------------------------  ----------------
2024-11-14 10:53:40.804316  | Diagnostics/NewCompletedTrajs     202
2024-11-14 10:53:40.806312  | Diagnostics/StepsInTrajWindow   50000
2024-11-14 10:53:40.807308  | Diagnostics/Iteration            1247
2024-11-14 10:53:40.807972  | Diagnostics/CumTime (s)           403.264
2024-11-14 10:53:40.809936  | Diagnostics/CumSteps           399360
2024-11-14 10:53:40.810939  | Diagnostics/CumCompletedTrajs     961
2024-11-14 10:53:40.811935  | Diagnostics/CumUpdates           1248
2024-11-14 10:53:40.813931  | Diagnostics/StepsPerSecond        914.8
2024-11-14 10:53:40.814928  | Diagnostics/UpdatesPerSecond        2.85875
2024-11-14 10:53:40.815925  | Diagnostics/ReplayRatio             1
2024-11-14 10:53:40.816922  | Diagnostics/CumReplayRatio          1
2024-11-14 10:53:40.817


Total time elapsed: 00:01:49
0% [##############################] 100% | ETA: 00:00:00

2024-11-14 10:55:23.617036  | a2c_acrobot-v1_0 itr #1559 saving snapshot...
2024-11-14 10:55:23.622494  | a2c_acrobot-v1_0 itr #1559 saved
2024-11-14 10:55:23.634388  | -----------------------------  ----------------
2024-11-14 10:55:23.635478  | Diagnostics/NewCompletedTrajs     198
2024-11-14 10:55:23.636922  | Diagnostics/StepsInTrajWindow   50000
2024-11-14 10:55:23.638447  | Diagnostics/Iteration            1559
2024-11-14 10:55:23.639457  | Diagnostics/CumTime (s)           506.092
2024-11-14 10:55:23.640448  | Diagnostics/CumSteps           499200
2024-11-14 10:55:23.641444  | Diagnostics/CumCompletedTrajs    1159
2024-11-14 10:55:23.642441  | Diagnostics/CumUpdates           1560
2024-11-14 10:55:23.644172  | Diagnostics/StepsPerSecond        970.939
2024-11-14 10:55:23.645350  | Diagnostics/UpdatesPerSecond        3.03419
2024-11-14 10:55:23.646395  | Diagnostics/ReplayRatio             1
2024-11-14 10:55:23.647392  | Diagnostics/CumReplayRatio          1
2024-11-14 10:55:23.6


Total time elapsed: 00:01:42
0% [##############################] 100% | ETA: 00:00:00

2024-11-14 10:57:01.899444  | a2c_acrobot-v1_0 itr #1871 saving snapshot...
2024-11-14 10:57:01.915062  | a2c_acrobot-v1_0 itr #1871 saved
2024-11-14 10:57:01.915062  | -----------------------------  ----------------
2024-11-14 10:57:01.915062  | Diagnostics/NewCompletedTrajs     201
2024-11-14 10:57:01.915062  | Diagnostics/StepsInTrajWindow   50000
2024-11-14 10:57:01.915062  | Diagnostics/Iteration            1871
2024-11-14 10:57:01.930683  | Diagnostics/CumTime (s)           604.385
2024-11-14 10:57:01.930683  | Diagnostics/CumSteps           599040
2024-11-14 10:57:01.930683  | Diagnostics/CumCompletedTrajs    1360
2024-11-14 10:57:01.930683  | Diagnostics/CumUpdates           1872
2024-11-14 10:57:01.930683  | Diagnostics/StepsPerSecond       1015.74
2024-11-14 10:57:01.930683  | Diagnostics/UpdatesPerSecond        3.1742
2024-11-14 10:57:01.930683  | Diagnostics/ReplayRatio             1
2024-11-14 10:57:01.930683  | Diagnostics/CumReplayRatio          1
2024-11-14 10:57:01.930


Total time elapsed: 00:01:38


2024-11-14 10:57:02.604894  | Sampler decorrelating envs, max steps: 0
2024-11-14 10:57:02.604894  | Serial Sampler initialized.
2024-11-14 10:57:05.677603  | Sampler decorrelating envs, max steps: 0
2024-11-14 10:57:05.677603  | Serial Sampler initialized.
2024-11-14 10:57:08.970516  | Sampler decorrelating envs, max steps: 0
2024-11-14 10:57:08.970516  | Serial Sampler initialized.
2024-11-14 10:57:12.010988  | Sampler decorrelating envs, max steps: 0
2024-11-14 10:57:12.010988  | Serial Sampler initialized.
2024-11-14 10:57:15.130416  | Sampler decorrelating envs, max steps: 0
2024-11-14 10:57:15.130416  | Serial Sampler initialized.
2024-11-14 10:57:18.264017  | Sampler decorrelating envs, max steps: 0
2024-11-14 10:57:18.264017  | Serial Sampler initialized.
2024-11-14 10:57:21.447240  | Sampler decorrelating envs, max steps: 0
2024-11-14 10:57:21.447240  | Serial Sampler initialized.
2024-11-14 10:57:24.518330  | Sampler decorrelating envs, max steps: 0
2024-11-14 10:57:24.518330

0% [##############################] 100% | ETA: 00:00:00

2024-11-14 11:07:43.501683  | a2c_acrobot-v1_0 itr #311 saving snapshot...
2024-11-14 11:07:43.510851  | a2c_acrobot-v1_0 itr #311 saved
2024-11-14 11:07:43.516836  | -----------------------------  ------------
2024-11-14 11:07:43.518829  | Diagnostics/NewCompletedTrajs    847
2024-11-14 11:07:43.519829  | Diagnostics/StepsInTrajWindow   9498
2024-11-14 11:07:43.520825  | Diagnostics/Iteration            311
2024-11-14 11:07:43.523817  | Diagnostics/CumTime (s)          112.248
2024-11-14 11:07:43.524814  | Diagnostics/CumSteps           99840
2024-11-14 11:07:43.525812  | Diagnostics/CumCompletedTrajs    847
2024-11-14 11:07:43.526808  | Diagnostics/CumUpdates           312
2024-11-14 11:07:43.527806  | Diagnostics/StepsPerSecond       889.46
2024-11-14 11:07:43.528805  | Diagnostics/UpdatesPerSecond       2.77956
2024-11-14 11:07:43.529800  | Diagnostics/ReplayRatio            1
2024-11-14 11:07:43.530797  | Diagnostics/CumReplayRatio         1
2024-11-14 11:07:43.531797  | LengthAve


Total time elapsed: 00:01:52
0% [##############################] 100% | ETA: 00:00:00

2024-11-14 11:09:36.251846  | a2c_acrobot-v1_0 itr #623 saving snapshot...
2024-11-14 11:09:36.259827  | a2c_acrobot-v1_0 itr #623 saved
2024-11-14 11:09:36.268693  | -----------------------------  --------------
2024-11-14 11:09:36.269690  | Diagnostics/NewCompletedTrajs    1114
2024-11-14 11:09:36.270687  | Diagnostics/StepsInTrajWindow    9580
2024-11-14 11:09:36.271686  | Diagnostics/Iteration             623
2024-11-14 11:09:36.272746  | Diagnostics/CumTime (s)           224.996
2024-11-14 11:09:36.273755  | Diagnostics/CumSteps           199680
2024-11-14 11:09:36.274772  | Diagnostics/CumCompletedTrajs    1961
2024-11-14 11:09:36.275769  | Diagnostics/CumUpdates            624
2024-11-14 11:09:36.276765  | Diagnostics/StepsPerSecond        885.515
2024-11-14 11:09:36.278760  | Diagnostics/UpdatesPerSecond        2.76723
2024-11-14 11:09:36.279759  | Diagnostics/ReplayRatio             1
2024-11-14 11:09:36.280754  | Diagnostics/CumReplayRatio          1
2024-11-14 11:09:36.28307


Total time elapsed: 00:01:52
0% [##############################] 100% | ETA: 00:00:00

2024-11-14 11:11:22.559463  | a2c_acrobot-v1_0 itr #935 saving snapshot...
2024-11-14 11:11:22.559463  | a2c_acrobot-v1_0 itr #935 saved
2024-11-14 11:11:22.575085  | -----------------------------  --------------
2024-11-14 11:11:22.575085  | Diagnostics/NewCompletedTrajs    1058
2024-11-14 11:11:22.575085  | Diagnostics/StepsInTrajWindow    9636
2024-11-14 11:11:22.575085  | Diagnostics/Iteration             935
2024-11-14 11:11:22.575085  | Diagnostics/CumTime (s)           331.295
2024-11-14 11:11:22.575085  | Diagnostics/CumSteps           299520
2024-11-14 11:11:22.575085  | Diagnostics/CumCompletedTrajs    3019
2024-11-14 11:11:22.586094  | Diagnostics/CumUpdates            936
2024-11-14 11:11:22.586094  | Diagnostics/StepsPerSecond        939.241
2024-11-14 11:11:22.586094  | Diagnostics/UpdatesPerSecond        2.93513
2024-11-14 11:11:22.586094  | Diagnostics/ReplayRatio             1
2024-11-14 11:11:22.591103  | Diagnostics/CumReplayRatio          1
2024-11-14 11:11:22.59110


Total time elapsed: 00:01:46
0% [##############################] 100% | ETA: 00:00:00

2024-11-14 11:13:06.827318  | a2c_acrobot-v1_0 itr #1247 saving snapshot...
2024-11-14 11:13:06.836288  | a2c_acrobot-v1_0 itr #1247 saved
2024-11-14 11:13:06.838923  | -----------------------------  --------------
2024-11-14 11:13:06.838923  | Diagnostics/NewCompletedTrajs    1070
2024-11-14 11:13:06.838923  | Diagnostics/StepsInTrajWindow    8726
2024-11-14 11:13:06.838923  | Diagnostics/Iteration            1247
2024-11-14 11:13:06.838923  | Diagnostics/CumTime (s)           435.572
2024-11-14 11:13:06.838923  | Diagnostics/CumSteps           399360
2024-11-14 11:13:06.838923  | Diagnostics/CumCompletedTrajs    4089
2024-11-14 11:13:06.838923  | Diagnostics/CumUpdates           1248
2024-11-14 11:13:06.853931  | Diagnostics/StepsPerSecond        957.442
2024-11-14 11:13:06.853931  | Diagnostics/UpdatesPerSecond        2.99201
2024-11-14 11:13:06.856716  | Diagnostics/ReplayRatio             1
2024-11-14 11:13:06.857712  | Diagnostics/CumReplayRatio          1
2024-11-14 11:13:06.858


Total time elapsed: 00:01:44
0% [##############################] 100% | ETA: 00:00:00

2024-11-14 11:14:56.679949  | a2c_acrobot-v1_0 itr #1559 saving snapshot...
2024-11-14 11:14:56.687884  | a2c_acrobot-v1_0 itr #1559 saved
2024-11-14 11:14:56.697740  | -----------------------------  ---------------
2024-11-14 11:14:56.699738  | Diagnostics/NewCompletedTrajs    1066
2024-11-14 11:14:56.700471  | Diagnostics/StepsInTrajWindow    9465
2024-11-14 11:14:56.702471  | Diagnostics/Iteration            1559
2024-11-14 11:14:56.703467  | Diagnostics/CumTime (s)           545.423
2024-11-14 11:14:56.704466  | Diagnostics/CumSteps           499200
2024-11-14 11:14:56.705515  | Diagnostics/CumCompletedTrajs    5155
2024-11-14 11:14:56.707304  | Diagnostics/CumUpdates           1560
2024-11-14 11:14:56.708299  | Diagnostics/StepsPerSecond        908.871
2024-11-14 11:14:56.709298  | Diagnostics/UpdatesPerSecond        2.84022
2024-11-14 11:14:56.711291  | Diagnostics/ReplayRatio             1
2024-11-14 11:14:56.712289  | Diagnostics/CumReplayRatio          1
2024-11-14 11:14:56.71


Total time elapsed: 00:01:49
0% [##############################] 100% | ETA: 00:00:00

2024-11-14 11:16:43.615533  | a2c_acrobot-v1_0 itr #1871 saving snapshot...
2024-11-14 11:16:43.621045  | a2c_acrobot-v1_0 itr #1871 saved
2024-11-14 11:16:43.621045  | -----------------------------  ---------------
2024-11-14 11:16:43.621045  | Diagnostics/NewCompletedTrajs    1072
2024-11-14 11:16:43.621045  | Diagnostics/StepsInTrajWindow    9478
2024-11-14 11:16:43.636672  | Diagnostics/Iteration            1871
2024-11-14 11:16:43.636672  | Diagnostics/CumTime (s)           652.356
2024-11-14 11:16:43.636672  | Diagnostics/CumSteps           599040
2024-11-14 11:16:43.636672  | Diagnostics/CumCompletedTrajs    6227
2024-11-14 11:16:43.636672  | Diagnostics/CumUpdates           1872
2024-11-14 11:16:43.636672  | Diagnostics/StepsPerSecond        933.667
2024-11-14 11:16:43.636672  | Diagnostics/UpdatesPerSecond        2.91771
2024-11-14 11:16:43.636672  | Diagnostics/ReplayRatio             1
2024-11-14 11:16:43.636672  | Diagnostics/CumReplayRatio          1
2024-11-14 11:16:43.63


Total time elapsed: 00:01:46


2024-11-14 11:16:44.295361  | Sampler decorrelating envs, max steps: 0
2024-11-14 11:16:44.295361  | Serial Sampler initialized.
2024-11-14 11:16:47.636028  | Sampler decorrelating envs, max steps: 0
2024-11-14 11:16:47.636028  | Serial Sampler initialized.
2024-11-14 11:16:51.116675  | Sampler decorrelating envs, max steps: 0
2024-11-14 11:16:51.116675  | Serial Sampler initialized.
2024-11-14 11:16:54.408723  | Sampler decorrelating envs, max steps: 0
2024-11-14 11:16:54.408723  | Serial Sampler initialized.
2024-11-14 11:16:57.607709  | Sampler decorrelating envs, max steps: 0
2024-11-14 11:16:57.607709  | Serial Sampler initialized.
2024-11-14 11:17:00.695520  | Sampler decorrelating envs, max steps: 0
2024-11-14 11:17:00.695520  | Serial Sampler initialized.
2024-11-14 11:17:03.862059  | Sampler decorrelating envs, max steps: 0
2024-11-14 11:17:03.862059  | Serial Sampler initialized.
2024-11-14 11:17:07.045111  | Sampler decorrelating envs, max steps: 0
2024-11-14 11:17:07.045111

0% [##############################] 100% | ETA: 00:00:00

2024-11-14 11:26:55.717713  | a2c_acrobot-v1_0 itr #311 saving snapshot...
2024-11-14 11:26:55.725852  | a2c_acrobot-v1_0 itr #311 saved
2024-11-14 11:26:55.741854  | -----------------------------  -------------
2024-11-14 11:26:55.741854  | Diagnostics/NewCompletedTrajs    198
2024-11-14 11:26:55.741854  | Diagnostics/StepsInTrajWindow  50000
2024-11-14 11:26:55.741854  | Diagnostics/Iteration            311
2024-11-14 11:26:55.741854  | Diagnostics/CumTime (s)           92.7361
2024-11-14 11:26:55.741854  | Diagnostics/CumSteps           99840
2024-11-14 11:26:55.741854  | Diagnostics/CumCompletedTrajs    198
2024-11-14 11:26:55.741854  | Diagnostics/CumUpdates           312
2024-11-14 11:26:55.750044  | Diagnostics/StepsPerSecond      1076.6
2024-11-14 11:26:55.750044  | Diagnostics/UpdatesPerSecond       3.36438
2024-11-14 11:26:55.750044  | Diagnostics/ReplayRatio            1
2024-11-14 11:26:55.750044  | Diagnostics/CumReplayRatio         1
2024-11-14 11:26:55.750044  | LengthAv


Total time elapsed: 00:01:32
0% [##############################] 100% | ETA: 00:00:00

2024-11-14 11:28:29.431167  | a2c_acrobot-v1_0 itr #623 saving snapshot...
2024-11-14 11:28:29.439333  | a2c_acrobot-v1_0 itr #623 saved
2024-11-14 11:28:29.447339  | -----------------------------  --------------
2024-11-14 11:28:29.447339  | Diagnostics/NewCompletedTrajs     203
2024-11-14 11:28:29.447339  | Diagnostics/StepsInTrajWindow   50000
2024-11-14 11:28:29.447339  | Diagnostics/Iteration             623
2024-11-14 11:28:29.447339  | Diagnostics/CumTime (s)           186.442
2024-11-14 11:28:29.447339  | Diagnostics/CumSteps           199680
2024-11-14 11:28:29.455334  | Diagnostics/CumCompletedTrajs     401
2024-11-14 11:28:29.455334  | Diagnostics/CumUpdates            624
2024-11-14 11:28:29.455334  | Diagnostics/StepsPerSecond       1065.47
2024-11-14 11:28:29.455334  | Diagnostics/UpdatesPerSecond        3.32958
2024-11-14 11:28:29.455334  | Diagnostics/ReplayRatio             1
2024-11-14 11:28:29.455334  | Diagnostics/CumReplayRatio          1
2024-11-14 11:28:29.455334


Total time elapsed: 00:01:33
0% [##############################] 100% | ETA: 00:00:00

2024-11-14 11:30:02.681551  | a2c_acrobot-v1_0 itr #935 saving snapshot...
2024-11-14 11:30:02.689673  | a2c_acrobot-v1_0 itr #935 saved
2024-11-14 11:30:02.697702  | -----------------------------  --------------
2024-11-14 11:30:02.697702  | Diagnostics/NewCompletedTrajs     197
2024-11-14 11:30:02.697702  | Diagnostics/StepsInTrajWindow   50000
2024-11-14 11:30:02.697702  | Diagnostics/Iteration             935
2024-11-14 11:30:02.697702  | Diagnostics/CumTime (s)           279.692
2024-11-14 11:30:02.697702  | Diagnostics/CumSteps           299520
2024-11-14 11:30:02.697702  | Diagnostics/CumCompletedTrajs     598
2024-11-14 11:30:02.705700  | Diagnostics/CumUpdates            936
2024-11-14 11:30:02.705700  | Diagnostics/StepsPerSecond       1070.67
2024-11-14 11:30:02.705700  | Diagnostics/UpdatesPerSecond        3.34583
2024-11-14 11:30:02.705700  | Diagnostics/ReplayRatio             1
2024-11-14 11:30:02.705700  | Diagnostics/CumReplayRatio          1
2024-11-14 11:30:02.705700


Total time elapsed: 00:01:33
0% [##############################] 100% | ETA: 00:00:00

2024-11-14 11:31:37.256598  | a2c_acrobot-v1_0 itr #1247 saving snapshot...
2024-11-14 11:31:37.264600  | a2c_acrobot-v1_0 itr #1247 saved
2024-11-14 11:31:37.272600  | -----------------------------  --------------
2024-11-14 11:31:37.272600  | Diagnostics/NewCompletedTrajs     201
2024-11-14 11:31:37.272600  | Diagnostics/StepsInTrajWindow   50000
2024-11-14 11:31:37.280601  | Diagnostics/Iteration            1247
2024-11-14 11:31:37.280601  | Diagnostics/CumTime (s)           374.267
2024-11-14 11:31:37.280601  | Diagnostics/CumSteps           399360
2024-11-14 11:31:37.280601  | Diagnostics/CumCompletedTrajs     799
2024-11-14 11:31:37.280601  | Diagnostics/CumUpdates           1248
2024-11-14 11:31:37.280601  | Diagnostics/StepsPerSecond       1055.67
2024-11-14 11:31:37.288610  | Diagnostics/UpdatesPerSecond        3.29897
2024-11-14 11:31:37.288610  | Diagnostics/ReplayRatio             1
2024-11-14 11:31:37.288610  | Diagnostics/CumReplayRatio          1
2024-11-14 11:31:37.2886


Total time elapsed: 00:01:34
0% [##############################] 100% | ETA: 00:00:00

2024-11-14 11:33:11.652919  | a2c_acrobot-v1_0 itr #1559 saving snapshot...
2024-11-14 11:33:11.661153  | a2c_acrobot-v1_0 itr #1559 saved
2024-11-14 11:33:11.661153  | -----------------------------  --------------
2024-11-14 11:33:11.669154  | Diagnostics/NewCompletedTrajs     198
2024-11-14 11:33:11.669154  | Diagnostics/StepsInTrajWindow   50000
2024-11-14 11:33:11.669154  | Diagnostics/Iteration            1559
2024-11-14 11:33:11.669154  | Diagnostics/CumTime (s)           468.663
2024-11-14 11:33:11.669154  | Diagnostics/CumSteps           499200
2024-11-14 11:33:11.669154  | Diagnostics/CumCompletedTrajs     997
2024-11-14 11:33:11.677153  | Diagnostics/CumUpdates           1560
2024-11-14 11:33:11.677153  | Diagnostics/StepsPerSecond       1057.67
2024-11-14 11:33:11.677153  | Diagnostics/UpdatesPerSecond        3.30521
2024-11-14 11:33:11.677153  | Diagnostics/ReplayRatio             1
2024-11-14 11:33:11.677153  | Diagnostics/CumReplayRatio          1
2024-11-14 11:33:11.6771


Total time elapsed: 00:01:34
0% [##############################] 100% | ETA: 00:00:00

2024-11-14 11:34:45.620312  | a2c_acrobot-v1_0 itr #1871 saving snapshot...
2024-11-14 11:34:45.628313  | a2c_acrobot-v1_0 itr #1871 saved
2024-11-14 11:34:45.636314  | -----------------------------  -------------
2024-11-14 11:34:45.636314  | Diagnostics/NewCompletedTrajs     201
2024-11-14 11:34:45.636314  | Diagnostics/StepsInTrajWindow   50000
2024-11-14 11:34:45.636314  | Diagnostics/Iteration            1871
2024-11-14 11:34:45.644471  | Diagnostics/CumTime (s)           562.639
2024-11-14 11:34:45.644471  | Diagnostics/CumSteps           599040
2024-11-14 11:34:45.644471  | Diagnostics/CumCompletedTrajs    1198
2024-11-14 11:34:45.644471  | Diagnostics/CumUpdates           1872
2024-11-14 11:34:45.644471  | Diagnostics/StepsPerSecond       1062.41
2024-11-14 11:34:45.644471  | Diagnostics/UpdatesPerSecond        3.32003
2024-11-14 11:34:45.644471  | Diagnostics/ReplayRatio             1
2024-11-14 11:34:45.652473  | Diagnostics/CumReplayRatio          1
2024-11-14 11:34:45.65247


Total time elapsed: 00:01:33


2024-11-14 11:34:46.274457  | Sampler decorrelating envs, max steps: 0
2024-11-14 11:34:46.282461  | Serial Sampler initialized.
2024-11-14 11:34:49.401935  | Sampler decorrelating envs, max steps: 0
2024-11-14 11:34:49.401935  | Serial Sampler initialized.
2024-11-14 11:34:52.811161  | Sampler decorrelating envs, max steps: 0
2024-11-14 11:34:52.816382  | Serial Sampler initialized.
2024-11-14 11:34:56.236211  | Sampler decorrelating envs, max steps: 0
2024-11-14 11:34:56.236211  | Serial Sampler initialized.
2024-11-14 11:34:59.338092  | Sampler decorrelating envs, max steps: 0
2024-11-14 11:34:59.338092  | Serial Sampler initialized.
2024-11-14 11:35:02.368232  | Sampler decorrelating envs, max steps: 0
2024-11-14 11:35:02.368232  | Serial Sampler initialized.
2024-11-14 11:35:05.543083  | Sampler decorrelating envs, max steps: 0
2024-11-14 11:35:05.543083  | Serial Sampler initialized.
2024-11-14 11:35:08.637963  | Sampler decorrelating envs, max steps: 0
2024-11-14 11:35:08.637963

0% [##############################] 100% | ETA: 00:00:00

2024-11-14 11:45:10.109706  | a2c_acrobot-v1_0 itr #311 saving snapshot...
2024-11-14 11:45:10.121338  | a2c_acrobot-v1_0 itr #311 saved
2024-11-14 11:45:10.125852  | -----------------------------  ------------
2024-11-14 11:45:10.125852  | Diagnostics/NewCompletedTrajs    576
2024-11-14 11:45:10.125852  | Diagnostics/StepsInTrajWindow  10609
2024-11-14 11:45:10.125852  | Diagnostics/Iteration            311
2024-11-14 11:45:10.125852  | Diagnostics/CumTime (s)          106.008
2024-11-14 11:45:10.125852  | Diagnostics/CumSteps           99840
2024-11-14 11:45:10.133140  | Diagnostics/CumCompletedTrajs    576
2024-11-14 11:45:10.134148  | Diagnostics/CumUpdates           312
2024-11-14 11:45:10.134148  | Diagnostics/StepsPerSecond       941.816
2024-11-14 11:45:10.134148  | Diagnostics/UpdatesPerSecond       2.94318
2024-11-14 11:45:10.134148  | Diagnostics/ReplayRatio            1
2024-11-14 11:45:10.138250  | Diagnostics/CumReplayRatio         1
2024-11-14 11:45:10.138250  | LengthAv


Total time elapsed: 00:01:45
0% [##############################] 100% | ETA: 00:00:00

2024-11-14 11:47:14.123918  | a2c_acrobot-v1_0 itr #623 saving snapshot...
2024-11-14 11:47:14.134425  | a2c_acrobot-v1_0 itr #623 saved
2024-11-14 11:47:14.134425  | -----------------------------  --------------
2024-11-14 11:47:14.150055  | Diagnostics/NewCompletedTrajs    1072
2024-11-14 11:47:14.150055  | Diagnostics/StepsInTrajWindow    9318
2024-11-14 11:47:14.150055  | Diagnostics/Iteration             623
2024-11-14 11:47:14.150055  | Diagnostics/CumTime (s)           230.021
2024-11-14 11:47:14.150055  | Diagnostics/CumSteps           199680
2024-11-14 11:47:14.150055  | Diagnostics/CumCompletedTrajs    1648
2024-11-14 11:47:14.150055  | Diagnostics/CumUpdates            624
2024-11-14 11:47:14.150055  | Diagnostics/StepsPerSecond        805.08
2024-11-14 11:47:14.150055  | Diagnostics/UpdatesPerSecond        2.51587
2024-11-14 11:47:14.150055  | Diagnostics/ReplayRatio             1
2024-11-14 11:47:14.150055  | Diagnostics/CumReplayRatio          1
2024-11-14 11:47:14.150055


Total time elapsed: 00:02:03
0% [##############################] 100% | ETA: 00:00:00

2024-11-14 11:49:28.264116  | a2c_acrobot-v1_0 itr #935 saving snapshot...
2024-11-14 11:49:28.275916  | a2c_acrobot-v1_0 itr #935 saved
2024-11-14 11:49:28.275916  | -----------------------------  --------------
2024-11-14 11:49:28.275916  | Diagnostics/NewCompletedTrajs    1116
2024-11-14 11:49:28.275916  | Diagnostics/StepsInTrajWindow    9193
2024-11-14 11:49:28.287929  | Diagnostics/Iteration             935
2024-11-14 11:49:28.288327  | Diagnostics/CumTime (s)           364.162
2024-11-14 11:49:28.288327  | Diagnostics/CumSteps           299520
2024-11-14 11:49:28.288327  | Diagnostics/CumCompletedTrajs    2764
2024-11-14 11:49:28.288327  | Diagnostics/CumUpdates            936
2024-11-14 11:49:28.288327  | Diagnostics/StepsPerSecond        744.289
2024-11-14 11:49:28.288327  | Diagnostics/UpdatesPerSecond        2.3259
2024-11-14 11:49:28.288327  | Diagnostics/ReplayRatio             1
2024-11-14 11:49:28.296327  | Diagnostics/CumReplayRatio          1
2024-11-14 11:49:28.296327


Total time elapsed: 00:02:14
0% [##############################] 100% | ETA: 00:00:00

2024-11-14 11:51:34.386839  | a2c_acrobot-v1_0 itr #1247 saving snapshot...
2024-11-14 11:51:34.406701  | a2c_acrobot-v1_0 itr #1247 saved
2024-11-14 11:51:34.411904  | -----------------------------  --------------
2024-11-14 11:51:34.411904  | Diagnostics/NewCompletedTrajs    1078
2024-11-14 11:51:34.418949  | Diagnostics/StepsInTrajWindow    8978
2024-11-14 11:51:34.418949  | Diagnostics/Iteration            1247
2024-11-14 11:51:34.418949  | Diagnostics/CumTime (s)           490.293
2024-11-14 11:51:34.418949  | Diagnostics/CumSteps           399360
2024-11-14 11:51:34.418949  | Diagnostics/CumCompletedTrajs    3842
2024-11-14 11:51:34.418949  | Diagnostics/CumUpdates           1248
2024-11-14 11:51:34.418949  | Diagnostics/StepsPerSecond        791.559
2024-11-14 11:51:34.426956  | Diagnostics/UpdatesPerSecond        2.47362
2024-11-14 11:51:34.426956  | Diagnostics/ReplayRatio             1
2024-11-14 11:51:34.426956  | Diagnostics/CumReplayRatio          1
2024-11-14 11:51:34.426


Total time elapsed: 00:02:06
0% [##############################] 100% | ETA: 00:00:00

2024-11-14 11:54:15.657411  | a2c_acrobot-v1_0 itr #1559 saving snapshot...
2024-11-14 11:54:15.664977  | a2c_acrobot-v1_0 itr #1559 saved
2024-11-14 11:54:15.669967  | -----------------------------  --------------
2024-11-14 11:54:15.670966  | Diagnostics/NewCompletedTrajs    1065
2024-11-14 11:54:15.672960  | Diagnostics/StepsInTrajWindow    9408
2024-11-14 11:54:15.674078  | Diagnostics/Iteration            1559
2024-11-14 11:54:15.675074  | Diagnostics/CumTime (s)           651.552
2024-11-14 11:54:15.676072  | Diagnostics/CumSteps           499200
2024-11-14 11:54:15.677070  | Diagnostics/CumCompletedTrajs    4907
2024-11-14 11:54:15.678067  | Diagnostics/CumUpdates           1560
2024-11-14 11:54:15.680063  | Diagnostics/StepsPerSecond        619.127
2024-11-14 11:54:15.682056  | Diagnostics/UpdatesPerSecond        1.93477
2024-11-14 11:54:15.683052  | Diagnostics/ReplayRatio             1
2024-11-14 11:54:15.684563  | Diagnostics/CumReplayRatio          1
2024-11-14 11:54:15.684


Total time elapsed: 00:02:41
0% [##############################] 100% | ETA: 00:00:00

2024-11-14 11:56:02.698197  | a2c_acrobot-v1_0 itr #1871 saving snapshot...
2024-11-14 11:56:02.705176  | a2c_acrobot-v1_0 itr #1871 saved
2024-11-14 11:56:02.710592  | -----------------------------  --------------
2024-11-14 11:56:02.711586  | Diagnostics/NewCompletedTrajs    1079
2024-11-14 11:56:02.712583  | Diagnostics/StepsInTrajWindow    9296
2024-11-14 11:56:02.713582  | Diagnostics/Iteration            1871
2024-11-14 11:56:02.714579  | Diagnostics/CumTime (s)           758.592
2024-11-14 11:56:02.715576  | Diagnostics/CumSteps           599040
2024-11-14 11:56:02.716573  | Diagnostics/CumCompletedTrajs    5986
2024-11-14 11:56:02.717571  | Diagnostics/CumUpdates           1872
2024-11-14 11:56:02.718567  | Diagnostics/StepsPerSecond        932.734
2024-11-14 11:56:02.719565  | Diagnostics/UpdatesPerSecond        2.91479
2024-11-14 11:56:02.720563  | Diagnostics/ReplayRatio             1
2024-11-14 11:56:02.721561  | Diagnostics/CumReplayRatio          1
2024-11-14 11:56:02.722


Total time elapsed: 00:01:46


2024-11-14 11:56:03.357057  | Sampler decorrelating envs, max steps: 0
2024-11-14 11:56:03.358856  | Serial Sampler initialized.
2024-11-14 11:56:06.323993  | Sampler decorrelating envs, max steps: 0
2024-11-14 11:56:06.323993  | Serial Sampler initialized.
2024-11-14 11:56:09.397379  | Sampler decorrelating envs, max steps: 0
2024-11-14 11:56:09.397379  | Serial Sampler initialized.
2024-11-14 11:56:12.409612  | Sampler decorrelating envs, max steps: 0
2024-11-14 11:56:12.411607  | Serial Sampler initialized.
2024-11-14 11:56:15.317600  | Sampler decorrelating envs, max steps: 0
2024-11-14 11:56:15.318598  | Serial Sampler initialized.
2024-11-14 11:56:18.323441  | Sampler decorrelating envs, max steps: 0
2024-11-14 11:56:18.323441  | Serial Sampler initialized.
2024-11-14 11:56:21.260013  | Sampler decorrelating envs, max steps: 0
2024-11-14 11:56:21.261011  | Serial Sampler initialized.
2024-11-14 11:56:24.123325  | Sampler decorrelating envs, max steps: 0
2024-11-14 11:56:24.139893

In [12]:
# Tạo thư mục results trong cùng thư mục với file code
current_dir = os.getcwd()
save_dir = osp.join(current_dir, 'results')
os.makedirs(save_dir, exist_ok=True)

# Tạo dữ liệu để vẽ
xRange = np.arange(-np.pi, np.pi, 0.1)
yRange = np.arange(-np.pi, np.pi, 0.1)
toExternal = lambda x, y : toExternalStateRep([x, y, 0, 0])
RFn = compose(rewardFn, toExternal)

# Vẽ đồ thị
plotFunction(RFn, xRange, yRange, 'theta1', 'theta2', 'R')

# Tạo tên file với timestamp
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
filename = f'recovered_{timestamp}.png'
save_path = osp.join(save_dir, filename)

# Lưu hình với chất lượng cao
plt.savefig(save_path, 
            dpi=300,           # độ phân giải cao
            bbox_inches='tight',  # cắt bỏ viền trắng thừa
            pad_inches=0.1,    # thêm padding nhỏ
            format='png')      # định dạng file

# Hiển thị đồ thị
plt.show()
from Eval import simulateAgent_2
# Chạy mô phỏng
simulateAgent_2(agent, render=True)
# video_display, steps = simulateAgentFile(agent)

TypeError: FigureBase.gca() got an unexpected keyword argument 'projection'

<Figure size 640x480 with 0 Axes>