In [1]:
import os
import copy
import random
import gym
import pickle

import numpy as np
import pandas as pd
import tensorflow as tf
tf.keras.backend.set_floatx('float32')

from itertools import permutations
from sklearn.model_selection import KFold, GridSearchCV

from multiprocessing import set_start_method
import multiprocessing as mp

path = os.path.abspath('..')
if path not in sys.path:
    sys.path.append(path)

from seal.agents.default_config import DEFAULT_CONFIG as config
# from seal.agents.dqn import DQNAgent
# from seal.agents.qr_dqn import QuantileAgent
from seal.agents.multi_head_dqn import MultiHeadDQNAgent
# from seal.agents.discrete_bcq import DiscreteBCQAgent

from seal.algos.kfold import CVS, KFoldCV
from seal.algos.advantage_learner import AdvantageLearner
from seal.algos.behavior_cloning import BehaviorCloning
from seal.algos.density_ratio import VisitationRatioModel
from seal.algos.fqe import FQE

def one_step(seed):
    np.random.seed(seed)
    tf.random.set_seed(seed)
    
    path = './data/mh/rem/trajs_mh.pkl'
    nfolds = 5
    n_splits = 5
    ckpts = (np.arange(10) + 1)*5000
    
    num_actions = 5
    # configures
    config['online'] = False
    config['lr'] = 5e-4
    config['decay_steps'] = 50000
    config['max_training_steps'] = 50000
    config['training_steps_to_checkpoint'] = 5000
    config['training_steps_to_eval'] = 100000
    config['hiddens'] = [64,64]
    config['double'] = False
    config['dueling'] = False
    config['num_heads'] = 200

    index = pd.MultiIndex.from_product([np.arange(nfolds), ckpts])
    columns = ['dqn',  'seal']
    rets = pd.DataFrame(index=index, columns=columns)

    print('-'*20, 'start', '-'*20)
    cvs = CVS(path, n_splits=nfolds, random_state=seed)
    cvs.split()
    for fold in range(nfolds):
        train_path = cvs.train_paths[fold] + 'trajs.pkl'
        kf = KFoldCV(train_path, n_trajs=None, n_splits=n_splits, shuffle=False, random_state=seed)
        kf.split()

        print('-'*20, 'training agent', '-'*20)
        # agent
        config['persistent_directory'] = kf.agent_path
        config['checkpoint_path'] = kf.ckpt_path
        agent = MultiHeadDQNAgent(num_actions=num_actions, config=config)
        agent.learn()

        print('-'*20, 'training agents', '-'*20)
        # agent_1, ..., agent_K
        for idx in range(kf.n_splits):
            config_idx = copy.deepcopy(config)
            config_idx['persistent_directory'] = kf.agent_paths[idx]
            config_idx['checkpoint_path'] = kf.ckpt_paths[idx]
            agent_idx = MultiHeadDQNAgent(num_actions=num_actions, config=config_idx)
            agent_idx.learn()

        # fitted q evaluation
        test_path = cvs.test_paths[fold] + 'trajs.pkl'
        with open(test_path, 'rb') as f:
            trajs = pickle.load(f)

        print('-'*20, 'behavior cloning', '-'*20)
        # behavior cloning
        bc = BehaviorCloning(num_actions=num_actions)
        states  = np.array([transition[0] for traj in kf.trajs for transition in traj])
        actions = np.array([transition[1] for traj in kf.trajs for transition in traj])
        bc.train(states, actions)

        for ckpt in ckpts:
            print('-'*20, 'ckpt: ', ckpt, '-'*20)
            agent = MultiHeadDQNAgent(num_actions=num_actions, config=config)
            agent.load(kf.ckpt_path + 'offline_rem_{}.ckpt'.format(ckpt))

            agents = []
            for idx in range(kf.n_splits):
                config_idx = copy.deepcopy(config)
                config_idx['persistent_directory'] = kf.agent_paths[idx]
                config_idx['checkpoint_path'] = kf.ckpt_paths[idx]
                agent_idx = MultiHeadDQNAgent(num_actions=num_actions, config=config_idx)
                agent_idx.load(kf.ckpt_paths[idx] + 'offline_rem_{}.ckpt'.format(ckpt))
                agents.append(agent_idx)
            states, qvalues, qtildes = kf.update_q(agents, bc)

            print('-'*20, 'adv learner', '-'*20)
#             advs1 = qvalues - qvalues.mean(axis=1, keepdims=True)
#             agent1 = AdvantageLearner(num_actions=num_actions)
#             agent1._train(states, advs1)
            
            advs2 = qtildes - qtildes.mean(axis=1, keepdims=True)
            agent2 = AdvantageLearner(num_actions=num_actions)
            agent2._train(states, advs2)

            print('-'*20, 'fqe on dqn & seal', '-'*20)
            fqe_dqn = FQE(agent.greedy_actions, num_actions=num_actions, activation='tanh', hiddens=config['hiddens'], max_iter=100, eps=0.0015)
            fqe_dqn.train(trajs)
#             fqe_dml = FQE(agent1.greedy_actions, num_actions=num_actions)
#             fqe_dml.train(trajs)
            fqe_seal = FQE(agent2.greedy_actions, num_actions=num_actions, activation='tanh', hiddens=config['hiddens'], max_iter=100, eps=0.0015)
            fqe_seal.train(trajs)

            rets.loc[(fold, ckpt), 'dqn'] = fqe_dqn.values
#             rets.loc[(fold, ckpt), 'dml'] = fqe_dml.values
            rets.loc[(fold, ckpt), 'seal'] = fqe_seal.values
            
    return rets

In [2]:
save_path = './data/mh/rem/'
pool = mp.Pool(5)
rets = pool.map(one_step, range(5))
pool.close()

with open(save_path + 'rets_rem_mh.pkl', 'wb') as f:
    pickle.dump(rets, f)

-------------------- --------------------start -------------------- start -------------------- --------------------
start
 --------------------
----------------------------------------  startstart -------------------- --------------------

-------------------- training agent --------------------
-------------------- training agent --------------------
-------------------- training agent --------------------
-------------------- training agent ----------------------------------------
 training agent --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold0/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold0/train/agent/trajs.pkl!Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold0/train/agent/trajs.pkl!

Refresh buffer every 1000000 sampling!Refresh buffer every 1000000 sampling!



To chang


saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold0/train/agent/ckpt/offline_rem_50000.ckpt
-------------------- training agents --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold0/train/agent0/ckpt/offline_rem_50000.ckpt
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold0/train/agent1/trajs1.pkl!
R




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold0/train/agent2/ckpt/offline_rem_5000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold0/train/agent2/ckpt/offline_rem_10000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold0/train/agent2/ckpt/offline_rem_15000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/798842024/fold0/train/agent2/ckpt/offline_rem_15000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold0/train/agent2/ckpt/offline_rem_5000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold0/train/age


saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold0/train/agent2/ckpt/offline_rem_40000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold0/train/agent2/ckpt/offline_rem_45000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold0/train/agent3/ckpt/offline_rem_5000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold0/train/agent2/ckpt/offline_rem_50000.ckpt
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by defaul


saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/798842024/fold0/train/agent4/ckpt/offline_rem_5000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold0/train/agent3/ckpt/offline_rem_50000.ckpt
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/798842024/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/798842024/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/798842024/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disa

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, 

-----iteration:  25 target diff:  0.0024833439458527293 values:  -60.617283 ----- 

-----iteration:  51 target diff:  0.0018593455394680177 values:  -58.949497 ----- 

-----iteration:  52 target diff:  0.0029575425810411783 values:  -58.955532 ----- 

-----iteration:  26 target diff:  0.002788698431927137 values:  -60.623722 ----- 

-----iteration:  53 target diff:  0.0017048069875908714 values:  -58.96209 ----- 

-----iteration:  54 target diff:  0.0016437984679717123 values:  -58.95609 ----- 

-----iteration:  55 target diff:  0.0015092051685619006 values:  -58.947838 ----- 

-----iteration:  27 target diff:  0.003029822025459828 values:  -60.58183 ----- 

-----iteration:  56 target diff:  0.0013704532098610088 values:  -58.9469 ----- 

-------------------- ckpt:  10000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/798842024/fold0/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
-----iteration: Loaded trajectories from 




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  29 target diff:  0.003016805235603603 values:  -60.522453 ----- 

-----iteration:  30 target diff:  0.0027395281950122093 values:  -60.55297 ----- 

-----iteration:  31 target diff:  0.0024275034887600564 values:  -60.56391 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  32 target diff:  

 




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  12 target diff:  0.002


-------------------- adv learner --------------------
-----iteration:  2 target diff:  0.0030446253824663676 values:  -55.75779 ----- 

-----iteration:  69 target diff:  0.0017681150976479537 values:  -58.925232 ----- 

-----iteration:  2 target diff:  0.0014420779350617528 values:  -52.93372 ----- 

-----iteration:  28 target diff:  0.0026157689904225878 values:  -59.705635 ----- 

-----iteration:  3 target diff:  0.0024787351940984905 values:  -55.741558 ----- 

-----iteration:  70 target diff:  0.0018460679527425288 values:  -58.89259 ----- 

-----iteration:  4 target diff:  0.0023633804620985304 values:  -55.69654 ----- 

-----iteration:  71 target diff:  0.0021975426624182646 values:  -58.870293 ----- 

-----iteration:  29 target diff:  0.0020016711591772525 values:  -59.688755 ----- 

-----iteration:  5 target diff:  0.0023860944670493363 values:  -55.707355 ----- 

-----iteration:  72 target diff:  0.002281683011006573 values:  -58.87686 ----- 

-----iteration:  30 target diff:

-----iteration:  8 target diff:  0.002527385399862898 values:  -53.534554 ----- 

-----iteration:  9 target diff:  0.0015958234789767605 values:  -53.420406 ----- 

-----iteration:  7 target diff:  0.0017862099182861793 values:  -51.97962 ----- 

-----iteration:  0 target diff:  0.922346066371538 values:  -58.44663 ----- 

-----iteration:  10 target diff:  0.002216530329407719 values:  -53.423504 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
 target diff: 
 0.0039220592922810295 values:  -58.525494 ----- 

-----iteration:  11 target diff:  0.001412653166096684 values:  -53.269024 ----- 

-------------------- ckpt:  10000 --------------------
-----iteration:  Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/re

0.9202410993439233
 values:  -53.237576 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  9 target diff:  0.00136623852278

target diff:  0.002059191237591772 values:  -53.727093 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
-----iteration: Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold0/train/agent4/trajs4.pkl! 
7Refresh buffer every 1000000 sampling! target diff: 


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the b

-----iteration:  30 target diff:  0.002013606165017045 values:  -59.48383 ----- 

-----iteration:  12 target diff:  0.00386954872346143 values:  -53.91196 ----- 

-----iteration:  31 target diff:  0.002313950588831114 values:  -59.52825 ----- 

-----iteration:  0 target diff:  0.9195861208860503 values:  -61.666565 ----- 

-----iteration:  13 target diff:  0.001832818251157028 values:  -53.845787 ----- 

-----iteration:  32 target diff:  0.002326248208490128 values:  -59.546963 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  33 target diff:  0.0024847806627216534 values:  -59.522747 ----------iteration:   14 target diff: 

 0.0020969874434604682 values:  -53.74427 ----- 

-----iteration:  1 target diff:  0.0033846628920118356 values:  -61.679142 ----- 

-----iteration:  34 target diff:  0.002088074037851392 values:  -59.490856 ----- 

-----iteration:  15 target diff:  0.001812295283881145 values:  -53.76939 ----- 

-----iteration:  2 target diff:  0

-----iteration:  3 target diff:  0.002386903630694239 values:  -53.48419 ----- 

-----iteration:  44 target diff:  0.0018429382367336198 values:  -59.65534 ----- 

-----iteration:  2 target diff:  0.0021542370404684505 values:  -53.60728 ----- 

-----iteration:  4 target diff:  0.0019000169732954902 values:  -----iteration: -53.55249  45 ----- target diff: 
 
0.0017065946226201727 values:  -59.66217 ----- 

-----iteration:  3 target diff:  0.0020561504885815217 values:  -53.63848 ----- 

-----iteration:  5 target diff:  0.002414440139804723 values:  -53.595364 ----- -----iteration: 
 
46 target diff:  0.0018254280339599954 values:  -59.639492 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  4 target diff:  0.00185

-----iteration:  61 target diff:  0.0018281260892071562 values:  -59.767048 ----- 

-----iteration:  1 target diff:  0.0033286227454639543 values:  -54.414707 ----- 

-----iteration:  4 target diff:  0.001477642104884857 values:  -61.63258 ----- 

-----iteration:  1 target diff:  0.0023506153224737147 values:  -51.652386 ----- 

-----iteration:  62 target diff:  0.001897133176208942 values:  -59.736267 ----- 

-----iteration:  2 target diff:  0.002057488759181104 values:  -51.696495 ----- 

-----iteration:  2 target diff:  0.002507218786872628 values:  -54.439728 ----- 
-----iteration: 
 63 target diff:  0.0016663527686009442 values:  -59.739628 ----- 

-----iteration:  3 target diff:  0.0021412139657397376 values:  -51.726543 ----- 

-----iteration:  64 -----iteration:  target diff: 3  0.0017409814796644377 target diff: values:   0.0021954256194692346-59.698467  -----values:   -54.49115
 
----- 

-----iteration:  65 target diff:  0.0019022374375985318 values:  -59.72099 ----- 

-----i

-----iteration:  2 target diff:  0.0016328026961217345 values:  -60.96988 ----- 

-----iteration:  2 target diff:  0.002553171868347803 values:  -51.151752 ----- 

-----iteration:  17 target diff:  0.004104854982028589 values:  -55.372562 ----- 

-----iteration:  78 target diff:  0.0021189117540925226 values:  -59.80935 ----- 

-----iteration:  3 target diff:  0.0029010584056567627 values: -----iteration:   18-51.14738  target diff: ----- 0.0021787868544478214  

values:  -55.368965 ----- 

-----iteration:  79 target diff:  0.0016873982807719863 values:  -59.79308 ----- 

-----iteration:  3 target diff:  0.0025445002715657914 values:  -60.99355 ----- 

-----iteration:  4 target diff:  0.0025282460110745823 values:  -51.18166 ----- 

-----iteration:  19 target diff:  0.0019638395812583867 values:  -55.28549 ----- 

-----iteration:  80 target diff:  0.0017981493742851161 values:  -59.841953 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa


-----iteration:  0 target diff:  0.9202039335149708 values:  -58.437263 ----- 

-----iteration:  15-----iteration:  target diff:   0.002838824956063466534 values:   -61.332222target diff:   ----- 0.001999780132327671

 values:  -55.043888 ----- 

-----iteration:  1 target diff:  0.001998755288755342 values:  -53.588432 ----- 

-----iteration:  1 target diff:  0.00300269602813048 values:  -58.489704 ----- 

-----iteration:  16 -----iteration: target diff:   350.0016360179452703076  values: target diff:   0.0018635163226478535-61.311428 ----- values:   
-55.00103
 ----- 

-----iteration:  0 target diff:  0.9233613445449226 values:  -53.535095 ----- 

-----iteration:  2 target diff:  0.0031464942455238162 values:  -58.5254 ----- 

-----iteration:  2 target diff:  0.0025447459265552407 values:  -53.634033 ----- 

-----iteration:  36 target diff:  0.0014997285089327808 values:  -54.984703 ----- 

-----iteration: --------------------  ckpt: 17  target diff:  150000.0016925401685577326  valu




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
target diff: 
 0.0025122028520254205 values:  -58.58966 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner 

-----iteration:  16 target diff:  -----iteration: 0.00198531855948113  15 values: target diff:   -58.86240.0016867646636544676  values: -----  
-53.69703 
----- 

-----iteration:  14 target diff:  0.002235683570250138 values:  -53.701748 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  17 target diff:  0.0018195196953749385 values:  -58.896732 ----- 

-----iteration:  15 target diff:  0.00212849901481647 values:  -53.70418 ----- 

-----iteration:  16 target diff:  0.0019811532729247993 values:  -53.74664 ----- 

-----iteration:  0 target diff:  0.9204788912008657 values:  -53.16265-----iteration:   -----16  

target diff:  0.0012356638791792543 values:  -53.65041 ----- 

-------------------- ckpt:  20000 -----iter





To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('flo


-----iteration:  26 target diff:  0.00236644607803728 values:  -59.223057 ----- 

-----iteration:  27 target diff:  0.00254200418010914 values:  -59.2037 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  28 target diff:  0.0025505507249345906 values:  -59.242878 ----- 

-----iteration:  0 target diff:  0.9197017581411528 values:  -59.79892 ----- 

-----iteration:  0 target diff:  0.9229223552492758 values:  -51.90463 -

27 target diff:  0.0024096737569019484 values:  -52.545757Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold0/train/agent0/trajs0.pkl! 
----- Refresh buffer every 1000000 sampling!


Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  32 target diff:  0.0022365365420138907 values:  -52.653667 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iterati

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  8 target diff:  0.0027212158523096373 values: Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold0/train/agent2/trajs2.pkl! 
-51.900146Refresh buffer every 1000000 sampling!
 ----- 

-----iteration:  2 target diff:  0.0018336457137916777 values:  -59.359234 ----- 

-----iteration:  53 target diff:  0.0018602562582202865 values:  -51.704803 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just thi



-------------------- fqe on dqn & sale --------------------
-----iteration:  32 target diff:  0.0021234104576599955 values:  -58.564087 ----- 

-----iteration:  15 target diff:  0.001607940456802338 values:  -51.71137 ----- 

-----iteration:  33 target diff:  0.0020829141529199336 values:  -58.506268 ----- 

-----iteration:  34 target diff:  0.0023721593046559177 values:  -58.4745 ----- 

-----iteration:  16 target diff:  0.002024104981667867 values:  -51.80194 ----- 

-----iteration:  35 target diff:  0.002264887519950675 values:  -58.426727 ----- 

-----iteration:  17 target diff:  0.002391946806452574 values:  -51.74838 ----- 

-----iteration:  36 target diff:  0.0020115178732072352 values:  -58.40305 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  -----iteration: 20  7target diff:   0.002358778779373937 target diff: values:  0.002077881713710909 Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold0/train/agent2/trajs2.pkl! 
-54.245876values: Refresh buffer every 1000000 sampling!
 ----- 

 -52.682407 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by de

-----iteration: -----iteration:   8645  target diff: target diff:  0.0021053246372677325 values:  0.002273561333505828  -58.13773values:   ----- -52.64013 
----- 


-----iteration:  25 target diff:  0.0012897311914006001 values:  -52.415596 ----- 

-------------------- ckpt:  30000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold0/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
87 target diff:  -----iteration:  0.001818939935091036246  target diff: values:   -58.1015970.0022969259886832334  -----values:   -52.57669
 
-----Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  17-----iteration:   88 target diff: target diff:  0.0016773873653735964 0.0024257202506458393  values: values:   -58.107395-59.907066 ----- -----  


-----iteration:  
47 target diff:  0.001919222442350733 values:  -52.48712 ----- 

-----iteration:  48 target diff:  0.0022050673524333536 values:  -52.320435 ----- 

-----iter


-----iteration:  9 target diff:  0.002679283789319109 values:  -53.36874 ----- 

-----iteration:  60 target diff:  0.0021248648757099886 values:  -51.338867 ----- 

-----iteration:  10 target diff:  0.0024023632321699367 values:  -53.317364 ----- 

-----iteration:  61 target diff:  0.002074664029379233 values:  -51.26212 ----- 

-----iteration:  0 target diff:  0.9218604707117802 -----iteration: values:  11  target diff: -52.887302  0.00252843234566918-----  values:  -53.337543
 
----- 

-----iteration:  62 target diff:  0.0016303282797249868 values:  -51.19668 ----- 

-----iteration:  24 target diff:  0.0032673981549514275 values:  -60.140068 ----- 

-----iteration:  1 target diff:  0.0026780412762420465 values:  -52.91981 ----- 

-----iteration:  12 target diff:  0.0015065202652041396 values:  -53.431545 ----- 

-----iteration:  63 target diff:  0.002247161676082197 values:  -51.16609 ----- 

-----iteration:  2 target diff:  0.002071644551453825 values:  -52.99693 ----- 

-----itera




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  4 target diff:  0.0028144468778858924 values:  -53.08919 ----- 



To change al




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  3 target diff:  0.0026347





To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  19 target diff:  0.00190


-----iteration:  17 target diff:  0.0024056632876973145 values:  -60.816624 ----- 

-----iteration:  31 target diff:  0.002110942220668655 values:  -54.71654 ----- 

-----iteration:  38 target diff:  0.0028515928599608486 values:  -53.640728 ----- 

-----iteration:  50 target diff:  0.0019594446002077597 values:  -52.183353 ----- 

-----iteration:  0 target diff:  0.9213622140817258 values:  -57.16436 ----- 

-----iteration:  32 target diff:  0.0019994845447659864 values:  -54.64266 ----- 

-----iteration:  39 target diff:  0.0018422857644097323 values:  -53.54237 ----- 

-----iteration:  18 target diff:  0.002428198343524346 values:  -60.861626 ----- 

-----iteration:  1 target diff:  0.0030860168124389616 values:  -57.221333 ----- 

-----iteration:  40 target diff:  0.00284216325595562 values:  -53.482693 ----- 

-----iteration: -----iteration:   5133 target diff:   target diff: 0.00211628209701966  0.0018918181671942161 values:  -54.587833values:   -52.07456----- ----- 
 
-----iter

48 target diff:  0.0015294479226389828 values:  -54.029778 ----- 

-----iteration:  59 target diff:  0.002339934709221533 values:  -53.363415 ----- 

-----iteration:  30 target diff:  0.001987061326513156 values:  -60.984486 ----- 

-----iteration:  20 target diff:  0.002060331306097531 values:  -57.87492 ----- 

-----iteration:  60 target diff:  0.002504922647074338 values:  -53.423553 ----- 

-----iteration:  49 target diff:  0.0019596935037315515 values:  -54.050903 ----- 

-----iteration:  31 target diff:  0.0024739657194769323 values:  -61.028282 ----- 

-----iteration:  21 target diff:  0.0033232151452463926 values:  -57.89704-----iteration:   -----61  target diff: 
 
0.0024460353329951194 values:  -53.441414 ----- 

-------------------- fqe on dqn & sale --------------------
 values:  -53.97611 ----- 

-----iteration:  22 target diff:  0.00241932334255551-----iteration:   values:  62 target diff: -57.96894  0.0017318034616872742 values:  -53.487385-----  
----- 


-----iteration

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/798842024/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  18 target diff:  0.0026679826932424753 values:  -53.448215 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/798842024/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  59 target diff:  0.0020987608154393374Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/798842024/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling! 
values:  -59.101383 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/798842024/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/798842024/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  77 target diff:  0.001541794788326081 values:  -57.910896 ----- Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold0/train/agent1/trajs1.pkl!


Refresh buffer every 1000000 sampling!
-----iteration:  5 target diff:  0.0018443053043832923 values: Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling! 
-58.766926 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by d

 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  7 target diff:  0.001806598656489367 values:  -54.663208 ----- Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold0/train/agent4/trajs4.pkl!


Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change ju

0.0017477107828386268
 values:  -53.346844 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
 
----- 

-------------------- adv learner --------------------
-----iteration:  46 target diff:  0.002062191258593825 values:  -53.331688 ----- 

-----iteration:  8 target diff:  0.0021277270028927935 values:  -54.718697 ----- 

-----iteration:  8 target diff:  0.002334922846708918 values:  -53.323883 ----- 

-----iteration:  25 target diff:  0.002197961795968897 values:  -59.246914 ----- 

-----iteration:  47 target diff:  0.0016121429908012674 values:  -53.337288 ----- 

-----iteration:  9 target diff:  0.0022237426970343707 values:  -54.703114 ----- 

-----iteration:  9 target diff:  0.0019883542803122126 values:  -53.30718 ----- 

-----

 14 target diff:  0.0018824789108291683 values:  -54.971348 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='floa

-----iteration:  5 target diff:  0.0024230544190009507 values:  -52.751045 ----- 

-----iteration:  5 target diff:  0.002355858797070875 values:  -53.020336 ----- 

-----iteration:  6 target diff:  0.0018707574776515635 values:  -52.75614 ----- 

-----iteration:  -----iteration:  1648  target diff: target diff:  0.00208875477607151 0.0019028704195085712  values: values:   -59.040535-60.981976  ----------  



-----iteration:  6 target diff:  0.0020376528420101405 values:  -53.069675 ----- 

-----iteration:  7 target diff:  0.0026534959911786297 values:  -52.841587 ----- 

-----iteration:  49 target diff:  0.0021748585845711675 values:  -59.012268 ----- 

-----iteration:  7 target diff:  0.00253945245416272 values:  -53.153233 ----- 

-----iteration:  6 target diff:  0.0019000969509436992 values:  -52.76996 ----- 

-----iteration:  8 target diff:  0.002605667718853732 values:  -53.10599 ----- 

-----iteration:  17 target diff:  0.002057107108841408 values:  -60.980377 ----- 

-----itera

Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are 



Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
 values:  -52.285122 -----iteration: -----  35

 target diff:  0.0018813433276880008 values:  -59.09135 ----- Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!


Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  48 --------------------target diff:   adv learner0.0020261433594355545  --------------------values: 
 -52.52005 ----- 

-----iteration:  36 target diff:  0.0021908822320096805 values:  -59.102234 ----- 

-----iteration:  14 target diff:  0.0019087523846479887 values:  -52.27554 ----- 

-----iteration:  49 target diff:  0.0024024082826871445 values:  -52.51969 ----- 

-----iteration:  37 target diff:  0.0020043803940128497 values:  -59.042786 ----- 

-----iteration:  0 target diff:  0.9196365893155415 values:  -59.45583 ----- 

-----iteration:  50 target diff:  0.002402412311996834 values:  -52.526207 ----- 

-----iteration:  15 target diff:  0.0018492896394553722 values:  

-----iteration:  64 target diff:  0.002028867510821621 values:  -52.54248 ----- 

-----iteration:  0 target diff:  0.9183444255635276 values:  -51.980915 ----- 

-----iteration:  9 target diff:  0.0013999902049943423 values:  -59.374153 ----- 

-------------------- ckpt:  50000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold0/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  65 target diff:  0.002094430035874536 Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold0/train/agent0/trajs0.pkl!
values:  Refresh buffer every 1000000 sampling!-52.51334


Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  25 target diff:  0.0022173481108506846 values:  -52.383415 ----- 


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.


-------------------- adv learner --------------------
-----iteration:  66 target diff:  0.001941172697554986 values:  -52.513176 ----- 

-----iteration:  2 target diff:  0.0031716998551970786 values:  -51.882046 ----- 

-----iteration:  67 target diff:  0.002136355573456674 values:  -52.525364 ----- 

-----itera

-----iteration:  14 target diff:  0.001987294285569936 values:  -52.8443 ----- 

-----iteration:  7 target diff:  0.0018892425035748077 values:  -59.588383 ----- 

-------------------- fqe on dqn & sale --------------------
 values:  -52.309597 ----- 

-----iteration:  8 target diff:  0.0015856842588512097 values:  -59.569275 ----- 

-----iteration:  15 target diff: -----iteration:  0.001796025383826211 1 target diff:  0.004416616311293209  values:  -59.882793 values: ----- 

 -52.83859 ----- 

-----iteration:  9 target diff:  0.001743346887249732 values:  -59.543736 ----- 

-----iteration:  16 target diff:  0.0021711348816172518 values:  -52.83678 ----- 
-----iteration:  
2 target diff:  0.0024110367027496593 values:  -59.901085 ----- 

-----iteration:  17 target diff:  0.0016464782766925032 values:  -52.8581 ----- 

-----iteration:  10 target diff:  0.0015520436176187425-----iteration:   values: 3 -59.56653  target diff: -----  0.002305906356394919
 
values:  -59.854836 ----- 

-----

-----iteration:  43 Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/798842024/fold0/train/agent0/trajs0.pkl!
target diff: Refresh buffer every 1000000 sampling! 0.0013324939392940762


Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/798842024/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/798842024/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/798842024/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/798842024/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the


-------------------- fqe on dqn & sale --------------------
-----iteration:  26 target diff:  0.0029363947854961995 values:  -59.67591 ----- 

-----iteration:  26 target diff:  0.002555503631210093 values:  -55.475086 -------------------- -----fqe on dqn & sale  --------------------


-----iteration:  0 target diff:  0.9198138401399771 values:  -50.942055 -----iteration: -----  
27
 target diff:  0.002468614891497116 values:  -59.728428 ----- 

-----iteration:  1 target diff:  0.004216958456721766 values:  -50.891598 ----- 

-----iteration:  27 target diff:  0.002288515560143545 values:  -55.496513 ----- 

-----iteration:  2 target diff:  0.0035276087647567265 values:  -51.018383 ----- 

-----iteration:  28-----iteration:  28  target diff:  target diff: 0.002361636315463413 0.0019685706309439423 values:  values:   -55.47561 -59.69382-----  -----
 


-----iteration:  3 target diff:  0.003104288549230457 values:  -51.105373 ----- 

-----iteration:  4 target diff:  0.0028313773096423655 

 49 target diff:  0.0019060281543118503 values:  -58.833828 ----- 

-----iteration:  61 target diff:  0.002325840962414267 values:  -56.493256 ----- 

-----iteration:  32 target diff:  0.0037061932442731373 values:  -52.486797 ----- 

-----iteration:  50 target diff:  0.0016333711681730053 values:  -58.84737 ----- 

-----iteration:  7 target diff:  0.0014179671316425168 values:  -57.13581 ----- 

-------------------- ckpt:  40000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/798842024/fold0/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/798842024/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  51 target diff:  0.0015262617602558063 Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/798842024/fold0/train/agent1/trajs1.pkl!
values: Refresh buffer every 1000000 sampling! -58.762268--




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  52 target diff:  0.0016909900213491185 values:  -58.718086 ----- 

-----iteration:  62 target diff:  0.0023052788378249787 values:  -56.50748 ----- 

-----iteration:  34 target diff:  0.00401924918207029 values:  -52.66753 ----- 

-----iteration:  53 target diff:  0.0011194889903715802 values:  -58.65769 ----- 

-----iterati


-----iteration:  10 target diff:  0.002595223244430088 values:  -60.89904 ----- 

-----iteration:  11 target diff:  0.0020840054038653256 values:  -60.93388 ----- 

-----iteration:  51 target diff:  0.0024447395036999733 values:  -53.57088 ----- 

-----iteration:  7 target diff:  0.0018876600946121816 values:  -59.640648 ----- 

-----iteration:  0 target diff:  0.9203124798765816 values:  -51.35065 ----- 

-----iteration:  12 target diff:  0.002388754467133441 values:  -61.004097 ----- 

-----iteration:  52-----iteration:  target diff:   80.002326320754105098  target diff: values:   0.0020329029032155523-53.610874  values: ----- -59.67641 ----- 

 

-----iteration:  1 target diff:  0.003546949022364151 values:  -51.431904 ----- 

-----iteration:  53 target diff:  -----iteration: 0.0028997946588712282 values:   -53.6665579 ----- target diff:   0.0027650327707677085
 
values:  -59.717674 ----- 

-----iteration:  2 target diff:  0.0025453054382675827 values:  -51.52103 ----- 

-----itera

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold2/train/agent1/ckpt/offline_rem_5000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/798842024/fold2/train/agent1/ckpt/offline_rem_10000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold2/train/agent0/ckpt/offline_rem_50000.ckpt
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold2/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor.




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold2/train/agent1/ckpt/offline_rem_35000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/798842024/fold2/train/agent1/ckpt/offline_rem_50000.ckpt
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/798842024/fold2/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructo


saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold2/train/agent2/ckpt/offline_rem_35000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold2/train/agent2/ckpt/offline_rem_25000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold2/train/agent2/ckpt/offline_rem_50000.ckpt
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold2/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructo


saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold2/train/agent4/ckpt/offline_rem_5000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold2/train/agent3/ckpt/offline_rem_30000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold2/train/agent3/ckpt/offline_rem_45000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/798842024/fold2/train/agent4/ckpt/offline_rem_10000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold2/train/agent4/ckpt/offline_rem_5000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold2/train/agent4/ckpt/offline_rem_10000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold2/train/agent3/ckpt/offline_rem_35000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold2/train/agent3/ckpt/offline_rem_50000.ckpt
Loaded trajectories from 




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold2/train/agent4/ckpt/offline_rem_45000.ckpt
-------------------- fqe on dqn & sale --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the la

-----iteration:  0 target diff:  0.9123878497989881 values:  -52.327293 ----- 

-----iteration:  1 target diff:  0.0038688759319967588 values:  -52.37926 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold2/train/agent4/ckpt/offline_rem_50000.ckpt
-------------------- behavior cloning --------------------
-----iteration:  2 target diff:  0.002429859640940856 values:  -52.436226 ----- 

-----iteration:  3 target diff:  0.0024549353797722194 values:  -52.516247 ----- 

-----iteration:  4 target diff:  0.0017676347032978695 values:  -52.48557 ----- 

-------------------- ckpt:  5000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold2/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold2/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/ju

 




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  6 target diff:  0.0019312243064675042 values:  -52.312824 ----- 

-----iteration:  7 target diff:  0.0020589616422019372 values:  -52.353184 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold2/train/agent4/ckpt/offline_rem_25000.ckpt
-----iteration:  8 target diff:  0.00164646357917651

-------------------- ckpt:  15000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/798842024/fold2/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!



Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold2/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/798842024/fold2/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!







Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/798842024/fold2/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold2/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!


Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/798842024/fold2/train/agent4/trajs4.pkl!Loaded trajectories from load path:


-------------------- fqe on dqn & sale --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

 adv learner --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

--

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold2/train/agent1/trajs1.pkl!
-----iteration:  15Refresh buffer every 1000000 sampling! 
target diff:  0.003087170643929494 values:  -51.972218 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold2/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold2/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold2/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer co

-----iteration:  21 target diff:  0.0020540507129536032 values:  -51.791348 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  22 target diff:  0.001828351128728461 values:  -51.79621 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  23 target diff:  0.002476404699829355 values:  -51.662106 ----- 

-----iteration:  0 target diff:  0.9130003219965953 values:  -52.932564 ----- 

-----iteration:  24 target diff:  0.0019079780587271963 values:  -51.54939 ----- 

-----iteration:  1 target diff:  0.002687851253982384 values:  -52.833736 ----- 

-----iteration:  2 target diff:  0.002062848496390241 values:  -52.902065 ----- 

-----iteration:  25 target diff:  0.001719606494280194 values: 

-----iteration:  5 target diff:  0.0020826575623333764 values:  -52.977192 ----- 

-----iteration:  6 target diff:  0.002033262373353197 values:  -52.931194 ----- 

-----iteration:  7 target diff:  0.0017086867683053413 values:  -52.91095 ----- 

-------------------- fqe on dqn & sale --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold2/train/agent4/ckpt/offline_rem_40000.ckpt
-----iteration:  8 target diff:  0.001664863956163272 values:  -52.843437 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  9 target diff:  0.0015808587027758738 values:  -52.797516 ----- 

-----iteration:  0 target diff:  0.9178794361614 values:  -63.23875 ----- 

-----iteration:  10 target di


-----iteration:  5 target diff:  0.0024588547258140667 values:  -55.18628 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-51.06734 ----- 

-------------------- ckpt:  20000 --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold2/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold2/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold2/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  0Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold2/train/agent3/trajs3.pkl! 
target diff: Refresh buffer every 1000000 sampling! 
0.9129188366849481 -----iteration: values:  -51.283855 6 ----- target diff:   
0.001609714424124319
 Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold2/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!values: 
 -62.576538 ----- 



To change all layers to have dtype float64 by defau


-------------------- adv learner --------------------
-----iteration:  5 target diff:  0.0018174856570167665 values:  -51.316525 ----- 

-----iteration:  6 target diff:  0.0019244062765536199 values:  -51.25469 ----- 

-----iteration:  7 target diff:  0.0021661205290495647 values:  -51.211597 ----- 

-----iteration:  8 target diff:  0.001891251810432488 values:  -51.22586 ----- 

-------------------- fqe on dqn & sale --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  9 target diff:  0.0022421584729738044 values:  -51.204197 ----- 

-----iteration:  10 target diff:  0.0024866194409017954 values:  -51.16389 ----- 

-----iteration:  0 target diff:  0.9129863905652762 values:  -54.76981 ----- 

-----itera


-----iteration:  31 target diff:  0.0021851892160210137 values:  -49.596252 ----- 

-----iteration:  9 target diff:  0.0019649312333634697 values:  -62.84597 ----- 

-----iteration:  32 target diff:  0.002105144399747693 values:  -49.43942 -----iteration: -----  10
 
target diff:  0.0017816185577487114 values:  -62.821186 ----- 

-----iteration:  0 target diff:  0.9115960254165563 values:  -50.217094 ----- 

-----iteration:  11 target diff:  0.0016188186243845859 values:  -62.865814 ----- 

-----iteration:  33 target diff:  0.0017402514221876678 values:  -49.202328 ----- 

-----iteration:  1 target diff:  0.003829067956915501 values:  -50.24975 ----- 

-----iteration:  12 target diff:  0.0013300126663420536 values:  -62.855762 ----- 

-------------------- ckpt:  30000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold2/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  34 target diff:  0.00206107




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  35 target diff:  0.0020732485047976596 values:  -48.748146 ----- 

-----iteration:  3 target diff:  0.0019257946825346451 values:  -50.291355 ----- 

-----iteration:  36 target diff:  0.002590060777641726 values:  -48.592834 ----- 

-----iteration:  37 target diff:  0.0017656254276163907 values:  -48.471165 ----- 

-----iter

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold2/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold2/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold2/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold2/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold2/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, 



Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold2/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold2/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  2 target diff:  0.002113250309945302 values:  -52.26047 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold2/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold2/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold2/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, p




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  3 target diff:  0.002581245767331403 values:  -54.84846 ----- 

-----iteration:  3 target diff:  0.0022498817685733885 values:  -52.28024 ----- 

-----iteration:  4 target diff:  0.003352460626706995 values:  -54.920044 ----- 

-----iteration:  5 target diff:  0.002381290716723188 values:  -54.90057 -----iteration: ----- 4 
 
target diff:  0.0018454256252900154 values:  -52.23348 ----- 

-----iteration:  6 target diff:  0.0023835367757050327 values:  -54.99401 ----- 

-----iteration:  5 target diff:  0.00128723483722576 values:  -52.26526 ----- 

-------------------- ckpt:  30000 --------------------
Loaded trajectories




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-------------------- fqe on dqn & sale ---

----- 


-----iteration:  23 target diff:  0.0025653622197368635 values:  -55.262592 ----- 

-----iteration:  24 target diff:  0.00241573259992782 values:  -55.209984 ----- 

-----iteration:  0 target diff:  0.9159951205933304 values:  -60.43149 ----- 

-----iteration:  25 target diff:  0.003245427126127098 values:  -55.160225 ----- 

-----iteration:  1 target diff:  0.00527317406984024 values:  -60.47332 ----- 

-----iteration:  26 target diff:  0.002686460057746116 values:  -55.07831 ----- 

-----iteration:  2 target diff:  0.004781919274421497 values:  -60.523464 ----- 

-----iteration:  27 target diff:  0.0022979323295594526 values:  -55.07784 ----- 

-----iteration:  28 target diff:  0.00250629570881731-----iteration:  values:  3 -55.02698  target diff: -----  

0.003278310872204684 values:  -60.572002 ----- 

-----iteration:  29 target diff:  0.0025929860959134853 -----iteration: values:   4-55.037224  target diff: -----  0.002399702617196827 
values: 
 -60.59713 ----- 



To cha

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold2/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  46 target diff:  0.0023942605428488345 values:  -54.109562Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold2/train/agent1/trajs1.pkl! -----
 Refresh buffer every 1000000 sampling!


Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold2/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold2/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold2/train/agent4/trajs4.pkl!
-----iteration: Refresh buffer every 1000000 sampling! 


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change 

0.002631912548945426 values:  -51.46924 ----- Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold2/train/agent/trajs.pkl!


Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold2/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold2/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold2/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold2/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold2/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change 




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  0 target diff:  0.9174705576063014 values:  -62.548985 ----- 

-----iteration:  75 target diff:  0.002070744158145979 values:  -51.017735 ----- 

-----iteration:  3 target diff:  0.002418122435193112 values:  -51.812744 ----- 

-----iteration:  1 target diff:  0.005990273078024911 values:  -62.539894 ----- 

-----iteration:  76 target diff:  0.0027951372605066025 values:  -50.883423 ----- 
-----iteration: 
 4 target diff:  0.002316190248855784 values:  -51.81298 ----- 

-----iteration:  2 target diff:  0.0029808223982743364 values:  -62.524605 ----- 

-----iteration:  5 target diff:  0.002070146051240912 values:  -51.80



-----iteration:  89 target diff:  0.0025248808112789544 values:  -49.741238 ----- 

-----iteration:  17 target diff:  0.0022792789337572417 values:  -51.80976 ----- 

-----iteration:  5 target diff:  0.001496960844470899 values:  -58.80147 ----- 

-----iteration:  90 target diff:  0.003083357222007059 values:  -49.68518 ----- 

-----iteration:  91 target diff:  0.0024819746099647017 values:  -49.65528 ----- 

-----iteration:  18 target diff:  0.0030071969584367147 values:  -51.70316 ----- 

-----iteration:  92 target diff:  0.002453918664038631 values:  -49.643227 ----- 

-----iteration:  19 target diff:  0.0026006436788908044 values:  -51.609577 ----- 

-----iteration:  93 target diff:  0.0023805733789012337 values:  -49.605473 ----- 

-----iteration:  94 target diff:  0.0022931948049418256 values:  -49.605145 ----- 

-----iteration:  20 target diff:  0.0023236855601106933 values:  -51.430016 ----- 

-----iteration:  95 target diff:  0.0021822679030721573 values:  -49.62412 ----- 



-----iteration:  34 target diff:  0.002202834999389756 values:  -49.212368 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
-----iteration:  
0.0022057731084784435
  35values:   target diff: -57.73315  0.002301195153582737-----  values: 
 
-49.057102 ----- 

-----iteration:  36 target diff:  0.00213525645609233 values:  -48.85197 ----- 

-----iteration:  0 target diff:  0.9129754047578494 values:  -----iteration: -53.073128  0-----  
tar

target diff:  
0.0037674158348555324-----iteration:   values: 57 -53.534973  -----target diff:   
0.0014680739877715012 values: 
 -45.142254 ----- 

-------------------- ckpt:  40000 --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/798842024/fold2/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/798842024/fold2/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/798842024/fold2/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from 




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  31 target diff:  0.0019036540914520006 values:  -57.124607 ----- 

-----iteration:  3 target diff:  0.002431943406766608 values:  -53.587727 ----- 

-----iteration:  4 target diff:  0.002061820292864998 values:  -53.62307 ----- 

-----iteration:  32 target diff:  0.0017172381698530048 values:  -56.963215 ----- 

-----iterati


-----iteration:  40 target diff:  0.001582599018482104 values:  -56.172993 -----iteration: -----  
1
 target diff:  0.003450545143131874 values:  -63.728394 ----- 

-----iteration:  41 target diff:  0.0011797938395803217 values:  -56.090096 ----- -----iteration: 
 
2--------------------  target diff: ckpt:   0.00253400844128689510000  --------------------values: 
 -63.745266 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold2/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold2/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  0 target diff:  0.9128370750373201 values:  Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold2/train/agent1/trajs1.pkl!-50.452656
 Refresh buffer every 1000000 sampling!
----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/dat




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  1 target diff:  0.001908624421578232 values:  -50.498188 ----- 

-----iteration:  4 target diff:  0.001788089833076104 values:  -63.701702 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. I

 0.0013751859414289302 values:  -51.51435 ----- 

-------------------- ckpt:  40000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/798842024/fold2/train/agent4/trajs4.pkl!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold2/train/agent/trajs.pkl!Refresh buffer every 1000000 sampling!
Refresh buffer every 1000000 sampling!



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base L




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold2/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
-------------------- adv learner --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold2/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold2/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold2/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all 


-----iteration:  12 target diff:  0.0022796108586072153 values:  -58.537556 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  13 target diff:  0.0021746560132783962 values:  -58.474514 ----- 

-----iteration:  14 target diff:  0.0018125760518625251 values:  -58.44281 ----- 

-----iteration:  0 target diff:  0.9127662487806484 values:  -50.85882 ----- 

-----iteration:  0 target diff:  0.9131283395064794 values:  -52.14632 ----- 

-----iteration:  15 target diff:  0.0017581852714359393 values:  -58.434933 ----- 

-----iteration:  1 target diff:  0.0012434181328674773 values:  -50.908638 ----- 

-----iteration:  1 target diff:  0.004016318734105943 values:  -52.146023 ----- 

-----iteration:  16 target diff:  0.0024













Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold2/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!




Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/798842024/fold2/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/798842024/fold2/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/798842024/fold2/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold2/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold2/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/

 
values:  -56.700188 ----- 

-------------------- adv learner --------------------
-----iteration:  35 target diff:  0.0017054420565985981 values:  -56.59681 ----- 

-----iteration:  36 target diff:  0.0016916917502628225 values:  -56.48392 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  37 target diff:  0.0016392469916766974 values:  -56.34515 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  38 target diff:  0.0026469701837264323 values:  -56.191895 ----- 

-----iteration:  39 target diff:  0.00234401763577715 values:  -55.924152 ----- 

-----iteration:  40 target diff:  0.0024718720070396804 values:  -55.687447 ----- 

-----iteration:  41 target diff:  0.0023514986505971973 values:  -55.463543 ----- 

-----iteration:  42 target diff:  0.0021107080522644772 values:  -55.28744 ----- 

-----iteration:  43 target diff:  0.0016717587702728587 values:  -55.122944 ----- 



To change all layers to have dtype float64 b




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  57 target diff:  0.0016592357629765585 values:  -52.21991 ----- 
--------------------
 adv learner --------------------
-----iteration: -----iteration:   558  target diff:  0.0015821364206818436 values: target diff:   0.002392479458969997-52.043064  values:  -51.688652 ---------- 

 

-----iteration:  59 target diff:  -----iteration: 0.0016591148278794396  6values:   target diff: -51.85953  0.0017101733604314156-----  
values:  
-51.688286 ----- 

-----iteration:  60 target diff:  0.0018140988155684722 values:  -51.61955 ----- 

-----iteration:  7 target diff:  0.001826640823780506 values:  -51.66571 ----- 

-------------------- fqe on dqn & sale --------------------
-----


-----iteration:  0 target diff:  0.9154003481307611 values:  -58.577553 ----- 

-----iteration:  1 target diff:  0.0019095674049245744 values:  -58.483727 ----- 

-----iteration:  2 target diff:  0.0013461259974239133 values:  -58.387474 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.9135072930481901 values:  -52.60216 ----- 

-----iteration:  1 target diff:  0.0011467562933131707 values:  -52.54111 ----- 

-------------------- training agent --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold3/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold2/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold2/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold2/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold2/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold2/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, 


-------------------- adv learner --------------------
-----iteration:  4 target diff:  0.0018708080903746002 values:  -58.799038 ----- 

-----iteration:  5 target diff:  0.002166636431036377 values:  -58.73584 ----- 

-----iteration:  6 target diff:  0.0018781114361641608 values:  -58.8317 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  7 target diff:  0.002513149188776977 values:  -58.89993 ----- 

-----iteration:  8 target diff:  0.001988794777734501 values:  -58.83677 ----- 

-----iteration:  9 target diff:  0.0018390439965387973 values:  -58.91679 ----- 

-----iteration:  10 target diff:  0.0027122537938344023 values:  -58.84551 ----- 

-----iteration:  11 target diff:  0.0021404637870163145 values:  -58.88134 ----- 

-----iteration:  12 target diff:  0.002184175616275461 values:  -58.86233 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float

-----iteration:  13 target diff:  0.004504523630975705 values:  -58.846577 ----- 

-----iteration:  24 target diff:  0.0016659327422364127 values:  -63.019756 ----- 

-----iteration:  25 target diff:  0.0021884889758778943 values:  -63.04935 ----- 

-----iteration:  14 target diff:  0.002153366199563251 values:  -58.86529 ----- 

-----iteration:  26 target diff:  0.0017882188333443808 values:  -63.033504 ----- 

-----iteration:  15 target diff:  0.00213288762399782 values:  -58.82078 ----- 

-----iteration:  27 target diff:  0.001972961430923993 values:  -63.010967 ----- 

-----iteration:  28 target diff:  0.0019831256044520497 values:  -63.01441 ----- 

-----iteration:  16 target diff:  0.0025493438279790264 values:  -58.81064 ----- 

-----iteration:  29 target diff:  0.0024883229745551946 values:  -62.908474 ----- 

-----iteration:  17 target diff:  0.002269460758328638 values:  -58.649727 ----- 

-----iteration:  30 target diff:  0.0023539826523452574 values:  -62.84592 ----- 

----




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-------------------- fqe on dqn & sale ---

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold2/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold2/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold2/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold2/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold2/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, 


-------------------- adv learner --------------------
-------------------- fqe on dqn & sale --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.9141059630985681 values:  -64.70681 ----- 

-----iteration:  1 target diff:  0.004153628939088128 values:  -64.74035 ----- 

-----iteration:  2 target diff:  0.003743617971094251 values:  -64.8134 ----- 

-----iteration:  3 target diff:  0.003767650916189637 values:  -64.823616 ----- 

-----iteration:  4 target diff:  0.003319914268315971 values:  -64.839424 ----- 

-----iteration:  5 target diff:  0.002843312727218527 values:  -64.90046 ----- 

-----iteration:  6 target diff:  0.003125934379832618 values:  -64.903625 ----- 

-----iteration:  

-----iteration:  55 target diff:  0.0026691126626541824 values:  -64.48079 ----- 

-----iteration:  56 target diff:  0.0024084254905319615 values:  -64.41327 ----- 

-----iteration:  57 target diff:  0.002256029115922786 values:  -64.384415 ----- 

-----iteration:  58 target diff:  0.002297419469842885 values:  -64.33277 ----- 

-----iteration:  59 target diff:  0.002282034059009281 values:  -64.195015 ----- 

-----iteration:  60 target diff:  0.0029062246276356065 values:  -64.112915 ----- 

-----iteration:  61 target diff:  0.003367581215123275 values:  -64.07996 ----- 

-----iteration:  62 target diff:  0.0027924954619621832 values:  -64.04496 ----- 

-----iteration:  63 target diff:  0.0027120574414224437 values:  -63.96705 ----- 

-----iteration:  64 target diff:  0.002342797672622459 values:  -63.895203 ----- 

-----iteration:  65 target diff:  0.0026120898585910894 values:  -63.82141 ----- 

-----iteration:  66 target diff:  0.0021191777807010323 values:  -63.759346 ----- 

----

-----iteration:  13 target diff:  0.0024428016721875735 values:  -64.43742 ----- 

-----iteration:  14 target diff:  0.0027938743348708823 values:  -64.408806 ----- 

-----iteration:  15 target diff:  0.003823913912213448 values:  -64.3705 ----- 

-----iteration:  16 target diff:  0.003410977163188797 values:  -64.3072 ----- 

-----iteration:  17 target diff:  0.0030691417832615286 values:  -64.34388 ----- 

-----iteration:  18 target diff:  0.0032449421521313555 values:  -64.24358 ----- 

-----iteration:  19 target diff:  0.003962903899375319 values:  -64.20025 ----- 

-----iteration:  20 target diff:  0.003850703570045503 values:  -64.18638 ----- 

-----iteration:  21 target diff:  0.002805962703823462 values:  -64.17069 ----- 

-----iteration:  22 target diff:  0.0028211729251072597 values:  -64.14477 ----- 

-----iteration:  23 target diff:  0.0030479375537957867 values:  -64.1334 ----- 

-----iteration:  24 target diff:  0.003204945174363495 values:  -64.01098 ----- 

-----iterati

-----iteration:  51 target diff:  0.0032234002694515674 values:  -63.403675 ----- 

-----iteration:  52 target diff:  0.0038887303743098075 values:  -63.340473 ----- 

-----iteration:  53 target diff:  0.0031951967882701444 values:  -63.285828 ----- 

-----iteration:  54 target diff:  0.0035094979571182036 values:  -63.191463 ----- 

-----iteration:  55 target diff:  0.0029325082453721305 values:  -63.170452 ----- 

-----iteration:  56 target diff:  0.0037687000321625745 values:  -63.052925 ----- 

-----iteration:  57 target diff:  0.003763810862620099 values:  -63.060364 ----- 

-----iteration:  58 target diff:  0.0033183976976371952 values:  -62.92008 ----- 

-----iteration:  59 target diff:  0.0033657839695685502 values:  -62.833523 ----- 

-----iteration:  60 target diff:  0.0030668612708820366 values:  -62.737186 ----- 

-----iteration:  61 target diff:  0.002877973825984228 values:  -62.688404 ----- 

-----iteration:  62 target diff:  0.003074201468865541 values:  -62.62439 -----




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold3/train/agent0/ckpt/offline_rem_10000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/798842024/fold3/train/agent1/ckpt/offline_rem_5000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold3/train/agent/ckpt/offline_rem_5000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold3/train/agent1/ckpt/offline_rem_10000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold3/train/agent0/ckpt/offline_rem_15000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold3/train/agen




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/798842024/fold3/train/agent1/ckpt/offline_rem_50000.ckpt
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/798842024/fold3/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just


saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold3/train/agent2/ckpt/offline_rem_5000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold3/train/agent0/ckpt/offline_rem_50000.ckpt
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold3/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor


saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold3/train/agent1/ckpt/offline_rem_50000.ckpt
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold3/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/798842024/fold3/train/agent4/ckpt/offline_rem_10000.c


-------------------- adv learner --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold3/train/agent4/ckpt/offline_rem_50000.ckpt
-------------------- behavior cloning --------------------
-------------------- fqe on dqn & sale --------------------
-------------------- ckpt:  5000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold3/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold3/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold3/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold3/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------


To change all layers to have dtype float


-------------------- adv learner --------------------
-----iteration:  0 target diff:  0.9243186695163517 values:  -56.87319 ----- 

-----iteration:  1 target diff:  0.002213582538218753 values:  -56.92857 ----- 

-----iteration:  2 target diff:  0.0023755952434161708 values:  -56.947964 ----- 

-----iteration:  3 target diff:  0.0020207557121843804 values:  -56.95492 ----- 

-----iteration:  4 target diff:  0.0023335928650839314 values:  -56.924603 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  5 target diff:  0.0014553134236657374 values:  -56.911217 ----- 

-------------------- ckpt:  10000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold3/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold3/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from lo




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-------------------- fqe on dqn & sale ---




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  18 target diff:  0.0024549997024046356 values:  -57.808895 ----- 

-----iteration:  19 target diff:  0.0022237924223245504 values:  -57.85285 ----- 

-----iteration:  20 target diff:  0.002575750129054017 values:  -57.820183 ----- 

-----iteration:  21 target diff:  0.002366588388175935 values:  -57.82617 ----- 

-----iteration:  22 target diff:  0.0017715646637000782 values:  -57.82662 ----- 

-----iteration:  23 target diff:  0.0015161839480339117 values:  -57.828716 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  24 target diff:  0.0013597330035441048 values:  -57.745266 ----- 




-----iteration:  16 target diff:  0.0018112976609772982 values:  -60.38644 ----- 

-----iteration:  17 target diff:  0.0020521812451918687 values:  -60.36469 ----- 

-----iteration:  18 target diff:  0.0019605223988333692 values:  -60.389343 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  19 target diff:  0.0018416574392045757 values:  -60.370796 ----- 

-----iteration:  0 target diff:  0.9242051608543902 values:  -57.581055 ----- 

-----iteration:  20 target diff:  0.002086186999791454 values:  -60.421528 ----- 

-----iteration:  1 target diff:  0.0022651569369219675 values:  -57.59873 ----- 

-----iteration:  21 target diff:  0.0017011287666263027 values:  -60.450188 ----- 

-----iteration:  2 target diff:  0.0

-----iteration:  71 target diff:  0.0018823718044441095 Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold3/train/agent1/trajs1.pkl!
values:  Refresh buffer every 1000000 sampling!-56.552914
 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold3/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold3/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold3/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer c




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  4 target diff:  0.0023882139683676284 values:  -56.970882 ----- 

-----iteration:  5 target diff:  0.0016970450263732828 values:  -57.002472 ----- 

-----iteration:  6 target diff:  0.0019585167578055735 values:  -57.043663 ----- 

-----iteration:  7 target diff:  0.0028422472957066254 values:  -57.076965 ----- 

-----iteration:  8 target diff:  0.0021759028290520726 values:  -57.065197 ----- 

-----iteration:  9 target diff:  0.001513703630161385 values:  -57.205788 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  10 target diff:  0.002442883655068649 values:  -57.10101 ----- 

-----


-----iteration:  35 target diff:  0.0018687361924139972 values:  -56.331326 ----- 

-----iteration:  0 target diff:  0.9157619362862994 values:  -58.63779 ----- 

-----iteration:  36 target diff:  0.0016847701089570814 values:  -56.296284 ----- 

-----iteration:  37 target diff:  0.0021031186373401665 values:  -56.2176 ----- 

-----iteration:  1 target diff:  0.002630625181951226 values:  -58.60729 ----- 

-----iteration:  38 target diff:  0.0015937582887619423 values:  -56.15497 ----- 

-----iteration:  2 target diff:  0.0027743248542621305 values:  -58.624146 ----- 

-----iteration:  3 target diff:  0.0017171493071783824 values:  -58.577404 ----- 

-----iteration:  39 target diff:  0.001395664839835879 values:  -56.110973 ----- 

-----iteration:  4 target diff:  0.002162455753939359 values:  -58.56075 ----- 

-----iteration:  5 target diff:  0.0017786977432842914 values:  -58.548622 ----- 

-----iteration:  6 target diff:  0.0020071124609779513 values:  -58.57782 ----- 

-----iterat




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------


To change all layers to have dtype float

-----iteration:  2 target diff:  0.0025811184068851023 values:  -57.85266 ----- 

-----iteration:  3 target diff:  0.0019653938972966916 values:  -57.85947 ----- 

-----iteration:  4 target diff:  0.002156768253471903 values:  -57.838936 ----- 

-----iteration:  5 target diff:  0.0019493345911384644 values:  -57.77322 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold3/train/agent4/ckpt/offline_rem_15000.ckpt
-----iteration:  6 target diff:  0.0023484548320819257 values:  -57.770508 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  7 target diff:  0.0023831095933362803 values:  -57.774822 ----- 

-----iteration:  0 target diff:  0.9233766257194751 values:  -56.458443 ----- 

---

-----iteration:  3 target diff:  0.001818625525540426 values:  -56.949 ----- 

-----iteration:  1 target diff:  0.0019166458687918865 values:  -62.503704 ----- 

-----iteration:  25 target diff:  0.002245684270948812 values:  -57.732014 ----- 

-----iteration:  4 target diff:  0.002106087264311256 values:  -56.955982 ----- 

-----iteration:  26 target diff:  0.002155561633333788 values:  -57.68644 ----- 

-----iteration:  2 target diff:  0.001164239046415085 values:  -62.541298 ----- 

-----iteration:  5 target diff:  0.0017213869463970374 values:  -57.037067 ----- 

-----iteration:  6 target diff:  0.002612302516173373 values:  -57.046562 ----- 

-----iteration:  27 target diff:  0.0017876276760892308 values:  -57.548943 ----- 

-----iteration:  7 target diff:  0.001474100817580344 values:  -57.01823 ----- 

-------------------- ckpt:  30000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold3/train/agent/trajs.pkl!
Refresh 




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  29 target diff:  0.002208261844026899 values:  -57.481743 ----- 

-----iteration:  30 target diff:  0.0016779723900948685 values:  -57.41067 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor.

-----iteration:  5 target diff:  0.0021352539317822327 values:  -58.45944 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  6 target diff:  0.0017240534767344754 values:  -58.426514 ----- 

-----iteration:  7 target diff:  0.0023323812523953773 values:  -58.458195 ----- 

-----iteration:  0 target diff:  0.9150949488888631 values:  -58.91796 ----- 

-----iteration:  8 target diff:  0.0023526210125788855 values:  -58.525635 ----- 

-----iteration:  1 target diff:  0.0028303915483897544 values:  -58.822456 ----- 

-----iteration:  9 target diff:  0.002445867281384836 values:  -58.553776 ----- 

-----iteration:  10 target diff:  0.0016646201787152118 values:  -58.476555 ----- 
-----iteration: 
 2 target diff:  0.00213

-----iteration:  2 target diff:  0.0019357502502309212 values:  -67.79533 ----- 

-----iteration:  25 target diff:  0.002166888951249768 values:  -58.223186 ----- 

-----iteration:  3 target diff:  0.001641044865168798 values:  -67.751656 ----- 

-----iteration:  4 target diff:  0.002501985229436823 values:  -67.74131 ----- 

-----iteration:  5 target diff:  0.0016768247371579253 values:  -67.6826 ----- 

-----iteration:  26 target diff:  0.002489306487660903 values:  -58.127583 ----- 

-----iteration:  6 target diff:  0.0017787018421953954 values:  -67.69303 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  27 target diff:  0.002281789483804258 values:  -58.05341 ----- 

-----iteration:  0 target diff:  0.91445782




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  1 target diff:  0.0038736220498349493 values:  -58.36433 ----- 

--------------




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa


-------------------- adv learner --------------------
-----iteration:  44 target diff:  0.0015045659963792797 values:  -58.230145 ----- 

-----iteration:  45 target diff:  0.0013646456287808408 values:  -58.269897 ----- 

-----iteration:  0 target diff:  0.9241116426228851 values:  -56.117756 ----- 

-----iteration:  1 target diff:  0.002251471838287825 values:  -56.21561 ----- 

-----iteration:  2 target diff:  0.0030778692911853085 values:  -56.299988 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  3 target diff:  0.0022482008081207276 values:  -56.398396 ----- 

-----iteration:  4 target diff:  0.0023679275360722406 values:  -56.42083 ----- 

-----iteration:  5 target diff:  0.0018768076684420804 values:  -56.543476 ----- 

-----iteration:  6 target diff:  0.002339803357470129 values:  -56.621723 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='

21
 target diff:  0.00221918022419026 values:  -63.43042 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have d

-----iteration:  35 target diff:  0.001544573854585739 values:  -55.392246 ----- 

-----iteration:  20 target diff:  0.0023040641503040424 values:  -62.992485 ----------iteration:   
81
 target diff:  0.00187444689399799 values:  -55.059002 ----- 

-----iteration:  36 target diff:  0.0017027026009559735 values:  -55.31693 ----- 

-----iteration:  21 target diff:  0.002452895814278958 values:  -63.0024 ----- 

-----iteration:  82 target diff:  0.0017465895591750812 values:  -55.028065 ----- 

-----iteration:  37 target diff:  0.0019017786029749497 values:  -55.2136 ----- 

-----iteration:  83 target diff:  0.0021269652537154317 values:  -55.02517 ----- 

-----iteration:  22 target diff:  0.0018930408086812222 values:  -62.94475 ----- 

-----iteration:  38 target diff:  0.0023523865915048947 values:  -55.014194 ----- 

-----iteration:  84 target diff:  0.0019932438180245322 values:  -55.0033 ----- 

-----iteration:  85 target diff:  0.0019532036444338536 values:  -54.972603 ----- 

-----

 ----- 
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold3/train/agent0/trajs0.pkl!

Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold3/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold3/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold3/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold3/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of thi

-----iteration:  2 target diff:  0.002116378536764526 Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold3/train/agent0/trajs0.pkl!values: 
 Refresh buffer every 1000000 sampling!-62.172565
 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold3/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold3/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold3/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold3/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pa


-----iteration:  11 target diff:  0.0022856682094841555 values:  -62.24149 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.9235839441750409 values:  -55.398857 ----- 

-----iteration:  12 target diff:  0.002353299103651613 values:  -62.1493 ----- 

-----iteration:  0 target diff:  0.9156666632471525 values:  -59.403923 ----- 

-----iteration:  1 target diff:  0.0027290824095189425 values:  -55.373543 ----- 

-----iteration:  1 target diff:  0.004465957861674303 values:  -59.446133 ----- 

-----iteration:  13 target diff:  0.0020558861004191485 values:  -62.12283 ----- 

-----iteration:  2 target diff:  0.00243451487349825 values:  -55.432426 ----- 

-----iteration:  2 target diff:  0.0024605062




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
 
target diff:  0.002730013588354002 values:  -55.80434 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dt

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold3/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocastin


-----iteration:  24 target diff:  0.002088864123478326 values:  -59.209106 ----- 

-----iteration:  0 target diff:  0.9233915343892113 values:  -57.591972 ----- 

-----iteration:  25 target diff:  0.002033033848433142 values:  -59.201702 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold3/train/agent4/ckpt/offline_rem_30000.ckpt
-----iteration:  1 target diff:  0.0027847802410225666 values:  -57.636875 ----- 

-----iteration:  26 target diff:  0.0022483217515782785 values:  -59.072735 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  2 target diff:  0.0021706244899391282 values:  -57.67027 ----- 

-----iteration:  27 target diff:  0.0023492828783112252 values:  -59.038525 ----- 

-----iteration:  3 target diff:  0.0024114820475847126 values:  -57.705784 ----- 

-----iteration:  28 target diff:  0.0021300461920634455 values:  -58.888027 ----- 

-----iteration:  4 target diff:  0.001699233638736167 values:  -57.8486


-----iteration:  13 target diff:  0.002001762558821674 values:  -56.671307 ----- 

-----iteration:  14 target diff:  0.0014708690056651837 values:  -56.653084 ----- 

-----iteration:  0 target diff:  0.9151644511661585 values:  -58.36491 ----- 

-------------------- training agent --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold4/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasti




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa


-----iteration:  29 target diff:  0.002836678681526764 values:  -58.245777 ----- 

-----iteration:  30 target diff:  0.0033829276624348405 values:  -58.213604 ----- 

-----iteration:  0 target diff:  0.9205764031997177 values:  -66.56393 ----- 

-----iteration:  31 target diff:  0.002922704188162292 values:  -58.12377 ----- 

-----iteration:  1 target diff:  0.0020344623531342388 values:  -66.54961 ----- 

-----iteration:  32 target diff:  0.0030687125981178503 values:  -58.028896 ----- 

-----iteration:  2 target diff:  0.0014966294910784873 values:  -66.506256 ----- 

-------------------- ckpt:  40000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold3/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!


Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold3/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyua




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
34 target diff: 
 0.002773313458265159 values:  -57.777824 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  35 target diff:  0.0028611316733968112 values:  -57.682922 ----- 

-----iteration:  36 target diff:  0.002383643529791027 values:  -57.554604 ----- 

-----iteration:  37 target diff:  0.00242845510023205 values:  -57.47958 ----- 

-------------------- fqe on d


-------------------- adv learner --------------------
-----iteration:  17 target diff:  0.001641637013825814 values:  -61.977303 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold3/train/agent4/ckpt/offline_rem_35000.ckpt
-----iteration:  18 target diff:  0.001683916921510316 values:  -61.921413 ----- 

-----iteration:  19 target diff:  0.0018156829855650227 values:  -61.99898 ----- 

-----iteration:  20 target diff:  0.001939075911052595 values:  -62.024605 ----- 

-----iteration:  21 target diff:  0.0018627410637388243 values:  -62.035145 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  22 target diff:  0.0018094575806265375 values:  -62.021503 ----- 

-----iteration:  23 target diff:  0.0015323215133846745 values:  -61.973026 ----- 

-----iteration:  24 target diff:  0.0018399794170844874 values:  -61.87631 ----- 

-----iteration:  25 target diff:  0.0015092905287571463 values:  -61.87803 ----- 

-----iteratio

-----iteration:  57 target diff:  0.0018090351472441933 values:  -61.62954 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  58 target diff:  0.0017261636501811042 values:  -61.595833 ----- 

-----iteration:  59 target diff:  0.0017925784957299991 values:  -61.457882 ----- 

-----iteration:  60 target diff:  0.0023634211569066015 values:  -61.44799 ----- 

-----itera


-----iteration:  14 target diff:  0.0024408785139166615 values:  -59.27055 ----- 

-----iteration:  15 target diff:  0.001954887932663916 values:  -59.218914 ----- 

-----iteration:  0 target diff:  0.9181695907780341 values:  -63.59946 ----- 

-----iteration:  16 target diff:  0.002158162127825661 values:  -59.257385 ----- 

-----iteration:  1 target diff:  0.004754551305381361 values:  -63.69826 ----- 

-----iteration:  17 target diff:  0.0024546053849181897 values:  -59.183826 ----- 

-----iteration:  2 target diff:  0.002479978202189455 values:  -63.6715 ----- 

-----iteration:  18 target diff:  0.0028066403965099317 values:  -59.12689 ----- 

-----iteration:  3 target diff:  0.0021327650949950648 values:  -63.75721 ----- 

-----iteration:  19 target diff:  0.0022112291762317 values:  -59.122463 ----- 

-----iteration:  4 target diff:  0.0023350667014900885 values:  -63.69519 ----- 

-----iteration:  5 target diff:  0.002337339107098413 values:  -63.753834 -----iteration: -----  



-----iteration:  0 target diff:  0.9144380398785389 values:  -58.315155 ----- 

-----iteration:  0 target diff:  0.9184519432960816 values:  -62.444904 ----- 

-----iteration:  1 target diff:  0.004235470568960301 values:  -58.3342 ----- 

-----iteration:  1 target diff:  0.002096574444987748 values:  -62.502583 ----- 

-----iteration:  2 target diff:  0.0030367127255588084 values:  -58.31915 ----- 

-----iteration:  2 target diff:  0.0018537552241622791 values:  -62.57525 ----- 

-----iteration:  3 target diff:  0.0025336086542163045 values:  -58.31738 ----- 

-----iteration:  3 target diff:  0.00195758046406516 values:  -62.575893 ----- 

-----iteration:  4 target diff:  0.0027434268539142347 values:  -58.44736 ----- 

-----iteration:  4 target diff:  0.0017819817778922482 values:  -62.562893 ----- 

-----iteration:  5 target diff:  0.0025625256502862038 values:  -58.466198 ----- 

-----iteration:  5 target diff:  0.0017213987112330155 values:  -62.638298 ----- 

saving model weight




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold3/train/agent3/ckpt/offline_rem_35000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold4/train/agent/ckpt/offline_rem_5000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold3/train/agent4/ckpt/offline_rem_50000.ckpt
-------------------- behavior cloning --------------------
-------------------- ckpt:  5000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold3/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-------------------- fqe on dqn & sale ---


-----iteration:  0 target diff:  0.9176157173790194 values:  -66.534615 ----- 

-----iteration:  1 target diff:  0.002047332196867959 values:  -66.65996 ----- 

-----iteration:  2 target diff:  0.0028911464073963357 values:  -66.616615 ----- 

-----iteration:  3 target diff:  0.0014480119582993385 values:  -66.670425 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.9180991606063033 values:  -67.31904 ----- 

-----iteration:  1 target diff:  0.001901750103755803 values:  -67.33775 ----- 

-----iteration:  2 target diff:  0.001926306186869057 values:  -67.43132 ----- 

-----iteration:  3 target diff:  0.0026241290142753108 values:  -67.49186 ----- 

-----iteration:  4 target diff:  0.0013851660395




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-------------------- fqe on dqn & sale ---


-----iteration:  0 target diff:  0.9172469567852823 values:  -67.23826 ----- 

-----iteration:  1 target diff:  0.0020071226905926712 values:  -67.247505 ----- 

-----iteration:  2 target diff:  0.002069824536940605 values:  -67.221695 ----- 

-----iteration:  3 target diff:  0.0018637104706417222 values:  -67.21933 ----- 

-----iteration:  4 target diff:  0.001604535382233336 values:  -67.19219 ----- 

-----iteration:  5 target diff:  0.0016602115756553695 values:  -67.18978 ----- 

-----iteration:  6 target diff:  0.0016505692431291357 values:  -67.35897 ----- 

-----iteration:  7 target diff:  0.0026873018115738073 values:  -67.38048 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold4/train/agent/ckpt/offline_rem_25000.ckpt
-----iteration:  8 target diff:  0.0015432934703601053 values:  -67.3081 ----- 

-----iteration:  9 target diff:  0.001624454439251955 values:  -67.30774 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tm

-----iteration:  14 target diff:  0.0018477598602424337 values:  -68.6534 ----- 

-----iteration:  15 target diff:  0.0018983868067335601 values:  -68.76518 ----- 

-----iteration:  16 target diff:  0.002894426435267537 values:  -68.89951 ----- 

-----iteration:  17 target diff:  0.003135582185865541 values:  -68.933556 ----- 

-----iteration:  18 target diff:  0.0016941124819084038 values:  -68.918175 ----- 

-----iteration:  19 target diff:  0.0016550711324016091 values:  -68.97492 ----- 

-----iteration:  20 target diff:  0.0015637991846565503 values:  -68.99122 ----- 

-----iteration:  21 target diff:  0.0020004357967202363 values:  -69.035416 ----- 

-----iteration:  22 target diff:  0.00192019383912684 values:  -69.06624 ----- 

-----iteration:  23 target diff:  0.002279055290163748 values:  -69.047104 ----- 

-----iteration:  24 target diff:  0.0022480457074274627 values:  -69.03627 ----- 

-----iteration:  25 target diff:  0.0021254567308607044 values:  -69.01856 ----- 

-----i




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-------------------- fqe on dqn & sale --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base La

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold3/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold3/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold3/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold3/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold3/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold3/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold3/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold3/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold3/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, 


-----iteration:  0 target diff:  0.9172620719195114 values:  -69.42984 ----- 

-----iteration:  1 target diff:  0.002313181235260228 values:  -69.49371 ----- 

-----iteration:  2 target diff:  0.0015425618079455666 values:  -69.561485 ----- 

-----iteration:  3 target diff:  0.0015818262051442544 values:  -69.54661 ----- 

-----iteration:  4 target diff:  0.0014658333215786393 values:  -69.51039 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.917138772546349 values:  -68.68608 ----- 

-----iteration:  1 target diff:  0.0030449667750100987 values:  -68.76079 ----- 

-----iteration:  2 target diff:  0.0024108123222978273 values:  -68.81227 ----- 

-----iteration:  3 target diff:  0.00213130160925


-----iteration:  0 target diff:  0.9184376941498639 values:  -66.22416 ----- 

-----iteration:  1 target diff:  0.003882036595389451 values:  -66.19427 ----- 

-----iteration:  2 target diff:  0.00343113512417189 values:  -66.20922 ----- 

-----iteration:  3 target diff:  0.0032838371589450637 values:  -66.38452 ----- 

-----iteration:  4 target diff:  0.0044358388814838955 values:  -66.41878 ----- 

-----iteration:  5 target diff:  0.002619961366294497 values:  -66.40121 ----- 

-----iteration:  6 target diff:  0.0030600565952811722 values:  -66.44077 ----- 

-----iteration:  7 target diff:  0.002413771355944443 values:  -66.54637 ----- 

-----iteration:  8 target diff:  0.003054706316891058 values:  -66.57586 ----- 

-----iteration:  9 target diff:  0.0028339485440611787 values:  -66.628815 ----- 

-----iteration:  10 target diff:  0.0026802329511508723 values:  -66.67913 ----- 

-----iteration:  11 target diff:  0.0021321399476899833 values:  -66.72699 ----- 

-----iteration:  12 t

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold4/train/agent/ckpt/offline_rem_45000.ckpt


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.9175126774787361 values:  -67.14879 ----- 

-----iteration:  1 target diff:  0.002181753167901552 values:  -67.07763 ----- 

-----iteration:  2 target diff:  0.0017024526410020094 values:  -67.076294 ----- 

-----iteration:  3 target diff:  0.0014410585567247525 values:  -66.990135 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passin


-----iteration:  44 target diff:  0.002943369835704387 values:  -62.28874 ----- 

-----iteration:  45 target diff:  0.002721397749908884 values:  -62.29242 ----- 

-----iteration:  46 target diff:  0.0030965280971656027 values:  -62.253994 ----- 

-----iteration:  47 target diff:  0.002639064237029333 values:  -62.226894 ----- 

-----iteration:  48 target diff:  0.002465347848478427 values:  -62.184917 ----- 

-----iteration:  49 target diff:  0.0021323095095473264 values:  -62.164036 ----- 

-----iteration:  50 target diff:  0.0021680808498180234 values:  -62.106464 ----- 

-----iteration:  51 target diff:  0.0018449063577968977 values:  -61.954704 ----- 

-----iteration:  52 target diff:  0.002340226677283649 values:  -61.817547 ----- 

-----iteration:  53 target diff:  0.0023248182245142806 values:  -61.675346 ----- 

-----iteration:  54 target diff:  0.002693310809552536 values:  -61.561222 ----- 

-----iteration:  55 target diff:  0.0021143036418328574 values:  -61.4245 ----- 

-




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-------------------- fqe on dqn & sale --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.9233548246117782 values:  -48.697845 ----- 

-----iteration:  1 target diff:  0.0028089474572727913 values:  -48.727764 ----- 

-----iteration:  2 target diff:  0.001940932014427078 values:  -48.78324 ----- 

-----iteration:  3 target diff:  0.0021

-----iteration:  7 target diff:  0.0016855952334614936 values:  -46.857456 ----- 

-----iteration:  8 target diff:  0.0025013263843658537 values:  -46.904133 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold4/train/agent/ckpt/offline_rem_20000.ckpt
-----iteration:  9 target diff:  0.002021309812552442 values:  -46.911503 ----- 

-----iteration:  10 target diff:  0.0014230717704737045 values:  -46.914333 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.9233082643188327 values:  -46.792454 ----- 

-----iteration:  1 target diff:  0.0017351868443155661 values:  -46.766273 ----- 

-----iteration:  2 target diff:  0.0020626172460050784 values:  -46.79964 ----- 

s

-----iteration:  16 target diff:  0.0024002693188551836 values:  -46.854633 ----- 

-----iteration:  17 target diff:  0.0022870252648670075 values:  -46.88548 ----- 

-----iteration:  18 target diff:  0.0022356683811088794 values:  -46.955467 ----- 

-----iteration:  19 target diff:  0.002321699365937076 values:  -47.021793 ----- 

-----iteration:  20 target diff:  0.002204183684118657 values:  -47.02881 ----- 

-----iteration:  21 target diff:  0.003340428059306943 values:  -47.04038 ----- 

-----iteration:  22 target diff:  0.0022562206026871365 values:  -47.054485 ----- 

-----iteration:  23 target diff:  0.002366868609493407 values:  -47.085155 ----- 

-----iteration:  24 target diff:  0.002611115713905013 values:  -47.108356 ----- 

-----iteration:  25 target diff:  0.002099490579406274 values:  -47.134125 ----- 

-----iteration:  26 target diff:  0.0019252496234870864 values:  -47.131893 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold4/trai


-------------------- adv learner --------------------
-------------------- fqe on dqn & sale --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold4/train/agent/ckpt/offline_rem_30000.ckpt


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.9240611193620246 values:  -47.052612 ----- 

-----iteration:  1 target diff:  0.0038795095457391585 values:  -47.05727 ----- 

-----iteration:  2 target diff:  0.0029616783163287797 values:  -47.038536 ----- 

-----iteration:  3 target diff:  0.0022490145639770336 values:  -47.04328 ----- 

-----iteration:  4 target diff:  0.002768372506240459 values:  -47.05428 ----- 

-----iteration:  5 target diff:  0.001619242035039904 v

-----iteration:  7 target diff:  0.0018184471512775243 values:  -48.583412 ----- 

-----iteration:  8 target diff:  0.0015740248823448242 values:  -48.602898 ----- 

-----iteration:  9 target diff:  0.0019466952612774335 values:  -48.58338 ----- 

-----iteration:  10 target diff:  0.0015401930537142176 values:  -48.62339 ----- 

-----iteration:  11 target diff:  0.0015971898534858427 values:  -48.623394 ----- 

-----iteration:  12 target diff:  0.00134801384484796 values:  -48.693634 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold4/train/agent0/ckpt/offline_rem_45000.ckpt


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.9234949222546145 values:  -49.32516 ----- 

-




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa


-----iteration:  0 target diff:  0.9240134482145894 values:  -46.295105 ----- 

-----iteration:  1 target diff:  0.004094633595162376 values:  -46.31823 ----- 

-----iteration:  2 target diff:  0.0038714060310444296 values:  -46.306248 ----- 

-----iteration:  3 target diff:  0.0026620579102371146 values:  -46.347797 ----- 

-----iteration:  4 target diff:  0.0029062346136795266 values:  -46.375786 ----- 

-----iteration:  5 target diff:  0.0026461287588332542 values:  -46.37977 ----- 

-----iteration:  6 target diff:  0.0026277865345346054 values:  -46.364983 ----- 

-----iteration:  7 target diff:  0.00231031356930345 values:  -46.38354 ----- 

-----iteration:  8 target diff:  0.002565433546537576 values:  -46.425922 ----- 

-----iteration:  9 target diff:  0.00294805491499317 values:  -46.422905 ----- 

-----iteration:  10 target diff:  0.0022003519047844324 values:  -46.45113 ----- 

-----iteration:  11 target diff:  0.0020422907941247508 values:  -46.492737 ----- 

-----iteration




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-------------------- fqe on dqn & sale --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base La

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold4/train/agent0/ckpt/offline_rem_50000.ckpt
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold4/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just 


-------------------- adv learner --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/798842024/fold4/train/agent0/ckpt/offline_rem_50000.ckpt
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/798842024/fold4/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- fqe on dqn & sale --------------------


To

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold3/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold3/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold3/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold3/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold3/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold4/train/agent/ckpt/offline_rem_10000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/798842024/fold4/train/agent1/ckpt/offline_rem_20000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold4/train/agent1/ckpt/offline_rem_35000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold4/train/agent1/ckpt/offline_rem_25000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold4/train/agent0/ckpt/offline_rem_25000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold4/train/agent/ckpt/offline_rem_15000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/798842024/fold4/train/agent1/ckpt/offline_rem_25000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold4/train/agent1/ckpt/offline_rem_40000.ckpt
saving model weights at /h


saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold4/train/agent/ckpt/offline_rem_40000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/798842024/fold4/train/agent1/ckpt/offline_rem_50000.ckpt
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/798842024/fold4/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold4/train/agent1/ckpt/offline_rem_40000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/798842024/fold4/train/agent2/ckpt/offline_rem_35000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold4/train/agent0/ckpt/offline_rem_30000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold4/train/agent2/ckpt/offline_rem_45000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold4/train/agent3/ckpt/offline_rem_5000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold4/train/ag




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold4/train/agent3/ckpt/offline_rem_45000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold4/train/agent1/ckpt/offline_rem_35000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/798842024/fold4/train/agent3/ckpt/offline_rem_40000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold4/train/agent4/ckpt/offline_rem_10000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold4/train/agent3/ckpt/offline_rem_5000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold4/train/ag

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold4/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold4/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold4/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disab




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-------------------- fqe on dqn & sale --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold4/train/agent2/ckpt/offline_rem_30000.ckpt


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.9170553448470168 values:  -44.818 ----- 

-----iteration:  1 target diff:  0.0019798525967308195 values:  -44.770496 ----- 

-

-----iteration:  25 target diff:  0.0017577726321684145 values:  -45.700768 ----- 

-------------------- ckpt:  5000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold4/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold4/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold4/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold4/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold4/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  26 target diff:  0.0018610511465693585 values:  -45.68578 ----




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  27 target diff:  0.0031772452314691407 values:  -45.682938 ----- 

-----iteration:  28 target diff:  0.002152071878453672 values:  -45.654274 ----- 

-----iteration:  29 target diff:  0.002443605015520359 values:  -45.63412 ----- 

-----iteration:  30 target diff:  0.002409791200929896 values:  -45.604816 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  31 target diff:  0.002314160693018399 values:  -45.567493 ----- 

-----iteration:  32 target diff:  0.00229972642012544 values:  -45.492973 ----- 

-----iteration:  33 target diff:  0.002646982970262285 values:  -45.53775 ----- 

-----


-------------------- adv learner --------------------
-----iteration:  59 target diff:  0.002156755208124723 values:  -45.1272 ----- 

-----iteration:  60 target diff:  0.001865461158882512 values:  -45.02485 ----- 

-----iteration:  61 target diff:  0.002296283610085705 values:  -45.026665 ----- 

-----iteration:  62 target diff:  0.002150175268868076 values:  -45.031876 ----- 

-----iteration:  63 target diff:  0.002106898243034759 values:  -45.02268 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  64 target diff:  0.002038344832501675 values:  -44.951996 ----- 

-----iteration:  65 target diff:  0.002209656651585634 values:  -44.9471 ----- 

-----iteration:  66 target diff:  0.002070098769255071 values:  -44.850395 ----- 

-----iteration:  67 target diff:  0.00219647211414407 values:  -44.84031 ----- 

-----iteration:  68 target diff:  0.0020407372388402435 values:  -44.73871 ----- 

-----iteration:  69 target diff:  0.00218689389255475 values:  

target diff: 
 0.0017851948840686904 values:  -52.990173 ----- 

-----iteration:  6 target diff:  0.0016445135885291488 values:  -53.000217 ----- 

-----iteration:  0 target diff:  0.9184596868782201 values:  -46.732006 ----- 

-----iteration:  7 target diff:  0.0016502526309489782 values:  -52.98708 ----- 

-----iteration:  1 target diff:  0.0029345827546175133 values:  -46.73714 ----- 

-----iteration:  8 target diff:  0.0021366685257961887 values:  -53.00987 ----- 

-----iteration:  9 target diff:  0.0022939485552174897 values:  -53.04842 ----- 

-----iteration:  2 target diff:  0.0029517936147075837 values:  -46.728424 ----- 

-----iteration:  3 target diff:  0.0023776389900495277-----iteration:   values: 10 -46.715096 target diff:   0.0018480946620794334----- values:   -53.017124 ----- 



-----iteration:  11 target diff: -----iteration:   40.002014752373520022  values: target diff:  -53.07837  0.001923375953701584-----  
values: 
 -46.73449 ----- 

-----iteration:  12 target diff




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
 0.00224406583397162
 values:  -46.57409 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner ---------------


-----iteration:  36 target diff:  0.0019838661842064755 values:  -46.074646 ----- 

-----iteration:  0 target diff:  0.9245767920520759 values:  -54.156162 ----- 

-----iteration:  1 target diff:  0.003414686844998113 values:  -54.18496 ----- 

-----iteration:  37 target diff:  0.0019756247005027704 values:  -45.99224 ----- 

-----iteration:  2 target diff:  0.0024315962553893254 values:  -54.190487 ----- 

-----iteration:  38 target diff:  0.002542459212086166 values:  -45.922943 ----- 

-----iteration:  3 target diff:  0.003137851249401969 values:  -54.138653 ----- 

-----iteration:  4 target diff:  0.0027259102080113037 values:  -54.113976 ----- 

-----iteration:  39 target diff:  0.0022501902610328655 values:  -45.823982 ----- 

-----iteration:  5 target diff:  0.002120940539201761 values:  -54.06637 ----- 

-----iteration:  40 target diff:  0.002509447665132916 values:  -45.7877 ----- 

-----iteration:  6 target diff:  0.0024155394773635627 values:  -54.118114 ----- 
-----iterati




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa


-----iteration:  6 target diff:  0.001424663489539984 values:  -53.091908 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

ckpt:  25000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold4/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.


Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold4/train/agent0/trajs0.pkl!
Refresh




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-------------------- fqe on dqn & sale ---

 7 target diff:  0.0026102156472826202 values:  -52.215843 ----- 

-----iteration:  5 target diff:  0.0019794290043485575 values:  -45.304276 ----- 

-----iteration:  8 target diff:  0.002059877570960624 values:  -52.247738 ----- 

-----iteration:  6 target diff:  0.00279114504892513 values:  -45.325775 ----- 

-----iteration:  9 target diff:  0.0017616694156801966 values:  -52.320866 ----- 

-----iteration:  7 target diff:  0.0022811213151026338 values:  -45.368443 ----- 

-----iteration:  10 target diff:  0.002435534661208542 values:  -52.292633 ----- 

-----iteration:  8 target diff:  0.0024131712202172272 values:  -45.346645 ----- 

-----iteration:  11 target diff:  0.00215130260243329 values:  -52.251835 ----- 

-----iteration:  9 target diff:  0.0023078081588367104 values:  -45.341263 ----- 

-----iteration:  10 target diff:  0.002010671837920934 values:  -45.356293 ----- 

-----iteration:  12 target diff:  0.0019620390777011277 values:  -52.268993 ----- 

-----iteration:  13 tar




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  38 target diff:  0.002213



-------------------- ckpt:  15000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/209652396/fold4/train/agent1/trajs1.pkl!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/798842024/fold4/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!Refresh buffer every 1000000 sampling!






Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/798842024/fold4/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/798842024/fold4/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/798842024/fold4/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load

-----iteration:  2 target diff:  0.001959334371935769 values:  -59.519188 ----- 

-----iteration:  6 target diff:  0.0022194732741695348 values:  -52.711212 ----- 

-----iteration:  8 target diff:  0.0022038774100864213 values:  -45.255768 ----- 

-----iteration:  3 target diff:  0.0016818382476709302 values:  -59.61687 ----- 

-----iteration:  9 target diff:  0.00188409293353275 values:  -45.307156 ----- 

-----iteration:  7 target diff:  0.0017934822616839134 values:  -52.729313 ----- 

-----iteration:  4 target diff:  0.0022562283273244994 values:  -59.63262 ----- 

-----iteration:  10 target diff:  0.0021618743324221864 values:  -45.332844 ----- 

-----iteration:  8 target diff:  0.0017635895758782137 values:  -52.67576 ----- 

-----iteration:  11 target diff:  0.0031702319666116877 values:  -45.37136 ----- 

-----iteration:  5 target diff:  0.0016482082270125266 values:  -59.691875 ----- 

-----iteration:  9 target diff:  0.002161405303487929 values:  -52.605923 ----- 

-----itera

 




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('f

-----iteration:  26 target diff:  0.0020575155847291503 values:  -45.53782 ----- 

-----iteration:  27 target diff:  0.001777112034015875 values:  -45.531548 ----- 

-----iteration:  0 target diff:  0.9259069128097855 values:  -52.539333 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  28 target diff:  0.0031265241951863036 values:  -----iteration: -45.549866  ----- 

1 target diff:  0.0020323542441749902 values:  -52.539806 ----- 

-----iteration:  2 target diff:  0.0016907610875068954 values:  -52.532635 ----- 

-----iteration:  29 target diff:  0.0019599335317161937 values:  -45.555546 ----- 

-----iteration:  0 target diff:  0.9200784731322449 values:  -57.5089 ----- 

-----iteration:  3 target diff:  0.002243

-----iteration:  50 target diff:  0.002574616692461198 values:  -45.162357 ----- 

-----iteration:  20 target diff:  0.003462111353599469 values:  -56.984646 ----- 

-----iteration:  51 target diff:  0.0025025773834356463 values:  -45.12356 ----- 
-----iteration: 
 21 target diff:  0.002299187985604828 values:  -56.778465 ----- 

-----iteration:  52 target diff:  0.0024268070642629314 values:  -45.109337 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
 
22 target diff:  0.0028995060886422347 values:  -56.608513 ----- 

-----iteration:  53 target diff:  0.0016993481713678611 values:  -45.114212 ----- 

-----iteration:  23 target diff:  0.0025377858614879225 values:  -56.369884 ----- 

-----iteration:  0 target diff:  0.924893631585

-----iteration:  49 target diff:  0.0016564591126260522 values:  -54.810585 ----- 

-----iteration:  59 target diff:  0.002180028576082285 values:  -46.41653 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold4/train/agent3/ckpt/offline_rem_25000.ckpt
-----iteration:  50 target diff:  0.001621165154937374 values:  -54.64737 ----- 

-----iteration:  60 target diff:  0.0025139223615811067 values:  -46.39489 ----- 

-----iteration:  51 target diff:  0.0015096633452673846 values:  -54.42733 ----- 

-----iteration:  61 target diff:  0.0029463449468110896 values:  -46.38104 ----- 

-----iteration:  52 target diff:  0.0016599520739700659 values:  -54.209084 ----- 

-----iteration:  62 target diff:  0.002012643384199716 values:  -46.30177 ----- 

-----iteration:  53 target diff:  0.0015034379291113309 values:  -53.997185 ----- 

-----iteration:  63 target diff:  0.0017393586325727605 values:  -46.271046 ----- 

-----iteration:  54 target diff:  0.00168857349




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner -----iteration:  --------------------
65 target diff:  0.001665



-----iteration:  13 target diff:  0.0021395359540420386 values:  -56.137985 ----- 

-----iteration:  14 target diff:  0.003298827347920687 values:  -56.16715 ----- 

-----iteration:  15 target diff:  0.0019329431496400177 values:  -56.199024 ----- 

-----iteration:  16 target diff:  0.0016440636451683442 values:  -56.10199 ----- 

-----iteration:  17 target diff:  0.0020408190959157312 values:  -56.018417 ----- 

-----iteration:  18 target diff:  0.0020718507639399322 values:  -55.88069 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.9185595370995241 values:  -45.030445 ----- 

-----iteration:  19 target diff:  0.002030222876918374 values:  -55.909363 ----- 

-----iteration:  1 target diff:  0

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold4/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold4/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold4/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold4/train/agent3/trajs3.pkl!
16Refresh buffer every 1000000 sampling! 
target diff:  0.001949049272767331 values:  -57.627693 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold4/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, p


-----iteration:  70 target diff:  0.0025831726430610847 values:  -43.289036 ----- 

-----iteration:  28 target diff:  0.0022991118989753832 values:  -56.409904 ----- 

-----iteration:  0 target diff:  0.9172108240953554 values:  -51.429985 ----- 

-----iteration:  71 target diff:  0.0018671673771783121 values:  -43.300137 ----- 

-----iteration:  29 target diff:  0.001975209345207957 values:  -56.165276 ----- 

-----iteration:  1 target diff:  0.0023297581117515767 values:  -51.433887 ----- 

-----iteration:  72 target diff:  0.0025635509622530047 values:  -43.294678 ----- 

-----iteration:  30 target diff:  0.0018682267209663823 values:  -56.077282 ----- 

-----iteration:  2 target diff:  0.0018430984025864612 values:  -51.38701 ----- 

-----iteration:  73 target diff:  0.0020759312401679646 values:  -43.273926 ----- 

-----iteration:  31 target diff:  0.0019542076409005214 values:  -55.883797 ----- 

-----iteration:  3 target diff:  0.002181341498443 values:  -51.445072 ----- 

----


-----iteration:  12 target diff:  -----iteration: 0.002071641551447174  99values:   target diff: -51.70009 0.0019916996136600516  -----values:   
-43.397022 
----- 

-----iteration:  0 target diff:  0.9229267432680651 values:  -58.89991 ----- 

-----iteration:  1 target diff:  0.0024898964672954082 values:  -58.920067 ----- 

-----iteration:  13 target diff:  0.0018889913072547347 values:  -51.764587 ----- 

-----iteration:  2 target diff:  0.0019252652364401888 values:  -58.851486 ----- 

-----iteration:  3 target diff:  0.002201111269431273 values:  -58.71552 ----- 

-----iteration:  14 target diff:  0.0029336189425796474 values:  -51.747326 ----- 

-----iteration:  4 target diff:  0.0022440270654664426 values:  -58.64755 ----- 

-----iteration:  15 target diff:  0.0026049368589401377 values:  -51.774105 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer construc

-----iteration:  24 target diff:  0.0025407486937984716 values:  -58.412098 ----- 

-----iteration:  20 target diff:  0.003914056334574394 values:  -46.8601 ----- 

-----iteration:  25 target diff:  0.0019978624357882735 values:  -58.42227 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  21 target diff:  0.0027591577987740993 values:  -46.818153 ----- 

-----iteration:  26 target diff:  0.0034810080829361946 values:  -58.323315 ----- 

-----iteration:  27 target diff:  0.0020523261230320556 values:  -58.31486 ----- 

-----iteration:  0 target diff:  0.9171663540750893 values:  -51.610756 ----- 

-----iteration:  28 target diff:  0.002509270179122302 values:  -58.30897 ----- 

-----iteration:  22 target diff:  0.00

-----iteration:  Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold4/train/agent0/trajs0.pkl!37
 target diff:  Refresh buffer every 1000000 sampling!0.0015592306600285687


Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold4/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold4/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  78 target diff:  0.0016319300532850912 values:  -50.326416 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold4/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold4/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.ba




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa

-----iteration:  5 target diff:  0.0029450091619811223 values:  -52.06516 ----- 

-----iteration:  37 target diff:  0.003099064185603054 values:  -56.88429 ----- 

-----iteration:  48 target diff:  0.002351088447873077 values:  -44.709522 ----- 

-----iteration:  6 target diff:  0.0023818101382981054 values:  -52.0929 ----- 

-----iteration:  38 target diff:  0.001939471144541395 values:  -56.711098 ----- 

-----iteration:  7 target diff:  0.002460696726793837 values:  -51.974022 ----- 

-----iteration: -----iteration:   839  target diff: target diff:  0.0018104176136435216  0.002015376968256543values:   values: -56.570168 -51.941986  ----- 

----- 

-----iteration:  49 target diff:  0.0026753113218111186 values:  -44.703175 ----- 

-----iteration:  9 target diff:  0.0023915496014080898 values:  -52.033115 ----- 

-----iteration:  40 target diff:  0.0018822973990679061 values:  -56.344025 ----- 

-----iteration:  50 target diff:  0.0029335790159727678 values:  -44.671986 ----- 

-----i




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  44 target diff:  0.001698

-----iteration:  66 target diff:  0.002022704766920023 values:  -44.217144 ----- 

-----iteration:  3 target diff:  0.0015976122747050376 values:  -49.33048 ----- 

-----iteration:  4 target diff:  0.002058698704456755 values:  -49.212166 ----- 

-----iteration:  67 target diff:  0.001906258819036888 values:  -44.147163 ----- 

-----iteration:  5 target diff:  0.001630529035401858 values:  -49.20521 ----- 

-----iteration:  68 target diff:  0.0020857444420391584 values:  -44.095894 ----- 

-----iteration:  6 target diff:  0.0015902866145144834 values:  -49.20135 ----- 

-----iteration:  69 target diff:  0.002386268525486823 values:  -44.041424 ----- 

-----iteration:  7 target diff:  0.002003583547905877 values:  -49.26669 ----- 

-----iteration:  70 target diff:  0.002091597119935888 values:  -44.01102 ----- 

-----iteration:  71 target diff:  0.0019258599028863872 values:  -43.993294 ----- 

-----iteration:  8 target diff:  0.0023028209136760594 values:  -49.393726 ----- 



To chang

-----iteration:  95 target diff:  0.0018594809846448515 values:  -43.27731 ----- 

-----iteration:  5 target diff:  0.0019177459709480084 values:  -57.613705 ----- 

-----iteration:  96 target diff:  0.0019877207377088355 values:  -43.243217 ----- 

-----iteration:  6 target diff:  0.0018859146330210696 values:  -57.492588 ----- 

-----iteration:  97 target diff:  0.0017061547511403566 values:  -43.235226 ----- 

-----iteration:  7 target diff:  0.002536861401156834 values:  -57.46233 ----- 

-----iteration:  98 target diff:  0.0019481748378788757 values:  -43.200108 ----- 

-----iteration:  8 target diff:  0.002257921889099864 values:  -57.38461 ----- 

-----iteration:  99 target diff:  0.0017555519233523782 values:  -43.19279 ----- 

-----iteration:  9 target diff:  0.0015118241741012904 values:  -57.368958 ----- 

-----iteration:  10 target diff:  0.002191621841953196 values:  -57.32568 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_float

-----iteration:  50 target diff:  0.0018331479169187864 values:  -44.556965 ----- 

-----iteration:  67 target diff:  0.0028526170721423698 values:  -50.330578 ----- 

-----iteration:  8 target diff:  0.0016620685481206733 values:  -52.242725 ----- 

-----iteration:  51 target diff:  0.0017008605624674006 values:  -44.50848 ----- 

-----iteration:  68 target diff:  0.0017318073908011858 values:  -50.20559 ----- 

-----iteration:  9 target diff:  0.0017763120120234284 values:  -52.154236 ----- 

-----iteration:  52 target diff:  0.0016147836275051002 values:  -44.55293 ----- 

-----iteration:  69 target diff:  0.0016874676550377341 values:  -50.152103 ----- 

-----iteration:  10 target diff:  0.0014702890484494886 values:  -52.071995 ----- 

-------------------- ckpt:  30000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/218175338/fold4/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
 -49.948795 ----- 

Loaded trajectories




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  71 target diff:  0.002745027138883447 values:  -49.75006 ----- 

-----iteration:  54 target diff:  0.0017955079399518742 values:  -44.50483 ----- 

-----iteration:  72 target diff:  0.0029703007238522346 values:  -49.58232 ----- 

-----iteration:  55 target diff:  0.0019920389388839144 values:  -44.4617 ----- 

-----iteratio

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold4/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold4/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold4/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold4/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/932136058/fold4/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, 


-------------------- fqe on dqn & sale --------------------
-----iteration:  19 target diff:  0.0022236913062542154 values:  -49.5431 ----- 

-----iteration:  20 target diff:  0.0021564956131825785 values:  -49.553585 ----- 

-----iteration:  21 target diff:  0.001848428049953568 values:  -49.575745 ----- 

-----iteration:  22 target diff:  0.002138046887912863 values:  -49.624447 ----- 

-----iteration:  23 target diff:  0.001808645901254801 values:  -49.572227 ----- 

-----iteration:  24 target diff:  0.0015842431156242842 values:  -49.5284 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  25 target diff:  0.001704935286738249 values:  -49.45278 ----- 

-----iteration:  0 target diff:  0.9184355689431493 values:




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
-43.674076 
----- 

-------------------- adv learner --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold4/train/agent3/ckpt/offline_rem_50000.ckpt
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold4/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 

-----iteration:  5 target diff:  0.0016068678036788302 values:  -45.575348 ----- 

-----iteration:  6 target diff:  0.0022294439582513405 values:  -45.558777 ----- 

-----iteration:  7 target diff:  0.0022716741999471816 values:  -45.538467 ----- 

-----iteration:  22 target diff:  0.0019821805171741503 values:  -49.285095 ----- 

-----iteration:  8 target diff:  0.0016425034754760579 values:  -45.54338 ----- 

-----iteration:  23 target diff:  0.0019725038058305915 values:  -49.38971 ----- 

-----iteration:  9 target diff:  0.0016679047537835525 values:  -45.53196 ----- 

-----iteration:  24 target diff:  0.0025764228531588223 values:  -49.437645 ----- 

-----iteration:  10 target diff:  0.0018595128158043447 values:  -45.500946 ----- 

-----iteration:  11 target diff:  0.0019177584776418415 values:  -45.494133 ----- 

-----iteration:  25 target diff:  0.0020396631701221765 values:  -49.370777 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold4/tra




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  16 target diff:  0.001782



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.9179369526124148 values:  -50.441113 ----- 

-----iteration:  1 target diff:  0.0028704644929541134 values:  -50.404167 ----- 

-----iteration:  2 target diff:  0.0019891471312051164 values:  -50.36422 ----- 

-----iteration:  3 target diff:  0.0016618245742693058 values:  -50.38051 ----- 

-----iteration:  4 target diff:  0.001807907911276976 values:  -50.304638 ----- 

-----iteration:  5 target diff:  0.0017863335370204955 values:  -50.212452 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this

-----iteration:  5 target diff:  0.0018414387147277428 values:  -52.65041 ----- 

-----iteration:  18 target diff:  0.0023520837900828 values:  -45.365047 ----- 

-----iteration:  6 target diff:  0.0020255429089471857 values:  -52.51513 ----- 

-----iteration:  7 target diff:  0.002049720625714031 values:  -52.451725 ----- 

-----iteration:  8 target diff:  0.002284060942953498 values:  -52.422375 ----- 

-----iteration:  19 target diff:  0.0023125549259693837 values:  -45.355865 ----- 

-----iteration:  9 target diff:  0.0019655651022309475 values:  -52.445637 ----- 

-----iteration:  10 target diff:  0.002558472720403982 values:  -52.300453 ----- 

-----iteration:  20 target diff:  0.002248076526810339 values:  -45.37102 ----- 

-----iteration:  11 target diff:  0.0016706688124125155 values:  -52.277714 ----- 

-----iteration:  12 target diff:  0.001673456595323093 values:  -52.365658 ----- 

-----iteration:  21 target diff:  0.002407078821087666 values:  -45.324173 ----- 

-----iter


-------------------- adv learner --------------------
-----iteration:  55 target diff:  0.002807473533316449 values:  -43.83603 ----- 

-----iteration:  56 target diff:  0.002480857928452095 values:  -43.818398 ----- 

-----iteration:  57 target diff:  0.0024435148521229438 values:  -43.796364 ----- 

-----iteration:  58 target diff:  0.0024797165405292415 values:  -43.7745 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  59 target diff:  0.0024977810550872856 values:  -43.752872 ----- 

-----iteration:  60 target diff:  0.002548860205246454 values:  -43.706924 ----- 

-----iteration:  61 target diff:  0.0024298532881604578 values:  -43.652153 ----- 

-----iteration:  62 target diff:  0.0023765563328416706 values:  -43.536194 ----- 

-----iteration:  63 target diff:  0.0019364499530144744 values:  -43.44413 ----- 

-----iteration:  64 target diff:  0.0017164710609022047 values:  -43.366364 ----- 

-----iteration:  65 target diff:  0.0018531057626923


-----iteration:  0 target diff:  0.9187879009089703 values:  -46.924923 ----- 

-----iteration:  1 target diff:  0.0030724334269133214 values:  -46.974537 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  2 target diff:  0.001923584370026058 values:  -47.000244 ----- 

-----iteration:  3 target diff:  0.002065543411391374 values:  -47.05654 ----- 

-----iteration:  0 target diff:  0.9168094746981822 values:  -50.496437 ----- 

-----iteration:  4 target diff:  0.002217313866672738 values:  -47.009174 ----- 

-----iteration:  1 target diff:  0.0023176516778875772 values:  -50.37056 ----- 

-----iteration:  5 target diff:  0.0029199925217449746 values:  -47.059998 ----- 

-----iteration:  2 target diff:  0.0023151995

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold4/train/agent4/ckpt/offline_rem_35000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold4/train/agent4/ckpt/offline_rem_40000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold4/train/agent4/ckpt/offline_rem_45000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold4/train/agent4/ckpt/offline_rem_50000.ckpt
-------------------- behavior cloning --------------------
-------------------- ckpt:  5000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold4/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold4/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-------------------- fqe on dqn & sale --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base La


-----iteration:  0 target diff:  0.9195531741022008 values:  -57.334023 ----- 

-----iteration:  1 target diff:  0.0027369223047348026 values:  -57.28928 ----- 

-----iteration:  2 target diff:  0.0013652248868085204 values:  -57.265778 ----- 

-------------------- ckpt:  15000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold4/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold4/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold4/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold4/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-------------------- fqe on dqn & sale --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.9193106617572596 values:  -58.27144 ----- 

-----iteration:  1 target diff:  0.002888591059169062 values:  -58.282715 ----- 

-----iteration:  2 target diff:  0.0018582072049485832 values:  -58.42883 ----- 

-----iteration:  3 target diff:  0.00240

-----iteration:  15 target diff:  0.002482471598384945 values:  -57.799534 ----- 

-----iteration:  16 target diff:  0.0020318823619156928 values:  -57.815666 ----- 

-----iteration:  17 target diff:  0.0014993053262180353 values:  -57.748936 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.9195496440323171 values:  -58.04486 ----- 

-----iteration:  1 target diff:  0.0032323900947556572 values:  -57.986176 ----- 

-----iteration:  2 target diff:  0.0017171736329555855 values:  -57.967453 ----- 

-----iteration:  3 target diff:  0.0016914692691634638 values:  -57.939297 ----- 

-----iteration:  4 target diff:  0.0023026936257365886 values:  -57.953533 ----- 

-----iteration:  5 target diff:  0.00


-----iteration:  0 target diff:  0.9196856783823851 values:  -57.92263 ----- 

-----iteration:  1 target diff:  0.0020236496047556117 values:  -57.921356 ----- 

-----iteration:  2 target diff:  0.0021210289614338584 values:  -57.86317 ----- 

-----iteration:  3 target diff:  0.001660571166152779 values:  -57.895428 ----- 

-----iteration:  4 target diff:  0.002221436751904834 values:  -57.871372 ----- 

-----iteration:  5 target diff:  0.0010958983995156741 values:  -57.945156 ----- 

-------------------- ckpt:  30000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold4/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold4/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/rem/tmp/717354021/fold4/train/agent1/trajs1.pkl!
Refresh buffer every 10




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-------------------- fqe on dqn & sale --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base La

-----iteration:  1 target diff:  0.0025501036534178964 values:  -57.66332 ----- 

-----iteration:  2 target diff:  0.0018831396603604016 values:  -57.65239 ----- 

-----iteration:  3 target diff:  0.001682912753513148 values:  -57.67416 ----- 

-----iteration:  4 target diff:  0.0022668398145761688 values:  -57.708843 ----- 

-----iteration:  5 target diff:  0.0021345518500047065 values:  -57.664574 ----- 

-----iteration:  6 target diff:  0.0016894599587736821 values:  -57.671043 ----- 

-----iteration:  7 target diff:  0.0017416339519316067 values:  -57.748318 ----- 

-----iteration:  8 target diff:  0.0024643165447361475 values:  -57.774826 ----- 

-----iteration:  9 target diff:  0.0018990551717713172 values:  -57.708 ----- 

-----iteration:  10 target diff:  0.001639268971391225 values:  -57.73864 ----- 

-----iteration:  11 target diff:  0.0013949363925541058 values:  -57.722897 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('fl




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.9185546271277591 values:  -57.30308 ----- 

-----iteration:  1 target diff:  0.0032673995682701495 values:  -57.263077 ----- 

-----iteration:  2 target diff:  0.002402238392551516 values:  -57.274136 ----- 

-----iteration:  3 target diff:  0.0017338449493529337 values:  -57.235252 ----- 

-----iteration:  4 target diff:  0.0017427303728142353 values:  -57.20309 ----- 

-----iteration:  5 target diff:  0.0020569673183454926 values:  -57.087097 ----- 

-----iteration:  6 target diff:  0.002029467761946228 values:  -57.115597 ----- 

-----iteration:  7 target diff:  0.0015693553059838343 values:  -56.954952 ----- 

-----iteration:  8 target diff:  0.0020686

