In [1]:
import sys
import os
import copy
import random
import gym
import pickle

import numpy as np
import pandas as pd
import tensorflow as tf
tf.keras.backend.set_floatx('float32')

from itertools import permutations
from sklearn.model_selection import KFold, GridSearchCV

from multiprocessing import set_start_method
import multiprocessing as mp

path = os.path.abspath('..')
if path not in sys.path:
    sys.path.append(path)

from seal.agents.default_config import DEFAULT_CONFIG as config
from seal.agents.dqn import DQNAgent
# from seal.agents.qr_dqn import QuantileAgent
# from seal.agents.multi_head_dqn import MultiHeadDQNAgent
# from seal.agents.discrete_bcq import DiscreteBCQAgent

from seal.algos.kfold import CVS, KFoldCV
from seal.algos.advantage_learner import AdvantageLearner
from seal.algos.behavior_cloning import BehaviorCloning
from seal.algos.density_ratio import VisitationRatioModel
from seal.algos.fqe import FQE

def one_step(seed):
    np.random.seed(seed)
    tf.random.set_seed(seed)
    
    path = './data/mh/dqn/trajs_mh.pkl'
    nfolds = 5
    n_splits = 5
    ckpts = (np.arange(20) + 1)*1000
    
    num_actions = 5
    # configures
    config['online'] = False
    config['lr'] = 5e-4
    config['decay_steps'] = 50000
    config['max_training_steps'] = 10000
    config['training_steps_to_checkpoint'] = 1000
    config['training_steps_to_eval'] = 100000
    config['hiddens'] = [64,64]
    config['double'] = False
    config['dueling'] = False

    index = pd.MultiIndex.from_product([np.arange(nfolds), ckpts])
    columns = ['dqn',  'seal']
    rets = pd.DataFrame(index=index, columns=columns)

    print('-'*20, 'start', '-'*20)
    cvs = CVS(path, n_splits=nfolds, random_state=seed)
    cvs.split()
    for fold in range(nfolds):
        train_path = cvs.train_paths[fold] + 'trajs.pkl'
        kf = KFoldCV(train_path, n_trajs=None, n_splits=n_splits, shuffle=False, random_state=seed)
        kf.split()

        print('-'*20, 'training agent', '-'*20)
        # agent
        config['persistent_directory'] = kf.agent_path
        config['checkpoint_path'] = kf.ckpt_path
        agent = DQNAgent(num_actions=num_actions, config=config)
        agent.learn()

        print('-'*20, 'training agents', '-'*20)
        # agent_1, ..., agent_K
        for idx in range(kf.n_splits):
            config_idx = copy.deepcopy(config)
            config_idx['persistent_directory'] = kf.agent_paths[idx]
            config_idx['checkpoint_path'] = kf.ckpt_paths[idx]
            agent_idx = DQNAgent(num_actions=num_actions, config=config_idx)
            agent_idx.learn()

        # fitted q evaluation
        test_path = cvs.test_paths[fold] + 'trajs.pkl'
        with open(test_path, 'rb') as f:
            trajs = pickle.load(f)

        print('-'*20, 'behavior cloning', '-'*20)
        # behavior cloning
        bc = BehaviorCloning(num_actions=num_actions)
        states  = np.array([transition[0] for traj in kf.trajs for transition in traj])
        actions = np.array([transition[1] for traj in kf.trajs for transition in traj])
        bc.train(states, actions)

        for ckpt in ckpts:
            print('-'*20, 'ckpt: ', ckpt, '-'*20)
            agent = DQNAgent(num_actions=num_actions, config=config)
            agent.load(kf.ckpt_path + 'offline_dqn_{}.ckpt'.format(ckpt))

            agents = []
            for idx in range(kf.n_splits):
                config_idx = copy.deepcopy(config)
                config_idx['persistent_directory'] = kf.agent_paths[idx]
                config_idx['checkpoint_path'] = kf.ckpt_paths[idx]
                agent_idx = DQNAgent(num_actions=num_actions, config=config_idx)
                agent_idx.load(kf.ckpt_paths[idx] + 'offline_dqn_{}.ckpt'.format(ckpt))
                agents.append(agent_idx)
            states, qvalues, qtildes = kf.update_q(agents, bc)

            print('-'*20, 'adv learner', '-'*20)
#             advs1 = qvalues - qvalues.mean(axis=1, keepdims=True)
#             agent1 = AdvantageLearner(num_actions=num_actions)
#             agent1._train(states, advs1)
            
            advs2 = qtildes - qtildes.mean(axis=1, keepdims=True)
            agent2 = AdvantageLearner(num_actions=num_actions)
            agent2._train(states, advs2)

            print('-'*20, 'fqe on dqn & seal', '-'*20)
            fqe_dqn = FQE(agent.greedy_actions, num_actions=num_actions, activation='tanh', hiddens=config['hiddens'], max_iter=100, eps=0.0015)
            fqe_dqn.train(trajs)
#             fqe_dml = FQE(agent1.greedy_actions, num_actions=num_actions)
#             fqe_dml.train(trajs)
            fqe_seal = FQE(agent2.greedy_actions, num_actions=num_actions, activation='tanh', hiddens=config['hiddens'], max_iter=100, eps=0.0015)
            fqe_seal.train(trajs)

            rets.loc[(fold, ckpt), 'dqn'] = fqe_dqn.values
#             rets.loc[(fold, ckpt), 'dml'] = fqe_dml.values
            rets.loc[(fold, ckpt), 'seal'] = fqe_seal.values
            
    return rets

In [2]:
save_path = './data/mh/dqn/'
pool = mp.Pool(5)
rets = pool.map(one_step, range(5))
pool.close()

with open(save_path + 'rets_dqn_mh.pkl', 'wb') as f:
    pickle.dump(rets, f)

----------------------------------------------------------------------------------------------------    start startstart startstart --------------------  --------------------
----------------------------------------
 

--------------------
-------------------- training agent --------------------
------------------------------------------------------------ --------------------  training agent training agenttraining agent training agent --------------------  --------------------
--------------------
--------------------

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold0/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold0/train/agent/trajs.pkl!Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold0/train/agent/trajs.pkl!

Refresh buffer every 1000000 sampling!Refresh buffer every 1000000 sampling!

Loaded tra


saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold0/train/agent/ckpt/offline_dqn_1000.ckptsaving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold0/train/agent/ckpt/offline_dqn_1000.ckpt

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold0/train/agent/ckpt/offline_dqn_1000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold0/train/agent/ckpt/offline_dqn_1000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold0/train/agent/ckpt/offline_dqn_1000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold0/train/agent/ckpt/offline_dqn_2000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold0/train/agent/ckpt/offline_dqn_2000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold0/train/agent/ckpt/offline_dqn_2000.ckpt
saving model weights at /home/jupyt/ley


saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold0/train/agent/ckpt/offline_dqn_20000.ckpt
-------------------- training agents --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold0/train/agent0/ckpt/offline_dqn_17000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold0/train/agent0/ckpt/offline_dqn_18000.ck


saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold0/train/agent1/ckpt/offline_dqn_4000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold0/train/agent1/ckpt/offline_dqn_1000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold0/train/agent1/ckpt/offline_dqn_4000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold0/train/agent1/ckpt/offline_dqn_3000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold0/train/agent1/ckpt/offline_dqn_1000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold0/train/agent1/ckpt/offline_dqn_5000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold0/train/agent1/ckpt/offline_dqn_2000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold0/train/agent1/ckpt/offline_dqn_5000.ckpt
saving model weights at /home/j

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold0/train/agent2/ckpt/offline_dqn_3000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold0/train/agent2/ckpt/offline_dqn_4000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold0/train/agent1/ckpt/offline_dqn_19000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold0/train/agent2/ckpt/offline_dqn_2000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold0/train/agent2/ckpt/offline_dqn_5000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold0/train/agent2/ckpt/offline_dqn_4000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold0/train/agent1/ckpt/offline_dqn_20000.ckpt
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 samp

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold0/train/agent2/ckpt/offline_dqn_17000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold0/train/agent3/ckpt/offline_dqn_1000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold0/train/agent2/ckpt/offline_dqn_14000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold0/train/agent2/ckpt/offline_dqn_15000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold0/train/agent3/ckpt/offline_dqn_3000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold0/train/agent2/ckpt/offline_dqn_18000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold0/train/agent3/ckpt/offline_dqn_2000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold0/train/agent2/ckpt/offline_dqn_15000.ckpt
saving model weights at /ho


saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold0/train/agent3/ckpt/offline_dqn_1000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold0/train/agent3/ckpt/offline_dqn_4000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold0/train/agent3/ckpt/offline_dqn_8000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold0/train/agent3/ckpt/offline_dqn_10000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold0/train/agent3/ckpt/offline_dqn_1000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold0/train/agent3/ckpt/offline_dqn_2000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold0/train/agent3/ckpt/offline_dqn_5000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold0/train/agent3/ckpt/offline_dqn_9000.ckpt
saving model weights at /home/

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold0/train/agent4/ckpt/offline_dqn_9000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold0/train/agent4/ckpt/offline_dqn_7000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold0/train/agent4/ckpt/offline_dqn_4000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold0/train/agent3/ckpt/offline_dqn_20000.ckpt
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, 

-----iteration:  0 target diff:  0.9186424265014933 values:  -60.745728 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold0/train/agent4/ckpt/offline_dqn_20000.ckpt
-------------------- behavior cloning --------------------
-----iteration:  1 target diff:  0.005777950719364351 values:  -60.75987 ----- 

-----iteration:  2 target diff:  0.004216759021856886 values:  -60.808655 ----- 

-----iteration:  3 target diff:  0.0033880346445750455 values:  -60.81739 ----- 

-----iteration:  4 target diff:  0.0033067099203269498 values:  -60.860172 ----- 

-----iteration:  5 target diff:  0.0035443518852534854 values:  -60.93413 ----- 

-----iteration:  6 target diff:  0.003225099077480865 values:  -60.98051 ----- 

-------------------- ckpt:  1000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold0/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /ho




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  4 target diff:  0.0015580852442467336 values:  -53.360867 ----- 

-----iteration:  46 target diff:  0.005166239107769059 values:  -60.74488 ----- 

-----iteration:  5 target diff:  0.0017340407288818625 values:  -53.437065 ----- 

-----iteration:  47 target diff:  0.004694299467471845 values:  -60.72405 ----- 

-----iteratio

-----iteration:  8 target diff:  0.0030091948073875034 values:  -59.51131 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold0/train/agent4/ckpt/offline_dqn_16000.ckpt
-----iteration:  63 target diff:  0.002465329680213076 values:  -58.635624 ----- 
-----iteration: 
 9 target diff:  0.0026348329096316956 values:  -59.57858 ----- 

-----iteration:  0 target diff: -----iteration:   0.919110544944037764  values: target diff:   0.003411922480191291 values: -53.542675 -58.595825 -----  

----- 

-----iteration:  10 target diff:  0.003178258487127649 values:  -59.62281 ----- 

-----iteration:  65 target diff:  0.00283973758364515 values:  -58.48074 ----- -----iteration:  
1
 target diff:  0.0030615613315193657 values:  -53.615513 ----- 

-----iteration:  11 target diff:  0.0023303627208228813 values:  -59.703037 ----- 

-----iteration:  2 target diff:  0.0020527928575428626 values:  -53.686924 ----- 

-----iteration:  66 target diff:  0.001910432152906927 

-----iteration:  45 target diff:  0.002515239563438768 values:  -60.100018 ----- 

-----iteration:  16 target diff:  0.0015994638862548924 values:  -52.707767 ----- 

-----iteration:  4 target diff:  0.003477236285817152 values:  -60.947918 ----- 

-----iteration:  46 target diff:  0.001953711316357038 values:  -60.102093 ----- 

-----iteration:  17 target diff:  0.0021175293119691293 values:  -52.81009 ----- 

-----iteration:  5 target diff:  0.0020383575223146496 values:  -60.880688 ----- 

-----iteration:  47 target diff:  0.002100207294533983 values:  -60.07398 ----- 

-----iteration:  6 target diff:  0.0019487540407082382 values:  -60.91457 ----- 
-----iteration:  
18 target diff:  0.0019165722008538748 values:  -52.85589 ----- 

-----iteration:  48 target diff:  0.0025348671731561553 values:  -60.057278 ----- 

-----iteration:  7 target diff:  0.001553238715913685 values:  -60.933235 ----- 

-----iteration:  19 target diff:  0.0018059890746715513 values:  -52.958305 ----- 

-----




-----iteration:  14 Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!


Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are t




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  15--------------------  adv learnertarget diff:  -------------------- 
0.0021559697683623384 values:  -54.944683 ----- 

-----iteration:  64 target diff:  0.0026887088984604286 values:  -60.05929 ----- 

-----iteration:  16 target diff:  0.002248768248557926 values:  -54.95686 ----- 

-------------------- ckpt:  1000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold0/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold0/train/agent0/trajs0.pkl!65
 Refresh buffer every 1000000 sampling!target dif

-----iteration:  3 target diff:  0.0022636114766202116 values:  -61.102455 ----- 

-----iteration:  9 target diff:  0.0021621819753060937 values:  -----iteration: -53.454712 42 -----  target diff:  

0.003067975551376051 values:  -54.831165 ----- 

-----iteration:  92 target diff:  0.002100367138743866 values:  -60.10538 ----- 

-----iteration:  10-----iteration:   target diff: 4  0.0031025373981434577 values: target diff:   -53.50097 -----0.0021691084368348 

 values: -----iteration:   -61.12298 ----------iteration:  43 
 93
 target diff:  target diff: 0.002017631509637519 0.002808717300372275 values:  -54.786137  -----values:  -60.094746  -----

 

-----iteration:  94 target diff:  0.0019513411638964787 values:  -60.082363 ----- 

-----iteration:  44 -----iteration: target diff:   50.002634040960927039  target diff:  values:  -54.843872 -----0.0018632366617708865  
values: 
 -61.126675 ----- 

-----iteration:  11 target diff:  0.0025635940059189326 values:  -53.488384 ----- 

-----it

 
28 target diff:  0.0020047752512611598 values:  -52.90741 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to hav




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa



Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  2 target diff:  0.0039937140415495405 values:  -53.654446 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold0/train/agent2/trajs2.pkl!
-----iteration: Refresh buffer every 1000000 sampling!
 53 target diff:  0.0020518889299607334 values:  -57.86269 ----- 

-----iteration:  11 target diff:  0.0020921055636866563 Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold0/train/agent3/trajs3.pkl!values: 
 Refresh buffer every 1000000 sampling!-53.127018
 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold0/train/agent4/trajs4.pkl!
Re

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_


-------------------- adv learner Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold0/train/agent0/trajs0.pkl!--------------------

Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the l

-----iteration:  13 target diff:  0.0019533788621390793 values:  -53.40733 ----- 

-----iteration:  6 target diff:  0.0017269542139718556 values:  -61.59438 ----- 

-----iteration:  4 target diff:  0.0027032289360082295 values:  -51.74749 ----- 

-----iteration:  14 target diff:  0.0023252707162649813 values:  -53.445484 ----------iteration:   6
 
target diff:  0.0025438602492004856 values:  -52.814106 ----- 

-----iteration:  4 target diff:  0.0020413571384684855 values:  -57.14886 ----- 

-----iteration:  7 -----iteration:  target diff:  50.00141384463344414  target diff: values:   0.0019534604187517284-61.59579 -----  
values: 
 -51.762993 ----- 

-----iteration:  15 target diff:  0.0014452645536677717 values:  -53.47169 ----- 

-----iteration:  7 target diff:  0.0011950972760717974 values:  -52.914665 ----- 

-------------------- ckpt:  3000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold0/train/agent/trajs.pkl!
Refre




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  7 target diff:  0.0016523066553915115 values:  -57.107815 ----- 

-----iteration:  8 target diff:  0.0017292168637130078 values:  -57.094273 ----- 

-----iteration:  9 target diff:  0.0014317993313882778 values:  -57.085884 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('fl




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
target diff: 
 0.001930225104886685 values:  -58.791435 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dt

-----iteration:  18 target diff:  0.0022086792708523742 values:  -63.150204 ----- 

-----iteration:  2 target diff:  0.0020488632935200825 values:  -53.075787 ----- 

-----iteration:  19 target diff:  0.00200214517662249 values:  -63.144253 ----- 

-----iteration:  18 target diff:  0.0025059068708780962 values:  -59.084248 ----- 

-----iteration:  20 target diff:  0.0016425627760004021 values:  -63.157192 ----- 

-----iteration:  3 target diff:  0.0024121156061160448 values:  -53.100445 ----- 

-----iteration:  4 target diff:  0.0015342243384396741 values:  -53.06956 ----- 

-----iteration:  19 target diff:  0.0030154373975247112 values:  -59.077465 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  21 target diff: 




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- fqe on dqn & sale --------------------


To change all layers to have dtype

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  48 target diff:  0.0015808194969210915 values:  -60.76083 ----- 


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.







Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  33 target diff:  0.002683906420420953 values:  Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold0/train/agent2/trajs2.pkl!-55.06239
 Refresh buffer every 1000000 sampling!-----
 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, 





Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
 0.0016200170911862474 values:  -53.70736 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  34 target diff:  0.002833661897058973 values:  Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold0/train/agent3/trajs3.pkl!-54.9264 -----
Refresh buffer every 1000000 sampling! 


Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

10 target diff:  0.001858670937692914 values:  -58.843575 ----- 

-------------------- adv learner --------------------
-----iteration:  3 target diff:  0.0012796546355706984 values:  -53.761356 ----- 

-------------------- ckpt:  6000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold0/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  35 target diff:  0.0028333274114634393 values:  -54.81787 ----- 

Loaded trajectories from load path: /home/jupy




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  36 target diff:  0.0032886584440336274 values:  -54.703438 ----- 

-----iteration:  12 target diff:  0.0019658459760758407 values:  -58.975403 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  37 target diff:  0.0023812114973151798 values:  -54.605766 ----- 

-----iteration:  38 target diff

-----iteration: 
 2 target diff:  0.0019324407668981473 values:  -61.25817 ----- 

-----iteration:  48 target diff:  0.00264107897056824 values:  -53.278446 ----- 

-----iteration:  23 target diff:  0.0016826583207768156 values:  -59.32768 ----------iteration:   

1 target diff:  0.003725686890027848 values:  -52.042316 ----- 

-----iteration:  3 target diff:  0.0017082095180046574 values:  -61.256336 ----- 

-----iteration:  24 target diff:  0.0016748250870592808 values:  -----iteration:  -59.40348449  ----- target diff:  
0.0033294806679561354
 values:  -53.136475 ----- 

-----iteration:  0 target diff: -----iteration:   0.92225172251027822 target diff:  values:   -53.7492680.0034353051945817037  values:  -52.27668----- ----- 
 


-----iteration:  4 target diff:  0.0026735639847541084 values:  -61.26119 ----- 

-----iteration:  3 target diff:  0.004311197077855571 values:  -52.561882-----iteration:   1-----  
target diff: 
 0.0031450191190229293 values:  -53.74313 ----- 

-----iterat

----- 




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  8 target diff:  0




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
 
values:  -53.257023 ----- 



To change all layers to have dtype float64 by default, call `tf.k

 
0.0016107989176502983 values:  -59.248528 ----- 

-------------------- adv learner --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  21 target diff:  0.0019594462380750576 values:  -53.920757 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  54 target diff:  0.0015920178902224017 values:  -59.18468 ----- 

-----iteration:  22 target diff:  0.0020118175759420843 values:  -54.01014 ----- 

-----iteration:  55 target diff:  0.001809

-----iteration:  32 target diff:  0.0021328311541100064 values:  -54.621788-----iteration:  -----  1
 
target diff:  0.0038385609051739818 values:  -60.966206 ----- 

-----iteration:  1 target diff:  0.0024273280530417137 values:  -54.454895 ----- 

-----iteration:  33 target diff:  0.0017461667476937595 values:  -54.681797 ----- 

-----iteration:  2 target diff:  0.002235465624762585 values:  -60.981953 ----- 

-----iteration:  2 target diff:  0.0027065899656053 values:  -54.52659 ----- 

-----iteration:  34 target diff: -----iteration:  0.002366340666193669 3 values:  target diff:   -54.7364920.0022756712264807347  -----values:   -54.561054
 
----- 

-----iteration:  3 target diff:  0.002439582166733286 values:  -61.069286 ----- 

-----iteration:  4 target diff:  0.0017268237665325826 values:  -54.586353 ----- 

-----iteration:  35 target diff:  0.0019483245456665427 values:  -54.759357 ----- 

-----iteration:  4 target diff:  0.002949171687702807 values:  -61.065247 ----- 

-----ite

-----iteration:  3 target diff:  0.0021974370349048195 values:  -57.882336 ----- 

-----iteration:  1 target diff:  0.0026669052853012143 values:  -51.360962 ----- 

-----iteration:  46 target diff:  0.001988667256046266 values:  -54.720608 ----- 

-----iteration:  4 target diff:  0.0015698184394602605 values:  -57.91515 ----- 

-----iteration:  47 target diff:  0.0019380287664822692 values:  -54.794415 ----- 

-----iteration:  2 target diff:  0.0023305383821694125 values:  -51.439716 ----- 

-----iteration:  5 target diff:  0.0021199305458907414 values:  -57.939045 ----- 

-----iteration:  10 target diff:  0.0016283455250156592 values:  -53.384235 ----- 

-----iteration:  48 target diff:  0.0019032047136809545 values:  -54.881313-----iteration:  -----  
3
 target diff:  0.0028121589462316745 values:  -51.494656 ----- 

-----iteration:  6 target diff:  0.0014071459179256815 values:  -57.964344 ----- 

-----iteration:  11 target diff:  0.0013442785626507558 values:  -53.44397 ----- 

--




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
-----iteration:  
5 target diff:  0.0017389945101486048 values:  -51.580036 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  13 Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold0/train/agent1/trajs1.pkl!
target diff:  Refresh buffer every 1000000 sampling!0.002007021892805219
 values:  -52.394855 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, p


-----iteration:  2 target diff:  0.0014309183107328871 values:  -60.292946 ----- 

-------------------- adv learner --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

values:  -52.527683 ----- 

-----iteration:  5 target diff:  0.0019493551789022954 values:  -57.56414 ----- 

-----iteration:  15 target diff:  0.0021685253450546096 values:  -52.564392 ----- 

-----iteration:  6 target diff:  0.0018510201862038458 values:  -57.568684 ----- 

-----iteration:  0 target diff:  0.9216408125750449 values:  -51.46612 ----- 

-----iteration:  16 target diff:  0.002004435754997146 values:  -52.59497 ----- 

-----iteration:  1--------------------  target diff: fqe on dqn & sale  0.002163335093931812-------------------- 


-----it

-----iteration:  25 target diff:  0.002500293392901661 values:  -53.06918 ----- 

-----iteration:  5 target diff:  0.004484023941775807 values:  -53.728687 ----- 

-----iteration:  16 target diff:  0.002198694069195705 values:  -57.646145 ----- 

-----iteration:  6 target diff:  0.0023405170180000544 values:  -53.71683 ----- 

-----iteration:  26 target diff:  0.002226575450914527 values:  -53.041546 ----- 

-----iteration:  17 target diff:  0.0018677317445715334 values:  -57.65485 ----- 

-----iteration:  7 target diff:  0.0023975123032624105 values:  -53.81412 ----- 

-----iteration:  18 target diff:  0.0017256307541585183 values:  -57.68413 ----- 

-----iteration:  8 target diff:  0.0029609466375776165 values:  -53.906822 ----- 

-----iteration:  27 target diff:  0.002518381122541632 values:  -53.035614 ----- 

-----iteration:  19 target diff:  0.0016774511322825625 values:  -57.670437 ----- 

-----iteration:  9 target diff:  0.0018245734304837696 values:  -53.890095 ----- 



To ch


-----iteration:  11 target diff:  0.0023062337509986044 values:  -51.265015 ----- 

-----iteration:  40 target diff:  0.0018755967004658474 values:  -52.8709 ----- 

-----iteration:  2 target diff:  0.001591878116754314 values:  -59.88381 ----- 

-----iteration:  41 target diff:  0.0016617106968791256 values:  -52.843998 ----- 

-----iteration:  3 target diff: -----iteration:   0.00319868263843411940 values:   target diff: -59.864754  0.9205352755116136 -----values:   
-53.097576 
----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  1 target diff:  0.002396967326360675 values:  -52.9968 ----- 

-----iteration:  42 target diff:  0.0017774422140501493 values:  -52.78955 ----- 

-----iteration:  2 target diff:  0.0025761213325750615-----iteration:   values: 4  -53.051426target diff:   0.0014297564281322173-----  values: 
 -59.87762
 ----- 

-----iteration:  12 target diff:  0.0015908338139365265 values:  -51.271873 ----- 

-----iteration:  3 target diff:  

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, 

----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!-----iteration: 
 4 target diff:  0.002204098483375191 values:  -59.325645 ----- Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!


Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pa


-----iteration:  0 target diff:  0.9222473604399753 values:  -52.948524 ----- 

-----iteration:  10 target diff:  0.0020968426144224272 values:  -59.42144 ----- 

-----iteration:  11 target diff:  0.0029557572167443675 values:  -59.35837 ----- 

-----iteration:  0 target diff:  0.9200414065066297 values:  -59.745922 ----- 

-----iteration:  1 target diff:  0.003066383116290615 values:  -52.97904 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  12 target diff:  0.002081805306769375 values:  -59.322407 ----- 

-----iteration:  2 target diff:  0.0029236477331182277 values:  -52.983067 ----- 

-----iteration:  1 target diff:  0.0021605882317366583 values:  -59.78477 ----- 

-----iteration:  13 target diff:  0.0027966

-----iteration:  11 target diff: -----iteration:   00.001628599692434489  target diff:  0.9235785369576942values:  values:  -52.976154 -59.499683 -----  ----- 



-----iteration:  16 target diff:  0.002154505183147589 values:  -55.048244 ----- 

-----iteration:  1 target diff:  0.00391426965045905 values:  -53.000546 ----- 

-----iteration:  28 target diff:  0.0022386076208470605 values:  -59.2312 ----- 

-----iteration:  17 target diff:  0.0014624778485150917 values:  -55.05001 ----- 

-------------------- ckpt:  10000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold0/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  2Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold0/train/agent0/trajs0.pkl! 
target diff: Refresh buffer every 1000000 sampling! 0.002812796370847823


Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
6
 target diff:  0.0019793659353924053 values:  -53.130474 ----- 



To 

values: -----iteration:   -53.08515534  -----target diff:   0.001462339885293638 values:  -58.680416 ----- 



-----iteration:  20 target diff:  0.0022517593134086748 values:  -52.668316 ----- 

-----iteration:  20 target diff:  0.0020522864275927303 values:  -53.22349 ----- 

-----iteration:  29 target diff:  0.002686008839068588 values:  -53.100555 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  21 target diff:  0.002155354017780003 values:  -53.207615 ----- 

-----iteration:  30 target diff:  0.0025331218456216974 values:  -53.135044 ----- 

-----iteration:  22 target diff:  0.0022191430532810677 values:  -53.30752 ----- 

-----iteration:  21 target diff:  0.002403122619896588 values:  -52.68513 ----- 

-----i

-----iteration:  73 target diff:  0.001990182190912216 values:  -52.176994 ----- 

-----iteration:  59 target diff:  0.0020645817226556907 values:  -52.070404 ----- 

-----iteration:  38 target diff:  0.0016039027011718913 values:  -59.69029 ----- 

-----iteration:  74 target diff:  0.0024025081562664766 values:  -52.076473 ----- 

-----iteration:  60 target diff:  0.00159817764416742 values:  -51.98244 ----- 

-----iteration:  39 target diff:  0.0016984380051682335 values:  -59.717384 ----- 

-----iteration:  75 target diff:  0.0019514871054542356 values:  -52.0137 ----- 

-----iteration:  6 target diff:  0.0013594566935627326 values:  -53.357292 ----- 

-------------------- -----iteration:  ckpt:  6111000 target diff:   --------------------
0.0017883357916908507 values:  -51.874897 ----- 
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold0/train/agent/trajs.pkl!
-----iteration: 
Refresh buffer every 1000000 sampling!




Loaded trajectories fr




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
 
target diff:  0.0015506795568062941 values:  -59.68368 ----- 

-----iteration:  62 target diff:  0.001602729672876284 values:  -51.982666 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  77 target diff:  0.0016910888656900848 values:  -51.877953 ----- 

-------------------- adv learner --------------------
-----iteration:  23 target diff:  0.0029990400922400595 values:  -61.609356 ----- 

-----iteration:  42 target d




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  44 target diff:  0.0017875692437204216 values:  -59.68466 ----- 


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.




To change a


-------------------- adv learner -------------------------iteration: 
 77 target diff:  0.002636357717763968 values:  -52.00601 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  78 target diff:  0.0027125310596004776 values:  -51.93121 ----- 

-----iteration:  29 target diff:  0.0024814662174048895 values:  -61.144566 ----- 

-----iteration:  0 target diff:  0.9215384821591057 values:  -52.90936 ----- 

-----iteration:  79 target diff:  0.002480300653374593 values:  -51.69549 ----- 

-----iteration:  1 target diff:  0.0024925626930006072 values:  -52.923332 ----- 

-----iteration:  80-----iteration:   0 target diff:  target diff:  0.921393847942191 0.0023268049331913667values:  -53.44164 ----- 
 
values:  -51.5576

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!17
 target diff:  0.002558407042024248 values:  -52.498135-----iteration:   Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold0/train/agent4/trajs4.pkl!-----
50  Refresh buffer every 1000000 sampling!
target diff:  
0.0016632075867876522 
values:  -58.06143 ----- 



To change all layers to have dtype float64 by d


-------------------- adv learner --------------------
-----iteration:  20 target diff:  0.0023293540391332444 values:  -58.35655 ----- 

-----iteration:  5 target diff:  0.0022042372010285883 values:  -51.80942 ----- 

-----iteration:  20 target diff:  0.0023187243590460397 values:  -52.433006 ----- 

-----iteration:  21 target diff:  0.002259916352791107 values:  -58.34673 ----- 

-----iteration:  6 target diff:  0.0014490430242615883 values:  -51.813564 ----- 

-------------------- ckpt:  11000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold0/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!


Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
-----

 22
 target diff:  0.0027095969418519765 values:  -52.388374 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  23 target diff:  0.0021198500305086346 --------------------values:   adv learner-58.362766  -------------------------
 

-----iteration:  24 target diff:  0.002401689340825321 values:  -58.401176 ----- 

-----iteration:  23 target diff:  0.003076879574308379 values:  -52.46017 ----- 

-----iteration:  25 target


-----iteration:  34 target diff:  0.0033647885836936638 values:  -52.616096 ----- 

-----iteration:  2 target diff:  0.002001540193950715 values:  -52.716187 ----- 

-----iteration:  0 target diff:  0.9204407187416409 values:  -61.609993 ----- 

-----iteration:  3 target diff: -----iteration:   0.00213313747024031535  values: target diff:   0.0026883962300320173-52.721832  -----values:   -52.576824-----iteration: 
 1
  target diff: ----- 
 
0.0017686918783526699 values:  -61.589077 ----- 

-----iteration:  36 target diff:  0.001959308571577062 values:  -58.2648 ----- 

-----iteration:  0 target diff:  0.91873469602887 values:  -51.624634 ----- 

-----iteration:  4 target diff:  0.0020596156919260897 values:  -52.70176 ----- 

-----iteration:  36 target diff:  0.002697293248701031 values:  -52.533997 ----- 

-----iteration:  2 target diff:  0.0019800914930688416 values:  -61.65515 ----- 

-----iteration:  1 -----iteration: target diff:  0.004164614521029019 37  values:  -51.626766 ----

-----iteration:  77 target diff:  0.0017589590150984063 values:  -50.582874 -----iteration: ----- 

 37 target diff:  0.002155324860822478 values:  -52.427483 ----- 

-----iteration:  40 target diff:  0.0017998135310402631 values:  -53.189228 ----- 

-----iteration:  71 target diff:  0.00156366774370429 values:  -57.62105 ----- 

-----iteration:  78 target diff:  0.0019241050273629328 values:  -50.569923 ----- 

-----iteration:  38 target diff:  0.002810060881248171 values:  -52.506927 ----- 

-----iteration:  41 target diff:  0.003349265711790957 values:  -53.191395 ----- 

-----iteration:  72 target diff:  0.0015033123692078658 values:  -57.60995 ----- 

-----iteration:  79 target diff:  0.0018295237100047466 values:  -50.556557 ----- 

-----iteration:  26 target diff:  0.0022608626675449453 values:  -59.591915 ----- 

-----iteration:  73 target diff:  0.0015643333252259523 values:  -----iteration:  -----iteration:  42-57.62429439   target diff: target diff: -----   
0.00163740683388




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.





To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('fl


-53.928467 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  0 targ




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

 ----- 

-----iteration:  0 target diff:  0.9188273957685897 values:  -58.2532 ----- 

-----iteration:  0 target diff:  0.9193961059359956 values:  -51.62464 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  1 target diff:  0.003132983391119239-----iteration:   values: 1  target diff: -58.292706  -----0.0034614074380577057  values:  
-51.648964 ----- 


-----iteration:  50 target diff:  0.002241873755378505 values:  -5

 -51.583378 ----- 

-----iteration:  58 target diff:  0.0030735917598877084 values:  -59.128437 ----- 

-----iteration:  9 target diff:  0.001974253843276843 values:  -51.53407 ----- 

-----iteration:  12 target diff:  0.0019168497902022398 values:  -52.800106 ----- 

-----iteration:  59 target diff:  0.0020529098011455547 values:  -59.0464 ----- 

-----iteration:  13 target diff:  0.0028369368829328705 values:  -52.81652 ----- 

-----iteration:  10 target diff:  0.0022657281942703826 values:  -51.598625 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  60 target diff:  0.002173186607075517 values:  -59.013668-----iteration:   -----14  

target diff:  0.0017056207488856973 values:  -52.886726 ----- 

-----iteration

-----iteration:  25 target diff:  0.0021456134689332466 values:  -52.903362 ----- 

-----iteration:  20 target diff:  0.001611249319493624 values:  -51.348946 ----- 

-----iteration:  26 target diff: -----iteration:  0.0028471278330103027  values: 8  target diff: -52.856365  0.0018115189001608927 -----values:   
-53.030125 
----- 

-----iteration:  11 target diff:  0.0016816280773577841 values:  -58.67068 ----- 

-----iteration:  9 target diff:  0.002504405281118257 values: -----iteration:   -53.03716321 -----  target diff:  
0.0015756603815796552 
values:  -51.318203 ----- 

-----iteration:  27 target diff:  0.0019437843611135033 values:  -52.90061 ----- 

-----iteration:  12 target diff:  0.0029910160852575905 values:  -58.70503 ----- 

-----iteration:  22 target diff:  0.0012875953335951481 values:  -51.319817 ----- 

-----iteration:  13 target diff:  0.002137272912163063 values:  -58.708523 ----- 

-----iteration:  10 target diff:  0.0019252241719774341 values:  -53.161064 ----- 






To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa


-----iteration:  41 target diff:  0.004049780653585145 values:  -52.167786 ----- 

-----iteration:  7 target diff:  0.0020151189668303933 values:  -52.594624 ----- 

-----iteration:  8 target diff:  0.0024208529586015135 values:  -52.60433 ----- 

-----iteration:  0 target diff:  0.9189847404267518 values:  -58.218044 ----- 

-----iteration:  42 target diff:  0.002763844758765381 values:  -52.18388 ----- 

-----iteration:  9 target diff:  0.0015195218097864508 values:  -52.56564 ----- 

-----iteration:  1 target diff:  0.004087476721839579 values:  -58.226135 ----- 

-----iteration:  2 target diff:  0.002940932757060744 values:  -58.21757 ----- 

-----iteration:  43 target diff:  0.004153188829573041 values:  -52.197052 ----- 

-----iteration:  10 target diff:  0.001956678157864677 values:  -52.60648 ----- 

-----iteration:  3 target diff:  0.0021852889745151366 values:  -58.253902 ----- 

-----iteration:  44 target diff:  0.003866168155643347 values:  -52.086433 ----- 

-----iteratio




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration: --------------------  19adv learner  target diff: -------------------- 
0.0027710387167982626 values:  -58.41269 ----- 

-----iteration:  16 target diff:  0.00195679473777036 values:  -54.38703 ----------iteration:  60 target diff:  0.00337004696226321  
values: 
 -51.605804 ----- 

-----iteration:  20 target diff:  0.0020074104510806575 values: -----iteration:  14 target diff: 

-----iteration:  71 target diff:  0.0019223186491933082 values:  -51.232143 ----- 

-----iteration:  30 target diff:  0.0017544623385639779 values:  -58.62997 ----- 

-----iteration:  22 target diff:  0.0027948938792597645 values:  -62.732853 ----- 

-----iteration:  0 target diff:  0.9222922844641391 values:  -51.838894 ----- 

-----iteration:  72 target diff:  0.0024940003595978573 values:  -51.19696 ----- 

-----iteration:  31 target diff:  0.0020211705847711526 values:  -58.662605 ----- 

-----iteration:  23 target diff:  0.0023703567322225557 values:  -62.63799 ----- 

-----iteration:  73 target diff:  0.002759491486155966 values: -----iteration:  -51.15168  32-----  

target diff:  0.0017049216404623928 values:  -58.68689 ----- 

-----iteration:  1 target diff:  0.003416911770101391 values:  -51.858173 ----- 

-----iteration:  74 target diff:  0.0017646340969673044 values:  -51.040276 ----- 

-----iteration:  2 target diff:  0.002423828491733105 values:  -51.811214 ----- 

-----i

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
target diff: 
 0.0019547855390062495 values:  -----iteration:  -----iteration: -51.51951 7 target diff:  0 0.002596493561341208 values:   -57.59057target diff:   0.9207818396429913----------  values: 
  


-53.824562 ----- 



To change all l

-----iteration:  14 target diff:  0.0021520978397113143 values:  -57.833824 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  11 target diff:  0.003031763146059423 values:  -53.23736 ----- 

-----iteration:  32 target diff:  0.0019083223087639806 values:  -51.28754 ----- 

-----iteration:  15 target diff:  0.0018571736961368805 values:  -57.918667 ----- 

-----iteration:  12 target diff:  0.0016199579881826865 values:  -53.405666 ----- 

-----iteration:  16 target diff:  0.002311188379487168 values:  -57.97898 ----- 

-----iteration:  13 target diff:  0.002456971292178989 values:  -53.33722 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
 33
 target diff:  0.0019817821584732407 values:  -51.21582

-----iteration:  4 target diff:  0.0020815571550195575 values:  -60.91256 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  5 target diff:  0.001900423891848986 values:  -60.932076 ----- 

-----iteration:  28 target diff:  -----iteration: 0.0037436900553832137 6  values:  -58.20655target diff:  0.002253386808341756 values:  -----  -60.900852

 ----- 

-----iteration:  0 target diff:  0.918704771713554 values:  -53.284584 ----- 

-----iteration:  7 target diff:  0.001884037117798544 values:  -60.94085 ----- 

-----iteration:  29 target diff:  0.002410398787151274 values:  -58.225796 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this lay





To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.


-----iteration:  20 target diff:  0.0019078605340411181 values:  -53.676044 Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold0/train/agent1/trajs1.pkl!----- 

Refresh buffer every 1000000 sampling


Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
adv learner
 --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this


-----iteration:  5 target diff:  0.0019321484520149318 values:  -59.768333 ----- 

-----iteration:  28 target diff:  0.003144435661681267 values:  -53.558594 ----- 

-----iteration:  6 target diff:  0.0023719877824876423 values:  -59.804268 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  29 target diff:  0.0024014282178362475 values:  -53.536377 ----- 

-----iteration:  0 target diff:  0.9197158101020233 values:  -53.599873 ----- 

-----iteration:  7 target diff:  0.0015296586669733172 values:  -59.88516 ----- 

-----iteration:  1 target diff:  0.003734959698711295 values:  -53.694447 ----- -----iteration:  
8
 target diff:  0.0023670235961304933 values:  -59.89371 ----- 

-----iteration:  0 target diff:  0.9232




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration: -----iteration:   313  target diff: target diff:  0.0031269806025202205 0.002076663321874002 values:  values:  -53.481903 -53.77662  ---------- 
 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable au


-----iteration:  45 target diff:  0.002694798591371465-----iteration:   15values:   -53.01786target diff:   0.0019452966895157331-----  values:  

-53.81998 ----- 

-----iteration:  13 target diff:  0.002951749477076253 values:  -59.496128 ----- 

-----iteration:  16 target diff:  0.0022636573330858546 values:  -53.76536 ----- 

-----iteration:  14 target diff:  0.0029291885986692475 values:  -59.435467 ----- 

-----iteration:  46 target diff:  0.0026105503685533494 values:  -52.901436 ----- 

-----iteration:  0-----iteration:   target diff: 17  0.9199297402780446target diff:   0.0017967072574231573values:   values: -61.376736  -53.840374-----  -----
 




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
 
target diff:  0.002670421984065770




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  68 target diff:  0.0021132118202122443 values:  -52.45037 ----- 

-----iteration:  15 target diff:  0.0014356352082770466 values:  -61.076904 ----- 

-----iteration:  30 target diff:  0.0017993554954731663 values:  -58.887424 ----- 

-----iteration: -----iteration:   69 33target diff:   0.002212573106703875 values: target diff:  0.001460627685035081  -52.426975values:   -53.684383-----  -----

 

-------------------- ckpt:  15000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold0/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectori




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
 
values:  -52.405857 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteratio

-----iteration:  40 target diff:  0.0016875561783421427 values:  -58.815884 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  3 target diff:  0.002066112788651811 values:  -60.829216 ----- 

-----iteration:  41 target diff:  0.0014956724882954327 values:  -58.8147 ----- 

-----iteration:  4 target diff:  0.002117876330573181 values:  -60.788704 ----- 

-----iteration:  0 target diff:  0.9186380902817124 values:  -53.792




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  16 target diff:  0.002168104556911359 values:  -61.295097 ----- 

-------------------- adv learner --------------------
-----iteration:  9 target diff:  0.0020362389738687136 values:  -53.402973 ----- 

-----iteration:  0 target diff:  0.9216454628678193 values:  -54.081287 ----- 

-----iteration:  17 target diff:  0.002205019470299625 values: -----iteration:   -61.23706415  target diff: -----  
0.0021170026516126508
 values:  -54.19913 ----- 

-----iteration:  1 target diff:  0.0034753996462465633 values:  -54.045483 ----- 

-----iteration:  10 target diff:  0.0018836772248336637 values:  -53.41987 ----- 

-----iteration:  18 target diff:  0.0019954861193126686 values:  -

-----iteration:  29 target diff:  0.002196110860544386 values:  -61.28343 ----- 

-----iteration:  2 target diff:  0.0028112135455739067 values:  -57.23647 ----- 

-----iteration:  4 target diff:  0.002642358945930341 values:  -52.576416 ----- 

-----iteration:  25 target diff:  0.0017049877338239535 values:  -54.706326 ----- 

-----iteration:  3 target diff:  0.002313786100164736 values:  -57.274822 ----- 

-----iteration:  5 target diff:  0.0021436738825445084 values:  -52.669163 ----- 

-----iteration:  4 target diff:  0.002567345794143982 values:  -57.311485 ----- -----iteration: 
 
26 target diff:  0.0021311780330290227 values:  -54.73681 ----- 

-----iteration:  6 target diff:  0.0024964198047966566 values:  -52.71091 ----- 

-----iteration:  5 target diff:  0.0025188927786346034 values:  -57.331482 ----- 

-----iteration:  27 target diff:  0.0015847867054165203 values:  -54.781425 ----- 

-----iteration:  7-----iteration:   target diff: 30  0.002078090219997761target diff:   val

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  38 target diff:  0.0028331985904618976 values:  -53.044415 ----- 

-----iteration:  58 target diff:  0.001953326420686214 values:  -59.90435 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype floa


-------------------- adv learner --------------------
-----iteration:  39 target diff:  0.00339435280607919 values:  -53.02882 ----- 

-----iteration:  59 target diff:  0.001472156792588841 values:  -59.85702 ----- 

-------------------- ckpt:  13000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold0/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  1 target diff:  0.002987629516161054 values:  -52.54331 ----- 

-----iteration:  40 target diff:  0.002511181301290661 values: Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold0/train/agent1/trajs1.pkl! 
-58.41531 Refresh buffer every 1000000 sampling!-----
 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold0/tr




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
-----iteration: 
 2 target diff:  0.002321592564355097 values:  -52.46847 ----- 

-------------------- adv learner --------------------
-----iteration:  40 target diff:  0.0029194345984860292 values:  -53.021755 ----- 

-----iteration:  41 target diff:  0.0023087181887433967 values:  -58.416634 ----- 

-----iteration:  3 target diff:  0.001873823032756518 values:  -52.402348 ----- 

-----iteration:  42 target diff:  0.00196643550963892 values:  -58.3987 ----- 

-----iteration:  41 target diff:  0.00391457226543831 values:  -53.03893 ----- 

-----iteration:  4 target diff:  0.001415908451596456 values:  -52.37698 ----- 

-------------------- ckpt:  18000 --------------------
Loaded trajector

values: 
 -58.393364 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.ke

-----iteration:  2 target diff:  0.002263589286115597 values:  -53.89255 ----- 

-----iteration:  70 target diff:  0.0020813073891686137 values:  -52.810425 ----- 

-----iteration:  0 target diff:  0.9225890692943667 values:  -51.4574 -----iteration: -----  
3
 target diff:  0.001522762430466822 values:  -53.903862 ----- 

-----iteration:  0 target diff:  0.9200450472481079 values:  -59.83418 ----- 

-----iteration:  71 target diff:  0.0022119368826988128 values:  -52.781685 ----- 

-----iteration:  1 target diff:  0.0025577030283705487 values:  -51.43868 ----- 

-----iteration:  4 target diff:  0.0019433013363522806 values:  -53.85572 ----- 

-----iteration:  6 target diff:  0.0018525566537870718 values:  -58.324745 ----- 

-----iteration:  72 target diff:  0.0017275840441383437 values:  -52.757313 ----- 

-----iteration:  1 target diff:  0.004102013562018943 values:  -59.89087 ----- 

-----iteration:  5 target diff:  0.001970091408258886 values:  -53.858364 ----- 

-----iteration:  2

 values:  -60.032017 -----Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!


-----iteration:  Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold0/train/agent1/trajs1.pkl!81
 Refresh buffer every 1000000 sampling!target diff: 
 0.0019432603550285908 values:  -52.06471 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. 

-----iteration:  84 target diff:  0.0015826435103439738 values:  -51.836975 ----- 

-----iteration:  85 target diff:  0.001827316435476037 values:  -51.787323 ----- 

-----iteration:  86 target diff:  0.0017751933578750633 values:  -51.622646 ----- 

-----iteration:  10 target diff:  0.002692435875449386 values:  -60.17379 ----- 

-----iteration:  87 target diff:  0.0017818643723236463 values:  -51.5406 ----- 

-----iteration:  11 target diff:  0.002500712124287809 values:  -60.230976 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  12 target diff:  0.0018777383339451715 values:  -60.232624 ----- 

-----iteration:  88 target diff:  0.0022607429196459085 values:  -51.35223 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  89-----iteration:  target diff:  13 0.0019053381144086643  target diff:  values: 0.0019875210848229116  -51.197475values:   -60.18991 ---------- 

 

-----iteration:  90 target diff:  0.001942969851

 -57.916836
 ----- 

-------------------- adv learner --------------------
-----iteration:  20 target diff:  0.0022485575252045543 values:  -60.2211 ----- 

-----iteration:  7 target diff:  0.0025165641010780754 values:  -54.688374 ----- 

-----iteration:  3 target diff:  0.0024921204773518933 values:  -53.870136 ----- 

-----iteration:  4 target diff:  0.0016248745360781345 values:  -57.916912 ----- 

-----iteration:  4 target diff:  0.002204255952499818 values:  -53.734276 ----- 

-----iteration:  5 target diff:  0.0016313870131064974 values:  -57.915813 ----- 

-----iteration:  8 target diff:  0.004161598946085179 values:  -54.74066 ----- 

-----iteration:  5 target diff:  0.0023986944362791667 values:  -53.773323 ----- 

-----iteration:  21 target diff:  0.0021062362319550798 values:  -60.172882 ----- 

-----iteration:  6 target diff:  0.0022110268401487193 values:  -57.95371 ----- 

-----iteration:  9 target diff:  0.0021097809534260737 values:  -54.740322 ----- 

-----iteration: 

-----iteration:  40 target diff:  0.0023321720953143506 values:  -57.99667 ----- 

-----iteration:  19 target diff:  0.0019454332283956564 values:  -54.81168 ----- 

-----iteration:  47 target diff:  0.0015155938887776775 values:  -55.47699 ----- 

-----iteration:  20 target diff:  0.001912616205880578 values:  -54.78974 ----- 

-----iteration:  46 target diff:  0.0025696477390039093 values:  -59.12536 ----- 

-----iteration:  48 target diff:  0.0022194107131516994 values:  -55.4876 ----------iteration:   
0 
target diff:  0.9218610689506512 values:  -53.221443 ----- 

-----iteration:  41 target diff:  0.0021792444582947805 values:  -57.987713 ----- 

-----iteration:  21 target diff:  0.0018116797125315948 values:  -54.791615 ----- 

-----iteration:  47 target diff:  0.0019791676169907753 values:  -59.032887 ----- 

-----iteration:  1 target diff:  0.0030935839788418956 values:  -53.163433 ----- 

-----iteration:  42 target diff:  0.002028885058936578 values:  -57.98405 ----- 

-----it




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  11 target diff: --------------------  0.0018794705818707396 adv learnervalues:   -53.91817-------------------- ----- 


-----iteration:  19 target diff:  0.001931301197882364 values:  -52.645805 ----- 

-----iteration:  45 target diff:  0.0023349237545770905 values:  -53.949734 ----- 

-----iteration:  12 target diff:  0.0016391051891801083 values:  -53.85312 ----- 

-


-----iteration:  56 target diff:  0.0018118110485373035 values:  -53.628 ----- 

-----iteration:  28 target diff:  0.002126155582940428 values:  -52.488586 ----- 

-----iteration:  6 target diff:  0.002185173507916792 values:  -57.793293 ----- 

-----iteration:  57 target diff:  0.0021825669242779375 values:  -53.62958 ----- 

-----iteration:  0 target diff:  0.9194415062439975 values:  -60.049236 ----- 

-----iteration:  58 target diff:  0.001971767911093647 values:  -53.58793 ----- 

-----iteration:  7 target diff:  0.0021155377544627867 values:  -57.805264 ----- 

-----iteration:  29 target diff:  0.001695700274431216 values:  -52.44751 ----- 

-----iteration:  59 target diff:  0.002908703297284142 values:  -53.69138 ----- 

-----iteration:  1 target diff:  0.0016289874962270756 values:  -60.127537 ----- 

-----iteration:  8 target diff:  0.002125869850053779 values:  -57.822113 ----- 

-----iteration:  60 target diff:  0.0023976356802542717 values:  -53.60871 ----- 

-----iteratio


-----iteration:  14 target diff:  0.002616748698107025 values:  -53.34609 ----- 

-----iteration:  41 target diff:  0.0018853207173912113 values:  -52.02057 ----- 

-----iteration:  77 target diff:  0.0021289416766488785 values:  -52.846966 ----- 

-----iteration:  15 target diff:  0.002620008937185791 values:  -53.39215 ----- 

-----iteration:  0 target diff:  0.9193672678481049 values:  -61.05085 ----- 

-----iteration:  78 target diff:  0.002447551251313277 values:  -52.829685 ----- 

-----iteration:  42 target diff:  0.0018931752383109697 values:  -52.0062 ----- 

-----iteration:  79 target diff:  0.0018726228393896314 values:  -52.872005 ----- 

-----iteration:  16 target diff:  0.00198746655003191 values:  -53.42782 ----- 

-----iteration:  43 target diff:  0.0018292602689212096 values:  -52.003174 ----- 

-----iteration:  80 target diff:  0.0016177850688111975 values:  -52.797802 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx(

-----iteration:  11 target diff:  0.0026115076749789164 values:  -58.86273 ----- 

-----iteration:  26 target diff:  0.002054596656271586 values:  -53.684937 ----- 

-----iteration:  96 target diff:  0.0020632583732462803 values:  -52.52081 ----- 

-----iteration:  12 target diff:  0.0016720535110869787 values:  -58.83735 ----- 

-----iteration:  13 target diff:  0.002640726298907497 values:  -61.624825-----iteration:   27-----  target diff: 

 -----iteration: 0.0024796187577514326 97  target diff:  values: 0.001726048951496469 values:   -53.671696-52.560863 ----- -----  



-----iteration:  98 target diff:  0.0013987775965313876 values:  -52.54193 ----- 

-------------------- ckpt:  11000 --------------------
-----iteration:  14 target diff:  Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold0/train/agent/trajs.pkl!0.0030901453896521026
 Refresh buffer every 1000000 sampling!values:  -61.619656
 ----- 



Loaded trajectories from load path: /ho




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  15 target diff:  0.0014156137525733448 values:  -61.72601 ----- 

-------------------- ckpt:  15000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold0/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
----- 

 0.0030685542729570908 values:  -58.929913




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  15 target diff:  0.002459


-----iteration:  1 target diff:  0.0033469932118787766 values:  -55.567177 ----- 

-----iteration:  24 target diff:  0.002184374533451629 values:  -58.953632 ----- 

-----iteration:  2 target diff:  0.0028023636176805486 values:  -55.58302 ----- 

-----iteration:  0 target diff:  0.922209599338228 values:  -54.059265 ----- 

-----iteration:  25 target diff:  0.002079902875017107 values:  -58.979927 ----- 

-----iteration:  1 target diff:  0.002140392655226863 values:  -54.069004 ----- 

-----iteration:  3 target diff:  0.0022155615791418968 values:  -55.64254 ----- 

-----iteration:  0 target diff:  0.9209121246713963 values:  -61.23154 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
values:  
-54.040344 ----- 

-----iteration:  

Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration: -----iteration:   12  target diff: target diff:   0.00193951399653267640.003036246601545047  values: values:   -58.85034-52.03449 -----  -----
 


-----iteration:  2 target diff:  0.002783192773134853 values:  -52.06713 ----- 
-----iteration:  
38 target diff:  0.0022053331329326357 values:  -59.08764 ----- 

-----iteration:  3 target diff: 




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa


-----iteration:  12 target diff:  0.0021448053644043336 values:  -52.562763 ----- 

-----iteration:  20 target diff:  0.0019430843847213618 values:  -52.28174 ----- 

-----iteration:  13 target diff:  0.002425578116443067 values:  -52.60424 ----- 

-----iteration:  21 target diff:  0.0019296309100999053 values:  -52.23925 ----- 

-----iteration:  0 target diff:  0.9193911320176875 values:  -58.91904 ----- 

-----iteration:  0 target diff:  0.9219771777699605 values:  -58.03288 ----- 

-----iteration:  22 target diff:  0.0017890929086741242 values:  -52.234688 ----- 

-----iteration:  1 target diff:  0.0016033097121709592 values:  -58.90819 ----- 

-----iteration:  14 target diff:  0.0018685008537321851 values:  -52.65985 ----- 

-----iteration:  1 target diff:  0.005632010559977181 values:  -----iteration: -58.070614  -----23  
target diff: 
 0.002373173387437883 values:  -52.253113 ----- 

-----iteration:  2 target diff:  0.0019383493930054362 values:  -58.891445 ----- 

-----iterati

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, 




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa

-----iteration:  38 target diff:  0.0025532271556641447 values:  -61.154034 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  9 target diff:  0.0016245517094108835 values:  -59.266697 ----- 

-----iteration:  10 target diff:  0.001771807442968459 values:  -59.257122 ----- 

-----iteration:  0 target diff:  0.9182102924273976 values:  -51.80889 ----- 

-----iteration:  39 target diff:  0.002904973338538816 values:  -60.968533 ----- 

-----iteration:  11 target diff:  0.0022829169166218195 values:  -59.24548 ----- 

-----iteration:  1 target diff:  0.003819002950319642 values:  -51.796925 ----- 

-----iteration:  12 target diff:  0.0024657014404802315 values:  -----iteration: -59.294796  2----- target diff:  
 
0.004


-----iteration:  9 target diff:  -----iteration:  550.0019693946230941546  target diff:  values: 0.0016101324959067087 -53.679024  values: -----  -58.37623 ----- 



-----iteration:  30 target diff:  0.002348074610073245 values:  -60.12894 ----- 

-----iteration:  31 target diff:  0.0020102940407310988 values:  -60.181053 -----iteration: -----  10
 
target diff:  0.002127586328176467 values:  -53.721992 ----- 

-----iteration:  56 target diff:  0.001329491941090866 values:  -58.3164 ----- 

-------------------- ckpt:  17000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold0/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
-----iteration: Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold0/train/agent0/trajs0.pkl! 
Refresh buffer every 1000000 sampling!
 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold0/train/agent1/trajs1.pkl




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  1 target diff:  -----iteration: 0.0025930277623183473  32values:  target diff:  0.001579920516716075 -51.106262 values:   ------60.20755  ----- 



-----iteration:  12 target diff:  0.002127846194872628 values:  -53.739418 ----- 

-----iteration:  -----iteration: 33  2target diff:  target diff:  0.001745573411253245  0.00244

values:  
-59.87429 ----- 

-----iteration:  0 target diff:  0.9225233874564583 values:  -53.640606 ----- 

-----iteration:  1 target diff:  0.0019280754080733184 values:  -53.71504 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
 
2 target diff:  0.0024788808724919426 values:  -53.79378 ----- 

-----iteration:  3 target diff:  0.0025679998217088204 values:  -53.8074 ----- 

-----iteration:  0 target diff:  0.9203397569372666 values:  -58.4163 ----- 

-----iteration:  4 target diff:  0.0021013524831193535 values:  -53.78016 ----- 

-----iteration:  1 target diff:  0.0034553398024809636 values:  -58.44785 ----- 

-----iteration:  5 target diff:  0.0018719997599174912 values:  -53.8082 ----- 

-----iteration:  2 target diff:  0.0021


-----iteration:  22 target diff:  0.0021193382222670777 values:  -54.115986 ----- 

-----iteration:  21 target diff:  0.002525383624859866 values:  -59.424805 ----- 

-----iteration:  23 target diff:  0.0024090977215900857 values:  -54.08355 ----- 

-----iteration:  22 target diff:  0.0022244338961863674 values:  -59.454613 ----- 

-----iteration:  0 target diff:  0.9202050152155385 values:  -59.565125 ----- 

-----iteration:  24 -----iteration: target diff:   230.003018681032334075  values: target diff:   -54.0774270.0027015556471607174 ----- values:   

-59.49743 ----- 

-----iteration:  24 target diff:  0.001790006964948059 values:  -59.550533 ----- 

-----iteration:  1 target diff:  0.002272714012785216 values:  -59.55396 ----- 

-----iteration:  25 target diff:  0.00250418686972251 values:  -54.087307 ----- 

-----iteration:  25 target diff:  0.0018624109916209413 values:  -59.607716 ----- 

-----iteration:  26 target diff:  0.0021389319039571294 values:  -54.09536 ----- 

-----i




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  52 target diff:  0.0016803350966182523 values:  -53.73871 ----- 
--------------------
 adv learner --------------------
-----iteration:  14 target diff:  0.0018587624359686092 values:  -60.65369 ----- 

-----iteration:  53 target diff:  0.0020018589866585973 values:  -53.769497 ----- 

-----iteration:  15 target diff:  0.0018423222679169388 values:  -60.631172 ----- 

-----iteration:  54 target diff:  0.001721810107366953 values:  -53.7649 ----- 

-----iteration:  16 target diff:  0.001979228434755943 values:  -60.619606 ----- 

-----iteration:  -----iteration: 55  target diff: 17 0.0018605860234456877  values: target diff:   0.0021082423378938346-53.790188 values:   -----




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
 
values:  -59.348606 ----- 

-----iteration:  18 target diff:  0.0026382289630594393 values:  -59.61303 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Laye

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold0/train/agent1/trajs1.pkl!49
 Refresh buffer every 1000000 sampling!target diff: 
 0.0017142941860443925 values:  -59.551838 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, 


-----iteration:  34 target diff:  0.0023107224827910375 values:  -54.102665 ----- 

-----iteration:  35 target diff:  0.002025783707083214 values:  -54.036957 ----- 

-----iteration:  0 target diff:  0.9206728280768499 values:  -59.375614 ----- 

-----iteration:  1 target diff:  0.004008855493780804 values:  -59.385532-----iteration:   -----36  
target diff: 
 0.0021647880477987627 values:  -53.959988 ----- 

-----iteration:  2 target diff:  0.0029813671838307583 values:  -59.431973 ----- 

-----iteration:  37 target diff:  0.0020016063091028505 values:  -53.924644 ----- 

-----iteration:  3 target diff:  0.0022254816700268175 values:  -59.45853 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

saving model weights at /home/jupyt/


saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold1/train/agent/ckpt/offline_dqn_11000.ckpt


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the 


-----iteration:  1 target diff:  0.004177534045648197 values:  -59.55884 ----- 

-----iteration:  4 target diff:  0.001756489903431359 values:  -53.03481 ----- 

-----iteration:  2 target diff:  -----iteration:  0.0022330559975261730  values: target diff:   0.9209905109190992-59.52775  -----values:   
-59.309963
 ----- 

-----iteration:  5 target diff:  0.0018745890613678803 values:  -53.06191 ----- 

-----iteration:  1 target diff:  0.0036819175111279683 values:  -59.358124 ----- 

-----iteration:  3 target diff:  0.0021322808920571854 values:  -59.470604 ----- 

-----iteration:  6 target diff:  0.002824663418112825 values:  -53.085102 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold1/train/agent/ckpt/offline_dqn_12000.ckpt
-----iteration:  7 target diff:  0.0018402432113981725 values:  -53.135994 ----- -----iteration: 
 2
 target diff:  0.003038814585923673 values:  -59.37918 ----- 

-----iteration:  4 target diff:  0.001968725034120076 values:

-----iteration:  25 target diff:  0.002751348812715387 values:  -60.288654 ----- 

-----iteration:  26 target diff:  0.00210618288419998 values:  -60.30224 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold1/train/agent/ckpt/offline_dqn_14000.ckpt
-----iteration:  27 target diff:  0.002005644845318968 values:  -59.494747 ----- 

-----iteration:  27 target diff:  0.001943926449627283 values:  -60.31826 ----- 

-----iteration:  28 target diff:  0.002306004435009267 values:  -60.357937 ----- 
-----iteration:  28 
target diff:  0.0028753501030984985 values:  -59.454327 ----- 

-----iteration:  29 target diff:  0.0017932952044708603 values:  -60.399006 ----- 

-----iteration:  -----iteration:  2930  target diff: target diff:   0.00211975140663376230.0022941795712503586  values: values:   -59.43115-60.39565 -----  ----- 



-----iteration:  30 target diff:  0.0021224428043387636 values:  -59.415554 ----- 



To change all layers to have dtype float64 by d



 0.0018705031184383318 values:  Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!-58.89307
 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the laye


-------------------- adv learner --------------------
-----iteration:  87 target diff:  0.001808039163779338 values:  -58.810284 ----- 

-----iteration:  88 target diff:  0.0018494203783336295 values:  -58.769802 ----- 

-----iteration:  35 target diff:  0.00207157096014295 values:  -51.578743 ----- 

-----iteration:  89 target diff:  0.0016872392268005613 values:  -58.74407 ----- 

-----iteration:  90 target diff:  0.0021224611208886445 values:  -58.746468 ----- 

-----iteration:  36 target diff:  0.002284255391053182 values:  -51.52393 ----- 

-----iteration:  91 target diff:  0.001988073882798753 values:  -58.671658 ----- 

-----iteration:  37 target diff:  0.002283073088605019 values:  -51.462166 ----- 

-----iteration:  92 target diff:  0.0018778199811673797 values:  -58.620384 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold1/train/agent0/ckpt/offline_dqn_7000.ckpt
-----iteration:  38 target diff:  0.0021918500867865337 values:  -51.421597 


-----iteration:  55 target diff:  0.001744455094550193 values:  -51.110043 ----- 

-----iteration:  0 target diff:  0.9190714912758001 values:  -61.19679 ----- 

-----iteration:  56 target diff:  0.0020924359618855926 values:  -51.13293 ----- 

-----iteration:  7 target diff:  0.002428189867646089 values:  -58.666042 ----- 

-----iteration:  1 target diff:  0.002659224291117722 values:  -61.186604 ----- 

-----iteration:  57 target diff:  0.0021888546592239775 values:  -51.14746 ----- 

-----iteration:  8 target diff:  0.002438268009518267 values:  -58.64687 ----- 

-----iteration:  2 target diff:  0.0019037533899788225 values:  -61.115604 ----- 

-----iteration:  9 target diff:  0.0022719133264431815 values:  -58.70746 ----- 

-----iteration:  58 target diff:  0.0018302902021011705 values:  -51.112034 ----- 

-----iteration:  3 target diff:  0.002422859122745859 values:  -61.088932-----iteration:   59----- 

 target diff:  0.001778748916963898 values:  -51.093853 ----- 

-----iterati




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------


To change all layers to have dtype float




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.9213012905582315 values:  -54.363487 ----- 

-----iteration:  0 target diff:  0.9222536729931742 values:  -57.381557 ----- 

-----iteration:  1 target diff:  0.002476591156107732 values:  -54.44953 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold1/train/agent0/ckpt/offline_dqn_10000.ckpt
-----iteration:  1 target diff:  0.0025344184049260056 values:  -57.427193 ----- 

-----iteration:  2 target diff:  0.003217812444996994 values:  -54.471416 ----- 

-----iteration:  2 target diff:  0.0023062906540421845 values:  -57.45753 ----- 

-----iteration:  3 target diff:  0.00274399947053869 values:  -54.441772 ----- 

-----it

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, 

-----iteration:  23 target diff:  0.0021423194421565034 values:  -57.22453 ----- 

-----iteration:  68 target diff:  0.0016873561020733542 values:  -53.38412 ----- 

-----iteration:  24 target diff:  0.002045478863484099 values:  -57.186802 ----- 

-----iteration:  69 target diff:  0.0018010427533262175 values:  -53.353687 ----- 

-----iteration:  25 target diff:  0.0024625684435720394 values:  -57.152412 ----- 

-----iteration:  70 target diff:  0.0017343407090725268 values:  -53.341076 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold1/train/agent/ckpt/offline_dqn_9000.ckpt
-----iteration:  26 target diff:  0.002584133894105183 values:  -57.167107 ----- 

-----iteration:  71 target diff:  0.0016969081282623057 values:  -53.28891 ----- 

-----iteration:  72 target diff:  0.0019386311870064029 values:  -53.257023 ----- 

-----iteration:  27 target diff:  0.0016908184497672308 values:  -57.10735 ----- 

saving model weights at /home/jupyt/leyuan/SUP




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras


-----iteration:  6 target diff:  0.002548637904678677 values:  -57.301334 ----- 

-----iteration:  7 target diff:  0.002780433222043449 values:  -57.311382 ----- 

-----iteration:  0 target diff:  0.9212937630254495 values:  -53.01768 ----- 

-----iteration:  1 target diff:  0.0034893810062443745 values:  -53.11503 ----- 

-----iteration:  8 target diff:  0.0022369352639022735 values:  -57.308025 ----- 

-----iteration:  2 target diff:  0.0027296252702031713 values:  -53.222996 ----- 

-----iteration:  3 target diff:  0.00264632504261349 values:  -53.29289 ----- 

-----iteration:  9 target diff:  0.0018078688714059914 values:  -57.30616 ----- 

-----iteration:  4 target diff:  0.0026649665242917363 values:  -53.307583 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold1/train/agent1/ckpt/offline_dqn_3000.ckpt
-----iteration:  10 target diff:  0.001803397987656929 values:  -57.306755 ----- 

-----iteration:  5 target diff:  0.002963603566045552 value

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras


-----iteration:  31 target diff:  0.002305282875193366 values:  -54.10478 ----- 

-----iteration:  32 target diff:  0.0024509047545921104 values:  -54.122612 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  33 target diff:  0.0024156601372532036 values:  -54.06115 ----- 

-----iteration:  0 target diff:  0.9204740855834765 values:  -57.878666 ----- 

-----iteration:  34 target diff:  0.002493594080499289 values:  -54.107216 ----- 

-----iteration:  1 target diff:  0.00414054558050549 values:  -57.92315 ----- 

-----iteration:  35 target diff:  0.0026171253287160133 values:  -54.091038 ----- 

-----iteration:  36 target diff:  0.0029808571868390546 values:  -54.0779 ----- 

-----iteration:  2 target diff:  0.00269

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  26 target diff:  0.0023387487550308584 values:  -52.36668 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, p


-------------------- adv learner --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold1/train/agent1/ckpt/offline_dqn_8000.ckpt
-----iteration:  27 target diff:  0.002494084712040214 values:  -52.291904 ----- 

-----iteration:  28 target diff:  0.002380697763063318 values:  -52.303783 ----- 

-----iteration:  29 target diff:  0.0018728770410129576 values:  -52.318733 ----- 

-----iteration:  30 target diff:  0.0020043025808636015 values:  -52.323086 ----- 

-----iteration:  31 target diff:  0.0018685990040941689 values:  -52.28628 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold1/train/agent1/ckpt/offline_dqn_17000.ckpt
-----iteration:  32 target diff:  0.0021812658338115393 values:  -52.316925 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  33 target diff:  0.002292624812974601 values:  -52.30001 ----- 

-----iteration:  34 target diff:  0.0019283598783623075 val




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  14 target diff:  0.002734737383927017 values:  -59.794456 ----- 

-----iteration:  15 target diff:  0.0025841227583212377 values:  -59.85667 ----- 

-----iteration:  16 target diff:  0.002412880188478312 values:  -59.90635 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  17 target diff:  0.0024520881721723027 values:  -59.81298 ----- 

-----iteration:  0 target diff:  0.9197001761935666 values:  -53.6


-----iteration:  44 target diff:  0.0013747585364556377 values:  -59.668926 ----- 

-------------------- training agent --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold1/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.9229068812694876 values:  -53.419827 ----- 

saving model weights 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold1/train/agent0/ckpt/offline_dqn_17000.ckpt


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold1/train/agent/ckpt/offline_dqn_4000.ckpt
-----iteration:  0 target diff:  0.9224366745645459 values:  -54.27027 ----- 

-----iteration:  1 target diff:  0.002711919195465738 values:  -54.28757 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold1/train/agent2/ckpt/offline_dqn_7000.ckpt
-----iteration:  2 target diff:  0.0025068198773436513 values:  -54.247654 ----- 

-----iteration:  3 target diff:  0.002221283292760066 values:  -54.232807 ----- 

-----iterat

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold1/train/agent2/ckpt/offline_dqn_6000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold1/train/agent1/ckpt/offline_dqn_5000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold1/train/agent2/ckpt/offline_dqn_15000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold1/train/agent/ckpt/offline_dqn_12000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold1/train/agent/ckpt/offline_dqn_1000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold1/train/agent2/ckpt/offline_dqn_7000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold1/train/agent1/ckpt/offline_dqn_6000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold1/train/agent2/ckpt/offline_dqn_16000.ckpt
saving model weights at /home/j


saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold1/train/agent1/ckpt/offline_dqn_13000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold1/train/agent/ckpt/offline_dqn_9000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold1/train/agent2/ckpt/offline_dqn_15000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold1/train/agent3/ckpt/offline_dqn_4000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold1/train/agent0/ckpt/offline_dqn_1000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold1/train/agent1/ckpt/offline_dqn_14000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold1/train/agent2/ckpt/offline_dqn_16000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold1/train/agent/ckpt/offline_dqn_10000.ckpt
saving model weights at /hom

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold1/train/agent2/ckpt/offline_dqn_9000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold1/train/agent0/ckpt/offline_dqn_16000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold1/train/agent3/ckpt/offline_dqn_12000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold1/train/agent0/ckpt/offline_dqn_5000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold1/train/agent4/ckpt/offline_dqn_1000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold1/train/agent3/ckpt/offline_dqn_13000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold1/train/agent0/ckpt/offline_dqn_17000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold1/train/agent2/ckpt/offline_dqn_10000.ckpt
saving model weights at /ho

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold1/train/agent1/ckpt/offline_dqn_12000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold1/train/agent4/ckpt/offline_dqn_17000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold1/train/agent3/ckpt/offline_dqn_5000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold1/train/agent4/ckpt/offline_dqn_9000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold1/train/agent1/ckpt/offline_dqn_2000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold1/train/agent1/ckpt/offline_dqn_13000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold1/train/agent4/ckpt/offline_dqn_18000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold1/train/agent3/ckpt/offline_dqn_6000.ckpt
saving model weights at /hom




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold1/train/agent3/ckpt/offline_dqn_11000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold1/train/agent4/ckpt/offline_dqn_15000.ckpt
-------------------- fqe on dqn & sale --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold1/train/agent1/ckpt/offline_dqn_8000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold1/train/agent1/ckpt/offline_dqn_19000.ckpt


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold1/train/agent1/ckpt/offline_dqn_11000.ckpt
-------------------- fqe on dqn & sale --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold1/train/agent2/ckpt/offline_dqn_2000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold1/train/agent3/ckpt/offline_dqn_15000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold1/train/agent4/ckpt/offline_dqn_19000.ckpt


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold1/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold1/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold1/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold1/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold1/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, 


-------------------- adv learner --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold1/train/agent1/ckpt/offline_dqn_14000.ckpt


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- fqe on dqn & sale --------------------
-----iteration:  0 target diff:  0.9233849926921902 values:  -60.197407 ----- 

-----iteration:  1 target diff:  0.00433449988646088 values:  -60.25205 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold1/train/agent2/ckpt/offline_dqn_5000.ckpt
-----iteration:  2 target diff:  0.0027693615237710115 values:  -60.264336 ----- 

-----iteration:  3 target diff:  0.0023900654354560087 values:  -60.215828 ----- 

-----iteration:

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold1/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold1/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold1/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disab




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  49 target diff:  0.003993540454168339 values:  -56.080997 ----- 

-----iteration:  50 target diff:  0.005980412489866028 values:  -56.248646 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold1/train/agent4/ckpt/offline_dqn_3000.ckpt
-------------------- fqe on dqn & sale -----------------

Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, c


saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold1/train/agent4/ckpt/offline_dqn_7000.ckpt
-----iteration:  0 target diff:  0.9171962207536641 values:  -58.057163 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold1/train/agent2/ckpt/offline_dqn_4000.ckpt
-----iteration:  1 target diff:  0.002743532080048923 values:  -58.049465 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  2 target diff:  0.0022477096187490456 values:  -58.069317 ----- 

-----iteration:  3 target diff:  0.001985720450188241 values:  -58.057503 ----- 

-----iteration:  0 target diff:  0.9219820366353687 values:  -64.62306 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/d

-----iteration:  27 target diff:  0.0018500824188499548 values:  -59.061657 ----- 

-----iteration:  28 target diff:  0.0015042809173472689 values:  -59.012142saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold1/train/agent4/ckpt/offline_dqn_10000.ckpt
 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold1/train/agent2/ckpt/offline_dqn_7000.ckpt
-----iteration:  29 target diff:  0.0015269771057828015 values:  -58.96644 ----- 

-----iteration:  30 target diff:  0.0017765655268748403 values:  -58.994804 ----- 

-----iteration:  31 target diff:  0.0018351743995933972 values:  -58.971687 ----- 

-----iteration:  32 target diff:  0.0018410898216070608 values:  -59.045948 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing 

-----iteration:  5 target diff:  0.0018002848088763954 values:  -63.840466 ----- 

-----iteration:  6 target diff:  0.0013430989667982097 values:  -63.802673 ----- 

-------------------- ckpt:  4000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold1/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold1/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold1/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold1/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold1/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 samplin




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold1/train/agent2/ckpt/offline_dqn_20000.ckpt
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold1/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `t




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold1/train/agent3/ck


-----iteration:  15 target diff:  0.001845103447158661 values:  -59.480015 ----- 

-----iteration:  0 target diff:  0.9177408048881329 values:  -58.66966 ----- 

-----iteration:  1 target diff:  0.002400044390902867 values:  -58.617886 ----- 

-----iteration:  16 target diff:  0.0018046391907966834 values:  -59.474052 ----- 

-----iteration:  17 target diff:  0.0017831021200432398 values:  -59.480217 ----- 

-----iteration:  2 target diff:  0.0023231039921636316 values:  -58.58358 ----- 

-----iteration:  18 target diff:  0.001840091015842078 values:  -59.410748 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold1/train/agent4/ckpt/offline_dqn_18000.ckpt
-----iteration:  3 target diff:  0.0019441266476343134 values:  -58.54592 ----- 

-----iteration:  19 target diff:  0.0021214975267902563 values:  -59.304058 ----- 

-----iteration:  4 target diff:  0.0021180079251623012 values:  -58.486835 ----- 

-----iteration:  20 target diff:  0.002015754652879

-----iteration:  52 target diff:  0.0019052083613739716 values:  -57.7018 ----- 

-----iteration:  33 target diff:  0.001889543918607107 values:  -57.722393 ----- 

-----iteration:  53 target diff:  0.0021443023982100524 values:  -57.673767 ----- 

-----iteration:  34 target diff:  0.0016354560352192173 values:  -57.6227 ----- 

-----iteration:  54 target diff:  0.0016494924140995023 values:  -57.567608 ----- 

-----iteration:  35 target diff:  0.001543006237507269 values:  -57.416912 ----- 

-----iteration:  55 target diff:  0.0022051169949158633 values:  -57.435314 ----- 

-----iteration:  36 target diff:  0.0016792906986219259 values:  -57.240505 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold1/train/agent2/ckpt/offline_dqn_18000.ckpt
-----iteration:  37 target diff:  0.002203920297130694 values:  -57.134697 ----- 

-----iteration:  56 target diff:  0.002038254310764872 values:  -57.47581 ----- 

-----iteration:  38 target diff:  0.00188541319

-----iteration:  11 target diff:  0.002380673730495827 values:  -54.745674 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold1/train/agent3/ckpt/offline_dqn_10000.ckpt
-----iteration:  56 target diff:  0.0017778012787647286 values:  -54.744473 ----- 

-----iteration:  12 target diff:  0.0022910458366586094 values:  -54.88262 ----- 

-----iteration:  57 target diff:  0.0020586905146381404 values:  -54.641144 ----- 

-----iteration:  13 target diff:  0.003190508606270558 values:  -54.99544 ----- 

-----iteration:  58 target diff:  0.001950298200765257 values:  -54.52625 ----- 

-----iteration:  14 target diff:  0.002438202153577359 values:  -54.9865 ----- 

-----iteration:  59 target diff:  0.0019452200324312113 values:  -54.36952 ----- 

-----iteration:  15 target diff:  0.0019215758261541436 values:  -55.098583 ----- 

-----iteration:  60 target diff:  0.0017338002952895946 values:  -54.249672 ----- 

-----iteration:  16 target diff:  0.002306689166

-------------------- fqe on dqn & sale --------------------
-----iteration:  2 target diff:  0.00244822925740755 values:  -63.543934 ----- 

-----iteration:  29 target diff:  0.0026858270661641844 values:  -55.594868 ----- 

-----iteration:  3 target diff:  0.002252082198199042 values:  -63.610226 ----- 

-----iteration:  30 target diff:  0.002683625672464563 values:  -55.528015 ----- 

-----iteration:  4 target diff:  0.0017982275947951512 values:  -63.65817 ----- 

-----iteration:  31 target diff: -----iteration:   5 target diff:  0.00293711888254901160.0019235553617390816  values:  -55.43167 ----- 

values:  -63.81148 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold1/train/agent3/ckpt/offline_dqn_1000.ckpt
-----iteration:  6 target diff:  0.002269712516790493 values:  -63.800934 ----- 

-----iteration:  32 target diff:  0.00279392131492422 values:  -55.36141 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend

-----iteration:  36 target diff:  0.001888678639081196 values:  -58.95038 ----- 

-----iteration:  57 target diff:  0.0019558955954626996 values:  -56.787544 ----- 

-----iteration:  37 target diff:  0.0022137750928859653 values:  -58.861084 ----- 

-----iteration:  0 target diff:  0.9182714986275782 values:  -55.062073 ----- 

-----iteration:  58 target diff:  0.0018253269737368344 values:  -56.62002 ----- 

-----iteration:  38 target diff:  0.0022946269214966725 values:  -58.882893 ----- 

-----iteration:  1 -----iteration: target diff:  59  0.0031718457711123337target diff:   0.0017733598155424708 values: values:  -56.46268  ----- 
-55.124474 
----- 

-----iteration:  39 target diff:  0.0024701630502649394 values:  -58.814003 ----- 

-----iteration:  60 target diff:  0.0017920096976288384 values:  -56.33349 ----- 

-----iteration:  2 target diff:  0.003190777998081076 values:  -55.08094 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold1/train/ag

-----iteration:  86 target diff:  0.0015912761667476651 values:  -53.492046 ----- 

-----iteration:  64 target diff:  0.001778203048900688 values:  -55.113953 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  65 target diff:  0.0014013562160499434 values:  -54.967487 ----- 

-------------------- ckpt:  11000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold1/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
target diff:  0.0015211412500724302 values:  -53.386513 ----- 
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold1/train/agent0/trajs0.pkl!

Refresh buffer every 1000000 sampling!
Loaded trajectori




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  0 target diff:  0.9178683519029868 values:  -55.19271 ----- 

-----iteration:  88 target diff:  0.0016108885310995235 values:  -53.377956 ----- 

-----iteration:  89 target diff:  0.0018266443789848128 values:  -53.297905 ----- 

-----iteration:  1 target diff:  0.002876676763847847 values:  -55.232433 ----- 

-----iteration

-----iteration:  2 target diff:  0.002739833386091472 values:  -54.82564 ----- 

-----iteration:  4 target diff:  0.0016124956470370537 values:  -59.876858 ----- 

-----iteration:  3 target diff:  0.002755007781388666 values:  -54.825447 ----- 

-----iteration:  5 target diff:  0.0016889491825970199 values:  -59.851765 ----- 

-----iteration:  4 target diff:  0.0018867812113539074 values:  -54.814606 ----- 

-----iteration:  6 target diff:  0.0017801041121482256 values:  -59.90319 ----- 

-----iteration:  5 target diff:  0.002384973941689255 values:  -54.838215 ----- 

-----iteration:  7 target diff:  0.0016196167299843839 values:  -59.998997 ----- 

-----iteration:  6 target diff:  0.0019825966288368655 values:  -54.90296 ----- 
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold1/train/agent3/ckpt/offline_dqn_8000.ckpt

-----iteration:  8 target diff:  0.001972057117554658 values:  -60.03223 ----- 

-----iteration:  7 target diff:  0.002025172587183496 val




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  1 target diff:  0.0023155117280547277 values:  -55.380337 ----- 

-----iteration:  8 target diff:  0.0022680678035597464 values:  -59.163975 ----- 

-----iteration:  2 target diff:  0.0014998840589068425 values:  -55.36063 ----- 

-----iteration:  9 target diff:  0.0017479655032374864 values:  -59.18265 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  10 target diff:  0.0017242796233076251 values:  -5




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.


-------------------- adv learner --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold1/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold1/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from loa




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  0 target diff:  0.9177530733292383 values:  -58.697845 ----- 

-----iteration:  1 target diff:  0.0020479112545282543 values:  -58.69643 ----- 

-----iteration:  2 target diff:  0.0016084151802620932 values:  -58.743732 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  3 target diff:  0.001429797638974939 values:  -58.76782 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold1/train/agent3/ckpt/offline_dqn_11000.ckpt
-------------------- fqe on dqn & sale --------------------


To change all layers to have dtype float64 by default, call `tf.keras.back


-----iteration:  9 target diff:  0.0021514352404352396 values:  -61.03541 ----- 

-----iteration:  11 target diff:  0.0022875474536461085 values:  -59.11375 ----- 

-----iteration:  10 target diff:  0.0021239693492254076 values:  -61.122585 ----- 

-----iteration:  12 target diff:  0.0015882795355216853 values:  -59.14095 ----- 

-----iteration:  0 target diff:  0.9186655896518678 values:  -54.71418 ----- 

-----iteration:  13 target diff:  0.0017259009464630204 values:  -59.26019 ----- 

-----iteration:  11 target diff:  0.0017568745791653208 values:  -61.002266 ----- 

-----iteration:  14 target diff:  0.002953353603006576 values:  -59.297707 ----- 

-----iteration:  1 target diff:  0.0028607942406425444 values:  -54.751 ----- 

-----iteration:  12 target diff:  0.0028925868229700354 values:  -61.082893 ----- 

-----iteration:  15 target diff:  0.001972221591386328 values:  -59.32055 ----- 

-----iteration:  2 target diff:  0.0022107628886917315 values:  -54.723694 ----- 

-----iter




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
-----iteration:  6
 target diff:  0.0019522015899692661 values:  -54.616997 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------

 
1 target diff:  0.002428778109801065 values:  -59.65916 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold1/train/agent3/ckpt/offline_dqn_14000.ckpt
-----iteration:  2 target diff:  0.0016675736755402614 values:  -59.715332 ----- 

-----iteration:  0 target diff:  0.9231751732395357 values:  -61.105236 ----- 

-----iteration:  3 target diff:  0.001696327259926469 values:  -59.74172 ----- 

-----iteration:  1 target diff:  0.0027176249287676 values:  -61.075787 ----- 

-----iteration:  4 target diff:  0.0018404566215710501 values:  -59.835625 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  2 target diff:  0.0017827852977513815 values:  -61.093365 ----- 

-----iteration:  5 ta




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold1/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
 -61.51313 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold1/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold1/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold1/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold1/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the aut


-------------------- adv learner --------------------
-----iteration:  29 target diff:  0.0022784777800570586 values:  -58.256523 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold1/train/agent3/ckpt/offline_dqn_18000.ckpt
-----iteration:  30 target diff:  0.0029085154306596485 values:  -58.174507 ----- 

-----iteration:  22 target diff:  0.0021787592485545955 values:  -61.321507 ----- 

-----iteration:  31 target diff:  0.002864633665686143 values:  -58.145344 ----- 

-----iteration:  23 target diff:  0.0025606819920343518 values:  -61.2315 ----- 

-----iteration:  24 target diff:  0.0019142515851050003 values:  -61.06945 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  32 target diff:  0.00268761352499779 values:  -58.02659 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold1/train/agent4/ckpt/offline_dqn_9000.ckpt
-----iteration:  25 target diff:  0.0020636274872841693 value


-----iteration:  55 target diff:  0.002152908074953644 values:  -55.577816 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  45 target diff:  0.0016745782708313171 values:  -58.101826 ----- 

-----iteration:  56 target diff:  0.002255568789471989 values:  -55.51542 ----- 

-----iteration:  57 target diff:  0.0022172489936974973 values:  -55.301147 ----- 

-----iteration:  46 target diff:  0.0022413122592359292 values:  -----iteration:  0-57.875294 target diff:   -----0.9189443525322287  
values: 
 -55.242252 ----- -----iteration: 
 
58 target diff:  0.0019346713469439328 values:  -55.20534 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold1/train/agent4/ckpt/offline_dqn_11000.c

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold1/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold1/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold1/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold1/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold1/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, 





To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-------------------- fqe on dqn & sale --------------------
-------------------- fqe on dqn & sale --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold1/train/agent4/ckpt/offline_dqn_2000.ckpt


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')


Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold1/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasti



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  9 target diff:  0.001597847094019646 values:  -58.476658 ----- 

-----iteration:  10 target diff:  0.001758631018758149 values:  -58.562004 ----- 

-----iteration:  0 target diff:  0.9236634832468927 values:  -62.41933 ----- 

-----iteration:  11 target diff:  0.0022476065767309443 values:  -58.54247 ----- 

-----iteration:  12 target diff:  0.002195501404061805 values:  -58.57101 ----- 

-----iteration:  1 target diff:  0.0019422504388899953 values:  -62.502712 ----- 

-----iteration:  13 target diff:  0.0016356587549487718 values:  -58.649433 ----- 

-----iteration:  2 target diff:  0.0017991813159417528 values:  -62.574993 ----- 

-----iteration:  14 target diff:  -----i




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa


-----iteration:  3 target diff:  0.0014093185436661894 values:  -59.154106 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold1/train/agent4/ckpt/offline_dqn_17000.ckpt
-----iteration:  9 target diff:  0.0019861263971420037 values:  -61.93204 ----- 

-----iteration:  0 target diff:  0.9186110819860749 values:  -55.00327 ----- 

-----iteration:  10 target diff:  0.002691876017849527 values:  -61.82714 ----- 

-----iteration:  11 target diff:  0.0019782128903895833 values:  -61.835087 ----- 

-----iteration:  1 target diff:  0.0029174046596467246 values:  -55.098816 ----- 

-----iteration:  2 target diff:  0.002888320591983471 values:  -55.144756 ----- 

-----iteration:  12 target diff:  0.0018178564556730092 values:  -61.720787 ----- 

-----iteration:  13 target diff:  0.00249804422810254 values:  -61.649494 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold1/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
 values:  -60.513584 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold1/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold1/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold1/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold1/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you a

 target diff:  0.0014014919186906211 values:  -55.588833 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold1/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold1/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold1/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold1/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold1/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64


-------------------- ckpt:  1000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold1/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!


Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold1/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold1/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold1/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold1/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold1/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


T




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
 


-------------------- adv learner --------------------
-----iteration:  13 target diff:  0.0019730957020534135 values:  -60.401897 ----- 

-----iteration:  1 target diff:  0.003456952817201346 values:  -56.026867 ----- 

-----iteration:  14 target diff:  0.0018108053133892028 values:  -60.462124 ----- 

-----iteration:  2 target diff:  0.0040720922548355245 values:  -56.055443 ----- 

-----iteration:  15 target diff:  0.0015871369297465454 values:  -60.439636 ----- 

-----iteration:  3 target diff:  0.0023388408926397685 values:  -56.059753 ----- 

-----iteration:  16 target diff:  0.0016544200271593527 values:  -60.41197 ----- 

-----iteration:  4 target diff:  0.0024814044173731118 val

-----iteration:  9 target diff:  0.0037659449058682296 values:  -61.523712 ----- 

-----iteration:  5 target diff:  0.002360945000526478 values:  -60.308216 ----- 

-----iteration:  30 target diff:  0.001271307900187549 values:  -60.340557 ----- 

-----iteration:  10 target diff:  0.0020822516712942357 values:  -61.487614 ----- 

-------------------- fqe on dqn & sale --------------------
 6-61.538708 ----- target diff:   

0.0022096232838041572 values:  -60.261547 ----- 

-----iteration:  12 target diff:  0.0017113497258386418 values:  -61.44321 ----- 

-----iteration:  7 target diff:  0.002311729567571336 values:  -60.276478 ----- 

-----iteration:  13 target diff:  0.002334172818692762 values:  -61.399925 ----- 

-----iteration:  8 target diff:  0.0018908788893284823 values:  -60.24719 ----- 

-----iteration:  14 target diff:  0.0021565128174644167 values:  -61.394306 ----- 

-----iteration:  9 target diff:  0.002056731887329212 values:  -60.257107 ----- 

-----iteration:  15 target


-------------------- adv learner --------------------
-----iteration:  9 target diff:  0.0021509998663931207 values:  -54.940178 ----- 

-----iteration:  28 target diff:  0.0015678585679412852 values:  -60.914608 ----- 

-----iteration:  23 target diff:  0.0020184266544462717 values:  -59.923775 ----- 

-----iteration:  29 target diff:  0.0015892219623120456 values:  -60.881413 ----- 

-----iteration:  10 -----iteration: target diff:   300.0022237724834146865 target diff:  0.001423807344422553  values: values:   -60.8075-54.897438 ----- -----  



-----iteration:  24 target diff:  0.002130483868879506 values:  -59.91152 ----- 

-----iteration:  11 target diff:  0.0016696694884398107 values:  -54.878742 ----- 

-----iteration:  25 target diff:  0.0019224646836203823 values:  -59.914165 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  12 target diff:  0.0019130187993820261 values:  -54.844563 ----- 

-----iteration:  13 target diff:  0.001686878548970

-----iteration:  37 target diff:  0.0022051343981939484 values:  -59.991604 ----- 

-----iteration:  25 target diff:  0.0023209347699154506 values:  -55.356403 ----- 

-----iteration:  38 target diff:  0.0022671103622248466 values:  -59.97566 ----- 

-----iteration:  26 target diff:  0.001916258532475983 values:  -55.398624 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  39 target diff:  0.0023448782400710875 values:  -59.991203 ----- 

-----iteration:  27 target diff:  0.0023521790846550285 values:  -55.439213 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold1/train/agent4/ckpt/offline_dqn_14000.ckpt
-----iteration:  40 target diff:  0.00210782605230735 values:  -60.023357 -

-----iteration:  54 target diff:  0.0020098272091189483 values:  -59.38467 ----- 

-----iteration:  4 target diff:  0.002304056773759554 values:  -55.027363 ----- 

-----iteration:  5 target diff:  0.0015667106087776102-----iteration:   values: 55  -55.01629 target diff: -----  0.0019057788812983085
 
values:  -59.3171 ----- 

-----iteration:  6 target diff:  0.0017340044389473589 values:  -55.0436 ----- 

-----iteration:  56 target diff:  0.0019419365679674786 values:  -59.205635 ----- 

-----iteration:  7 target diff:  0.0015526228997092219 values:  -55.11877 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  8 target diff:  0.0019361233878600148 values:  -55.11432 ----- 

-----iteration:  57 target diff:  0.00296

-----iteration:  10 target diff:  0.0019086036761636986 values:  -58.730076 ----- 

-----iteration:  9 target diff:  0.002120406410660892 values:  -62.649445 ----- 

-----iteration:  11 target diff:  0.001707524237619981 values:  -58.741657 ----- 

-----iteration:  72 target diff:  0.0025146197296425188 values:  -58.400593 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
target diff: 
 0.0027141019429018525 values:  -62.556744 ----- 

-----iteration:  12 target diff:  0.0017504757887021973 values:  -58.811184 ----- 

-----iteration:  11 target diff:  0.0018069202888713208 values:  -62.64069 ----- 

-----iteration:  73 target diff:  0.0026324640247634454 values:  -58.35963 ----- 

-----iteration:  13 target diff:  0.0017180823722166


-----iteration:  91 target diff:  0.0019322277212452709 values:  -57.65656 ----- 

-----iteration:  32 target diff:  0.001871875436347449 values:  -61.219296 ----- 

-----iteration:  0 target diff:  0.9186606723153853 values:  -55.3345 ----- 

-----iteration:  92 target diff:  0.0019249673816068163 values:  -57.535984 ----- 

-----iteration:  1 target diff:  0.0031499309461570325 values:  -55.28587 ----- 

-----iteration:  33 target diff:  0.0017668223581401396 values:  -61.0288 ----- 

-----iteration:  93 target diff:  0.0023078038936440964 values:  -57.43154 ----- 

-----iteration:  2 target diff:  0.002420140824729298 values:  -55.245564 ----- 

-----iteration:  94 target diff:  0.0017784937989610084 values:  -57.303253 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing auto

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold1/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold1/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold1/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold1/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold1/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold1/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold1/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  4 Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold1/train/agent2/trajs2.pkl!target diff: 
 Refresh buffer every 1000000 sampling!0.001821223445343791 
values:  -55.146557 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold1/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold1/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pa


-----iteration:  22 target diff:  0.0025842755671102906 values:  -59.80814 ----- 

-----iteration:  6 target diff:  0.0013817314652653106 values:  -55.230553 ----- 

-----iteration:  23 target diff:  0.0021971298611147493 values:  -59.800747 ----- 

-----iteration:  0 target diff:  0.9230089011925005 values:  -60.950466 ----- 

-----iteration:  24 target diff:  0.002001610431094145 values:  -59.83564 ----- 

-----iteration:  25 target diff:  0.002203193561300636 values:  -59.816875 ----- 

-------------------- fqe on dqn & sale --------------------
 1 target diff:  0.0020117440610688845 values:  -60.89423 ----- 

-----iteration:  26 target diff:  0.0023774880382081845 values:  -59.876324 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  2 target diff:  0.0014111379369977535 values:  -61.006065 ----- 

-------------------- ckpt:  14000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold1/tra

-----iteration:  36 target diff:  0.0023163607102144167 values:  -59.714233 ----- 

-----iteration:  1 target diff:  0.0014250443911446179 values:  -60.77242 ----- 

-----iteration:  1 target diff:  -----iteration: 0.0020000786271815465  3values:   -50.72079target diff:   -----0.001869973235609897 
 
values:  -54.887547 ----- 

-----iteration:  37 target diff:  0.0029559089618061015 values:  -59.68552 ----- 

-----iteration:  2 target diff:  0.0012851402388230507 values:  -50.7724 ----- 

-----iteration:  38 target diff:  0.0028376208188241677 values:  -59.667988 ----- 

-----iteration:  4 target diff:  0.00201619182540096 values:  -54.892376 ----- 

-----iteration:  39 target diff:  0.003150097320618108 values:  -59.623684 ----- 

-----iteration:  5 target diff:  0.0018010659096636794 values:  -54.92102 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor


-------------------- adv learner --------------------
-----iteration:  56 target diff:  0.002427511178854707 values:  -57.6598 ----- 

-----iteration:  7 target diff:  0.0020134227593181266 values:  -59.886963 ----- 

-----iteration:  16 target diff:  0.0017571150469546215 values:  -54.97297 ----- 

-----iteration:  0-----iteration:   target diff: 57 0.9235042783375282  values: target diff:   -63.8975260.0022457338253693538 values:  -57.534748  ----- 

----- 

-----iteration:  8 target diff:  0.003266638670448483 values:  -59.96487 ----- 

-----iteration:  58 target diff:  0.002509456248922399 values:  -57.461662 ----- 

-----iteration:  1 target diff:  0.0020087442980501404 values:  -63.730633 ----- 

-----iteration:  17 target diff:  0.0023608484858444116 values:  -55.00959 ----- 

-----iteration:  9 target diff:  0.0024378928734214516 values:  -59.982845 ----- 

-----iteration:  59 target diff:  0.002473791977734415 values:  -57.381035 ----- 

-----iteration:  2 target diff:  0.001

-----iteration:  63 target diff:  0.002457738019130769 values: Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold1/train/agent1/trajs1.pkl! 
-56.84846 Refresh buffer every 1000000 sampling!-----
 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold1/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold1/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold1/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer con

-----iteration:  5 target diff:  0.001903468247528253 values:  -49.91148 ----- 

-----iteration:  0 target diff:  0.9222382355297218 values:  -63.609497 ----- 

-----iteration:  38 target diff:  -----iteration: 0.0016775673657043705  values: 90  -54.743256-----iteration:   target diff: 2  0.0015052540813699398-----target diff:    values: 
0.001997002163773026  values:  -60.88375
 ----- 

-54.68349 ----- 

-----iteration:  6 target diff:  0.0017087114481099126 values:  -49.97335 ----- 

-----iteration:  91 -----iteration: target diff:   30.0022925162318533053  target diff: values:   0.0018708668525320785-54.676716-----iteration:   values:  ----- -60.897488 39 
target diff: 
  -----0.0014971620973417714 values:   
-54.683655
 ----- 

-------------------- ckpt:  13000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold1/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

0.0020184257783274584 values:  -54.68769 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  8 target diff:  0.0014852491983299682 values:  -50.041893 ----- 

-------------------- ckpt:  3000 --------------------
-----iteration:  5 Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold1/train/agent/trajs.pkl!
target diff:  0.001411




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa

-------------------- fqe on dqn & sale --------------------
-----iteration:  6 target diff:  0.0018372251999796499 values:  -63.564594 ----- 

-----iteration:  7 target diff:  0.0018335498482718428 values:  -63.507656 ----- 

-----iteration:  8 target diff:  0.0018458029664799713 values:  -63.41348 ----- 

-----iteration:  9 target diff:  0.0018174935491117208 values:  -63.25552 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  10 target diff:  0.0018578447538015214 values:  -63.218544 ----- 

-----iteration:  0 target diff:  0.9189333670734755 values:  -54.68546 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='flo


-------------------- adv learner --------------------
-----iteration:  9 target diff:  0.0014786236374252238 values:  -60.443043 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  1 target diff:  0.003813115113345346 values:  -49.120213 ----- 

-----iteration:  2 target diff:  0.0018690442696452432 values:  -49.021416 ----- 

-----iteration:  0 target diff:  0.9214090971426137 values:  -60.392735 ----- 

-----iteration:

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold1/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold1/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold1/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold1/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dq


-----iteration:  2 target diff:  0.002625758584547091 values:  -60.55903 ----- 

-------------------- adv learner --------------------
-----iteration:  10 target diff:  0.0013592222370674319 values:  -49.232788 ----- 

-------------------- ckpt:  4000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold1/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold1/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold1/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold1/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
-----iteration: -----iteration:   30  target diff:  target diff:  0.0028946612621887440.922824023637




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  1 target diff:  0.0014136487541315007 values:  -----iteration: -62.02287  4-----  
target diff: 
 0.0018625123382850664 values:  -60.546963 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  5 target diff:  0.0026037191359469377 values:  -60.57693 ----- 

-----iteration:  6 target diff:  0.0018162247668793317 values:  -60.586964 ----- 

-----iteration:  7 target diff:  0.0019802660161633953 values:  -60.647686 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  8 target diff:  0.001919360960973726 values:  -60.68305 ----- 

-------------------- fqe on dq




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.9180316906568612 values:  -54.87574 ----- 

-------------------- adv learner --------------------
-----iteration:  2 target diff:  0.0014229498540372645 values:  -62.873657 ----- 

-------------------- ckpt:  17000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold1/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!


Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold1/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold1/train/agent1/trajs1.




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  1--------------------  target diff: adv learner  0.002599701016692461-------------------- 
values:  -54.98104 ----- 

-----iteration:  1 target diff:  0.001844708144356481 values:  -49.74816 ----- 

-----iteration:  2 target diff:  0.0025900278379454673 values:  -54.98612 ----- 

-----iteration:  2 target diff:  0.0014348459351980437 values:  -49.70832 ----- 

-----iteration:  

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



target diff:  0.0017662517859661554 values:  -48.862507 ----- 

-----iteration:  8 target diff:  0.0017986005932469542 values:  -55.947334 ----- 

-----iteration:  15 target diff:  0.001878296122621159 values:  -48.853996 ----- 

-----iteration:  9 target diff:  0.0017991637072552627 values:  -55.919582 ----------iteration:   
0
 target diff:  0.9206743968623713 values:  -60.951046 ----- 

-----iteration:  8 target diff:  0.0020953333454685683 values:  -61.099335 ----- 

-----iteration:  10 target diff:  0.001410090554524438 values:  -55.922657 ----------iteration:  
 
1 target diff: --------------------  ckpt: 0.002388379769789688 15000  values: -------------------- 
-60.97093 -----Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold1/train/agent/trajs.pkl!
 
Refresh buffer every 1000000 sampling!



-----iteration:  16 target diff:  0.002141362176048852 values:  -48.87923 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/


-------------------- adv learner --------------------
-----iteration:  17 target diff:  0.001434248371534763 values:  -48.930786 ----- 

-------------------- ckpt:  5000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold1/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold1/train/agent/trajs.pkl!

Refresh buffer every 1000000 sampling!

Refresh buffer every 1000000 sampling!




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' 


Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold1/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold1/train/agent2/trajs2.pkl!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.


Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold1/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
-------------------- adv learner --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold1/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by defaul

-----iteration:  1 target diff:  0.002101830040754446 values:  -59.765686 ----- 

-----iteration: -----iteration:   1329  target diff: target diff:  0.0030686282214019475 0.002173075333336497  values: values:  -61.05359  -60.280666 ----------  



-----iteration:  2 target diff:  0.0019758319890677877-----iteration:   values:  0-59.79702  -----target diff:   
0.9193555037140404
 values:  -50.64726 ----- 

-----iteration:  30 target diff:  0.0027509554315420593 values:  -60.1755 ----- 

-----iteration:  1 target diff:  0.0027797798510506427 values:  -50.685223 ----- 

-----iteration:  31 target diff:  0.0027309831260870577 values:  -----iteration:  14-60.09975  target diff: -----  0.001958355792781679
 values:  
-61.03512 ----- 

-----iteration:  3 target diff:  0.001678872329627952 values:  -59.798645 ----- 

-----iteration:  2 target diff:  0.0016526778595728562 values:  -50.64407 ----- 

-----iteration:  32 -----iteration: target diff:   150.0025620636273699184 target diff:   values:




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- -----iteration: adv learner  --------------------34 
target diff:  0.002357

-----iteration:  5 target diff:  0.001774497620132383 values:  -54.129787 ----- 

-----iteration:  46 target diff:  0.002502867225872803 values:  -57.09266 ----- 

-----iteration:  6 target diff:  0.0015324566779328158 values:  -54.21212 ----- 

-----iteration:  47 target diff:  0.0026386018463507733 values:  -56.872646 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  7 target diff:  0.0022783270143653337 values:  -54.267723 ----- 

-----iteration:  48 target diff:  0.0024105842784896544 values:  -56.665997 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.9215867988749142 values:  -60.925896 ----- 


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all 




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa

-----iteration:  76 target diff:  0.0016069572132903847 values:  -54.18316 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  77 target diff:  0.0015687023253383682 values:  -54.088894 ----- 

-----iteration:  22 target diff:  0.002958058109180788 values:  -60.607014 ----- 

-----iteration:  78 target diff:  0.001485589727303532 values:  -54.087624 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- training agent --------------------
Loaded t




-----iteration:  34 target diff:  0.0023309960850401777 values:  -59.87869 ----- 

-----iteration:  1 target diff:  0.001541263043973578 values:  -60.390156 ----- 

-----iteration:  35 target diff:  0.001789970458486234 values:  -59.78472 ----- 

-----iteration:  2 target diff:  0.0011250510865264 values:  -60.41676 ----- 

-----iteration:  0 target diff:  0.9198720668704469 values:  -49.545017 ----- 

-----iteration:  36 target diff:  0.0018026981382499377 values:  -59.690205 ----- 

-----iteration:  1 target diff:  0.0024095283678939833 values:  -49.535767 ----- 

-----iteration:  37 target diff:  0.0016116493194004368 values:  -59.609753 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  2 -----iteration: target diff:   0.001430653721318170838  target diff:  values: 0.0015563708828478836  -49.52242 values: ----- 

 -59.5554 ------------------------- ckpt:  8000 
 --------------------

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/d




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  39 target diff:  0.0014959062939968245 values:  -59.498127 ----- 

-------------------- ckpt:  19000 --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing auto

Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold1/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold1/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold1/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold1/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold1/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  2 target diff:  0.001955421387461981 values:  -55.648018 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
 
3 target diff:  0.0014036171737572725 values:  -55.645973 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the a




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.





To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  9 target diff:  0.00149


-----iteration:  20 target diff:  0.002938089080819244 values:  -55.7926 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  11 target diff:  0.002275699246888177 values:  -59.77651 ----- 

-----iteration:  0 target diff:  0.9198388020303289 values:  -49.25356 ----- 

-----iteration:  21 target diff:  0.002088991218136864 values:  -55.827568 ----- 

-----iteration:  12 target diff:  0.002497942812229308 values:  -59.84021 ----- 

-----iteration:  1 target diff:  0.0019117121942647903 values:  -49.303978 ----- 

-----iteration:  13 target diff:  0.0017857123856278515 values:  -59.83355 ----- 

-----iteration:  22 target diff:  0.002102817799290688 values:  -55.77657 ----- 

-----iteration:  2 target diff:  0.00171916




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa

-----iteration: -----iteration:   285  target diff: target diff:   0.00204363706404550.001997880527945644  values: values:   -60.095802-64.05899  ----- -----
 


-----iteration:  29 target diff:  -----iteration: 0.001956831261283818  6values:   target diff: -60.096367  0.0017430287699042228-----  values: 

 -64.01496 ----- 

-----iteration:  7 target diff:  0.0018523290014676845 values:  -63.98719 ----- 

-----iteration:  30 target diff:  0.0020008732117131082 values:  -60.1316 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  8 target diff:  0.0015862651781829516 values:  -64.06274 ----- 

-----iteration:  31 target diff:  0.001967010237534217 values:  -60.097954 ----- 

-----iteration:  0 target diff:  0.91824057




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  44 target diff:  0.0020087023678510294 values:  -60.07719 ----- 

-----iteration:  6 target diff:  0.0016965578075242849 values:  -54.83305 ----- 

-----iteration:  45 target diff:  0.0018571215594824844 values:  -60.00924 ----- 

-----iteration:  0 target diff:  0.9194338795603412 values:  -48.47848 ----- 

-----iteration:  7 target diff:  0.0018518403321982724 values:  -54.844143 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold2/train/agent/ckpt/offline_dqn_7000.ckpt
-----iteration:  1 target diff:  0.002762672077223794 values:  -48.467937 ----- 

-----iteration: -----iteration:   846  target diff: target diff:   0.00199373320605953




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  48 target diff:  0.001901




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold1/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold1/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold1/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold1/train/agent4/trajs4.pkl!
-----iteration: Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
 
values:  -57.908684 ----- 



To change all layers to 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold1/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold1/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold1/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold1/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold1/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, 




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent/ckpt/offline_dqn_3000.ckpt
-------------------- fqe on dqn & sale --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold2/train/agent/ckpt/offline_dqn_10000.ckpt
-------------------- fqe on dqn & sale --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base La


-----iteration:  0 target diff:  0.9220437195096747 values:  -59.94343 ----- 

-----iteration:  2 target diff:  0.0019217508151991574 values:  -49.258022 ----- 

-----iteration:  1 target diff:  0.0019311053557366147 values:  -59.981106 ----- 

-----iteration:  0 target diff:  0.9180979367470762 values:  -55.469357 ----- 

-----iteration:  2 target diff:  0.002381639310581886 values:  -60.001587 ----- 

-----iteration:  3 target diff:  0.0015388962636457382 values:  -49.238796 ----- 

-----iteration: -----iteration:   31  target diff: target diff:   0.00127539820023761650.0037028076038562834  values: values:  -60.089905 -----  -55.477173
 ----- 
-------------------- ckpt: 
 12000
 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold1/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold1/train/agent0/trajs0.pkl!
Refresh buffer 




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  2 target diff:  0.0021161799834989218 values:  -55.53391 ----- 

-----iteration:  5 target diff:  0.001791695592188707 values:  -49.2914 ----- 

-----iteration:  3 target diff:  0.002689397340379184 values:  -55.559517 ----- 

-----iteration:  6 target diff:  0.0015437566896369808 values:  -49.35458 ----- 

-----iteration:  



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.9214528704997929 values:  -61.42806 ----- 

-----iteration:  1 target diff:  0.0010499724694135201 values:  -61.42864 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent/ckpt/offline_dqn_6000.ckpt


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold2/train/agent/ckpt/offline_dqn_13000.c

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold1/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold1/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold1/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold1/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold1/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold1/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold1/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold1/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold1/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold1/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, 




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
saving model weights at /home/jupyt/leyuan


-------------------- adv learner --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.9226496137064801 values:  -61.409878 ----- 

-----iteration:  1 target diff:  0.0032541495548482294 values:  -61.644035 ----- 

-------------------- fqe on dqn & sale --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent/ckpt/offline_dqn_13000.ckpt
-----iteration:  2 target diff:  0.0031212849558425824 values:  -61.56625 ----- 

-----iteration:  3 target diff:  0.0025984569269451273 values:  -61.574787 ----- 

-----iteration:  4 target diff:  0.002367732499104411 values:  -61.56773 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold2/train/agent0/ckpt/offline_dqn_1000.ckpt
-----iteration:  4 target diff:  0.001170167311743014 values:  -50.170074 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold2/train/agent/ckpt/offline_dqn_9000.ckpt


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.921611878770518 values:  -59.510406 ----- 

-----iteration:  1 target diff:  0.001611592683262598 values:  -59.543007 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, 


saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold2/train/agent0/ckpt/offline_dqn_4000.ckpt
-----iteration:  21 target diff:  0.0016371210888129842 values:  -50.289585 ----- 

-----iteration:  0 target diff:  0.9210992794438234 values:  -59.774876 ----- 

-----iteration:  1 target diff:  0.0012972145137261169 values:  -59.77366 ----- 

-----iteration:  22 target diff:  0.0015519122513924929 values:  -50.297222 ----- 

-----iteration:  23 target diff:  0.001684503040412079 values:  -50.297768 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold2/train/agent/ckpt/offline_dqn_12000.ckpt
-----iteration:  24 target diff:  0.0014041415329838556 values:  -50.333607 ----- 

-------------------- ckpt:  17000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold1/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/le




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- fqe on dqn & sale ---




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.





To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  24 target diff:  0.0025

-----iteration:  2 target diff:  0.001682988467754873 values:  -59.82612 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent0/ckpt/offline_dqn_2000.ckpt
-----iteration:  3 target diff:  0.0017271424800598067 values:  -59.853477 ----- 

-----iteration:  4 target diff:  0.0019114887988664995 values:  -59.92493 ----- 

-----iteration:  5 target diff:  0.0018084424991747854 values:  -59.85989 ----- 

-----iteration:  6 target diff:  0.0017624652037378124 values:  -59.89665 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold2/train/agent0/ckpt/offline_dqn_9000.ckpt
-----iteration:  7 target diff:  0.001470353796747221 values:  -59.94212 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=Fals

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold2/train/agent/ckpt/offline_dqn_19000.ckpt
-----iteration:  16 target diff:  0.002719245953269581 values:  -61.37164 ----- 

-----iteration:  17 target diff:  0.002228559497866273 values:  -61.41055 ----- 

-----iteration:  18 target diff:  0.0023762338502294356 values:  -61.42715 ----- 

-----iteration:  19 target diff:  0.0016956459513407195 values:  -61.506477 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  20 target diff:  0.0027527518322473534 values:  -61.474323 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent0/ckpt/offline_dqn_5000.ckpt
-----iteration:  0 target diff:  0.9187563




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  2 target diff:  0.0019503


-----iteration:  0 target diff:  0.920769306603122 values:  -60.052166 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold2/train/agent0/ckpt/offline_dqn_13000.ckpt
-----iteration:  1 target diff:  0.002072707330278569 values:  -60.073723 ----- 

-----iteration:  2 target diff:  0.001326601839934977 values:  -60.10352 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold2/train/agent0/ckpt/offline_dqn_1000.ckpt


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.9193282860284078 values:  -46.029877 ----- 

-----iteration:  1 target diff:  0.0016818738735957612 values:  -46.034084 ----- 



To change all layers to have dtype float64 by de


saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold2/train/agent/ckpt/offline_dqn_1000.ckpt
-----iteration:  0 target diff:  0.9225949271422578 values:  -60.256813 ----- 

-----iteration:  1 target diff:  0.0018699286235252164 values:  -60.25397 ----- 

-----iteration:  2 target diff:  0.0015493695997108147 values:  -60.264412 ----- 

-----iteration:  3 target diff:  0.0018037543339101136 values:  -60.232338 ----- 

-----iteration:  4 target diff:  0.002023224195837172 values:  -60.194553 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold2/train/agent0/ckpt/offline_dqn_16000.ckpt
-----iteration:  5 target diff:  0.0014496692752034618 values:  -60.139984 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold2/train/agent0/ckpt/offline_dqn_4000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent0/ckpt/offline_dqn_10000.ckpt


To change all layer




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold2/train/agent0/ckpt/offline_dqn_9000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent0/ckpt/offline_dqn_15000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold2/train/agent/ckpt/of


saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold2/train/agent1/ckpt/offline_dqn_12000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent1/ckpt/offline_dqn_6000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold2/train/agent/ckpt/offline_dqn_18000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold2/train/agent/ckpt/offline_dqn_7000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold2/train/agent1/ckpt/offline_dqn_1000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold2/train/agent1/ckpt/offline_dqn_13000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent1/ckpt/offline_dqn_7000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold2/train/agent/ckpt/offline_dqn_19000.ckpt
saving model weights at /home/

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold2/train/agent0/ckpt/offline_dqn_13000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent2/ckpt/offline_dqn_2000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold2/train/agent2/ckpt/offline_dqn_9000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold2/train/agent0/ckpt/offline_dqn_3000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold2/train/agent1/ckpt/offline_dqn_17000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold2/train/agent0/ckpt/offline_dqn_14000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent2/ckpt/offline_dqn_3000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold2/train/agent0/ckpt/offline_dqn_4000.ckpt
saving model weights at /home


saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold2/train/agent2/ckpt/offline_dqn_13000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold2/train/agent1/ckpt/offline_dqn_11000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent2/ckpt/offline_dqn_19000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold2/train/agent3/ckpt/offline_dqn_6000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold2/train/agent1/ckpt/offline_dqn_1000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold2/train/agent2/ckpt/offline_dqn_14000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold2/train/agent1/ckpt/offline_dqn_12000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent2/ckpt/offline_dqn_20000.ckpt
Loaded trajectories from 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold2/train/agent1/ckpt/offline_dqn_17000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold2/train/agent3/ckpt/offline_dqn_9000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold2/train/agent2/ckpt/offline_dqn_8000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold2/train/agent4/ckpt/offline_dqn_2000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent3/ckpt/offline_dqn_16000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold2/train/agent1/ckpt/offline_dqn_18000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold2/train/agent3/ckpt/offline_dqn_10000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold2/train/agent2/ckpt/offline_dqn_9000.ckpt
saving model weights at /hom

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold2/train/agent4/ckpt/offline_dqn_17000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold2/train/agent4/ckpt/offline_dqn_4000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold2/train/agent3/ckpt/offline_dqn_6000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold2/train/agent2/ckpt/offline_dqn_14000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent4/ckpt/offline_dqn_13000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold2/train/agent4/ckpt/offline_dqn_18000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold2/train/agent4/ckpt/offline_dqn_5000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold2/train/agent3/ckpt/offline_dqn_7000.ckpt
saving model weights at /hom




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent4/ckpt/offline_dqn_19000.ckpt


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.9124175490732771 values:  -52.644985 ----- 

-----iteration:  1 target diff:  0.003500264115952916 values:  -52.621086 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold2/train/agent3/ckpt/offline_dqn_13

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent0/trajs0.pkl!--------------------
 fqe on dqn & saleRefresh buffer every 1000000 sampling!
 --------------------


Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To chan

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold2/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold2/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  33 Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold2/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!target diff:  0.0037686855330617912 
values:  -64.630936 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold2/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold2/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, 


-------------------- adv learner --------------------
-----iteration:  34 target diff:  0.0035981357923664187 values:  -64.52392 ----- 

-----iteration:  35 target diff:  0.004837781277311079 values:  -64.489 ----- 

-----iteration:  36 target diff:  0.004092277334840564 values:  -64.403786 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold2/train/agent3/ckpt/offline_dqn_5000.ckpt
-------------------- fqe on dqn & sale --------------------
-----iteration:  37 target diff:  0.003361416051542638 values:  -64.32726 ----- 

-----iteration:  38 target diff:  0.0035688565901558604 values:  -64.208534 ----- 

-----iteration:  39 target diff:  0.0033727499964448624 values:  -64.13123 ----- 

-----iteration:  40 target diff:  0.0031868151995985325 values:  -64.05661 ----- 

-----iteration:  41 target diff:  0.00326271114766624 values:  -63.95162 ----- 

-----iteration:  42 target diff:  0.003054065237216988 values:  -63.85684 ----- 



To change all layers 

-----iteration:  65 target diff:  0.002711763466690583 values:  -61.22678 ----- 

-----iteration:  66 target diff:  0.0026287764550075652 values:  -61.1378 ----- 

-----iteration:  67 target diff:  0.0031130300461991276 values:  -61.018818 ----- 

-----iteration:  68 target diff:  0.003775876702417638 values:  -60.905674 ----- 

-------------------- fqe on dqn & sale --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold2/train/agent3/ckpt/offline_dqn_20000.ckpt
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold2/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to hav




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  94 target diff:  0.0025914536345502492 values:  -57.797283 ----- 

-----iteration:  95 target diff:  0.00220282239758642 values:  -57.76272 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold2/train/agent4/ckpt/offline_dqn_2000.ckpt
-----iteration:  96 target diff:  0.002564913568829832 values:  -57.674778 ----- 

-----iteration:  97 target diff:  0.002057111779430753 values:  -57.57818 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold2/train/agent4/ckpt/offline_dqn_20000.ckpt
-------------------- behavior cloning --------------------
------------




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold2/train/agent3/ckpt/offline_dqn_10000.ckpt


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold2/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold2/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  21 target diff:  0.002046538421123226 values:  -64.19477Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold2/train/agent3/trajs3.pkl! -----
 Refresh buffer every 1000000 sampling!


Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold2/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer con

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold2/train/agent4/ckpt/offline_dqn_5000.ckpt
-----iteration:  29 target diff:  0.002075223777070423 values:  -64.18654 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  30 target diff:  0.0021745981000744334 values:  -64.14388 ----- 

-----iteration:  31 target diff:  0.0018998887829079264 values:  -64.108765 ----- 

-----iteration:  32 target diff:  0.002010663189495154 values:  -64.024185 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  33 target diff:  0.002520894751107164 values:  -63.948296 ----- 

-----iteration:  0 target diff:  0.9122454745343584 values:  -52.601414 ----- 

-----iteration:  1 targ




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  10 --------------------target diff:   adv learner0.001833766465218166  --------------------values: 
 -53.9033 ----- 

-----iteration:  51 target diff:  0.0027737956468490504 values:  -62.21909 ----- 

-----iteration:  11 target diff:  0.0019730678135910455 values:  -53.87214 ----- 

-----iteration:  52 target diff:  0.0024059161878106474 values:  -62.109356 ----- 

-----iteration:  12 target diff:  0.001315753463705217 values:  -53.783936 ----- 

-------------------- ckpt:  7000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold2/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories f




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
-----
 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold2/train/agent4/ckpt/offline_dqn_7000.ckpt


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False

Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold2/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold2/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold2/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold2/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have d

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold2/train/agent3/ckpt/offline_dqn_17000.ckpt
-----iteration:  93 target diff:  0.0017885670877964012 values:  -57.309002 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  94 target diff:  0.0017310823706929118 values:  -57.20117 ----- 

-----iteration:  95 target diff:  0.001592836550200411 values:  -57.120945 ----- 

-----iteration:  96 target diff:  0.0015017441585947514 values:  -56.986427 ----- 

-----iteration:  97 target diff:  0.001329531020717904 values:  -56.878838 ----- 

-------------------- ckpt:  2000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path:




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
-----iteration: 
 4 target diff:  0.001602856451936893 values:  -61.93143 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  7 target diff:  0.002552699824082041 values:  -64.405556 ----- 

-----iteration:  5 target diff:  0.0015212197320304992 values:  -61.90753 ----- 

-----iteration:  8 target diff:  0.002449620689425077 values:  -64.45982 ----- 

-----iteration:  




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold2/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold2/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold2/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold2/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold2/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold2/train/agent4/ckpt/offline_dqn_1000.ckpt
-----iteration:  8 target diff:  0.0013769196526890535 values:  -52.514473 ----- 

-------------------- ckpt:  10000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold2/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold2/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold2/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold2/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold2/train/agent3/trajs3.pk




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-------------------- fqe on dqn & sale --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold2/train/agent4/ckpt/offline_dqn_14000.ckpt
-------------------- fqe on dqn & sale --------------------
-------------------- fqe on dqn & sale --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype floa




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.9143038583985822 values:  -63.805447 ----- 

-----iteration:  0 target diff:  0.91847459092916 values:  -61.47327 ----- 

-----iteration:  0 target diff:  0.9130885865420107 values:  -52.904785 ----- 

-----iteration:  1 target diff:  0.0028104271616095993 values:  -63.78762 ----- 

-----iteration:  1 target diff:  0.0029748214847295433 values:  -52.85186 ----- 

-----iteration:  1 target diff:  0.003126045066915467 values:  -61.46653 ----- 

-----iteration:  2 target diff: -----iteration:   0.00220860495873522042  target diff: values:   0.0021690001014736267-52.82441  values: -----  
-63.803307
 ----- 

-----iteration:  2 target diff:  0.0023761405398451




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  1 target diff:  0.0036774054190329957 values:  -64.44134 ----- 

-----iteration:  35 target diff:  0.0027507871051899633 values:  -62.922188 ----- 

-----iteration:  2 target diff:  0.0025811056745281336 values:  -64.440056 ----- 

-----iteration:  36 target diff:  0.002496315346389089 values:  -62.839268 ----- 

-----iteration:  3 target diff:  0.002258724958297722 values:  -64.39019 ----- 

-----iteration:  4 target diff:  0.001591280627265729 values:  -64.361374 ----- 

-----iteration:  37 target diff:  0.0022392299944157365 values:  -62.669262 ----- 

-----iteration:  5 target diff:  0.0015972329370036128 values:  -




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  40 target diff:  0.0026022217046362153 values:  -62.32009 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa

-----iteration:  73 target diff:  0.0018408306753882001 values:  -57.871235 ----- 

-----iteration:  6 target diff:  0.00189509615378107 values:  -64.19069 ----- 

-----iteration:  7 target diff:  0.0016264140542130423-----iteration:   values: 74  -64.1678target diff:   -----0.0026989468206221516  values: 
 -57.746284 -----
 

-----iteration:  8 target diff: -----iteration:   0.00143267959519940575  values:  target diff:  0.0019472626862033855-64.21374  -----values:   
-57.644287
 -------------------- ckpt:  9000 ----- 
--------------------

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold2/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold2/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold2/train/agent1/trajs1.pkl!
Refresh buffer every 1




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

values:  -57.584465 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
--------------------



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold2/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold2/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold2/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  1 target diff:  0.002322153724667655 values:  Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold2/train/agent3/trajs3.pkl!-58.738106
 -----Refresh buffer every 1000000 sampling! 


Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold2/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pa

-----iteration:  3 target diff:  0.0020835203904258103Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold2/train/agent1/trajs1.pkl! 
values: Refresh buffer every 1000000 sampling! 
-53.780056 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold2/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold2/train/agent3/trajs3.pkl!
-----iteration:  Refresh buffer every 1000000 sampling!
16 target diff:  0.0026517550902485946 values:  -58.070972 ----- 

-----iteration:  26Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold2/train/agent4/trajs4.pkl!
 Refresh buffer every 1000000 sampling!
target diff:  0.0018800728750042683 values:  -64.879326 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just th




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
 
values:  -57.281826 ----- 

-------------------- adv learner --------------------
-----iteration:  27 target diff:  0.001495572440425075 values:  -57.26136 ----- 

-------------------- ckpt:  2000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold2/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories fro




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa


-----iteration:  0 target diff:  0.9153572349824436-----iteration:  values:   7-58.964813  -----target diff:  
 
0.002412932954394738 values:  -65.062645 ----- 

-----iteration:  0 target diff:  0.9123148619058483 values:  -52.249947 ----- 

-----iteration:  1 target diff:  0.002210112990581964 values:  -58.999634 ----- -----iteration: 

 8 target diff:  0.0025924522292520764 values:  -65.090355 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  2 target diff:  0.001855741863240546 values:  -58.887615 ----- 

-----iteration:  1 target diff:  0.0029107419743292463 values:  -52.27483 ----- 

-----iteration:  9 target diff:  0.002177644185708104 values:  -65.07461 ----- 

-----iteration:  0 target diff:  0.91807258684


--------------------

Refresh buffer every 1000000 sampling!












Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold2/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold2/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!


Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold2/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold2/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer construct

 
--------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  22 target diff:  0.0024211338312181757 values:  -64.72337 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  23 target diff:  0.0030984136910548925 values:  -64.74872 ----- 

-----iteration:  24 target diff:  0.0030630048408762864 values:  -64.71247 ----- 

-----iteration:  25 target diff:  0.0035166095837501437 values:  -64.659

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold2/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold2/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold2/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold2/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold2/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, 




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa


-----iteration:  82 target diff:  0.002445279580748357 values:  -57.14964 ----- 

-----iteration:  27 target diff:  0.00186796314743195 values:  -58.518635 ----- 

-----iteration:  0 target diff:  0.9127558615153157 values:  -53.29941 ----- 

-----iteration:  83 target diff:  0.0017686794710986217 values:  -57.0012 ----- 

-----iteration:  28 target diff:  0.0015697435869439667 values:  -58.371784 ----- 

-----iteration:  1 target diff:  0.0018982247765046382 values:  -53.370327 ----- 

-----iteration:  84 target diff:  0.0022270490118998037 values:  -56.893524 ----- 

-----iteration:  2 target diff:  0.0019743463574767497 values:  -53.386776 ----- 

-----iteration:  29 target diff:  0.0014408440400297017 values:  -58.225502 ----- 

-------------------- ckpt:  4000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold2/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt

 




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  86 target diff:  0.002


-------------------- adv learner --------------------
-----iteration:  97 target diff:  0.0019486776130050857 values:  -55.17376 ----- 

-----iteration:  13 target diff:  0.0015825225137500092 values:  -61.574123 ----- 

-----iteration:  98 target diff:  0.0019527096586670798 values:  -55.045403 ----- 

-----iteration:  0 target diff:  0.9150042832120888 values:  -60.380764 ----- 

-----iteration:  14 target diff:  0.0014318078581090922 values:  -61.66966 ----- 

-------------------- ckpt:  13000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold2/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!

-------------------- 














0.0010549135014753983

 



Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!

Refresh buffer every 1000000 sampling!
Loaded trajectories 




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------


To change all layers to have dtype float


-----iteration:  0 target diff:  0.915429947750519 values:  -57.89638 ----- 

-----iteration:  1 target diff:  0.0019961867858631806 values:  -57.78949 ----- 

-----iteration:  0 target diff:  0.913143240242902 values:  -55.1499 ----- 

-----iteration:  2 target diff:  0.0018164372255572157 values:  -57.709293 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
 
0.00296949808895126 values:  -55.057213 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  3 target di


-----iteration:  15 target diff:  0.001997321993630479 values:  -54.792118 ----- -----iteration: 
 
10 target diff:  0.0015705114754335089 values:  -65.07133 ----- 

-----iteration:  5 target diff:  0.0023571233722194007 values:  -50.81187 ----- 

-----iteration:  16 target diff:  0.002230645773608765 values:  -56.86328 ----- 

-----iteration:  6 target diff:  0.0019195077369637626 values:  -50.80981 ----- 

-----iteration:  11 target diff:  0.0016391696815942576 values:  -65.11159 ----- 

-----iteration:  16 target diff:  0.001816408526594445 values:  -54.736023 ----- 

-----iteration:  0 target diff:  0.9175945151569062 values:  -63.429905 ----- 

-----iteration:  12 target diff:  0.00193905325776306 values:  -65.161705 ----- 

-----iteration:  7 target diff:  0.0013929669054369234 values:  -50.77116-----iteration:  -----  

17 target diff:  0.0024165649653627866 values:  -54.715527 ----- 

-----iteration:  1 target diff:  0.003917809804105205 values:  -63.423256 ----- 

-----iterat




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
 
0.0019443667917547498 values:  -54.681038 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  16 target diff:  0.001951726

-----iteration:  4 target diff:  0.0019245965585196637 values:  -50.319756 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  30 target diff:  0.0026295994692874095 values:  -54.444233 ----- 

-----iteration:  5 target diff:  0.0014815776000585467 values:  -50.429478 ----- 

-------------------- -----iteration:  ckpt: 0 2000 target diff:   --------------------0.9154409728231319
 values:  -58.700397 ----- 
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold2/train/agent/trajs.pkl!

Refresh buffer every 1000000 sampling!


Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold2/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
L




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
target diff: 
 0.0019356622525183037 values:  -58.68524 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dt




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa

-----iteration:  7 target diff:  0.0017108510880460112 values:  -51.482708 ----- 

-----iteration:  7 target diff:  0.0017483188566882618 values:  -60.794746 ----- 

-----iteration:  18 target diff:  0.00238961361488407 values:  -64.34569 ----- 

-----iteration:  8 target diff:  0.0022116236998717376 values:  -60.78004 ----- 

-----iteration:  8 target diff:  0.0024533015167402072 values:  -51.55209 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  9 target diff:  -----iteration:  0.001783457093060596819 target diff:  values:  0.003561997259402277 -60.778687  values: -----  -64.30477

 ----- 

-----iteration:  9 target diff:  0.001964335463783368 values:  -51.4989 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Lay


-----iteration:  5 target diff:  0.002269737253110956 values:  -58.6694 ----- 

-----iteration:  1 target diff:  0.0042457350200485215 values:  -51.591526 ----- 

-----iteration:  31 target diff:  0.0032483974484133387 values:  -63.81168 ----- 
-----iteration: 
 6 target diff:  0.0017465305191906556 values:  -58.587067 ----- 

-----iteration:  2 target diff:  0.0029615242149122665 values:  -51.567078 ----- 

-----iteration:  32 -----iteration: target diff:   00.003980906810439049  target diff: values:   0.9143240235463915-63.677193  values: -----  

-52.291973 ----- 

-----iteration:  7 target diff:  0.001828591677968182 values:  -58.615112 ----- 

-----iteration:  33 target diff:  0.003600075430854338 values:  -63.60137 ----- 

-----iteration:  1 target diff:  0.0034102647686246454 values:  -52.202385 ----- 

-----iteration:  3 target diff:  0.002081201054223872 values:  -51.56544 ----- 

-----iteration:  8 target diff:  0.0017520638825450098 values:  -58.65456 ----- 

-----iteration




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
 
values:  -58.594063 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
0.004050395641039274 
values:  -63.320766 ----- 



To change all la

-----iteration: -----iteration:   431  target diff: target diff:  0.0023753716181621784  0.005619786168431265values:   values: -62.284508  -62.456238-----  -----
 


-----iteration:  17 target diff:  0.0025277636828254283 values:  -58.141636 ----- 

-----iteration:  44 target diff:  0.002174152618530021 values:  -62.192196 ----- 

-----iteration:  2 target diff:  0.002495202843815439 values:  -62.499294 ----- 

-----iteration:  18 target diff:  0.002122241618637443 values:  -58.156773 ----- 

-----iteration:  45 target diff:  0.0024450913754517512 values:  -62.079075 ----- 

-----iteration:  3 target diff:  0.0024320633610299784 values:  -62.448112 ----- 

-----iteration:  19 target diff:  0.0021521424323677775 values:  -58.104095 ----- 

-----iteration:  46 target diff:  0.003071823085439318 values:  -61.915173 ----- 

-----iteration:  4 target diff:  0.0020168288673431747 values:  -62.47636 ----- 

-----iteration:  5 target diff:  0.0023338941359556265 values:  -62.533386 ----- 



T

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold2/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold2/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_

-----iteration:  73 target diff:  0.0026485136724488205 values:  -59.050224 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  74 target diff:  0.002624964416228617 values:  -58.978783 ----- 

-----iteration:  75 target diff:  0.0024447502611538337 values:  -58.965313 ----- 

-----iteration:  0 target diff:  0.9154307409652036 values:  -59.193474 ----- 

-----iteration:  76 target diff:  0.002546496283847689 values:  -58.899418 ----- 

-----iteration:  1 target diff:  0.001883362676811723 values:  -59.201538 -----iteration: -----  
77
 target diff:  0.0023675792339815636 values:  -58.82714 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  78 target diff:  0.0019979765810599435 valu

 target diff:  0.0018502047875081668 values:  -50.862373 ----- 

-----iteration:  87 target diff:  0.001788738885966677 values:  -58.21683 ----- 

-----iteration:  2 target diff:  0.0018346881940776978 values:  -50.86295 ----- 

-----iteration:  7 target diff:  0.0016026969066031784 values:  -63.318 ----- 

-----iteration:  3 target diff:  0.0013140045888864029-----iteration:   values: 88  -50.89486 target diff: -----  0.0023587462566183825
 values: 
 -58.171867 ----- 

-----iteration:  8 target diff:  0.001543559385003561 values:  -63.397255 ----- 

-----iteration:  89 target diff:  0.002727257917280062 values:  -58.049877 ----- 

-----iteration:  9 target diff:  0.0025511447566585643 values:  -63.444347 ----- 

-----iteration:  90 target diff:  0.002185693536100305 values:  -58.00702 ----- -----iteration:  
10
 target diff:  0.0013401524372303568 values:  -63.46377 ----- 

-----iteration:  91 target diff:  0.0025124277647953007 values:  -57.91506 ----- 



To change all layers to hav

 target diff:  0.002043480187751537 values:  -63.612904Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold2/train/agent0/trajs0.pkl! 
Refresh buffer every 1000000 sampling!


Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold2/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold2/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold2/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold2/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to th

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold2/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold2/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold2/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disab

-----iteration:  0 target diff:  0.9131931080559131 values:  -52.345436 ----- 

-----iteration:  3 target diff:  0.0018141194044575316 values:  -64.026375 ----- 

-----iteration:  1 target diff:  0.0014505143859589655 values:  -52.311333 ----- 

-----iteration:  4 target diff:  0.002013586471918619 values:  -64.0843 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  5 target diff:  0.0023716766766008483 values:  -64.098 ----- 

-----iteration:  6 target diff:  0.0017752349834301562 values:  -64.113976 ----- 

-----iteration:  7 target diff:  0.001522438071687873 values:  -64.16889 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  8 target diff:  0.001672906710147589 values:  -64.18

 values:  -60.230198 ----- 

-----iteration:  8 target diff:  0.0016549076046061012 values:  -64.17516 ----- 

-----iteration:  9 target diff:  0.0014842111153358945 values:  -64.17893 ----- 

-----iteration:  1 target diff:  0.00222610346269595 values:  -60.221394 ----- 

-----iteration:  2 target diff:  0.002224759438894068 values:  -60.18736 ----- 

-----iteration:  3 target diff:  0.0021608975418384462 values:  -60.207153 ----- 

-----iteration:  4 target diff:  0.0016535476090075475 values:  -60.190716 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  5 target diff:  0.0014385205450793991 values:  -60.132046 ----- 

-------------------- ckpt:  9000 --------------------
Loaded trajectories from load path: /home


-----iteration:  11-----iteration:  target diff:  0.001946324245565048  12values:   target diff: -62.883156  0.0015756643577839004----- values:   

-63.872105 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  12 target diff:  0.0015914003567602064 values:  -62.906578 ----- 

-----iteration:  0 target diff:  0.915625590130892 values:  -59.284615 ----- 

-----iteration:  13 target diff:  0.002009291997559608 values:  -62.926384 ----- 

-----iteration:  13 target diff:  0.002046468773221552 values: -----iteration:   0-63.82258  target diff: -----  0.9124669110238262
 
values:  -51.08066 ----- 

-----iteration:  1 target diff:  0.0014663742039782567 values:  -59.35085 ----- 

-----iteration:  14 target diff:  0.001892

-----iteration: -----iteration:  29  27 target diff: target diff:  0.002061161701799836  values: 0.002455861541251178  values: -63.517395  ------63.826515  
-----
 

-----iteration:  10 target diff:  0.002205093304570019 values:  -57.794243 ----- 

-----iteration: -----iteration:   3028  target diff: target diff:  0.002366247232228212  0.002440620394462584 values:  values: -63.450825 -63.811855  ---------- 
 


-----iteration:  11 target diff:  0.0016211728290545574 values:  -57.694305 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent/ckpt/offline_dqn_7000.ckpt
-----iteration:  29 target diff:  0.0022145358997451747 values:  -63.82474 ----- 

-----iteration:  31 target diff:  0.0019262602711490814 values:  -63.454716 ----- 
-----iteration:  
12 target diff:  0.0020283804830049023 values:  -57.636448 ----- 

-----iteration:  30 target diff:  0.0027611644066279505 values:  -63.826313 ----- 

-----iteration:  13 target diff:  0.0017650543




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold2/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold2/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold2/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  88 target diff:  0.0022368548918438114 Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold2/train/agent3/trajs3.pkl!values:  -58.986843
 ----- Refresh buffer every 1000000 sampling!


-----iteration:  89 target diff:  0.0017393535538078582 values:  -61.676327 ----- Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold2/train/agent4/trajs4.pkl!

Refresh buffer every 1000000 sampling!



To change all layers to have dtype float64 by







To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autoc




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa


saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent/ckpt/offline_dqn_12000.ckpt--------------------
 adv learner --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.9178088098786774 values:  -63.179634 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype=

-----iteration:  12 -----iteration: target diff:   10.0024967211807355584  target diff: values:   0.0030003016322645354-63.374218  values:  -50.258327 ----------  



-----iteration:  10 target diff:  0.0016631157448213935 values:  -65.461205 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent/ckpt/offline_dqn_13000.ckpt
-----iteration:  13 target diff:  0.0016529164548988692 values:  -63.39078 ----- 

-----iteration:  11 target diff:  0.001524901473129641 values:  -65.49883 ----- 

-----iteration:  2 target diff:  0.0026687375986973443 values:  -50.235332 ----- 

-----iteration:  14 target diff:  0.002102326858145042 values:  -63.41825 ----- 

-----iteration:  12 target diff:  0.002227339499439943 values:  -65.533806 ----- 

-----iteration:  3 target diff:  0.00213077992100458 values:  -50.20812 ----- 
-----iteration: 
 15 target diff:  0.0018552079025986137 values:  -63.37029 ----- 

-----iteration:  13 target diff:  0.0014907971221690

Refresh buffer every 1000000 sampling!
-----iteration:  20 target diff:  0.0030556913901446884 values: Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold2/train/agent1/trajs1.pkl! -63.852318
 -----Refresh buffer every 1000000 sampling!
 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold2/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold2/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold2/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by pas



 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold2/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold2/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold2/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  49 target diff:  0.003056537499817381 Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold2/train/agent3/trajs3.pkl!values:  
-46.684082Refresh buffer every 1000000 sampling!
 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold2/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this lay




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
0.002573743427775192
 values:  -64.30664 ----- 

-----iteration:  49 target diff:  0.001802281962292599 values:  -62.052402 -------------------- ----- 
adv learner
 --------------------
-----iteration:  50 target diff:  0.0027567918639877587 values:  -46.550995 ----- 

-----iteration:  50 target diff:  0.0019148407454907328 values:  -61.90998 ----- 

-----iteration:  4 target diff:  0.002851654485657974 values:  -64.20885 ----- 

-----iteration:  51 target diff:  0.0027862622693340723 values:  -46.401512 ----- 

-----iteration:  51 target diff:  0.0017153772716309223 values:  -61.84209 ----- 

-----iteration:  5 target diff:  0.002122425326417863 values:  -64.305244 ----- 

-----iteration: 

-----iteration:  66 target diff:  0.002639892220133444 values:  -44.10904 ----- 

-----iteration:  67 target diff:  0.0026615112795001093 values:  -44.033386 ----- 

-----iteration:  64 target diff:  0.001996355854603698 values:  -60.151325 -----iteration:  ----- 
1 
target diff:  0.0017720467064357914 values:  -59.341656 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  68 target diff:  0.0025992715213128464 values:  -43.94234 ----- 

-----iteration:  65 target diff:  0.0033586384256105205 values:  -60.005867 ----- 
-----iteration: 
 2 target diff:  0.0015758515864866848 values:  -59.22583 ----- 

-----iteration:  69 target diff:  0.002536796889599823 values:  -43.881443 ----- 

-----iteration:  66 target diff:  0

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold2/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold2/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold2/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold2/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold2/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, 




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-------------------- fqe on dqn & sale --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.9143537020345343 values:  -59.15884 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent0/ckpt/offline_dqn_1000.ckpt
-----iteration:  1 target diff:  0.004195720486754547 values:  -59.166668 ----- 

-




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  17 target diff:  0.001797578887486401 values:  -65.1706 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all

-----iteration:  1 target diff:  0.0019258728251374457 values:  -58.947567 ----- 

-----iteration:  30 target diff:  0.0018068015061641182 values:  -65.375854 ----- 

-----iteration:  2 target diff:  0.001925837342981502 values:  -58.93737 ----- 

-----iteration:  31 target diff:  0.0019040097114184256 values:  -65.43874 ----- 

-----iteration:  3 target diff:  0.0012693436278542133 values:  -58.893005 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent0/ckpt/offline_dqn_4000.ckpt
-----iteration:  32 target diff:  0.0024748842376588398 values:  -65.37818 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold3/train/agent/ckpt/offline_dqn_4000.ckpt
-----iteration:  33 target diff:  0.00226461411587696 values:  -65.34108 ----- 

-----iteration:  34 target diff:  0.001955388644168616 values:  -65.28633 ----- 

-----iteration:  35 target diff:  0.002036119487258673 values:  -65.249374 ----- 

-----iteration: 

-----iteration:  8 target diff:  0.002575920133048986 -----iteration: values:  5  -52.490192target diff:  0.0032837495273697957 values:   -65.21546 ----- -----
 


-----iteration:  6 target diff:  0.0024923524874012636 values:  -65.22489 ----- 

-----iteration:  9 target diff:  0.001813204702927146 values:  -52.50413 ----- 

-----iteration:  7 target diff:  0.001990787154942249 values:  -65.33807 ----- 

-----iteration:  8 target diff:  0.0029037563545389992 values:  -65.425575 ----- 

-----iteration:  10 target diff:  0.0017060441604593152 values:  -52.490883 ----- 

-----iteration:  9 target diff:  0.0022364284264545916 values:  -65.361435 ----- 

-----iteration:  11 target diff:  0.0021778024154325126 values:  -52.56596 ----- 

-----iteration:  10 target diff:  0.002050508911247838 values:  -65.323975 ----- 

-----iteration:  12 target diff:  0.0026022875286404396 values:  -52.633976 ----- 

-----iteration:  11 target diff:  0.0023912623846991596 values:  -65.36033 ----- 

-----iter

 -----iteration: -61.873505  34 target diff: -----  0.002067815649319145
 values:  
-55.35728 ----- 

-----iteration:  71 target diff:  0.0017906140461311862 values:  -61.741943 ----- 

-----iteration:  21 target diff:  0.0030727018034900698 values:  -51.006813 ----- 

-----iteration:  35 target diff:  0.0025413941436180313 values:  -55.194942 ----- 

-----iteration:  72 target diff:  0.002265747686790724 values:  -61.660854 ----- 

-----iteration:  36 target diff:  0.0021053064580420547 values:  -54.969124 ----- 

-----iteration:  73 target diff:  0.002321751840731249 values:  -61.5514 ----- 

-----iteration:  22 target diff:  0.003022195558613396 values:  -50.906483 ----------iteration:   
37
 target diff:  0.0018117230761571733 values:  -54.678825 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold3/train/agent/ckpt/offline_dqn_10000.ckpt
-----iteration:  74 target diff:  0.0017104438214578079 values:  -61.378883 ----- 

-----iteration:  38 target

-----iteration:  45 target diff:  0.0032343901596902664 values:  -48.648613 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent0/ckpt/offline_dqn_12000.ckpt
-----iteration:  97 target diff:  0.0021308446699590277 values:  -58.08383 ----- 

-----iteration:  46 target diff:  0.004574703930490926 values:  -48.488163 ----- 

-----iteration:  98 target diff:  0.002057633220857247 values:  -57.907112 ----- 

-----iteration:  47 target diff:  0.003198082959648614 values:  -48.368885 ----- 

-----iteration:  99 target diff:  0.0021355781285007486 values:  -57.79711 ----- 

-------------------- ckpt:  9000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded tra




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
-----iteration: 
 49 target diff:  0.0028645398307496775 values:  -48.138924 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

------------




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  9 target diff:  0.0019613


-----iteration:  98 target diff:  0.0018649224320093128 values:  -44.948967 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent0/ckpt/offline_dqn_16000.ckpt
-----iteration:  99 target diff:  0.0018051671830679615-----iteration:   0values:  target diff:   -44.945110.9151452376076858  values: ----- -59.4332 
 -----
-------------------- 
 ckpt: 
 12000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold2/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold2/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold2/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold2/train/agent2/tr




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  1 target diff:  0.0016731645149076107 values:  -59.48888 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  2 target diff:  0.001525382933939608 values:  -59.332607 ----- 

-----iteration:  0 target diff:  0.9131857746529494 values:  -64.56605 ----- 

-----iteration:  1 target diff:  0.004733674912071983 values:  -64.62668 ----- 

-----iteration:  3 


-----iteration:  15 target diff:  0.0019431138475590143 values:  -64.95115 ----- 

-----iteration:  0 target diff:  0.9150941273150697 values:  -59.0429 ----- 

-----iteration:  16 target diff:  0.002291183392844217 values:  -65.00843 ----- 

-----iteration:  1 target diff:  0.0018639832676341094 values:  -59.027454 ----- 

-----iteration:  17 target diff:  0.0021026396185972497 values:  -65.002815 ----- 

-----iteration:  2 target diff:  0.001595752328260189 values:  -58.942352 ----- 

-----iteration:  3 target diff:  0.001710027577295929 values:  -59.06617 ----- 

-----iteration:  18 target diff:  0.0028379162174066224 values:  -65.01215 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

saving model weights at /home/jupyt/leyuan

-----iteration:  38 target diff:  0.0024668913453425815 values:  -64.401 ----- 

-----iteration:  18 target diff:  0.00318077491743261 values:  -53.125637 ----- 

-----iteration:  39 target diff:  0.0020816589682036697 values:  -64.33567 ----- 

-----iteration:  40 target diff:  0.002008856741403506 values:  -64.242874 ----- 

-----iteration:  19 target diff:  0.0036077092814776187 values:  -53.159107 ----- 

-----iteration:  41 target diff:  0.0019024523353937197 values:  -64.1899 ----- 

-----iteration:  42 target diff:  0.002660931373203452 values:  -64.02262 ----- 

-----iteration:  20 target diff:  0.0041922563915897226 values:  -53.151863 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent0/ckpt/offline_dqn_20000.ckpt
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, cal


-----iteration:  56 target diff:  0.002205329685177508 values:  -62.569023 ----- 

-----iteration:  33 target diff:  0.0064015136073945055 values:  -50.928005 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent1/ckpt/offline_dqn_1000.ckpt
-----iteration:  57 target diff:  0.0020564704309565638 values:  -62.48122 ----- 

-----iteration:  34 target diff:  0.0050205276636337195 values:  -50.778477 ----- 

-----iteration:  0 target diff:  0.9153177128622972 values:  -57.718227 ----- 

-----iteration:  35 target diff:  0.005079118510559188-----iteration:  58 target diff:  0.0020573236721075977 values:   -62.40627 values: -----  

-50.667046 ----- 

-----iteration:  1 target diff:  0.003096001510866026 values:  -57.600414 ----- 

-----iteration:  36 target diff:  0.005312431440716185 values:  -50.524525 ----- 

-----iteration:  59 target diff:  0.0017535758505151326 values:  -62.19342 ----- 

-----iteration:  2 target diff:  0.001823308798027

-----iteration:  62 target diff:  0.004524304814484374 values:  -47.07511 ----- 

-----iteration:  80 target diff:  0.0019459701638705224 values:  -59.07632 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  63 target diff:  0.004541597470039227 values:  -47.060497 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent1/ckpt/offline_dqn_3000.ckpt
-----iteration:  81 target diff:  0.0027165340787383197 values:  -58.83423 ----- 

-----iteration:  0 target diff:  0.9148571067816206 values:  -57.99835 ----- 

-----iteration:  64 target diff:  0.003917714854831872 values:  -47.007027 ----- 

-----iteration:  1 target diff:  0.0015521095092042882 values:  -----iteration: -58.0

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold2/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  88 target diff:  0.002151216367955862 values:  -46.469807 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold2/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold2/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold2/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold2/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold2/train/agent4/trajs4.pkl!
Refresh buffer e




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  89 target diff:  0.001909733230141171 values:  -46.493145 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold3/train/agent0/ckpt/offline_dqn_4000.ckpt
-----iteration:  90 target diff:  0.0020291054982503765 values:  -46.53445 ----- 

-----iteration:  91 target diff:  0.001911158685951788 values:  -46.5487 ----- 

-----iteration:  92 target diff:  0.0018225945519040367 values: --------------------  -46.560368fqe on dqn & sale -----  --------------------


-----iteration:  93 target diff:  0.0023424510110376024 values:  -46.59295 ----- 

-------------------- fqe on dqn & sale ----------

-----iteration:  5 target diff:  0.00266064613551734 values:  -64.50606 ----- 

-----iteration:  6 target diff:  0.0023404316127757204 values:  -64.513954 ----- 

-----iteration:  7 target diff:  0.00238570104251701 values:  -64.43784 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent1/ckpt/offline_dqn_6000.ckpt
-----iteration:  8 target diff:  0.0027752400524903857 values:  -64.45292 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  9 target diff:  0.0019757701422065815 values:  -64.4573 ----- 

-----iteration:  0 target diff:  0.9143266033865146 values:  -59.955994 ----- 

-----iteration:  10 target diff:  0.0021649909065263625 values:  -64.44506 ----- 

-----iter

-----iteration:  20 target diff:  0.0019635277251623265 values:  -64.516975 ----- 

-----iteration:  42 target diff:  0.006806960292194288 values:  -49.664146 ----- 

-----iteration:  21 target diff:  0.0016273232866772353 values:  -64.50017 ----- 

-----iteration:  43 target diff:  0.006047363342431385 values:  -49.47263 ----- 

-----iteration:  22 target diff:  0.0014775672676973228 values:  -64.52209 ----- 

-------------------- ckpt:  11000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!-----iteration: 


Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/j




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent1/ckpt/offline_dqn_11000.ckpt
-----iteration:  45 target diff:  0.0050989687944149 values:  -49.08016 ----- 

-----iteration:  46 target diff:  0.005865264501356449 values:  -48.878582 ----- 

-----iteration:  47 target diff:  0.005906652635374504 val




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  31 target diff:  0.003052282343401327 values:  -64.86077 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  32 target diff:  0.003258268124428879 values:  -64.697044 ----- 

-----iteration:  33 target diff:  0.002941339386480327 values:  -64.66411 ----- 

-----iteration:  34 target diff:  0.0027659786499667147 values:  -64.588234 ----- 

saving model

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, 




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold3/train/agent/ckpt/offline_dqn_13000.ckpt
-----iteration:  32 target diff:  0.005018118263651295 values:  -50.142193 ----- 

-----iteration:  33 target diff:  0.004080990674764306 values:  -49.97166 ----- 

-----iteration:  34 target diff:  0.005063777075797453 values:  -49.82746 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  35 target diff:  0.004040990993568068 values:  -49.688663 ----- 

-----iteration:  36 target diff:  0.004187564630994923 values:  -49.55782 ----- 

-----iteration:  37 target diff:  0.004103237915075458 values:  -49.448845 ----- 

-----iteration:  38 ta


Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold2/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold2/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold2/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold2/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold2/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer,


saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent2/ckpt/offline_dqn_5000.ckpt
-----iteration:  0 target diff:  0.9132591655039752 values:  -64.18391 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold3/train/agent/ckpt/offline_dqn_18000.ckpt
-----iteration:  1 target diff:  0.0048250122613117865 values:  -64.18096 ----- 

-----iteration:  2 target diff:  0.00404015655973449 values:  -64.17833 ----- 

-----iteration:  3 target diff:  0.002954750353473525 values:  -64.14714 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  4 target diff:  0.0031803518344405545 values:  -64.15736 ----- 

-----iteration:  0 target diff:  0.9121217238206727 

-----iteration:  57 target diff:  0.003883854608690366 values:  -47.331738 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent2/ckpt/offline_dqn_11000.ckpt
-----iteration:  72 target diff:  0.002271916359627444 values:  -59.52016 ----- 

-----iteration:  58 target diff:  0.004014901147982692 values:  -47.16485 ----- 

-----iteration:  73 target diff:  0.0030380599571650222 values:  -59.394592 ----- 

-----iteration:  59 target diff:  0.0033249770716856733 values:  -47.036777 ----- 

-----iteration:  74 target diff:  0.002397428657486789 values:  -59.29954 ----- 

-----iteration:  75 target diff:  0.0022273489441822026 values:  -59.127953 ----- 

-----iteration:  60 target diff:  0.0033360375770004674 values:  -46.884697 ----- 

-----iteration:  76 target diff:  0.0024544481108794603 values:  -58.932007 ----- 

-----iteration:  61 target diff:  0.003299318189509126 values:  -46.762905 ----- 

-----iteration:  62 -----iteration: target dif

91 target diff:  0.0018244881530993001 values:  -45.068657 ----- 

-----iteration:  92 target diff:  0.0015881412892500308 values:  -45.05905 ----- 

-----iteration:  93 target diff:  0.001561330449259009 values:  -45.034184 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold3/train/agent1/ckpt/offline_dqn_13000.ckpt
-----iteration:  94 target diff:  0.0019541374266656445 values:  -45.0479 ----- 

-----iteration:  95 target diff:  0.0014717864934096392 values:  -45.00333 ----- 

-------------------- ckpt:  16000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold2/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold2/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold2/train/agent1/trajs1


saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent2/ckpt/offline_dqn_14000.ckpt


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you ca

-----iteration:  47 target diff:  0.005403916592082451 values:  -47.900333 ----- 

-----iteration:  5 target diff:  0.003190720772962038 values:  -62.669136 ----- 

-----iteration:  48 target diff:  0.004312502434726817 values:  -47.70254 ----- 

-----iteration:  49 target diff:  0.004782885198055382 values:  -47.4982 ----- 

-----iteration:  6 target diff:  0.0025664237243925823 values:  -62.631336 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent2/ckpt/offline_dqn_20000.ckpt
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To chan

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold2/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold2/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  62 target diff:  0.0026326372221281754 values:  -60.242107Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold2/train/agent2/trajs2.pkl! 
Refresh buffer every 1000000 sampling!----- 


Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold2/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold2/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, 


Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer,


saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold3/train/agent2/ckpt/offline_dqn_7000.ckpt
-----iteration:  3 target diff:  0.0032939523062168476 values:  -52.163643 ----- 

-----iteration:  0 target diff:  0.9136857392912118 values:  -64.40568 ----- 

-----iteration:  4 target diff:  0.002869976952871816 values:  -52.154552 ----- 

-----iteration:  1 target diff:  0.0030049738053487957 values:  -64.48832 ----- 

-----iteration:  5 target diff:  0.0028898409746851575 values:  -52.141018 ----- 

-----iteration:  2 target diff:  0.002925761546271143 values:  -64.52679 ----- 

-----iteration:  3 target diff:  0.0022149428064812673 values:  -64.50743 ----- 

-----iteration:  6 target diff:  0.0027690634869213865 values:  -52.117584 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent3/ckpt/offline_dqn_8000.ckpt
-----iteration:  7 target diff:  0.0020313915556721862 values:  -52.224506 ----- 

-----iteration:  4 




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
 
0.003951479634818016 values:  -47.889378 ----- 



To change all layers to have dtype float64 b




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  15 target diff:  0.0019384199967599277 values:  -65.63279 ----- 

-----iteration:  16 target diff:  0.001847915728198866 values:  -65.68595 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent3/ckpt/offline_dqn_14000.ckpt
-----iteration:  17 target diff:  0.0020924221134403756 




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  1 target diff:  0.0036497610345720935 values:  -64.55865 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold3/train/agent2/ckpt/offline_dqn_16000.ckpt
-----iteration:  2 target diff:  0.003570390099804419 values:  -64.61673 ----- 

-----iteration:  3 target diff:  0.002971990205009422 valu




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  33 target diff:  0.004692728105706302 values:  -65.02135 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold3/train/agent1/ckpt/offline_dqn_13000.ckpt
-----iteration:  34 target diff:  0.004191164118036631 values:  -64.96716 ----- 

-----iteration:  35 target diff:  0.003780391208415523 values:  -64.94067 ----- 

-----iteration:  36 target diff:  0.0033231160967014586 values:  -64.87228 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  37 target diff:  0.003621021720999959 values:  -64.83104 ----- 

-----iteration:  38 target diff:  0.004276180605655


-----iteration:  0 target diff:  0.9130693098039174 values:  -51.07807 ----- 

-----iteration:  53 target diff:  0.0022340536507836385 values:  -63.418274 ----- 

-----iteration:  1 target diff:  0.002933770047704052 values:  -51.124165 ----- 

-----iteration:  54 target diff:  0.002836147181234504 values:  -63.31064 ----- 

-----iteration:  2 target diff:  0.0019422571261157589 values:  -51.104527 ----- 

-----iteration:  55 target diff:  0.001956429482079195 values:  -63.17068 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent4/ckpt/offline_dqn_1000.ckpt
-----iteration:  56 target diff:  0.0023391046879625635 values:  -63.03443 ----- 

-----iteration:  3 target diff:  0.0019596619926688826 values:  -51.18618 ----- 

-----iteration:  57 target diff:  0.0020003332233119214 values:  -62.94996 ----- 

-----iteration:  4 target diff:  0.002060101280782568 values:  -51.139668 ----- 

-----iteration:  58 target diff:  0.0023131553340126113 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent4/ckpt/offline_dqn_4000.ckpt
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
-----iteration: Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent0/trajs0.pkl! 
Refresh buffer every 1000000 sampling!


Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  39 target diff:  0.002128509134817845 values:  -47.77003 ----- 

-----iteration:  40 target diff:  0.002061586165074544 values:  -47.575916 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold3/train/agent1/ckpt/offline_dqn_18000.ckpt
-----iteration:  41 target diff:  0.002513473343562299 values:  -47.27152 ----- 

-----iteration:  42 target diff:  0.0030437413762459784 values:  -47.1357 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  43 target diff:  0.002129701212592671 values:  -46.983173 ----- 

-----iteration:  44 target diff:  0.00214036237532


-----iteration:  71 target diff:  0.002081805571515546 values:  -43.38651 ----- 

-----iteration:  72 target diff:  0.0020142905116068764 values:  -43.336487 ----- 

-----iteration:  73 target diff:  0.001947617261524528 values:  -43.27049 ----- 

-----iteration:  74 target diff:  0.0023640391134484963 values:  -43.223755 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  75 target diff:  0.0019397840317745838 values:  -43.20841 ----- 

-----iteration:  0 target diff:  0.9130132416509307 values:  -63.66047 ----- 

-----iteration:  76 target diff:  0.0022353719879220175 values:  -43.193638 ----- 

-----iteration:  1 target diff:  0.00392942588908762 values:  -63.651466 ----- 

-----iteration:  77 target diff:  0.001


-----iteration:  0 target diff:  0.9132466986836439 values:  -64.53389 ----- 

-----iteration:  1 target diff:  0.002656122724212861 values:  -64.548904 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold3/train/agent/ckpt/offline_dqn_1000.ckpt
-----iteration:  2 target diff:  0.003013472546705924 values:  -64.60897 ----- 

-----iteration:  3 target diff:  0.002523302875851196 values:  -64.57896 ----- 

-----iteration:  4 target diff:  0.002649331160576242 values:  -64.58415 ----- 

-----iteration:  5 target diff:  0.001790361434601924 values:  -64.547806 ----- 

-----iteration:  6 target diff:  0.003051287636575427 values:  -64.56421 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent4/ckpt/offline_dqn_9000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold3/train/agent3/ckpt/offline_dqn_8000.ckpt
-----iteration:  7 target diff:  0.0018099878607663695 values:  -64.57

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, 




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  2 target diff:  0.0022109280182210696 values:  -65.21027 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change al

-----iteration:  22 target diff:  0.002085256253690777 values:  -64.39581 ----- 

-----iteration:  34 target diff:  0.0020061551270873397 values:  -63.082397 ----- 

-----iteration:  23 target diff:  0.002629358457468527 values:  -64.41732 ----- 

-----iteration:  24 target diff:  0.0024619853962212328 values:  -64.39597 ----- 

-----iteration:  35 target diff:  0.0016995900406165069 values:  -63.02285 ----- 

-----iteration:  25 target diff:  0.0021359362576176925 values:  -64.44039 ----- 

-----iteration:  26 target diff:  0.0034524587908896253 values:  -64.465485 ----- 

-----iteration:  36 target diff:  0.0021001843958618075 values:  -63.02641 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold3/train/agent/ckpt/offline_dqn_18000.ckpt
-----iteration:  27 target diff:  0.0026245429877537027 values:  -64.46758 ----- 

-----iteration:  37 target diff:  0.0017716508440713875 values:  -62.953613 ----- 

-----iteration:  28 target diff:  0.002997134683


-----iteration:  61 target diff:  0.0017985956909770794 values:  -62.21555 ----- 

-----iteration:  52 target diff:  0.004456625937696313 values:  -62.237835 ----- 

-----iteration:  62 target diff:  0.0018704510733641285 values:  -62.199017 ----- 

-----iteration:  53 target diff:  0.004842225602367689 values:  -62.21521 ----- 

-----iteration:  63 target diff:  0.001852103250271997 values:  -62.187 ----- 

-----iteration:  64 target diff:  0.002014801050356589 values:  -62.176697 ----- 

-----iteration:  54 target diff:  0.004006053433207675 values:  -62.090614 ----- 

-----iteration:  55 target diff:  0.004654383302888159-----iteration:   values: 65  -62.01076target diff:  -----  0.0019813652014537895
 
values:  -62.193817 ----- 

-----iteration:  56 target diff:  0.0035926337961746614 values:  -61.81579 ----- 

-----iteration:  66 target diff:  0.0020609704431489204 values:  -62.215366 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold3/train/a


-----iteration:  86 target diff:  0.0038961369115193497 values:  -58.963444 ----- 

-----iteration:  87 target diff:  0.0026731069271180487 values:  -58.855846 ----- 

-----iteration:  0 target diff:  0.9208998237136797 values:  -66.72563 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold3/train/agent0/ckpt/offline_dqn_3000.ckpt
-----iteration:  88 target diff:  0.00362545603862696 values:  -58.84207 ----- 

-----iteration:  1 target diff:  0.0016815549850948428 values:  -66.75191 ----- 

-----iteration:  89 target diff:  0.002947027770006331 values:  -58.81965 ----- 

-----iteration:  2 target diff:  0.0010799619608209377 values:  -66.78678 ----- 

-----iteration:  90 target diff:  0.0019877252971105637 values:  -58.787273 ----- 

-----iteration:  91 target diff:  0.0030545076946109136 values:  -58.727913 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold3/train/agent3/ckpt/offline_dqn_5000.ckpt
-----iteration:

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, 


-----iteration:  0 target diff:  0.9131904207666428 values:  -64.698654 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold3/train/agent4/ckpt/offline_dqn_12000.ckpt
-----iteration:  1 target diff:  0.0031914772377993356 values:  -64.700096 ----- 

-----iteration:  2 target diff:  0.0020341019464573556 values:  -64.736336 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  3 target diff:  0.002087625012651887 values:  -64.76522 ----- 

-----iteration:  4 target diff:  0.0019537229148125844 values:  -64.81321 ----- 

-----iteration:  0 target diff:  0.9180758519317849 values:  -62.35858 ----- 

-----iteration:  5 target diff:  0.0018948504072746177 values:  -64.87896 ----- 

-----i




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  22 target diff:  0.0027905955074233844 values:  -65.196724 ----- 

-----iteration:  23 target diff:  0.0017634582719123055 values:  -65.14513 ----- 

-----iteration:  24 target diff:  0.002186287433032893 values:  -65.07943 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  25 target diff:  0.002375307429438092 values:  -65.000175 ----- 

-----iteration:  26 target diff:  0.0031880735734744125 values:  -64.88442 ----- 

-----iteration:  27 target diff:  0.002729460724811362 values:  -64.88183 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold3/train/


-------------------- adv learner --------------------
-----iteration:  50 target diff:  0.0031057458224310105 values:  -62.802063 ----- 

-----iteration:  51 target diff:  0.0037225875823365196 values:  -62.617275 ----- 

-----iteration:  52 target diff:  0.00348142096636715 values:  -62.50367 ----- 

-----iteration:  53 target diff:  0.0031282782305781876 values:  -62.477337 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  54 target diff:  0.003174317290173958 values:  -62.436592 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold3/train/agent0/ckpt/offline_dqn_11000.ckpt
-----iteration:  55 target diff:  0.0031989536903240068 values:  -62.30898 ----- 

-----iteration:  56 target diff:  0.002976167999452465 values:  -62.24584 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold3/train/agent3/ckpt/offline_dqn_13000.ckpt
-----iteration:  57 target diff:  0.003224054025861071 value

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold3/train/agent0/ckpt/offline_dqn_13000.ckpt
-----iteration:  84 target diff:  0.0021870416718935727 values:  -60.444233 ----- 

-----iteration:  85 target diff:  0.003059073489396325 values:  -60.395813 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold3/train/agent3/ckpt/offline_dqn_15000.ckpt
-----iteration:  86 target diff:  0.002074172621952042 values:  -60.334263 ----- 

-----iteration:  87 target diff:  0.0026584859126633715 values:  -60.311398 ----- 

-----iteration:  88 target diff:  0.002655686783497075 values:  -60.17924 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold3/train/agent4/ckpt/offline_dqn_20000.ckpt
-------------------- behavior cloning --------------------
-----iteration:  89 target diff:  0.0022470027025913953 values:  -60.14588 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.back

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
-------------------- fqe on dqn & sale --------------------


Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
-----iteration: Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold2/train/agent4/trajs4.pkl! 
7Refresh buffer every 1000000 sampling!
 target diff:  0.001998852110102782 values:  -63.46358 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disabl

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, 

-----iteration:  3 target diff:  0.0016083530323413682 values:  -63.718777 ----- 

-----iteration:  39 target diff:  0.001649767277034327 values:  -63.79406 ----- 

-----iteration:  4 target diff:  0.0015977771495528873 values:  -63.718365 ----- 

-----iteration:  5 target diff:  0.0014957581848106466 values:  -63.77396 ----- 

-------------------- ckpt:  8000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!


40 target diff:  0.0020425663434906326Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent0/trajs0.pkl! values: 
 Refresh buffer every 1000000 sampling!


Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/tr




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  41 target diff:  0.0019199482562465354 values:  -63.435635 ----- 

-----iteration:  27 target diff:  0.002630721666173906 values:  -56.751865 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold3/train/agent0/ckpt/offline_dqn_18000.ckpt
-----iteration:  28 target diff:  0.002282874481218793


-----iteration:  6 target diff:  0.001928491841213661 values:  -65.18376 ----- 

-----iteration:  39 target diff:  0.002313217959489724 values:  -55.526028 ----- 

-----iteration:  40 target diff:  0.0024822413084352977 values:  -55.449665 ----- 

-----iteration:  7 target diff:  0.0025050365642392547 values:  -65.17716 ----- 

-----iteration:  0 target diff:  0.917630930691182 values:  -62.603367 ----- 

-----iteration:  41 target diff:  0.0018246664673055187 values:  -55.25735 ----- 

-----iteration:  8 target diff:  0.002367105947223297 values:  -65.225876 ----- 

-----iteration:  1 target diff:  0.002695791374050684 values:  -62.607475 ----- 

-----iteration:  42 target diff:  0.0020984128008324178 values:  -54.87491 ----- 

-----iteration:  9 target diff:  0.0015035783201486285 values:  -65.3319 ----- 

-----iteration:  2 target diff:  0.0025129989912036866 values:  -62.615788 ----- 

-----iteration:  10 target diff:  0.002550380889354576 -----iteration: values:   43 -65.342735ta




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.





To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('fl

-----iteration:  23 target diff:  0.0022447610964875164 values:  -65.59702 ----- 

-----iteration:  24 target diff:  0.002778596812372448 values:  -65.60227 ----- 

-----iteration:  25 target diff:  0.002820388498112404 values:  -65.58912 ----- 

-----iteration:  26 target diff:  0.0023988685978133953 values:  -65.57341 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  27 target diff:  0.0024271345057638254 values:  -65.5723 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold3/train/agent1/ckpt/offline_dqn_4000.ckpt
-----iteration:  77 target diff:  0.0030505226097685204 values:  -59.551525 ----- 

-----iteration:  78 target diff:  0.0030186421607037253 values:  -59.434395 ----- 

-----iteration:  79 target diff:  0.0026652845925038164 values:  -59.334072 ----- --------------------
 
fqe on dqn & sale --------------------
-----iteration:  80 target diff:  0.0022739128393977296 values:  -59.229168 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  81 target diff:  0.002254951143133536 values:  -59.13766 ----- 

-----iteration:  82 target diff:  0.0021995070224339955 values:  -59.04203 ----- 

-----iteration:  83 target diff:  0.0020935230812226557 values:  -58.99411 ----- 

-----iteration:  84 target diff:  0.0026533032433727647 values:  -58.890556 ----- 

-----iteration:  85 target diff:  0.0026954353058924654 values:  -58.79876 ----- 

-----ite


-----iteration:  7 target diff:  0.0016709996038498914 values:  -62.62806 ----- 

-----iteration:  8 target diff:  0.0015516764578951024 values:  -62.692524 ----- 

-----iteration:  9 target diff:  0.001846524667125692 values:  -62.637764 ----- 

-----iteration:  10 target diff:  0.0020674273054166127 values:  -62.69082 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  11 target diff:  0.002072264618287526 values:  -62.684223 ----- 

-----iteration:  0 target diff:  0.9235457939547521 values:  -56.59272 ----- 

-----iteration:  12 target diff:  0.0016736340316358055 values:  -62.663643 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold3/train/agent4/ckpt/offline_dqn_8000.ckpt
-




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa


-----iteration:  0 target diff:  0.92331999294453 values:  -57.843666 ----- 

-----iteration:  2 target diff:  0.0016322172272488719 values:  -62.602654 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold3/train/agent/ckpt/offline_dqn_3000.ckpt
-----iteration:  1 target diff:  0.003879766190843116 values:  -57.826538 ----- 

-----iteration:  -----iteration: 3  2target diff:   target diff: 0.0016836092582917933  0.0016023189534003662 values:  values:  -62.712563-57.842377 -----  

----- 

-----iteration:  4 target diff:  0.002210296469877073 -----iteration: values:   -62.6933333  -----target diff:   0.0016649734189346855
 
values:  -57.877083 ----- 

-----iteration:  4 target diff:  0.0028825041147483564 values:  -57.9224 ----- 

-----iteration:  5 target diff:  0.0016548363805272612 values:  -62.70577 ----- 

-----iteration:  5 target diff:  0.002264128557431648 values:  -57.96452 ----- 

-----iteration:  6 target diff:  0.0018339050890851473 values

-----iteration:  30 target diff:  0.0017906519121805987 values:  -62.055107 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold3/train/agent4/ckpt/offline_dqn_13000.ckpt
-----iteration:  10 target diff:  0.0022081973823465115 values:  -57.37417 ----- 

-----iteration:  31 target diff:  0.0014110982039107385 values:  -62.149414 ----- 

-------------------- ckpt:  11000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
 0.0025701292054900486 values:  -57.35572 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyua




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  12 target diff:  0.0014780892012641704 values:  -57.35663 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold3/train/agent1/ckpt/offline_dqn_11000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold3/train/agent/ckpt/offline_dqn_6000.ckpt
------------------


saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold3/train/agent4/ckpt/offline_dqn_15000.ckpt
-----iteration:  0 target diff:  0.9189705095236254 values:  -63.845207 ----- 

-----iteration:  1 target diff:  0.002188816870206987 values:  -63.892735 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold3/train/agent1/ckpt/offline_dqn_13000.ckpt
-----iteration:  2 target diff:  0.0016793597162669085 values:  -63.88589 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  3 target diff:  0.001899614404687243 values:  -63.926777 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold3/train/agent/ckpt/offline_dqn_8000.ckpt
-----iteration:  4 target diff:  0.0015877331783173594 values:  -63.971 ----- 

-----iteration:  5 target diff:  0.0014609648159064482 values:  -64.0229 ----- 

-------------------- ckpt:  12000 --------------------
Loaded trajectories from load pat




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-------------------- fqe on dqn & sale ---

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold3/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocastin




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold3/train/agent/ckpt/offline_dqn_13000.ckpt


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.919195103540772 values:  -63.42513 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='f

-----iteration:  14 target diff:  0.002240721727279406 values:  -56.534355 ----- 

-----iteration:  3 target diff:  -----iteration: 0.0024169711056341997  values: 15  -67.41558target diff:   -----0.0017026391802313365 

 values:  -56.703247 ----- 

-----iteration:  4 target diff:  0.0017433979384137116 values:  -67.49807 ----- 

-----iteration:  16 target diff:  0.0022392608512549566 values: -----iteration:   5 target diff: -56.63294  ----- 0.0019978353168125795 
values: 
 -67.581154 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  6 target diff:  0.0018919672249594322 values:  -67.61615 ----- 

-----iteration:  17 target diff:  0.001912864047745387 values:  -56.570095 ----- 

-----iteration:  18 target diff:  0.0

-----iteration:  3 target diff:  0.0018741242649305016 values:  -63.11379 ----- 

-----iteration:  5 target diff:  0.0017096129017311382 values:  -67.805855 ----- 

-----iteration:  4 target diff:  0.0014875471812913325 values:  -63.106987 ----- 

-------------------- ckpt:  14000saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold3/train/agent2/ckpt/offline_dqn_1000.ckpt --------------------

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent2/t




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  6 target diff:  0.003150921330808114 values:  -67.65187 ----- 

-----iteration:  7 target diff:  0.0023991160880233254 values:  -67.68667 ----- 

-----iteration:  8 target diff:  0.0014104585032930581 values:  -67.801704 ----- 

-------------------- ckpt:  2000 --------------------
Loaded trajectories from load path: /home/j




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
--------------------
 fqe on dqn & sale --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner ---


-----iteration:  6 target diff:  0.0017217147513146239 values:  -63.62885 ----- 

-----iteration:  3 target diff:  0.002234851685995373 values:  -67.06089 ----- 

-----iteration:  4-----iteration:   0 target diff: target diff:   0.0010731107502391327 0.923457963539209values:  values:  -67.18555  -57.213238-----  

----- 

-----iteration:  7 target diff:  0.0015441728603225796 values:  -63.673473 ----- 

-----iteration:  1 target diff:  0.0035768073207093193 values:  -57.228863 ----- 

-----iteration:  8 target diff:  0.0016152551877946456 values:  -63.72378 ----- 

-----iteration:  2 target diff:  0.0025451331752295915 values:  -57.22414 ----- 

-----iteration:  9 target diff:  0.001846131913071279 values:  -63.788586 ----- 

-----iteration:  3 target diff:  0.0018906582957049132 values:  -57.2253 ----- 

-----iteration:  10 target diff:  0.0019028223305598696 values:  -63.790554 ----- 

-----iteration:  4 target diff:  0.002354386544302957 values:  -57.274902 ----- 

-----iteration: 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold3/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold3/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold3/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold3/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold3/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, 




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------


To change all layers to have dtype float




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  9 target diff:  0.0018411582447710798 values:  -58.306866 ----- 

-----iteration:  10 target diff: -----iteration:   0.0018707117686421710  values: target diff:   -58.327060.918092859269295 -----  values: 
 
-67.82992 ----- 

-----iteration:  11 -----iteration: target diff:   1 0.0021315670669890974target diff:   values: 0.0034458803593143492 -58.30607  values: -----  -67.92898
 -----
 

-----iteration:  0 target diff:  0.9184601117479577 values:  -62.97339 ----- 

-----iteration:  12 target diff:  0.001324899574725544 values:  -58.31772 ----- 

-----iteration:  1 target diff:  0.0024043974491490547 values:  -62.91292 ----- 

-----iteration:  2 target diff:  0.002890095991

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold3/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!


Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold3/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold3/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold3/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold3/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer


-------------------- adv learner --------------------
-----iteration:  7 target diff:  0.002214994187944292 values:  -62.11828 ----- 

-----iteration:  8 target diff:  0.001792429597540471 values:  -62.17617 ----- 

-----iteration:  9 target diff:  0.001589947200148276 values:  -62.30013 ----- 

-----iteration:  10 target diff:  0.002639189918055417 values:  -62.291656 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold3/train/agent2/ckpt/offline_dqn_7000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold3/train/agent0/ckpt/offline_dqn_2000.ckpt
-----iteration:  11 target diff:  0.002100833026175737 values:  -62.3108 ----- 

-----iteration:  12 target diff:  0.002046630703537057 values:  -62.380154 ----- 

-----iteration:  13 target diff:  0.0020433526778659257 values:  -62.469845 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer

-----iteration:  5 target diff:  0.0023465149741559123 values:  -68.12655 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold3/train/agent2/ckpt/offline_dqn_9000.ckpt
-----iteration:  6 target diff:  -------------------- 0.002326953745127593 fqe on dqn & sale values: -------------------- -68.093155




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold3/train/agent0/ckpt/offline_dqn_4000.ckpt
-----iteration:  7 target diff:  0.0019609650625362213 values:  -68.15413 ----- 

-----iteration:  0 target diff:  0.9230975426406421 values:  -57.667995 ----- 

-----iteration:  8 target diff:  0.001939827117094388 values:  -68.14991 -----

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold3/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold3/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold3/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold3/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold3/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, 



Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent2/trajs2.pkl!
-----iteration: Refresh buffer every 1000000 sampling! 
7 target diff:  0.001543696618769871 values:  -68.54189 ----- 
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent3/trajs3.pkl!

Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold3/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, p

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold3/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold3/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold3/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold3/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold3/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, 




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  33 target diff:  0.001932




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('flo

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold3/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold3/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold3/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold3/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold3/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, 


-------------------- adv learner --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- fqe on dqn & sale --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold3/train/agent2/ckpt/offline_dqn_17000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold3/train/agent0/ckpt/offline_dqn_12000.ckpt
-----iteration:  0 target diff:  0.92407013853241 values:  -57.587383 ----- 

-----iteration:  1 target diff:  0.003118995245692368 values:  -57.60026 ----- 

-----iteration:  2 target diff:  0.002789342831390624 values:  -57.595757 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  3 target diff:  0.0019




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  5 target diff:  0.001760902660130016 values:  -69.39901 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all

 
0.0018016282785879285 values:  -55.579735 ----- 

-----iteration:  2 target diff:  0.0015727603827982825 values:  -67.222206 ----- 

-----iteration:  43 target diff:  0.0017439631569147623 values:  -55.441925 ----- 

-----iteration:  44 target diff:  0.001754867328760139 values:  -55.409603 ----- 

-----iteration:  16 target diff:  0.002101489665736524 values:  -69.69253 ----- 

-----iteration:  3 target diff:  0.001982801872512605 values:  -67.35689 ----- 

-----iteration:  4 -----iteration: target diff:   450.0020489126383727617 values:   target diff: -67.268555 0.0017035298406154601  values: -----  
-55.327885
 ----- 

-----iteration:  17 target diff:  0.0026700018328264198 values:  -69.67939 ----- 

-----iteration:  5 target diff:  0.001808859880499916 values:  -67.20275 ----- 

-----iteration:  46 target diff:  0.002032119730993333 values:  -55.14252 ----- 

-----iteration:  6 target diff:  0.0014744592111311972 values:  -67.28205 ----- 

-----iteration:  18 target diff:  0.0025


-------------------- adv learner --------------------
-----iteration:  29 target diff:  0.007034245305874801 values:  -63.485676 ----- 

-----iteration:  30 target diff:  0.008320457029055308 values:  -62.736973 ----- 

-----iteration:  31 target diff:  0.00545397935634006 values:  -62.557 ----- 

-----iteration:  32 target diff:  0.003230691676648604 values:  -62.424377 ----- 

-----iteration:  33 target diff:  0.002668358879551733 values:  -62.277824 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold3/train/agent0/ckpt/offline_dqn_18000.ckpt
-----iteration:  34 target diff:  0.002332335886751744 values:  -62.091534 ----- 

-----iteration:  35 target diff:  0.0025038984540356577 values:  -61.984383 ----- 

-------------------- fqe on dqn & sale --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold3/train/agent3/ckpt/offline_dqn_3000.ckpt
-----iteration:  36 target diff:  0.0019510317677605925 values: 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold3/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  4 target diff:  0.0020898397759917513 values:  -57.005455 ----- 

-----iteration:  5 target diff:  0.001851920831702181 values:  -57.000767 ----- 

-----iteration:  6 target diff:  0.0014527530269532174 values:  -56.956577 -




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa

 
target diff:  0.0021801332082119567 values:  -56.788906 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  10 target diff:  0.0017460362874416049 values:  -56.81855 ----- 

-----iteration:  11 target diff:  0.0021339207232319198saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold3/train/agent1/ckpt/offline_dqn_2000.ckpt
 values:  -56.76007 ----- 

-----iteration:  12 target diff:  0.001994461000080674 values:  -56.7046 ----- 

-----iteration:  13 target diff:  0.001823032913580471 values:  -56.73543 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold3/train/agent3/ckpt/offline_dqn_7000.ckpt
-----ite


 -54.481167 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  46 target diff:  0.0022422344116720437 values:  -54.26636 ----- 

-----iteration:  47 target diff:  0.0018086556705755998 values:  -54.109035 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold4/train/agent/ckpt/offline_dqn_9000.ckpt
saving model weights at /home/jupyt/




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  6 target diff:  0.0026000963011671326 values:  -68.56505 ----- 

-----iteration:  7 target diff:  0.0016021835713060945 values:  -68.626305 ----- 

-----iteration:  8 target diff:  0.0017912427193617485 values:  -68.58166 ----- 

-----iteration:  9 target diff:  0.0014537614657143707 values:  -68.682365 ----- 

-------------




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold3/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  22 target diff:  0.002563914047253186 values:  -57.33496 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the 


-----iteration:  8 target diff:  0.0013847625728348233 values:  -68.37233 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold4/train/agent/ckpt/offline_dqn_16000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold3/train/agent3/ckpt/offline_dqn_17000.ckpt
-----iteration:  0 target diff:  0.9236319749994871 values:  -57.213684 ----- 

-----iteration:  1 target diff:  0.0025641158143109295 values:  -57.195904 ----- 

-----iteration:  2 target diff:  0.0020573844948404804 values:  -57.250835 ----- 

-----iteration:  3 target diff:  0.0023785455992134832 values:  -57.20799 ----- 

-----iteration:  4 target diff:  0.0019984255354310574 values:  -57.20678 ----- 

-----iteration:  5 target diff:  0.001596093835044689 values:  -57.194557 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If y

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold3/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold3/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold3/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold3/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold3/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, 


-----iteration:  0 target diff:  0.9234626673740078 values:  -58.090916 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  1 target diff:  0.00470676801031238 values:  -58.069252 ----- 

-----iteration:  2 target diff:  0.00225618400022655 values:  -58.123554 ----- 

-----iteration:  0 target diff:  0.9175549101577979 values:  -67.32455 ----- 

-----iteration:  3 target diff:  0.0037137223788813625 values:  -58.087013 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold3/train/agent1/ckpt/offline_dqn_15000.ckpt
-----iteration:  4 target diff:  0.0022218963408470356 values:  -58.1328 ----- 

-----iteration:  1 target diff:  0.0033475627994640545 values:  -67.378555 ----- 

-----ite


-----iteration:  1 target diff:  0.005222169367170733 values:  -68.68541 ----- 

-----iteration:  6 target diff:  0.0018317369265754455 values:  -56.525887 ----- 

-----iteration:  2 target diff:  0.0026702006321337617 values:  -68.688614 ----- 

-----iteration:  7 target diff:  0.001574176888748704 values:  -56.461163 ----- 

-----iteration:  3 target diff:  0.0024326196568366645 values:  -68.66445 ----- 

-----iteration:  8 target diff:  0.002247732571701159 values:  -56.34621 ----- 

-----iteration:  4 target diff:  0.002252015896242674 values:  -68.70086 ----- 

-----iteration:  9 target diff:  0.0020402192487669482 values:  -56.35714 ----- 

-----iteration:  5 target diff:  0.001768541146021362 values:  -68.789894 ----- 

-----iteration:  10 target diff:  0.00197010935086518 values:  -56.331142 ----- 

-----iteration:  11 target diff:  0.0013167187579416905 values:  -56.32481 ----- 

-------------------- ckpt:  17000 --------------------
Loaded trajectories from load path: /home/


saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold3/train/agent1/ckpt/offline_dqn_17000.ckpt


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  7 target diff:  0.001788413322157431 values:  -68.915 ----- 

-----iteration:  8 target diff:  0.003175836663198228 values:  -68.93952 ----- 

-----iteration:  9 target diff:  0.0016126048783525187 values


-----iteration:  8 target diff:  0.0026618062646187953 values:  -57.30323 ----- 

-----iteration:  9 target diff:  0.001709758653545826 values:  -57.30721 ----- 

-----iteration:  0 target diff:  0.9179170371108231 values:  -69.42282 ----- 

-----iteration:  10 target diff:  0.0020455393535446864 values:  -57.27804 ----- 

-----iteration:  1 target diff:  0.0045633210181603686 values:  -69.48541 ----- 

-----iteration:  11 target diff:  0.0018609564360256147 values:  -57.333347 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold3/train/agent4/ckpt/offline_dqn_4000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold4/train/agent0/ckpt/offline_dqn_3000.ckpt
-----iteration:  2 target diff:  0.002673512154289773 values:  -69.59354 ----- 

-----iteration:  12 target diff:  0.0017125410475001966 values:  -57.324455 ----- 

-----iteration:  13 target diff:  0.0017792601546522235 values:  -57.31162 ----- 

-----iteration:  3

-----iteration:  8 target diff:  0.0016706795469642484 values:  -56.29424 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  9 target diff:  0.00206970605522625 values:  -56.24079 ----- 

-----iteration:  10 target diff:  0.0017995556704532894 values:  -56.345398 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold3/train/agent2/ckpt/offline_dqn_2000.ckpt
-----iteration:  11 target diff:  0.0016543666889460638 values:  -56.44971 ----- 

-----iteration:  12 target diff:  0.0017577234329702062 values:  -56.410328 ----- 

-----iteration:  13 target diff:  0.001345025783515666 values:  -56.354546 ----- 

-------------------- ckpt:  18000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold3/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold3/train/agent0/trajs0.




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------


To change all layers to have dtype float


saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold3/train/agent4/ckpt/offline_dqn_10000.ckpt


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold3/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold3/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.9178360838975977 values:  -67.100105 ----- 

-----iteration:  0 target diff:  0.9237114500497685 values:  -56.522247 ----- 

-----iteration:  1 target diff:  0.003950398479675588 values:  -56.61417 ----- 

-----iteration:  1 target diff:  0.004124923430765672 values:  -66.9751 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold4/train/agent0/ckpt/offline_dqn_15000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold3/train/agent4/ckpt/offline_dqn_16000.ckpt
-----iteration:  2 target diff:  0.002266561474089916 values:  -56.61268 ----- 

-----iteration:  2 target diff:  0.00328486935871955

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold3/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold3/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold3/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold3/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold3/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold3/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold3/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold3/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold3/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold3/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, 

-----iteration: -----iteration:   2012  target diff: target diff:   0.0022995530718619920.0018475761630270119  values:  values:  -57.141148-69.7452 ----- 

 ----- 

-----iteration:  13 target diff:  0.0018047614074778698 values:  -57.1572 ----------iteration:   
10
 target diff:  0.0014903765725626675 values:  -57.17578 ----- 

-------------------- training agent -------------------------iteration: 
 21 target diff:  0.002641027591237408 Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold4/train/agent/trajs.pkl!values: 
 Refresh buffer every 1000000 sampling!-69.8336 -----
 


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backe

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold3/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold3/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold4/train/agent/ckpt/offline_dqn_5000.ckpt
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold3/train/agent2/trajs2.pkl!
-----iteration:  Refresh buffer every 1000000 sampling!81
 target diff:  0.001959399076379933 values:  -56.018635 -----Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold3/train/agent3/trajs3.pkl! 


Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold3/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change 


-------------------- adv learner --------------------
-----iteration:  82 target diff:  0.0015293855365082105 values:  -56.019573 ----- 

-----iteration:  83 target diff:  0.0021631641919346296 values:  -56.132122 ----- 

-----iteration:  84 target diff:  0.003363585672174702 values:  -56.20063 ----- 

-----iteration:  85 target diff:  0.0015781753937325065 values:  -56.166546 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  86 target diff:  0.0013577376800824885 values:  -56.226162 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold3/train/agent3/ckpt/offline_dqn_4000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold4/train/agent1/ckpt/offline_dqn_7000.ckpt


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can di

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold3/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold3/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold3/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold3/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold3/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, 


-------------------- adv learner --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold4/train/agent1/ckpt/offline_dqn_9000.ckpt
-------------------- fqe on dqn & sale --------------------
-------------------- fqe on dqn & sale --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold4/train/agent/ckpt/offline_dqn_8000.ckpt


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.9141565683552799 values:  -58.54154 ----- 

-----iteration:  1 target diff:  0.002462391498221982 values:  -58.466858 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just thi




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  7 target diff:  0.0023921662576960375 values:  -57.920227 ----- 

-----iteration:  8 target diff:  0.0018834336466189208 values:  -57.933514 ----- 

-----iteration:  9 target diff:  0.0021311753292048027 values:  -57.962563 ----- 

-----iteration:  10 target diff:  0.0020436412124110875 values:  -58.00879 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold3/train/agent3/ckpt/offline_dqn_9000.ckpt
-----iteration:  11 target diff:  0.002173488943106961 values:  -58.04891 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  12 target diff:  0.0022485563229

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold3/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold3/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold3/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold4/train/agent/ckpt/offline_dqn_15000.ckpt
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold3/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold3/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('fl


-----iteration:  0 target diff:  0.9151593303351492 values:  -58.557568 ----- 

-----iteration:  1 target diff:  0.002354716617191343 values:  -58.567024 ----- 

-----iteration:  2 target diff:  0.00254842837966993 values:  -58.57541 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold4/train/agent1/ckpt/offline_dqn_18000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold4/train/agent/ckpt/offline_dqn_4000.ckpt
-----iteration:  3 target diff:  0.0021272128643138348 values:  -58.655407 ----- 

-----iteration:  4 target diff:  0.0028517482865345026 values:  -58.66936 ----- 

-----iteration:  5 target diff:  0.00147406251025873 values:  -58.703392 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold3/train/agent3/ckpt/offline_dqn_16000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold4/train/agent/ckpt/offline_dqn_17000.ckpt


To change all layers to 

-----iteration:  2 target diff:  0.0019655434451115936 values:  -57.725826 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold3/train/agent3/ckpt/offline_dqn_19000.ckpt
-----iteration:  3 target diff:  0.002592553310149537 values:  -57.632305 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold4/train/agent/ckpt/offline_dqn_20000.ckpt
-------------------- training agents --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold4/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.back

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold4/train/agent/ckpt/offline_dqn_10000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold4/train/agent2/ckpt/offline_dqn_4000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold3/train/agent4/ckpt/offline_dqn_2000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold4/train/agent0/ckpt/offline_dqn_3000.ckpt


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.9147345293252074 values:  -57.03443 ----- 

-----iteration:  1 target diff:  0.002625662701727458 values:  -56.98455 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/m

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold4/train/agent2/ckpt/offline_dqn_13000.ckpt
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold3/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold3/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold3/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold3/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by pass


saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold3/train/agent4/ckpt/offline_dqn_18000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold4/train/agent0/ckpt/offline_dqn_7000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold4/train/agent0/ckpt/offline_dqn_19000.ckpt
-----iteration:  26 target diff:  0.00243540820812201 values:  -57.38962 ----- 

-----iteration:  27 target diff:  0.0020891812915092025 values:  -57.309498 ----- 

-----iteration:  28 target diff:  0.001981067036053109 values:  -57.325665 ----- 

-----iteration:  29 target diff:  0.0018980033903115204 values:  -57.30475 ----- 

-----iteration:  30 target diff:  0.0014939718186658649 values:  -57.24103 ----- 

-------------------- ckpt:  7000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold3/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded traj




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold4/train/agent3/ckpt/offline_dqn_1000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold3/train/agent4/ckpt/offline_dqn_19000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold4/train/agent0/ckpt/o

-----iteration:  5 target diff:  0.0017835835551107359 values:  -56.821598 ----- 

-----iteration:  6 target diff:  0.0014082123349005386 values:  -56.781696 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold4/train/agent3/ckpt/offline_dqn_3000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold4/train/agent0/ckpt/offline_dqn_10000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold4/train/agent1/ckpt/offline_dqn_2000.ckpt


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer cons


-----iteration:  10 target diff:  0.0014699480406462188 values:  -45.52856 ----- 

--------------------saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold4/train/agent1/ckpt/offline_dqn_4000.ckpt
 ckpt:  2000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold3/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold3/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold3/train/agent1/trajs1.pkl!
0 Refresh buffer every 1000000 sampling!target diff:  
0.9152739561002976 values:  -57.501793 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold3/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path:




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  1 target diff:  0.0030751555950253432 values:  -57.55836 ----- 

-----iteration:  2 target diff:  0.0023308847326819655 values:  -57.53244 ----- 

-----iteration:  3 target diff:  0.0018805615251659966 values:  -57.585102 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  4 target diff:  0.0017784768165226303 values:  -57.54941 ----- 

-----iteration:  5 target diff:  0.0018599950265632526 values:  -57.502323 ----- 

-----iteration:  6 target diff:  0.002110837005362798 values:  -57.430153 ----- 

-----iteration:  7 target diff:  0.0023797826609598368 values:  -57.472897 ----- 

-----it


-------------------- adv learner --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.9222316665269538 values:  -48.51936 ----- 

-----iteration:  1 target diff:  0.00305107121645245 values:  -48.53812 ----- 

-----iteration:  2 target diff:  0.0021330000197061945 values:  -48.557278 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold4/train/agent0/ckpt/offline_dqn_15000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold4/train/agent3/ckpt/offline_dqn_8000.ckpt
-----iteration:  3 target diff:  0.0016324234416903397 values:  -48.624004 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/f

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold4/train/agent0/ckpt/offline_dqn_17000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold4/train/agent3/ckpt/offline_dqn_10000.ckpt


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold4/train/agent1/ckpt/offline_dqn_9000.ckpt
-----iteration:  0 target diff:  0.915438761907963 values:  -57.633312 ----- 

-----iteration:  1 target diff:  0.0028475604000314492 values:  -57.640877 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constru

-----iteration: -----iteration:   218  target diff: target diff:   0.00211577123479116030.0016026465834304122  values: values:   -57.015728 -48.568142-----  -----
 


-----iteration:  22 target diff:  0.002965765419845626 values:  -56.92763 ----- 

-----iteration:  9 target diff:  0.001984464918367708 values:  -48.553383 ----- 

-----iteration:  23 target diff:  0.002118410092070991 values:  -56.87713 ----- 

-----iteration:  10 target diff:  0.0018571186141867506 values:  -48.631073 ----- 

-----iteration:  11 target diff:  0.0020995574584601103 values:  -48.702698 ----- 

-----iteration:  24 target diff:  0.0023897887462047108 values:  -56.674076 ----- 

-----iteration:  12 target diff:  0.0018453601144542784 values:  -48.71555 ----- 

-----iteration: -----iteration:   2513  target diff: target diff:   0.00307629295482013880.0024620291058486737  values: values:   -48.73276-56.577023  ----- -----
 


-----iteration:  14 target diff:  0.0014565912294926056 values:  -48.69712 ----- 

--

-----iteration:  48 target diff:  0.0018814495028210564 values:  -55.109993 ----- 

-----iteration:  49 target diff:  0.0021088651604458013 values:  -54.947968 ----- 

-------------------------iteration:   fqe on dqn & sale 50-------------------- 


-----iteration:  51 target diff:  0.002319492402235784 values:  -54.89329 ----- 

-----iteration:  52 target diff:  0.0017718852135014708 values:  -54.88331 ----- 

-----iteration:  53 target diff:  0.002144846199864682 values:  -54.80546 ----- 

-----iteration:  54 target diff:  0.0017748639933751044 values:  -54.78073 ----- 

-----iteration: saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold4/train/agent1/ckpt/offline_dqn_2000.ckpt
 55 target diff:  0.0018244140159857632 values:  -54.724735 ----- 

-----iteration:  56 target diff:  0.0018275945479643318 values:  -54.626133 ----- 

-----iteration:  57 target diff:  0.0018859710984394644 values:  -54.55086 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold3/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold3/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_


saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold4/train/agent1/ckpt/offline_dqn_18000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold4/train/agent3/ckpt/offline_dqn_19000.ckpt
-----iteration:  0 target diff:  0.9150146426431919 values:  -57.35673 ----- 

-----iteration:  1 target diff:  0.0048448873766657255 values:  -57.440693 ----- 

-----iteration:  2 target diff:  0.002948674123529782 values:  -57.586704 ----- 

-----iteration:  3 target diff:  0.0023664250608489597 values:  -57.56879 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  4 target diff:  0.0015335248626589923 values:  -57.605938 ----- 

-----iteration:  5 target diff:  0.0016268593


saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold4/train/agent4/ckpt/offline_dqn_1000.ckpt
-----iteration:  12 target diff:  0.002955382770010086 values:  -57.936794 ----- 

-----iteration:  2 target diff:  0.0017152085496964376 values:  -45.931587 ----- 

-----iteration:  3 target diff:  0.0019033876125131382 values:  -45.954704 ----- 

-----iteration:  13 target diff:  0.0017808968796005143 values:  -57.774864 ----- 

-----iteration:  4 target diff:  0.0015237539542039995 values:  -45.947075 ----- 

-----iteration:  14 target diff:  0.0019311580005031226 values:  -57.742893 ----- 

-----iteration:  5 target diff:  0.0023100763403859173 values:  -45.946804 ----- 

-----iteration:  15 target diff:  0.0021390338113844334 values:  -57.774708 ----- 

-----iteration:  16 target diff:  0.0018173138628944446 values:  -57.771484 ----- 

-----iteration:  6 target diff:  0.0018639000116674141 values:  -45.981487 ----- 

-----iteration:  7 target diff:  0.001606850




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  20 target diff:  0.002178635592764298 --------------------values:   -57.78173ad




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  50 target diff:  0.002271





Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold3/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold3/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!

Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold3/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold3/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
Loaded trajectories from loa




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('flo


-----iteration:  7 target diff:  0.002008454065293406 values:  -58.11785 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold4/train/agent1/ckpt/offline_dqn_16000.ckpt
-----iteration:  0 target diff:  0.9224393957219529 values:  -46.41706 ----- 

-----iteration:  8 target diff:  0.0013628655787476978 values:  -57.92666 ----- 

-----iteration:  1 target diff:  0.0027681243907322494 values:  -46.476162 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold4/train/agent2/ckpt/offline_dqn_8000.ckpt
-----iteration:  2 target diff:  0.002977929375468257 values:  -46.522015 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold4/train/agent4/ckpt/offline_dqn_9000.ckpt
-----iteration:  3 target diff:  0.0026547209745044972 values:  -46.512993 ----- 

-----iteration:  4 target diff:  0.0016761905005373238 values:  -46.54822 ----- 

-----iteration:  5 target diff:  0.0015105670054193117 values: 




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold4/train/agent4/ckpt/offline_dqn_11000.ckpt
-----iteration:  5 target diff:  0.0021743193829494483 values:  -48.05902 ----- 

-----iteration:  6 target diff:  0.0016848127028869335 values:  -48.05337 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  7 target diff:  0.0024161624385595666 values:  -48.08205 ----- 

-----iteration:  8 target diff:  0.0014132554187233587 values:  -48.07763 ----- 

-----iteration:  0 target diff:  0.91607059394401 values:  -56.8419 ----- 

-----iteration:  1 target diff:  0.0033539284366174383 values:  -56.930916 ----- 

-----iteration:  2 target diff:  0.0021188044077659228 values:  -56.879566 ----- 

-----it

-----iteration:  3 target diff:  0.002039228187857479 values:  -57.465397 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  4 target diff:  0.001831337190695854 values:  -57.42768 ----- 

-----iteration:  5 target diff:  0.001549024290292999 values:  -57.443813 ----- 

-----iteration:  6 target diff:  0.001910503538687428 values:  -57.40688 ----- 

-----iteration:  7 target diff:  0.0014293611888056466 values:  -57.43926 ----- 

-------------------- ckpt:  13000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold3/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold3/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold3/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded t




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
saving model weights at /home/jupyt/leyuan

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold3/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold3/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold3/train/agent2/trajs2.pkl!saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold4/train/agent2/ckpt/offline_dqn_17000.ckpt

Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold3/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold3/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('f




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

saving model weights at /home/jupyt/leyuan

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold4/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold4/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold4/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold4/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold4/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, 




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  5 target diff:  0.0018557520609280198 values:  -58.570354 ----- 

-----iteration:  6 target diff:  0.001905325982988087 values:  -58.59534 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  7 target diff:  0.0015815753582552087 values:  -58.63303 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold4/train/agent2/ckpt/offline_dqn_8000.ckpt
-----iteration:  8 target diff:  0.001705149596968518 values:  -58.64744 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold4/train/agent2/ckpt/offline_dqn_20000.ckpt
Loaded traject


-----iteration:  17 target diff:  0.0022776960459375637 values:  -58.594097 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold4/train/agent3/ckpt/offline_dqn_1000.ckpt
-----iteration:  4 target diff:  0.0023259962512245354 values:  -52.957836 ----- 

-----iteration:  5 target diff:  0.0024142281304562288 values:  -52.962955 ----- 

-----iteration:  0 target diff:  0.9227221037769134 values:  -49.32068 ----- 

-----iteration:  18 target diff:  0.0024047908131020397 values:  -58.53596 ----- 

-----iteration:  6 target diff:  0.0018701812235140348 values:  -52.978092 ----- 

-----iteration:  19 target diff:  0.0027228018782779993 values:  -58.580494 ----- 

-----iteration:  1 target diff:  0.003029977340775829 values:  -49.32444 ----- 

-----iteration:  7 target diff:  0.002132818656491414 values:  -53.03996 ----- 

-----iteration:  2 target diff:  0.002563307250080075 values:  -49.328594 ----- 

-----iteration:  20 target diff:  0.0020497519064019824




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  22 target diff:  0.002080


-------------------- adv learner --------------------
-----iteration:  5 target diff:  0.0020278062777119884 values:  -46.774624 ----- 

-----iteration:  6 target diff:  0.0020707761138955266 values:  -46.791016 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold4/train/agent2/ckpt/offline_dqn_11000.ckpt
-----iteration:  7 target diff:  0.0024493680954567735 values:  -46.8097 ----- 

-------------------- fqe on dqn & sale --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold4/train/agent3/ckpt/offline_dqn_3000.ckpt
-----iteration:  8 target diff:  0.001863913322931844 values:  -46.79118 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteratio


-----iteration:  11 target diff:  0.0016034674790500096 values:  -52.875282 ----- 

-----iteration:  12 target diff:  0.0018591882810340243 values:  -52.85588 ----- 

-----iteration:  0 target diff:  0.9226797600065842 values:  -46.16995 ----- 

-----iteration:  13 target diff:  0.0018246066513724496 values:  -52.889973 ----- 

-----iteration:  1 target diff:  0.0028793005869923037 values:  -46.178936 ----- 

-----iteration:  14 target diff:  0.001526306601261287 values:  -52.92899 ----- 

-----iteration:  2 target diff:  0.002268625629654075 values:  -46.16796 ----- 

-----iteration:  15 target diff:  0.0013764657476048202 values:  -52.94608 ----- 

-----iteration:  3 target diff:  0.001711595088956267 values:  -46.177708 ----- 

-----iteration:  4 target diff:  0.0016236701003904518 values:  -46.265167 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructo




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa

-----iteration:  0 target diff:  0.92323745945459 values:  -49.10822 ----- 

-----iteration:  1 target diff:  0.002038575119304314 values:  -49.130646 ----- 

-----iteration:  2 target diff:  0.0018821822044100156 values:  -49.178852 ----- 

-----iteration:  3 target diff:  0.0022825858449574705 values:  -49.211914 ----- 

-----iteration:  4 target diff:  0.001532227760154214 values:  -49.21631 ----- 

-----iteration:  5 target diff:  0.0027097109974124726 values:  -49.256336 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  6 target diff:  0.001995507924800782 values:  -49.29731 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  11 target diff:  0.0023577223507816525 values:  -47.08012--------------------  -----adv learner  
--------------------

-----iteration:  5 target diff:  0.0016548165950556462 values:  -53.264004 ----- 

-----iteration:  12 target diff:  0.0016630256536621841 values:  -47.063477 ----- 

-----iteration:  6 target diff:  0.001682293077984441 values:  -53.34903 ----- 

-----iteration:  13 target diff:  0.0016580064079505202 values:  -47.083584 ----- 

-----iteration:  7 target diff:  0.0015752423688537946 values:  -53.41865 ----- 

-----iteration:  14 target diff:  0.0016090095341269987 values:  -47.16287 ----- 

-----iteration:  8 target diff:  0.0015955300201651727 values:  




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float


-----iteration:  1 target diff:  0.0028586118197842314 values:  -58.69907 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 -----iteration: target diff:   20.924669309506365  target diff: values:  0.002468380997807852  -45.92379values:   ------58.743103  -----

 

-----iteration:  0 target diff:  0.9253490891050811 values:  -52.870056 ----- 

-----iteration:  3 target diff:  0.001543252064952162 values:  -58.782673 ----- 

-----iteration:  1 target diff:  0.0015484737977134965 values:  -46.04724 ----- 

-----iteration:  1 target diff:  0.0023594588119959267 values:  -52.8572 ----- -----iteration: 
 
4 target diff:  0.001693724180805355 values:  -58.866493 ----- 

-----iteration:  2 target diff:  0.00259959945074




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  14-----iteration:   target diff: 15  0.0017054118317369133 target diff: values:   0.001998725740153102-48.774475  ----- 
values:  
-57.708557 ----- 

-----iteration:  16 target diff:  0.0015474278695397038 values:  -57.77995 ----- 

-----iteration:  15 target diff:  0.0022058298520554764 values:  -48.79124 ----- 

-----iteration:  17 target diff:  0.001916656110932464 values:  -57.74001 ----- 

-----iteration:  16 target diff:  0.0017673829935761257 values:  -48.79248 ----- 

-----iteration:  18 target diff:  0.0018000317771601148 values:  -57.775723 ----- 

-----iteration:  17 target diff:  0.0015782329522328075 values

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold4/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
 52 target diff:  0.0014630375699582995 values:  -56.42684 -----Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold4/train/agent1/trajs1.pkl! 
Refresh buffer every 1000000 sampling!


-------------------- ckpt:  17000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold4/train/agent2/trajs2.pkl!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold3/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!Refresh buffer every 1000000 sampling!







To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by pa




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  64 target diff:  0.0022883539872445274 values:  -48.97637 ----- 

-----iteration:  65 target diff:  0.0021606058443088077 values:  -48.889797 ----- 

-----iteration:  66 target diff:  0.0020213831514252073 values:  -48.899136 ----- 

-----iteration:  67 target diff:  0.0021766666165645514 values:  -48.88106 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  68 target diff:  0.001882735709128111 values:  -48.8675 ----- 

-----iteration:  69 target diff:  0.001940947670425773 values:  -48.81824 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  70 target 

-----iteration:  6 target diff:  0.0016285546655946711 values:  -53.553513 -----iteration: ----- 4  
target diff: 
 0.002162824203307469 values:  -56.956318 ----- 

-----iteration:  7 target diff:  0.0024501391481155134 values:  -53.55866 ----- 

-----iteration:  5 target diff:  0.0018610412289463887 values:  -56.981174 ----- 

-----iteration:  8 target diff:  0.001584397754025236 values:  -53.562138 ----- 

-----iteration:  6 target diff:  0.0027028884164707677 values:  -56.982464 ----- 

-----iteration:  9 target diff:  0.0014615509333065343 values:  -53.520256 ----- 

-----iteration:  7 target diff:  0.0018400031610191455 values:  -56.819588 ----- 

-----iteration:  8 target diff:  0.0020437436785547066 values:  -56.855843 ----- 

-----iteration:  9 target diff:  0.002703283120484494 values:  -56.86615 ----- 

-----iteration:  10 target diff:  0.0024030213050385916 values:  -56.88986 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('


-----iteration:  15--------------------  target diff: adv learner  0.0021864502106522946-------------------- 
values:  -52.972492 ----- 

-----iteration:  16 target diff:  0.0017206055744872811 values:  -53.10665 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  17 target diff:  0.003535147125306326 values:  -53.107216 ----- 

-----iteration:  0 target diff:  0.9239503656796093 values:  -47.1693 ----- 

-----iteration:  18 target diff:  0.001844260132329773 values:  -53.122807 ----- 

-----iteration:  1 target diff:  0.002964757172859096 values:  -47.175484 ----- 

-----iteration:  19 target diff:  0.001927987184532375 values:  -53.135094 ----- 

-----iteration:  2 target diff:  0.0029793903456920724 values:  -47.

-----iteration:  36 target diff:  0.001807427708135861 values:  -52.995018 ----- 

-----iteration:  37 target diff:  0.0024240519162852217 values:  -52.86323 ----- 

-----iteration:  38 target diff:  0.0025127199270041974 values:  -52.826008 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold4/train/agent3/ckpt/offline_dqn_7000.ckpt
-------------------- fqe on dqn & sale --------------------
-----iteration:  39 target diff:  0.001396139259037763 values:  -52.7729 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold4/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded tra




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  0 target diff:  0.9254047436716178 values:  -53.242004 ----- 

-----iteration:  5 target diff:  0.0014215681040511334 values:  -47.62074 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold4/train/agent3/ckpt/offline_dqn_8000.ckpt
-----iteration:  1 target diff:  0.0028288615515163805 values:  -53.2072 ----- 

-----iteration:  2 target diff:  0.002162230969226568 values:  -53.23267 ----- 

-----iteration:  3 target diff:  0.0021207026331200823 values:  -53.19022 ----- 

-----iteration:  4 target diff:  0.0022648142641284947 values:  -53.176617 ----- 

-----iteration:  5 target diff:  0

-----iteration:  5 target diff:  0.0021376100671921267 values:  -53.999382 ----- 

-----iteration:  6 target diff:  0.0020575811050275736 values:  -57.704994 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  6 target diff:  0.002022919608890336 values:  -53.97174 ----- 

-----iteration:  7 target diff:  0.0017464387584740504 values:  -57.750637 ----- 

-----iteration:  7 target diff:  0.0016554829482883972 values:  -53.932953 ----- 

-----iteration:  8 target diff:  0.0015200936155805973 values:  -57.714653 ----- 

-----iteration:  8 target diff:  0.001743407968568916 values:  -53.985703 ----- 

-----iteration:  9 target diff:  0.0017224121712460766 values:  -57.720734 ----- 

-----iteration:  10 target diff:  0.001583147239386762 values:  -57.722023 ----- 

-----iteration:  11 target diff:  0.0014833490775100402 values:  -57.69682 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold4/train/agent4/ckpt/offline_dqn_20




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  2 target diff:  0.0025614761793732867 values:  -52.801075 ----- 

-----iteration:  0 target diff:  0.9235412708628737 values:  -47.066498 ----- 

-----iteration:  3 target diff:  0.0019681396035185574 values:  -52.836742 ----- 

-----iteration:  1 target diff:  0.002800734467603091 values:  -47.113117-----iteration:   -----4  
target diff: 
 0.001986606308966135 values:  -52.856934 ----- 

-----iteration:  5 target diff:  0.0014866996289522256 values:  -52.861122 ----- 

-----iteration:  2 target diff:  0.002166301780660826 values:  -47.113094 ----- 

-----iteration:  3 target diff:  0.0017313413763345004 values:  -47.1




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  25 target diff:  0.0017351518583845391--------------------  adv learnervalues:   ---------------------47.69915
 ----- 

-----iteration:  12 target diff:  0.002437367526518367 values:  -57.826225 ----- 

-----iteration:  26 target diff:  0.0022629280629877864 values:  -47.71303 ----- 

-----iteration:  13 target diff:  0.002552818805642496 values:  -57.671703 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold4/train/agent4/ckpt/offline_dqn_5000.ckpt
-----iteration:  27 target diff:  0.0017820753816224217 values:  -47.72128 ----- 

-----iteration:  14 target diff:  0.0022990454650531856 values:  -57.57406 ----- 

-----iteration:  28 targe

-----iteration:  5 target diff:  0.0018669847183537497 values:  -57.956512 ----- 

-----iteration:  8 target diff:  0.0018701329194232463 values:  -53.246124 ----- 

-----iteration:  9 target diff:  0.0016801884110616992 values:  -53.281487 ----- 

-----iteration:  6 target diff:  0.002199303701475987 values:  -57.849342 ----- 

-----iteration:  10 target diff:  0.0020577925609419987 values:  -53.32025 ----- 

-----iteration:  7 target diff:  0.0020975247995064835 values:  -58.00951 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold4/train/agent3/ckpt/offline_dqn_14000.ckpt
-----iteration:  11 target diff:  0.0013246740111508173 values:  -53.36559 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  23 target diff:  0.0016647732890294866 values:  -58.019497 ----- 

-----iteration:  1 target diff:  0.003343799948940749 values:  -46.83692 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold4/train/agent4/ckpt/offline_dqn_8000.ckpt
-----iteration:  2 target diff:  0.0023866158401960164 values:  -46.84482 ----- 

-----iteration:  24 target diff:  0.0022310766424440418 values:  -57.913624 ----- 

-----iteration:  3 target diff:  0.0018110376673556854 values:  -46.86425 ----- 

-----iteration:  4 target diff:  0.0015442028360336856 values:  -46.88986 ----- 

-----iteration:  25 target diff:  0.0020281066187831506 values:  -57.704964 ----- 

-----iteration:  5 target diff:  0.0017666308363893314 values:  -46.915375 ----- 

-----iteration:  26 target diff:  0.0020163085691473196 values:  -57.679115 ----- 

-----iteration: -----iteration:   627  target diff: target diff:   0.0017922372122208576 0.0015685440592955679values:  values:   -57.




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold4/train/agent4/ckpt/offline_dqn_11000.ckpt
-------------------- fqe on dqn & sale --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold4/train/agent/ckpt/offline_dqn_3000.ckpt
saving model weights at /home/jupyt/leyua




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
saving model weights at /home/jupyt/leyuan




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold4/train/agent4/ckpt/offline_dqn_7000.ckpt
-------------------- fqe on dqn & sale --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold4/train/agent4/ckpt/offline_dqn_20000.ckpt
-------------------- behavior cloning --

-----iteration:  0 target diff:  0.9175285597903428 values:  -45.366753 ----- 

-----iteration:  14 target diff:  0.0023458554106267956 values:  -53.270725 ----- 

-----iteration:  1 target diff:  0.0027276532071804807 values:  -45.36444 ----- 

-----iteration:  2 target diff:  0.0036057689489498936 values:  -45.426373 ----- 

-----iteration:  15 target diff:  0.0018904891465559128 values:  -53.247547 ----- 

-----iteration:  3 target diff:  0.00566217360478821 values:  -45.36154 ----- 

-----iteration:  4 target diff:  0.0019927811609931926 values:  -45.42871 ----- 

-----iteration:  16 target diff:  0.0020911128088854803 values:  -53.279346 ----- 

-----iteration:  5 target diff:  0.0027590489949000973 values:  -45.432537 ----- 

-----iteration:  6 target diff:  0.003708049475233865 values:  -45.339573 ----- 

-----iteration:  17 target diff:  0.0017497307711784892 values:  -53.282753 ----- 

-----iteration:  7 target diff:  0.002198971759540838 values:  -45.28263 ----- 

-----iterat

-----iteration:  32 target diff:  0.003228809070785785 values:  -45.77586 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold4/train/agent4/ckpt/offline_dqn_12000.ckpt
-----iteration:  33 target diff:  0.0030660180145729876 values:  -45.80844 ----- 

-----iteration:  34 target diff:  0.0024969435905780593 values:  -45.77079 ----- 

-----iteration:  35 target diff:  0.002512360190641509 values:  -45.769344 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  36 target diff:  0.0029784322466217673 values:  -45.719368 ----- 

-----iteration:  0 target diff:  0.9242493982413762 values:  -51.919983 ----- 

-----iteration:  37 target diff:  0.002272439258088543 values:  -45.684822 ----- 



-----iteration:  59 target diff:  0.0024415095269599793 values:  -43.976856 ----- 

-----iteration:  0 target diff:  0.9248032815431388 values:  -52.33486 ----- 

-----iteration:  60 target diff:  0.0031300630657790677 values:  -43.946663 ----- 

-----iteration: -----iteration:   161  target diff: target diff:   0.0032411769564695280.0027885296208744782  values:  values: -43.986973  -52.44794 ----------  



-----iteration:  62 target diff:  0.0022548627129095294 values:  -43.955467 ----- 

-----iteration:  63 target diff:  0.00240850266481867 values:  -44.00559 ----- 

-----iteration:  2 target diff:  0.003213937986543692 values:  -52.487015 ----- 

-----iteration:  3 target diff:  0.0023567574089344337 values:  -52.46187 ----- 

-----iteration:  64 target diff:  0.002493806415645699 values:  -44.025093 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold4/train/agent/ckpt/offline_dqn_20000.ckpt
-------------------- training agents -----------------




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
 
----- 

-------------------- adv learner --------------------
-----iteration:  85 target diff:  0.001962930671301705 values:  -43.886593 ----- 

-----iteration:  86 target diff:  0.0032034234110138123 values:  -43.928017 ----- 

-----iteration:  87 target diff:  0.0029689199593780327 values:  -43.93971 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  88 target diff:  0.003018140667837031 values:  -43.948566 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold4/train/agent0/ckpt/offline_dqn_2000.ckpt
-----iteration:  89 target diff:  0.002509886475227735 values:  -43.96817 ----- 

saving model weights at /home/jupyt/ley






Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold4/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold4/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold4/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold4/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold4/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold4/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!
Loaded traject




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------


To change all layers to have dtype float




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  7 target diff:  0.001920951226221649 values:  -53.29284 ----- 

-----iteration:  3 target diff:  0.004537914013674655 values:  -51.808197 ----- 

-----iteration:  4 target diff:  0.0033761868872217425 values:  -51.823452 ----- 

-----iteration:  8 target diff:  0.002771854960085596 values:  -53.33712 ----- 

-----iteration:  5 target diff:  0.003456717642282117 values:  -51.776222 ----- 

-----iteration:  9 target diff:  0.0026758087113618136 values:  -----iteration: -53.329014  6 ----- target diff: 
 
0.003068470092527205 values:  -51.758385 ----- 

-----iteration:  7 target diff:  0.0027069238873038506 values:  -51.80

-----iteration:  1 target diff:  0.002577707406900873 values:  -51.740395 ----- 
-----iteration:  
6 target diff:  0.0022650422599924088 values:  -46.86938 ----- 

-----iteration:  2 target diff:  0.002268524669179711 values:  -51.665695 ----- 
-----iteration: 
 7 target diff:  0.002190150925224128 values:  -46.923042 ----- 

-----iteration:  8 target diff:  0.0018343635647239378 values:  -46.948856 ----- 

-----iteration:  3 target diff:  0.001970710105824277 values:  -51.651558 ----- 

-----iteration:  4 target diff:  0.00204771326834365 values:  -51.693123 ----- 

-----iteration:  9 target diff:  0.0018309190596441029 values:  -47.003582 ----- 

-----iteration:  10 target diff:  0.0023224956453484526 values:  -47.059696 ----- 

-----iteration:  5 target diff:  0.0021927046631820364 values:  -51.559574 ----- 

-----iteration:  11 target diff:  0.001938112765030736 values:  -47.045334 ----- 

-----iteration:  6 target diff:  0.0020936114134796294 values:  -51.5234 ----- 

-----iterati

-----iteration:  27 target diff:  0.0027878977951946225 values:  -47.247257 ----- 

-----iteration:  0 target diff:  0.9247730488836984 values:  -53.43321 ----- 

-----iteration:  0 target diff:  0.9172015064570977 values:  -51.588833 ----- 

-----iteration:  28 target diff:  0.0019870712510445896 values:  -47.26936 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold4/train/agent0/ckpt/offline_dqn_9000.ckpt
-----iteration:  1 target diff:  0.001631466724028205 values:  -53.43607 ----- 

-----iteration:  1 target diff:  0.001635061287257526 values:  -51.638954 ----- 

-----iteration:  -----iteration: 29  2target diff:   target diff: 0.002018923898959316  0.0013050910639473125values:   values: -47.234207  -53.468613 ----------  



-------------------- ckpt:  19000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/209652396/fold4/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!


Loaded traje




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
 
-51.709496 ----- 

-------------------- adv learner --------------------
-----iteration:  31 target diff:  0.002284235852563587 values:  -47.268055 ----- 

-----iteration:  32 target diff:  0.0026040599936582327 values:  -47.32752 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  33 target diff:  0.003478920000582065 values:  -47.3534 ----- 

-----iteration: 

-----iteration:  49 target diff:  0.0021516180407994585 values:  -47.270416 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  50 target diff:  0.002623464670102608 values:  -47.189014 ----- 

-----iteration:  0 target diff:  0.9258060121250457 values:  -53.21961 ----- 

-----iteration:  51 target diff:  0.0023459881875084205 values:  -47.159893 ----- 

-----iteration:  1 target diff:  0.0015581746253683732 values:  -53.247353 ----- 

-----iteration:  52 target diff:  0.002418046487473398 values:  -47.110004 ----- 

-----iteration:  2 target diff:  0.0012046370418666219saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold4/train/agent0/ckpt/offline_dqn_11000.ckpt 
values:  -53.2294 ----- 






To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  53 target diff:  0.0024491373906760892 values:  -47.08335 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change a

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold4/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold4/train/agent1/trajs1.pkl!-----iteration: 
 4Refresh buffer every 1000000 sampling! 
target diff:  0.001231739173386806 values:  -53.200783 ----- Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold4/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!



Refresh buffer every 1000000 sampling!

 
 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold4/train/agent0/ckpt/offline_dqn_15000.ckpt
-----iteration:  11 target diff:  0.0018771984622191055 values:  -51.715332 ----- 

-----iteration:  1 target diff:  0.0023701146677474025 values:  -45.676025 ----- 

-----iteration:  2 target diff:  0.002502693100848924 values:  -45.694496 ----- 

-----iteration:  12 target diff:  0.0018527508325093336 values:  -51.68524 ----- 

-----iteration:  3 target diff:  0.0017461803752158215 values:  -45.709087 ----- 

-----iteration:  13 target diff:  0.0015012062631739583 values:  -51.642227 -----saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold4/train/agent0/ckpt/offline_dqn_15000.ckpt 


-----iteration:  4 target diff:  0.0017824340254333497-----iteration:   values: 14  -45.68922target diff:   0.0016059682290541725 -----values:  
 
-51.73698 ----- 

-----iteration:  5 target diff:  0.002716318101505326 values:  -45.75581 ----- 

-----iterat

-----iteration:  48 target diff:  0.002227302710398385 values:  -49.445312 ----- 

-----iteration:  68 target diff:  0.002305132641210336 values:  -45.72552 ----- 

-----iteration:  49 target diff:  0.001936761637577879 values:  -49.31863 ----- 

-----iteration:  50 target diff:  0.0025866734257601926 values:  -49.179955 ----- 

-----iteration:  69 target diff:  0.0017393320715280093 values:  -45.731632 ----- 

-----iteration:  70 -----iteration: target diff:   510.002651404017949306  values: target diff:   -45.671550.0023895520836080724 ----- values:   -49.066723 -----

 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold4/train/agent1/ckpt/offline_dqn_1000.ckpt
-----iteration:  52 target diff:  0.0022801923579605803 values:  -49.018112 ----- 

-----iteration:  71 target diff:  0.0020243205476289743 values:  -45.638607 ----- 

-----iteration:  53 target diff:  0.0019526990445055232 values:  -48.939728 ----- 

-----iteration:  72 target diff:  0.0021253972

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold4/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold4/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold4/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold4/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold4/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, 


saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold4/train/agent1/ckpt/offline_dqn_3000.ckpt


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  4 target diff:  0.002498207944135595 values:  -51.610054 ----- 

-----iteration:  5 target diff:  0.0019665015180074558 values:  -51.688496 ----- 

-----iteration:  6 target diff:  0.00202961279792103 values:  -51.59667 ----- 

-----iteration:  7 target diff:  0.00232099518762822 values:  -51.567524 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  8 target diff:  0.0020121669222040615 values:  -51.642677 ----- 

-----iteration:  9 target diff:  0.001830940210268875 




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
0.0020308771596796392
 values:  -47.453266 ----- 

-------------------- adv learner -------------


-----iteration:  62 target diff:  0.0024062627661889865 values:  -46.932617 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  63 target diff:  0.0019761493062362963 values:  -46.932064 ----- 

-----iteration:  64 target diff:  0.001857152263549676 values:  -46.92081 ----- 

-----iteration:  65 target diff:  0.002282818762595589 values:  -46.92339 ----- 

-----iterat




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  99 target diff:  0.0015131497238093376 values:  -47.29128 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold4/train/agent1/ckpt/offline_dqn_12000.ckpt
-------------------- fqe on dqn & sale --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backen




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  29 target diff:  0.002066707704548292 values:  -46.219772 ----- 

-----iteration:  30 target diff:  0.002115099218607185 values:  -46.187294 ----- 

-----iteration:  31 target diff:  0.0020441699074136887 values:  -46.205677 ----- 

-----iteration:  32 target diff:  0.002071401744747363 values:  -46.186134 ----- 

----------


-------------------- adv learner --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold4/train/agent1/ckpt/offline_dqn_18000.ckpt
-----iteration:  52 target diff:  0.0017771945433011527 values:  -45.3558 ----- 

-----iteration:  53 target diff:  0.003304022486088076 values:  -45.307514 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  54 target diff:  0.0015259139532086102 values:  -45.281986 ----- 

-----iteration:  55 target diff:  0.0018659749425633018 values:  -45.208214 ----- 

-----iteration:  56 target diff:  0.0023539174755792228 values:  -45.109886 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold4/train/agent1/ckpt/offline_dqn_18000.ckpt
-----iteration:  57 target diff:  0.001811028387190097 values:  -45.062977 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype


-----iteration:  0 target diff:  0.9186995320294556 values:  -50.744053 ----- 

-------------------- fqe on dqn & sale --------------------
 ----- 

-----iteration:  2 target diff:  0.0028327363900227734 values:  -50.712006 ----- 

-----iteration:  3 target diff:  0.0026169669984707385 values:  -50.58612 ----- 

-----iteration:  4 target diff:  0.002612544745668405 values:  -50.591198 ----- 

-----iteration:  5 target diff:  0.001925759981688441 values:  -50.734436 ----- 

-----iteration:  6 target diff:  0.0041650776442960355 values:  -50.704212 ----- 

-----iteration:  7 target diff:  0.001700135128611484 values:  -50.74262 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold4/train/agent1/ckpt/offline_dqn_20000.ckpt
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold4/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backen


-------------------- adv learner --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold4/train/agent2/ckpt/offline_dqn_2000.ckpt
-----iteration:  21 target diff:  0.0018843783971692256 values:  -45.985455 ----- 

-----iteration:  22 target diff:  0.0028971014913887496 values:  -45.993412 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold4/train/agent2/ckpt/offline_dqn_3000.ckpt
-------------------- fqe on dqn & sale --------------------
-----iteration:  23 target diff:  0.0030135085858321304 values:  -46.011364 ----- 

-----iteration:  24 target diff:  0.0021788842920249666 values:  -45.960194 ----- 

-----iteration:  25 target diff:  0.003027644838188328 values:  -46.01455 ----- 

-----iteration:  26 target diff:  0.0023311047226812855 values:  -45.97816 ----- 

-----iteration:  27 target diff:  0.0026926473170104117 values:  -45.973988 ----- 



To change all layers to have dtype float64 by default, c

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold4/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold4/train/agent2/ckpt/offline_dqn_8000.ckpt
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold4/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold4/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To 




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold4/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold4/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold4/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold4/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold4/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold4/train/agent2/ckpt/offline_dqn_15000.ckpt


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.


-----iteration:  0 target diff:  0.9166836560261865 values:  -45.10476 ----- 

-----iteration:  0 target diff:  0.9173986376844727 values:  -52.87378 ----- 

-----iteration:  1 target diff:  0.002285211276235826 values:  -45.09914 ----- 

-----iteration:  1 target diff:  0.00

-----iteration:  3 target diff:  0.002963442834375989 values:  -51.45201 ----- 

-----iteration:  4 target diff:  0.0025416819576647657 values:  -51.370686 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold4/train/agent2/ckpt/offline_dqn_17000.ckpt
-------------------- fqe on dqn & sale --------------------
-----iteration:  5 target diff:  0.0022675579116967614 values:  -51.386723 ----- 

-----iteration:  6 target diff:  0.0018652129948905456 values:  -51.36348 ----- 

-----iteration:  7 target diff:  0.0020593660208746254 values:  -51.28038 ----- 

-----iteration:  8 target diff:  0.0016095795438329386 values:  -51.17068 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold4/train/agent2/ckpt/offline_dqn_18000.ckpt
-----iteration:  9 target diff:  0.0018286382117460428 values:  -51.185036 ----- 

-----iteration:  10 target diff:  0.00194595015529784 values:  -51.11127 ----- 

-----iteration:  11 target diff:  0.001

0.003684934621355882
 values:  -47.01877 ----- 

-----iteration:  22 target diff:  0.0018885021498053428 values:  -46.995605 ----- 

-----iteration:  5 target diff:  0.0023421385400326056 values:  -52.65293 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold4/train/agent2/ckpt/offline_dqn_20000.ckpt
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold4/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of th


-----iteration:  48 target diff:  0.0024783802225069765 values:  -46.836296 ----- 

-----iteration:  49 target diff:  0.002203258414807808 values:  -46.798058 ----- 

-----iteration:  0 target diff:  0.9184561699944321 values:  -51.397762 ----- 

-----iteration:  50 target diff:  0.0018685234655700651 values:  -46.67781 ----- 

-----iteration:  1 target diff:  0.003934313460391978 values:  -51.34697 ----- 

-----iteration:  2 target diff:  0.0024609572164806786 values:  -51.500244 ----- 

-----iteration:  51 target diff:  0.0022368860640485312 values:  -46.561115 ----- 

-----iteration:  3 target diff:  0.0024634920951751666 values:  -51.560474 ----- 

-----iteration:  52 target diff:  0.002701388073094104 values:  -46.511448 ----- 

-----iteration:  4 target diff:  0.002103186017292388 values:  -51.665348 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold4/train/agent3/ckpt/offline_dqn_2000.ckpt
-----iteration:  5 target diff:  0.00316232129008547




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
-----iteration: 
 59 target diff:  0.0020732394314333007 values:  -46.008392 ----- 



To change 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold4/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold4/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_

target diff: 
 0.003202202643949647 values:  -44.74118 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner -




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  67 target diff:  0.001832637600969048 values:  -43.447765 ----- 

-----iteration:  68 target diff:  0.002168977466820848 values:  -43.469963 ----- 

-----iteration:  69 target diff:  0.0018890183736541128 values:  -43.482967 ----- 

-----iteration:  70 target diff:  0.0018369619496290078 values:  -43.42247 ----- 

-----itera

-----iteration:  1 target diff:  0.0015945019057036253 values:  -50.152233 ----- 

-----iteration:  2 target diff:  0.0019439850151614293 values:  -50.105747 ----- 

-----iteration:  3 target diff:  0.0016466729004424132 values:  -50.120636 ----- 

-----iteration:  4 target diff:  0.002204013525552715 values:  -50.02818 ----- 

-----iteration:  5 target diff:  0.001545349425715522 values:  -49.980534 ----- 

-----iteration:  6 target diff:  0.0014844381353404253 values:  -49.931892 ----- 

-------------------- ckpt:  20000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold4/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold4/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/218175338/fold4/train/agent1/trajs1.pkl!
Refresh buffer every




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

saving model weights at /home/jupyt/leyuan

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold4/train/agent3/ckpt/offline_dqn_17000.ckpt
-----iteration:  13 target diff:  0.0017487794229665843 values:  -44.614834 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold4/train/agent3/ckpt/offline_dqn_18000.ckpt
-----iteration:  14 target diff:  0.0015551612499267655 values:  -44.668194 ----- 

-----iteration:  15 target diff:  0.002280185835281081 values:  -44.729836 ----- 

-----iteration:  16 target diff:  0.0019306754803013876 values:  -44.69593 ----- 

-----iteration:  17 target diff:  0.002125855284480895 values:  -44.75004 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold4/train/agent3/ckpt/offline_dqn_18000.ckpt
-----iteration:  18 target diff:  0.0019220388027750008 values:  -44.76112 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold4/train/agent3/ckpt/offline_dqn_19000.ckpt
-----iteratio




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
saving model weights at /home/jupyt/leyuan

-----iteration:  12 target diff:  0.0027727686567620663 values:  -45.998165 ----- 

-----iteration:  13 target diff:  0.0024883376752446364 values:  -46.02144 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold4/train/agent4/ckpt/offline_dqn_13000.ckpt
-----iteration:  14 target diff:  0.0017279115560248463 values:  -46.009983 ----- 

-----iteration:  15 target diff:  0.0023851504382738768 values:  -45.980663 ----- 

-----iteration:  16 target diff:  0.002518297667129792 values:  -45.959953 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold4/train/agent4/ckpt/offline_dqn_12000.ckpt
-----iteration:  17 target diff:  0.0025736403322687475 values:  -45.98915 ----- 

-----iteration:  18 target diff:  0.0017386326913036692 values:  -46.04906 ----- 

-----iteration:  19 target diff:  0.0017996314652311616 values:  -46.175434 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold4/train/a

-----iteration:  71 target diff:  0.0018975367043033823 values:  -45.024822 ----- 

-----iteration:  72 target diff:  0.002305782610433259 values:  -45.002842 ----- 

-----iteration:  73 target diff:  0.001719168280582512 values:  -44.92381 ----- 

-----iteration:  74 target diff:  0.0019211218710345965 values:  -44.83159 ----- 

-----iteration:  75 target diff:  0.0018568479241337168 values:  -44.75006 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  76 target diff:  0.002303093016906992 values:  -44.73471 ----- 

-----iteration:  0 target diff:  0.9205304543730393 values:  -57.09027 ----- 

-----iteration:  77 target diff:  0.001777120627910025 values:  -44.662083 ----- 

-----iteration:  1 target diff:  0.00248

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold4/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold4/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold4/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold4/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold4/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, 


-----iteration:  1 target diff:  0.002258498006286036 values:  -58.199963 ----- 

-----iteration:  0 target diff:  0.9184551357692748 values:  -47.177807 ----- 

-----iteration:  2 target diff:  0.0015719384882674741 values:  -58.26145 ----- 

-----iteration:  1 target diff:  0.002635116055033591 values:  -47.124603 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  3 target diff:  0.002135696622381404 values:  -58.31521 ----- 

-----iteration:  2 target diff:  0.002141617423019492 values:  -47.128666 ----- 

-----iteration:  3 target diff:  0.0027175508772796673 values:  -47.07769 ----- 

-----iteration:  4 target diff:  0.0014613499684224002 values:  -58.263977 ----- 

-------------------- ckpt:  2000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold4/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  5 target diff:  0.0023877

-----iteration:  57 target diff:  0.0022453095791221175 values:  -45.56503 ----- 

-----iteration:  49 target diff:  0.0034785098822408315 values:  -49.73701 ----- 

-----iteration:  29 target diff:  0.0020429034193390843 values:  -56.743134 ----- 

-----iteration:  58 target diff:  0.0020453265126973434 values:  -45.492886 ----- 

-----iteration:  50 target diff:  0.0038694810911073835 values:  -49.59968 ----- 

-----iteration:  30 target diff:  0.0017281454798861319 values:  -56.670006 ----- 

-----iteration:  51 target diff:  0.003906573524409548 values:  -49.315437 ----- 

-----iteration:  59 target diff:  0.001880950665015266 values:  -45.494884 ----- 

-----iteration:  31 target diff:  0.0014676783839660105 values:  -56.57375 ----- 

-------------------- ckpt:  3000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold4/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!


Loaded trajectories from load path: /ho




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
-----
 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  53 target diff:  0.0037470641197387654 values:  -48.855343 ----- 

-----iteration:  61 target diff:  0.0020611465248079643 values:  -45.430088 ----- 

-----iteration:  54 target diff:  0.004224781676245826 values:  -48.689148 ----- 

-----iteration:  62 target diff:  0.0015837220741486235 values:  -45.359512 ----- 






To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  87 target diff:  0.0030401070411818112 values:  -44.82118 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autoca

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)




-------------------- fqe on dqn & sale --------------------
-----iteration:  6 target diff:  0.0018245446360616807 values:  -55.556576 ----- 

-----iteration:  7 target diff:  0.002186678625021161 values:  -55.437153 ----- 

-----iteration:  52 target diff:  0.001753417518202684 values:  -46.74763 ----- 

-----iteration:  8 target diff:  0.0023127917497266586 values:  -55.37199 ----- 

-----iteration:  53 target diff:  0.002292107791948874 values:  -46.747967 ----- 

-----iteration:  9 target diff:  0.0022108460262739915 values:  -55.259895 ----- 

-----iteration:  54 target diff:  0.0018932622971486448 values:  -46.691654 ----- 

-----iteration:  55 target diff:  0.0022265139407434595 values:  -46.644253 ----- 

-----iteration:  10 target diff:  0.002379360250711531 values:  -55.211647 ----- 

-----iteration:  56 target diff:  0.0019644209012854665 values:  -46.59802 ----- 

-----iteration:  11 target diff:  0.002540803936616258 values:  -55.15216 ----- 

-----iteration:  57 target d

-----iteration:  4 target diff:  0.0008813298646951548 values:  -58.00028 ----- 

-------------------- ckpt:  7000-----iteration:   --------------------
4 target diff:  0.002817103054654093 values:  Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold4/train/agent/trajs.pkl!-57.78655 
-----Refresh buffer every 1000000 sampling! 


Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold4/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold4/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold4/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold4/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!





To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  5 target diff:  0.002045949783721279 values:  -57.749493 ----- 

-----iteration:  6 target diff:  0.0018666057063600403 values:  -57.657627 ----- 

-----iteration:  7 target diff:  0.0023363327389956056 values:  -57.58639 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  8 target diff:  0.002317886179995948 values:  -57.60834 ----- 

-----iteration:




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  30 target diff:  0.0021808100085110743 values:  -56.112335 ----- 

-----iteration:  2 target diff:  0.0023454084575241364 values:  -45.54211 ----- 

-----iteration:  31 target diff:  0.0025632318680072217 values:  -56.034637 ----- 

-----iteration:  3 target diff:  0.0031281142178941775 values:  -45.597622 ----- 

-----iteration:  32 target diff:  0.0019388987758754395 values:  -55.9302 ----- 

-----iteration:  4 target diff:  0.002231734228784288 values:  -45.66963 ----- 

-----iteration:  33 target diff:  0.002409134917443901 values:  -55.729767 ----- 

-----iteration:  5 target diff:  0.003289794375408312 values:  -4


-------------------- adv learner --------------------
-----iteration:  54 target diff:  0.001777135210164385 values:  -52.44629 ----- 

-----iteration:  23 target diff:  0.0021497678488780485 values:  -45.100037 ----- 

-----iteration:  55 target diff:  0.001983634011357739 values:  -52.196007 ----- 

-----iteration:  56 target diff:  0.002426961585706725 values:  -52.070477 ----- 

-----iteration:  57 target diff:  0.0018714292213097995-----iteration:   values: 24  -51.86978target diff:   -----0.0031120513411503395  
values: 
 -45.261017 ----- 

-----iteration:  58 target diff:  0.0024554130599995397 values:  -51.63103 ----- 

-----iteration:  25 target diff:  0.004050334326205204 values:  -45.179462 ----- 

-----iteration:  59 target diff:  0.002504043400394327 values:  -51.51731 ----- 

-----iteration:  26 target diff:  0.0029835865918907173 values:  -45.158768 ----- 

-----iteration:  60 target diff:  0.0018786079187398956 values:  -51.30962 ----- 

-------------------- fqe on dqn

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



-----iteration:  1 target diff:  0.002712942263275905 values:  -58.302708 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  2 target diff:  0.001743913901610884 values:  -58.248245 ----- 

-----iteration:  3 target diff:  0.0014204570149078566 values:  -58.224743 ----- 

-------------------- ckpt:  11000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold4/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold4/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold4/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold4/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded traje




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold4/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold4/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold4/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
target diff:  
0.0017566489843864005 values:  -56.809914 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer

-----iteration:  31 target diff:  0.0019264247569625134 values:  -46.652203 ----- 

-----iteration:  32 target diff:  0.0021827288515488385 values:  -46.673134 ----- 

-----iteration:  33 target diff:  0.0022861677343219535 values:  -46.645653 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  34 target diff:  0.002288975979270516 values:  -46.648056 ----- 

-----iteration:  35 target diff:  0.0024220467302939926 values:  -46.691727 ----- 

-----iteration:  36 target diff:  0.0030838429881694606 values:  -46.70628 ----- 

-----iteration:  0 target diff:  0.9224048311197414 values:  -59.8294 ----- 

-----iteration:  37 target diff:  0.002235293780001991 values:  -46.63555 ----- 

-----iteration:  1 target diff:  0.00

-----iteration:  8 target diff:  0.0017772174264643508 values:  -54.55753 ----- 

-----iteration:  0 target diff:  0.9186306443556408 values:  -47.220043 ----- 

-----iteration:  9 target diff:  0.0014276153655780298 values:  -54.495964 ----- 

-------------------- ckpt:  5000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold4/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold4/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold4/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold4/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  1 target diff:  0.0025530087587610785Loaded trajectories f




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  2 target diff:  0.0033546




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
 
23 target diff:  0.002959381576051978 values:  -47.406902 ----- 

-------------------- adv learner --------------------
-----iteration:  24 target diff:  0.002417155791647699 values:  -47.358543 ----- 

-----iteration:  25 target diff:  0.003992781713671969 values:  -47.39663 ----- 

-------------------- fqe on dqn & sale --------------------
 26 target diff:  0.002413707481588514 values:  -47.41916 ----- 

-----iteration:  27 target diff:  0.0026745933762394496 values:  -47.427 ----- 

-----iteration:  28 target diff:  0.0019266417971959184 values:  -47.486923 ----- 

-----iteration:  29 target diff:  0.0030533627615574152 values:  -47.50909 ----- 



To change all layers to have dtype f


-----iteration:  0 target diff:  0.9187062974206254 values:  -57.97205 ----- 

-----iteration:  42 target diff:  0.00294247140111193 values:  -47.4081 ----- 

-----iteration:  1 target diff:  0.003394978060125816 values:  -57.937664 ----- 

-----iteration:  2 target diff:  0.0014667199474843867 values:  -57.916367 ----- 

-------------------- ckpt: -----iteration:   1500043  --------------------target diff: 
 0.002539851983062751 values:  -47.39881Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold4/train/agent/trajs.pkl! 
Refresh buffer every 1000000 sampling!----- 




Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold4/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold4/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/m




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  45 target diff:  0.002521644630300079 values:  -47.376266 ----- 

-----iteration:  1 target diff:  0.001705596460745362 values:  -59.453667 ----- 

-----iteration:  46 target diff:  0.001909326203137333 values:  -47.35372 ----- 

-----iteration:  2 target diff:  0.0027279680025400843 values:  -59.525043 ----- 

-----iteratio

-----iteration:  66 target diff:  0.0024289500108712245 values:  -46.201122 ----- 

-----iteration:  67 target diff:  0.002722233876218419 values:  -46.086163 ----- 

-----iteration:  68 target diff:  0.002499506213692229 values:  -46.017242 ----- 

-----iteration:  69 target diff:  0.0019321071321113565 values:  -45.932274 ----- 

-----iteration:  70 target diff:  0.001982205144606473 values:  -45.8877 ----- 

-----iteration:  71 target diff:  0.0022527560682239394 values:  -45.80086 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author o

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold4/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  88 target diff:  0.002100625047577132Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold4/train/agent1/trajs1.pkl! 
Refresh buffer every 1000000 sampling!values:  
-44.79524 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold4/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold4/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold4/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pa




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
 
--------------------
-----iteration:  89 target diff:  0.0021955721342577944 values:  -44.74293 ----- 

-----iteration:  90 target diff:  0.002193564185097645 values:  -44.710033 ----- 

-----iteration:  0 target diff:  0.9198855784966447 values:  -57.62898 ----- 

-----iteration:  91 target diff:  0.0022281815853732755 values:  -44.7196 ----- 

-----iteration:  1 target diff:  0.0025043238773

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold4/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold4/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold4/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold4/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold4/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, 



--------------------



Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold4/train/agent0/trajs0.pkl!







Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold4/train/agent1/trajs1.pkl!

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold4/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold4/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold4/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold4/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/717354021/fold4/train/age




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  18 target diff:  0.0023605767849146503 values:  -44.63937 ----- 

-----iteration:  19 target diff:  0.0021657663070759176 values:  -44.584312 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  20 target diff:  0.0022128150852259326 values:  -44.627647 ----- 

-------------------- fqe on dqn 




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  13 target diff:  0.0017272665441610206 values:  -59.345455 ----- 

-------------------- adv learner --------------------
-----iteration:  38 target diff:  0.003154431074901166 values:  -44.197624 ----- 

-----iteration:  14 target diff:  0.0019509151664236332 values:  -59.353405 ----- 

-----iteration:  39 target diff:  0.0022217479272324883 values:  -44.15567 ----- 

-----iter



Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold4/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold4/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold4/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold4/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold4/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer


-----iteration:  0 target diff:  0.918297241951929 values:  -45.84421 ----- 

-----iteration:  1 target diff:  0.004502479972837232 values:  -45.886803 ----- 

-----iteration: -----iteration:   02  target diff: target diff:   0.91983475168853460.0032499065263933867  values: values:   -57.27869 ------45.87205  

----- 

-----iteration:  3 target diff:  0.006403723713110171 values:  -45.87254-----iteration:   -----1  
target diff: 
 0.0017184984201768642 values:  -57.27033 ----- 

-----iteration:  4 target diff:  0.004569046177308494 values:  -45.8971 ----- 

-----iteration:  5 target diff:  0.0025174422935938068 values:  -45.84225 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  2 target diff:  0.001862288304935605 values:  -57.223503 ----- 

-----iteration:  6 target diff:  0.002198860448138077 values:  -45.835117 ----- 

-----iteration:  3 target diff:  0.0010701002818616374 values:  -57.09298 ----- 

-------------------- ckpt:  20000 -------------




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa

-----iteration:  33 target diff:  0.0018985203839239281 values:  -45.362015 ----- 

-----iteration:  15 target diff:  0.002321210160193221 values:  -56.744225 ----- 

-----iteration:  34 target diff:  0.002692345911816252 values:  -45.322056 ----- 

-----iteration:  16 target diff:  0.001973263712941043 values:  -56.773365 ----- 

-----iteration:  -----iteration: 17  35target diff:   target diff: 0.0019386022202499793 0.003020778782381697  values: values:  -45.28451  -56.701595----- 
 
----- 

-----iteration:  36 target diff:  0.002338464008867386 values:  -45.244823 ----- 

-----iteration:  18 target diff:  0.002579790902014219 values:  -56.58903 ----- 

-----iteration:  37 target diff:  0.002308782792235656 values:  -45.217167 ----- 

-----iteration:  19 target diff:  0.002036072187200221 values:  -56.481945 ----- 

-----iteration:  38 target diff:  0.00208344383256784 values:  -45.19224 ----- 

-----iteration:  20-----iteration:   target diff:  390.001843669641404144  target diff: v

-----iteration:  2 target diff:  0.0018325113624922756 values:  -58.806423 ----- 

-----iteration:  3 target diff:  0.0013728239622968954 values:  -58.769413 ----- 

-----iteration:  71 target diff:  0.0024696227653996394 values:  -43.723026 ----- 

-----iteration:  72 target diff:  0.002988945838174677 values:  -43.694794 ----- 

-----iteration:  73 target diff:  0.0019086441047591542 values:  -43.712772 ----- 

-----iteration:  74 target diff:  0.0022525307155852816 values:  -43.63312 ----- 

-----iteration:  75 target diff:  0.002465652649670351 values:  -43.59785 ----- 

-----iteration:  76 target diff:  0.002122222688709163 values:  -43.549038 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  77 target diff:  




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  83 target diff:  0.001768476216463624 values:  -43.219006 ----- 



To change a

-----iteration:  3 target diff:  0.001557059374251736 values:  -57.129726 ----- 

-----iteration:  4 target diff:  0.0021085411126850473 values:  -57.08602 ----- 

-----iteration:  5 target diff:  0.0013188853013446063 values:  -56.99897 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.9175861267659207 values:  -45.3207 ----- 

-----iteration:  0 target diff:  0.9207696693850801 values:  -57.708515 --




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa

-----iteration:  48 target diff:  0.0028593853268739033 values:  -53.724155 ----- 

-----iteration:  36 target diff:  0.0024419612409566998 values:  -47.70468 ----- 

-----iteration:  49 target diff:  0.002476636239808686 values:  -53.354164 ----- 

-----iteration:  37 target diff:  0.0031099392917773227 values:  -47.600475 ----- 

-----iteration:  50 target diff:  0.0026751756334467726 values:  -53.181904 ----- 

-----iteration:  51 target diff:  0.0023779211128848155 values:  -53.08711 -----iteration:  38-----  target diff: 
 
0.002295048527317244 values:  -47.631954 ----- 

-----iteration:  39 target diff: -----iteration:  52 0.002515688859665308  target diff: values:   0.0027974167230761437-47.629784 values:  -----  -52.898182
 ----- 


-----iteration:  53 -----iteration: target diff:   0.0018087513238106640  values: target diff:  -52.744865  0.0027198626226896424-----  
values: 
 -47.653267 ----- 

-----iteration:  54 target diff:  0.002099320184910148 values:  -52.655933 -----ite




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  94 target diff:  0.0023987624830452826 values:  -46.835415 ----- 

-----iteration:  95 target diff:  0.0019776499693890627 values:  -46.864254 ----- 

-----iteration:  96 target diff:  0.0031120240767555005 values:  -46.897713 ----- 

-----iteration:  97 target diff:  0.0032487958039229157 values:  -46.924976 ----- 

-----it




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa


-----iteration:  9 target diff:  0.0015799531963513315 values:  -44.40926 ----- 

-----iteration:  0 target diff:  0.9223376809511294 values:  -60.11829 ----- 

-----iteration:  10 target diff:  0.0018491462737815754 values:  -44.416496 ----- 

-----iteration:  1 target diff:  0.002655936920524099 values:  -60.07767 ----- 

-----iteration:  11 target diff:  0.001646942122415197 values:  -44.45855 ----- 

-----iteration:  2 target diff:  0.00183076816505176 values:  -60.084393 ----- 

-----iteration:  12 target diff:  0.0017909951648516743 values:  -44.466843 ----- 

-----iteration:  3 target diff:  0.001518950412456959 values:  -60.14394 ----- 

-----iteration:  4 target diff:  0.001590959687113405 values:  -60.168056 ----- 

-----iteration:  13 target diff:  0.0020771887945129986 values:  -44.504128 ----- 

-----iteration:  5 target diff:  0.0024571311690975866 values:  -60.154198 ----- 

-----iteration:  14 target diff:  0.0014119405492767816 values:  -44.49083 ----- 

-----iteratio




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- fqe on dqn & sale ---

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold4/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold4/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold4/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold4/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold4/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold4/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold4/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold4/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras


-------------------- adv learner --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.9195662068037981 values:  -56.39363 ----- 

-----iteration:  1 target diff:  0.0013473448177890879 values:  -56.370705 ----- 

-------------------- fqe on dqn & sale --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just th

-----iteration:  6 target diff:  0.002806262388800032 values:  -46.02042 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  7 target diff:  0.0017826496243275732 values:  -46.06747 ----- 

-----iteration:  0 target diff:  0.9216385579115544 values:  -58.654613 ----- 

-----iteration:  1 target diff:  0.0014434386093351104 values:  -58.655807 ----- 

-----iteration:  8 target diff:  0.002493980797474728 values:  -45.97627 ----- 

-----iteration:  9 target diff:  0.0025750909655062005 values:  -46.008827 ----- 

-----iteration:  10 target diff:  0.002425002541248355 values:  -46.111073 ----- 

-----iteration:  11 target diff:  0.0021316671789983117 values:  -46.191498 ----- 

-----iteration:  12 target diff:  0.001598

-----iteration:  38 target diff:  0.0019372760028473876 values:  -45.87357 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.9222381146605193 values:  -59.190712 ----- 

-----iteration:  39 target diff:  0.002436432361130717 values:  -45.763912 ----- 

-----iteration:  1 target diff:  0.0022114640327710886 values:  -59.143715 ----- 

-----iteration:  2 target diff:  0.0013659812247372536 values:  -58.9451 ----- 

-------------------- ckpt:  20000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/798842024/fold4/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/79


 -45.71932 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  41 tar

Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold4/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold4/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold4/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold4/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold4/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-------------------- fqe on dqn & sale --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.9183499122472523 values:  -44.93576 ----- 

-----iteration:  1 target diff:  0.0023372299757763145 values:  -44.98928 ----- 

-----iteration:  2 target diff:  0.004117044387627728 values:  -44.98562 ----- 

-----iteration:  3 target diff:  0.002122

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold4/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold4/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold4/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold4/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold4/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold4/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold4/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold4/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold4/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/dqn/tmp/932136058/fold4/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, 


-----iteration:  0 target diff:  0.9179125048776294 values:  -47.368732 ----- 

-----iteration:  1 target diff:  0.0028891143596579552 values:  -47.36121 ----- 

-----iteration:  2 target diff:  0.0027007147073766606 values:  -47.38648 ----- 

-----iteration:  3 target diff:  0.002361533827989079 values:  -47.399715 ----- 

-----iteration:  4 target diff:  0.002110864486957571 values:  -47.451843 ----- 

-----iteration:  5 target diff:  0.0027119382608808847 values:  -47.473934 ----- 

-----iteration:  6 target diff:  0.0024925765723616882 values:  -47.4764 ----- 

-----iteration:  7 target diff:  0.001811156573431088 values:  -47.49527 ----- 

-----iteration:  8 target diff:  0.0023612386046100147 values:  -47.57231 ----- 

-----iteration:  9 target diff:  0.002910601115727355 values:  -47.551083 ----- 

-----iteration:  10 target diff:  0.0025125408388768384 values:  -47.592045 ----- 

-----iteration:  11 target diff:  0.0029357384514551274 values:  -47.58767 ----- 

-----iteration:




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-------------------- fqe on dqn & sale --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base La


-----iteration:  87 target diff:  0.0018303052776982947 values:  -42.355045 ----- 

-----iteration:  88 target diff:  0.0019437202592138638 values:  -42.3803 ----- 

-----iteration:  89 target diff:  0.002312372615165089 values:  -42.360687 ----- 

-----iteration:  90 target diff:  0.001989698781230027 values:  -42.373375 ----- 

-----iteration:  91 target diff:  0.0018306296591025565 values:  -42.384933 ----- 

-----iteration:  92 target diff:  0.0021153178674576086 values:  -42.355854 ----- 

-----iteration:  93 target diff:  0.0018339266193784201 values:  -42.377144 ----- 

-----iteration:  94 target diff:  0.002003365132809982 values:  -42.386887 ----- 

-----iteration:  95 target diff:  0.0019107880748988853 values:  -42.405037 ----- 

-----iteration:  96 target diff:  0.002059472393311861 values:  -42.44908 ----- 

-----iteration:  97 target diff:  0.00198331566451506 values:  -42.468117 ----- 

-----iteration:  98 target diff:  0.0020580966544573504 values:  -42.4839 ----- 

--




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-------------------- fqe on dqn & sale --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base La

