In [1]:
import os
import copy
import random
import gym
import pickle

import numpy as np
import pandas as pd
import tensorflow as tf
tf.keras.backend.set_floatx('float32')

from itertools import permutations
from sklearn.model_selection import KFold, GridSearchCV

from multiprocessing import set_start_method
import multiprocessing as mp

path = os.path.abspath('..')
if path not in sys.path:
    sys.path.append(path)

from peal.agents.default_config import DEFAULT_CONFIG as config
# from peal.agents.dqn import DQNAgent
from peal.agents.qr_dqn import QuantileAgent
# from peal.agents.multi_head_dqn import MultiHeadDQNAgent
# from peal.agents.discrete_bcq import DiscreteBCQAgent

from peal.algos.kfold import CVS, KFoldCV
from peal.algos.advantage_learner import AdvantageLearner
from peal.algos.behavior_cloning import BehaviorCloning
from peal.algos.density_ratio import VisitationRatioModel
from peal.algos.fqe import FQE

def one_step(seed):
    np.random.seed(seed)
    tf.random.set_seed(seed)
    
    path = './data/mh/qr_dqn/trajs_mh.pkl'
    nfolds = 5
    n_splits = 5
    ckpts = (np.arange(10) + 1)*1000
    
    num_actions = 5
    # configures
    config['online'] = False
    config['lr'] = 5e-4
    config['decay_steps'] = 50000
    config['max_training_steps'] = 10000
    config['training_steps_to_checkpoint'] = 1000
    config['training_steps_to_eval'] = 100000
    config['hiddens'] = [64,64]
    config['double'] = False
    config['dueling'] = False

    index = pd.MultiIndex.from_product([np.arange(nfolds), ckpts])
    columns = ['dqn',  'peal']
    rets = pd.DataFrame(index=index, columns=columns)

    print('-'*20, 'start', '-'*20)
    cvs = CVS(path, n_splits=nfolds, random_state=seed)
    cvs.split()
    for fold in range(nfolds):
        train_path = cvs.train_paths[fold] + 'trajs.pkl'
        kf = KFoldCV(train_path, n_trajs=None, n_splits=n_splits, shuffle=False, random_state=seed)
        kf.split()

        print('-'*20, 'training agent', '-'*20)
        # agent
        config['persistent_directory'] = kf.agent_path
        config['checkpoint_path'] = kf.ckpt_path
        agent = QuantileAgent(num_actions=num_actions, config=config)
        agent.learn()

        print('-'*20, 'training agents', '-'*20)
        # agent_1, ..., agent_K
        for idx in range(kf.n_splits):
            config_idx = copy.deepcopy(config)
            config_idx['persistent_directory'] = kf.agent_paths[idx]
            config_idx['checkpoint_path'] = kf.ckpt_paths[idx]
            agent_idx = QuantileAgent(num_actions=num_actions, config=config_idx)
            agent_idx.learn()

        # fitted q evaluation
        test_path = cvs.test_paths[fold] + 'trajs.pkl'
        with open(test_path, 'rb') as f:
            trajs = pickle.load(f)

        print('-'*20, 'behavior cloning', '-'*20)
        # behavior cloning
        bc = BehaviorCloning(num_actions=num_actions)
        states  = np.array([transition[0] for traj in kf.trajs for transition in traj])
        actions = np.array([transition[1] for traj in kf.trajs for transition in traj])
        bc.train(states, actions)

        for ckpt in ckpts:
            print('-'*20, 'ckpt: ', ckpt, '-'*20)
            agent = QuantileAgent(num_actions=num_actions, config=config)
            agent.load(kf.ckpt_path + 'offline_qr_dqn_{}.ckpt'.format(ckpt))

            agents = []
            for idx in range(kf.n_splits):
                config_idx = copy.deepcopy(config)
                config_idx['persistent_directory'] = kf.agent_paths[idx]
                config_idx['checkpoint_path'] = kf.ckpt_paths[idx]
                agent_idx = QuantileAgent(num_actions=num_actions, config=config_idx)
                agent_idx.load(kf.ckpt_paths[idx] + 'offline_qr_dqn_{}.ckpt'.format(ckpt))
                agents.append(agent_idx)
            states, qvalues, qtildes = kf.update_q(agents, bc)

            print('-'*20, 'adv learner', '-'*20)
#             advs1 = qvalues - qvalues.mean(axis=1, keepdims=True)
#             agent1 = AdvantageLearner(num_actions=num_actions)
#             agent1._train(states, advs1)
            
            advs2 = qtildes - qtildes.mean(axis=1, keepdims=True)
            agent2 = AdvantageLearner(num_actions=num_actions)
            agent2._train(states, advs2)

            print('-'*20, 'fqe on dqn & peal', '-'*20)
            fqe_dqn = FQE(agent.greedy_actions, num_actions=num_actions, activation='tanh', hiddens=config['hiddens'], max_iter=100, eps=0.0015)
            fqe_dqn.train(trajs)
#             fqe_dml = FQE(agent1.greedy_actions, num_actions=num_actions)
#             fqe_dml.train(trajs)
            fqe_peal = FQE(agent2.greedy_actions, num_actions=num_actions, activation='tanh', hiddens=config['hiddens'], max_iter=100, eps=0.0015)
            fqe_peal.train(trajs)

            rets.loc[(fold, ckpt), 'dqn'] = fqe_dqn.values
#             rets.loc[(fold, ckpt), 'dml'] = fqe_dml.values
            rets.loc[(fold, ckpt), 'peal'] = fqe_peal.values
            
    return rets

In [2]:
save_path = './data/mh/qr_dqn/'
pool = mp.Pool(5)
rets = pool.map(one_step, range(5))
pool.close()

with open(save_path + 'rets_qr_dqn_mh.pkl', 'wb') as f:
    pickle.dump(rets, f)

------------------------------------------------------------ ----------------------------------------start     startstartstartstart--------------------    
--------------------------------------------------------------------------------



-------------------- training agent ---------------------------------------- -------------------- training agent
training agent -------------------- 
--------------------
-------------------- training agent --------------------
-------------------- training agent --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold0/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold0/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold0/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!Lo



saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold0/train/agent/ckpt/offline_qr_dqn_1000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold0/train/agent/ckpt/offline_qr_dqn_1000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold0/train/agent/ckpt/offline_qr_dqn_1000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold0/train/agent/ckpt/offline_qr_dqn_1000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold0/train/agent/ckpt/offline_qr_dqn_1000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold0/train/agent/ckpt/offline_qr_dqn_2000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold0/train/agent/ckpt/offline_qr_dqn_2000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold0/train/agent/ckpt/offline_qr_dqn_




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold0/train/agent/ckpt/offline_qr_dqn_10000.ckpt
-------------------- training agents --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by defa


saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold0/train/agent0/ckpt/offline_qr_dqn_10000.ckpt
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold0/train/agent0/ckpt/offline

Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold0/train/agent2/ckpt/offline_qr_dqn_1000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold0/train/agent2/ckpt/offline_qr_dqn_1000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold0/train/agent2/ckpt/off




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold0/train/agent2/ckpt/offline_qr_dqn_10000.ckpt
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To ch


saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold0/train/agent3/ckpt/offline_qr_dqn_10000.ckpt
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold0/train/agent3/ckpt/offline


-----iteration:  0 target diff:  0.9185576392906609 values:  -60.79718 ----- 

-----iteration:  1 target diff:  0.002700177931362835 values:  -60.787346 ----- 

-----iteration:  2 target diff:  0.0023783027012500746 values:  -60.815453 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold0/train/agent4/ckpt/offline_qr_dqn_10000.ckpt
-------------------- behavior cloning --------------------
-----iteration:  3 target diff:  0.002135231255121726 values:  -60.870064 ----- 

-----iteration:  4 target diff:  0.0021635941692616377 values:  -60.844734 ----- 

-----iteration:  5 target diff:  0.001583278260057687 values:  -60.892204 ----- 

-----iteration:  6 target diff:  0.0019472755253699733 values:  -60.925835 ----- 

-----iteration:  7 target diff:  0.001848814684495478 values:  -60.98138 ----- 

-------------------- ckpt:  1000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold0/train/a




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
saving model weights at /home/jupyt/leyuan


-------------------- adv learner --------------------
-----iteration:  3 target diff:  0.002345559016210568 values:  -60.92019 ----- 

-----iteration:  0 target diff:  0.921981511780682 values:  -53.584343 ----- 

-----iteration:  4 target diff:  0.002229060987812612 values:  -60.86054 ----- 

-----iteration:  1 target diff:  0.004056289973823552 values:  -53.557953 ----- 

-----iteration:  5 target diff:  0.0024679448708052588 values:  -60.885933 ----- 

-----iteration:  2 target diff:  0.003102187521983818 values:  -53.48289 ----- 

-----iteration:  6 target diff:  0.002483149173028148 values:  -60.879177 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold0/train/agent4/ckpt/offline_qr_dqn_10000.ckpt
-------------------- behavior cloning --------------------
-----iteration:  7 target diff:  0.00249436081847921 values:  -60.892864 ----- 

-----iteration:  3 target diff:  0.0036546933125889943 values:  -53.500313 ----- 

-----iteration:  8 target


target diff:  0.0021685561058368092 values:  -58.580936 ----- 

-----iteration:  12 target diff:  0.0024553114759392793 values:  -54.70871 ----- 

-----iteration:  31 target diff:  0.0036944808846448183 values:  -60.6403 ----- 

-----iteration:  4 target diff:  0.0019089333599919447 values:  -58.613884 ----- 

-----iteration:  27 target diff:  0.001749515957334471 values:  -52.415337 ----- 

-----iteration:  32 target diff:  0.005564405095239299 values:  -60.56601 ----- 

-----iteration:  13 target diff:  0.0021847182124599455 values:  -54.792057 ----- 

-----iteration:  0 target diff:  0.9239078433691987 values:  -54.125465 ----- 

-----iteration:  28 target diff:  0.0028621217770908227 values:  -52.475742 ----- 

-----iteration:  5 target diff:  0.0022788678821215286 values:  -58.56127 ----- 

-----iteration:  1 target diff:  0.0028258048454708464 values:  -54.142307 ----- 

-----iteration:  33 target diff:  0.0034942015150485413 values:  -60.472496 ----- 

-----iteration: -----iter




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
-----
 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floa

-----iteration:  25 target diff:  0.0023261320641990092 values:  -55.0665 ----- 

-----iteration:  45 target diff:  0.004745639708790652 values:  -59.04411 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  26 target diff:  0.002311470782895419 values:  -55.090855 ----- 

-----iteration:  46 target diff:  0.005611391654064059 values:  -58.945442 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  47 target diff:  0.003946446507630833 values:  -58.




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  65 target diff:  0.001828534593330653 values:  -57.043785 ----- 
-----iteration: 
 40 target diff:  0.0028789452024363987 values:  -55.095367 ----- 

-----iteration:  6 target diff:  0.0018014588996881841 values:  -58.849613 ----- 

-----iteration:  10 target diff:  0.0011804858139568286 values:  -53.277866 ----- 

-----iteration:  66 target diff:  0.0018223787159121368 values:  -56.983223 ----- 

-----iteration:  -----iteration:  7 41target diff:  target diff:  0.0014022337621125433  0.003787617190246831values:   values: -58.838745  -55.18034-----  
-----
 --------------------

 ckpt:  3000 --------------------
Loaded 




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  43 target diff:  0.002741


-----iteration:  5 target diff:  0.0022886206094706388 values:  -52.953274 ----- 

-----iteration:  53 target diff:  0.002338705529515271 values:  -54.96265 ----- 

-----iteration:  1 target diff:  0.001970706371001239 values:  -53.28654 ----- 

-----iteration:  81 target diff:  0.001835236090024177 values:  -56.063194 ----- 

-----iteration: -----iteration:   546  target diff: target diff:   0.00268061389749213970.002474303509624402  values: values:   -54.9212 -52.996155----- 
 
----- 

-----iteration:  0 target diff:  0.9224469877839115 values:  -58.915943 ----- 

-----iteration:  82 target diff:  0.0022780490404867145 values:  -56.03409 ----- 

-----iteration:  2 target diff:  0.0017712379216872202 values:  -53.264153 -----iteration: -----  

55 target diff:  0.0031356156826836024 values: -----iteration:   7 -54.858006target diff:   0.002034059256404215-----  

values:  -53.0086 ----- 

-----iteration:  1 target diff:  0.0016109639490199182 values:  -58.915524 ----- 

-----iteratio


 target diff:  0.002014574529071596 values:  -54.821026 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
 values:  -58.94622
 ----- 

----


-----iteration:  67 target diff:  0.0024696784515400682 values:  -54.235786 ----- 

-----iteration:  0 target diff:  0.9235545264529846 values:  -52.8037 ----- 

-----iteration:  0 target diff:  0.9216099206456765 values:  -58.8061 ----------iteration:   
68
 target diff:  0.002837303591248101 values:  -54.168133 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  1 target diff:  0.0021736506993091744 values:  -52.78586 ----- 

-----iteration:  69 target diff:  0.0019427594263979019 values:  -54.053555 ----- 

-----iteration:  1 target diff:  0.0048962493292961425 values:  -58.814533 ----- 

-----iteration:  2 target diff:  0.002215760994777488 values:  -52.794014 ----- 

-----iteration:  2 target diff:  0.003679567

-----iteration:  3 target diff:  0.0030485848842966754 values:  -61.162106 ----- 

-----iteration:  85 target diff:  0.002225597233981579 values: -----iteration:   -53.089024 -----13  

target diff:  0.0025339779265241722 values:  -58.784546 ----- 

-----iteration:  4 target diff:  0.0025848461410547335 values:  -61.227333 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  86 target diff:  0.0021384736301392703 values:  -52.979424 ----- 

-----iteration:  14 target diff:  0.002353659052481954 values:  -58.773098 ----- 

-----iteration:  5 target diff:  0.002408318896201498 values:  -61.24087 ----- 

-----iteration:  87 target diff:  0.0021863031193755304 values:  -52.839348 ----- 

-----iteration:  15 target diff:  0.001525777258515009 values:  -58.722878 ----- 

-----iteration:  6 target diff:  0.0024073023997708007 values:  -61.20211 ----- 

-----iteration:  88 target diff:  0.0022241017322520177 values:  -52.712578 ----- 

-----iteration:  7 target 




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  29 target diff:  0.0021375187231465207 values:  -59.099293 ----- 

-----iteration:  -----iteration: 13 target diff:  0.0021431396228870467 values:   -56.173504 ----- 19
 target diff: 
 0.0022212486099565426 values:  -61.455444 ----- 

 ----- 

-----iteration:  1 target diff:  0.003890517094576372 values:  -53.32946 ----- 

-----iteration:  0 target diff:  0.9233612440719654 values:  -52.964607 ----- 

-----iteration:  15 target diff:  0.002062419699046405 values:  -56.16131 ----- 

-----iteration:  31 target diff:  0.0019215882187745825 values:  -59.15433 ----- 

-----iteration:  2 target diff:  0.002408475190652991 values:  -53.326534 ----- 

-----iteration:  21 target di

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  28 target diff:  0.00302419345234986 values:  -56.21467 ----- 

-----iteration:  43 target diff:  0.002067418952690751 values: Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold0/train/agent2/trajs2.pkl! 
-59.072334Refresh buffer every 1000000 sampling!
 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype

-----iteration:  7 target diff:  0.001994192500042088-----iteration:   values: 3  target diff: -53.29868  0.0025638873608223684----- values:  
 
-53.907295 ----- 

-----iteration:  46 target diff:  0.0037664182338023153 values:  -54.550934 ----- 

-----iteration:  58 target diff:  0.0018897113696922801 values:  -58.72187 ----- 

-----iteration:  4 target diff:  0.0011807714135665531 values:  -53.913826 ----- 

-----iteration:  47 target diff:  0.004189140345779342 values:  -60.51634 ----- 

-----iteration:  47 target diff:  0.0035577432414590853 values:  -54.428364 ----- 

-----iteration:  48 target diff:  0.004168523051431124 values:  -----iteration:  59 target diff:  0.0019004055054458943 -60.451206 -----values:  -58.702385  
----- 


-----iteration:  8 target diff:  0.001402690430774598 values:  -53.255863 ----- 

-----iteration:  60 target diff:  0.0018804838165007935 values:  -58.64963 ----- 

-----iteration:  48 target diff:  0.0029804699510577086 values:  -54.166668 ----- 

----

-----iteration:  62 target diff:  0.00246432034099058 values:  -52.598488 ----- 

-----iteration:  8 target diff:  0.0019364600912803519 values:  -52.176018 ----- 

-----iteration:  60 target diff:  0.0025074765426202532 values:  -60.36967 ----- 

-----iteration:  7 target diff:  0.002126635328715195 values:  -53.454308 ----- 

-----iteration:  9 target diff:  0.0015189214880931752 values:  -52.363167 ----- 

-----iteration:  63 target diff:  0.0024252553035586655 values:  -52.50308 ----- 

-----iteration:  61 target diff: -----iteration:   0.00283504776928510468 values:   -60.27531target diff:  ----- 
 0.0014180196879791762 
values:  -53.448914 ----- 

-------------------- ckpt:  5000 --------------------
-----iteration:  10Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold0/train/agent/trajs.pkl! 
target diff: Refresh buffer every 1000000 sampling! 
0.0020543881127549632 values:  -52.3371 ----- 



Loaded trajectories from load path: /home/




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

values:  -52.52315 ----- 

-------------------- ckpt:  3000 --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Loaded trajecto


 target diff:  0.0023653144150875063 0.9204638863260166 values:  values:  -52.305595  -57.809135-----  ----- 





To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Lay


-----iteration:  74 target diff:  0.0021931178958895987 values:  -59.584064 ----- 

-----iteration:  1 target diff:  0.004781716523399337 values:  -57.547276 ----- 

-----iteration:  78 target diff:  0.0019279859678754505 values:  -52.171383 ----- 

-----iteration:  75 target diff:  0.0022934512357194775 values:  -59.515118 ----- 

-----iteration:  2 target diff:  0.003197982210594515 values:  -57.542637 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.920569993436554 values:  -52.346256 ----- 

-----iteration:  79 target diff:  0.0020168038230336696 values:  -52.10681 ----- 

-----iteration:  76 target diff:  0.00194488617000454 values:  -59.406048 ----- 

-----iteration:  3 target diff:  0.002

-----iteration:  88 target diff:  0.0021262079681890903 values:  -58.66006 ----- 

-----iteration:  6 target diff:  0.0013421800273613054 values:  -53.63935 ----------iteration:   

3 target diff:  0.0028845045359733147-------------------- values:   -52.10969 ----- 

ckpt:  6000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold0/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  89 target diff:  -----iteration: 0.002041682296450402 values:  -58.784615 13  target diff: ----- 0.0030590958488394596  values:  -57.71942 

----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
 4 target diff:  0.0027102428250657435 values:  -52.186115 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 100000




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  14 target diff:  0.0021278361636434748 values:  -57.69395 ----- 

-----iteration:  90 target diff:  0.0032377513960687118 values:  -58.836266 ----- 

-----iteration:  5 target diff:  0.0021586038349693366 values:  -52.241028 ----- 

-----iteration:  15 target diff:  0.0029897146360933504 values:  -57.684532 ----- 

-----iter

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author 


-----iteration:  24 target diff:  0.0018922519956906052 values:  -57.81366 ----- 

-----iteration:  7 target diff:  0.001768384247971609 values:  -54.6757 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  25 target diff:  0.002097987499013131 values:  -57.78365 ----- 

-----iteration:  8 target diff:  0.0017007807726815431 values:  -54.64376 ----- 

-----iteration:  0 target diff:  0.9222340812634714 values:  -53.637638 ----- 

-----iteration:  9 target diff:  0.0022818979637531612 values:  -54.62786 ----- 

-----iteration:  1 target diff:  0.00181656239659362 values:  -53.662567 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  26 target diff:  0.0023291039096146157 values:  -57.81482 ----- 

-----iteration:  10 target diff:  0.00236705661550135 values:  -54.618774 ----- 

-----iteration:  2 target diff:  0.0012046165089954202 values:  -53.60354 ----- 

-----iteration:  11 target diff:  0.0015639316853412206 values




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floa

-----iteration:  14 target diff:  0.0018390717682434847 values:  -52.213898 ----- 

-----iteration:  15 target diff:  0.00152537791866251 values:  -52.334564 ----- 

-----iteration:  6 target diff:  0.002196560511004582 values:  -62.787945 ----- 

-----iteration:  7 target diff:  0.0016797119347783716 values:  -62.82864 ----- 

-----iteration:  16 target diff:  0.002317793177623726 values:  -52.458035 ----- 

-----iteration:  8 target diff:  0.002274554813799081 values:  -62.849846 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  17 target diff:  0.002303648675509293 values:  -52.564342 ----- 

-----iteration:  9 target diff:  0.0018516548931614655 values:  -62.9044 ----- 



To change all layers to have dtype flo

-----iteration:  20 target diff:  0.0037003037944817706 values:  -59.284298 ----- 

-----iteration:  51 target diff:  0.0018826488110069717 values:  -52.472813 ----- 

-----iteration:  37 target diff:  0.0038438709993319042 values:  -61.949577 ----- 

-----iteration:  21 target diff:  0.0029173409213724298 values:  -59.399334 ----- 

-----iteration:  52 target diff:  0.0019454930360096401 values:  -----iteration:  -52.4312712 -----  target diff: 
 
0.002019692563195213 values:  -53.15756 ----- 

-----iteration:  38 target diff:  0.0027413955586405242 values:  -61.875587 ----- 

-----iteration:  31 target diff:  0.0019257705643371031 values:  -54.14692 ----- 

-----iteration:  22 target diff:  0.0022608844517013997 values:  -59.451572 ----- 

-----iteration:  53 target diff:  0.0017846798799505323 values:  -52.390327 ----- 

-----iteration:  39 target diff:  0.0024703059459032224 values:  -61.79037 ----- 

-----iteration:  13 target diff:  0.002067315973845925 values:  -53.086567 ----- 


-------------------- adv learner --------------------
-----iteration:  69 target diff:  0.0021083776766636264 values:  -51.76057 -----iteration: -----  
48 
target diff:  0.0019385860285622502 values:  -52.786396 ----- 

-----iteration:  58 target diff:  0.003327534754164691 values:  -60.582092 ----- 

-----iteration:  39 target diff:  0.0019532114072156543 values:  -59.440495 ----- 

-----iteration:  70 target diff:  0.002333063852018424 values:  -51.75156 ----- 

-----iteration:  59 target diff:  0.0031882376925958813 values:  -60.51076 ----- 

-----iteration:  40 target diff:  0.002089514101984203 values:  -59.454502 ----- 

-----iteration:  49 target diff:  0.0015397632601701465 values:  -52.68676 ----- 

-----iteration:  71 target diff:  0.002353754077718251 values:  -51.724697 ----- 

-----iteration:  60 target diff:  0.0027166196668343035 values:  -60.38929 ----- 

-----iteration:  41 target diff:  0.0020855275162439354 values:  -59.47244 ----- 

-----iteration:  50 target diff

-----iteration:  86 target diff:  0.0017148175578500231 values:  -51.05589 ----- 

-----iteration:  75 target diff:  0.002830033578246668 values:  -59.34687 ----- 

-----iteration:  62 target diff:  0.0018485106073169526 values:  -52.375305 ----- 

-----iteration:  76 target diff:  0.0027855169152491608 values:  -59.24714 ----- 

-----iteration:  87 target diff:  0.0017884641191881203 values:  -51.053585 ----- 

-----iteration:  63 target diff:  0.002205164318596478 values:  -52.26276 ----- 

-----iteration:  77 target diff:  0.0023609859492635813 values:  -59.1478 ----- 

-----iteration:  64 target diff:  0.0021489191365161348 values:  -52.2709 ----- 

-----iteration:  88 target diff:  0.0021568196482715137 values:  -50.98455 ----- 

-----iteration:  78 target diff:  0.0021493744064715733 values:  -59.058407 ----- 

-----iteration:  65 target diff:  0.001499372238066499 values:  -52.231934 ----- 

-------------------- ckpt:  5000 --------------------
Loaded trajectories from load path




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  90 target diff:  0.001803791005206972 values:  -50.939358 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change a

-----iteration:  3 target diff:  0.002516375371415105 values:  -51.397358 ----- 

-------------------- fqe on dqn & sale --------------------
 -58.66757 ----- 

-----iteration:  3 target diff:  0.0015167703187885496 values:  -58.81205 ----- 

-----iteration:  4 target diff:  0.0024851417450595047 values:  -51.36683 ----- 

-----iteration:  88 target diff:  0.0020717417118762647 values:  -58.565742 ----- 

-----iteration:  4 target diff:  0.0048346273959030605 values:  -58.83529 ----- 

-----iteration:  5-----iteration:   89target diff:   target diff: 0.0021243578742863056 0.0016601911954477139 values:  values:   -51.197624-58.493565  ---------- 
 


-----iteration:  90 target diff:  0.0017947245244070281 values:  -58.385136 ----- 

-----iteration:  5 target diff:  0.00206644759386977 values:  -58.928577 ----- 

-----iteration:  91 target diff:  0.0015624710735614666 values:  -58.33434 ----- 

-----iteration:  6 target diff:  0.0030732548463825366 values:  -51.192085 ----- 

-----iterat




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  1 target diff:  0.004551174304851399 values:  -58.624607 ----- 

-----iteration:  11 target diff:  0.0013867522368030557 values:  -52.07478 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  12 target diff:  0.0022009976336666926 values:  -52.89972 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold0/train/agent2/trajs2.pkl!
-----iteration: Refresh buffer every 1000000 sampling!
 22 target diff:  0.0022704768342270616 values:  -59.50431 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  17 target diff:  0.0023526561981601015 values:  Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold0/train/agent4/trajs4.pkl!-52.96964 
-----Refresh buffer every 1000000 sampling!
 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To chan

-----iteration:  54 target diff:  0.0017846789304852388 values:  -60.05351 ----- 

-----iteration:  4 target diff:  0.002030819989843615 values:  -60.80186 ----- 

-----iteration:  42 target diff:  0.0017814063338532729 values:  -54.3305 ----- 

-----iteration:  44 target diff:  0.0028000363219443146 values:  -53.188267 ----- 

-----iteration:  21 target diff:  0.002453281048081372 values:  -53.31263 ----- 

-----iteration:  55 target diff:  0.0018829492555753832 values:  -60.06386 ----- 

-----iteration:  5 target diff:  0.002069828998047386 values:  -60.780876 ----- 

-----iteration:  43 target diff:  0.0023808631366488485 values:  -54.363422 ----- 

-----iteration:  56 target diff:  0.0020945391309974792 values:  -60.089783 ----- 

-----iteration:  22-----iteration:   6target diff:   target diff: 0.002217064508628422  values: 0.001931226586876313  values: -53.31135 -----iteration:  -60.7808  -----45 

 -----target diff:   
0.0020166876233167655
 values:  -53.086773 ----- 

-----iter

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author 




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa


-------------------- adv learner --------------------
-----iteration:  28 target diff:  0.003582863187781039 values:  -60.984924 ----- 

-----iteration:  0 target diff:  0.9218387576139319 values:  -58.113045 ----- 

-----iteration:  1 target diff:  0.001184120077212592 values:  -58.125004 ----- 

-----iteration:  29 target diff:  0.00308692894599396 values:  -60.885273 ----- 

-----iteration:  30 target diff:  0.0031049372300338616 values:  -60.82318 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- fqe on dqn & sale --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer construct

 values:  -59.962444 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  18 target diff:  0.0016579798652482276 -----iteration:  3values:   target diff: -53.38827 0.003647292997786676  ----- values:  
-54.4241
 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  15Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold0/train/agent4/trajs4.pkl! 
target diff: Refresh buffer every 1000000 sampling! 
0.0020804276209300753 values:  -59.092155 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_

-----iteration:  8 target diff:  0.002610597675918816 values:  -54.714695 ----- 

-----iteration:  22 target diff:  0.001930041642179882 values:  -53.35429 ----- 

-----iteration:  52 target diff:  0.002726706418453155 values:  -59.386944 ----- 

-----iteration:  9 target diff:  0.0019136574165512857 values:  -54.66602 ----- 

-----iteration:  53 --------------------target diff:  fqe on dqn & sale  0.002295894378768962-------------------- 
 10
target diff: 
values:  -54.729103 ----- 

-----iteration:  23 target diff:  0.0025969920706012156 values:  -53.37379 ----- 

-----iteration:  11 target diff:  0.002668815130284329 values:  -54.731083 ----- 

-----iteration:  54 target diff:  0.002696190616854237 values:  -59.115295 ----- 

-----iteration:  12 target diff:  0.0016292916072555789 values:  -54.821037 ----- 

-----iteration:  55 target diff:  0.0022133521885520347 values:  -58.916115 ----- 

-----iteration:  -----iteration: 13 24  target diff: target diff:   0.00167846872502125250.00


-----iteration:  66 target diff:  0.001756547044598332 values:  -57.837612 ----- 

-----iteration:  26 target diff:  0.0020121625118178434 values:  -55.178234 ----- 

-----iteration:  33 target diff:  0.0013737341307487028 values:  -53.508892 ----- 

-------------------- training agent --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold1/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable a

Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
 
0.002606798027723374 values:  -58.816277 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 b


-----iteration:  36 target diff:  0.0019453569460750409 values:  -59.04771 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change a

-53.080692
 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  49 tar

-----iteration:  53 target diff:  0.0019205748740560062 values:  -53.495403 ----- 

-----iteration:  91 target diff:  0.0017288200896586556 values:  -49.236057 ----- 

-----iteration:  16 target diff:  0.0018088291208910759 values:  -58.030586 ----- 

-----iteration:  54 target diff:  0.0017782073579335346 values:  -53.469116 ----- 

-----iteration: -----iteration:   9227  target diff: target diff:   0.00164672498388477450.002829135133949561  values: values:  -61.63816 -49.094006  ----- ----- 



-----iteration:  17 target diff:  0.002755084499807628 values:  -58.064426 ----- 

-----iteration:  93 target diff:  0.0016400605909641507 values:  -48.921124 ----- 

-----iteration:  55 target diff:  0.0018734173905460476 values:  -53.50424 ----- 

-----iteration:  28 target diff:  0.0028141900785206096 values:  -61.673576 ----- 

-----iteration:  18 target diff:  0.002430959565956726 values:  -58.172554 ----- 

-----iteration:  94 target diff:  0.0015279354024774932 values:  -48.748405 -----




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
-----iteration:  
 59 values: 32target diff:   -58.274433 target diff: 0.0014787795739494567   va


-----iteration:  31 target diff:  0.0021315455114120302 values:  -58.807453 ----- 

-----iteration:  42 target diff:  0.0028413303785767377 values:  -60.825085 ----- 

-----iteration:  0 target diff:  0.923276994628738 values:  -54.385193 ----- 

-----iteration:  32 target diff:  0.0023804837819813526 values:  -58.87399 ----- 

-----iteration:  43 target diff:  0.0030388800395563446 values:  -60.79226 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  1 target diff:  0.0026740822219344274 values:  -54.38678 ----- 

-----iteration:  33 target diff:  0.002508924108486702 values:  -58.953865-----iteration:   2-----  
target diff: 
 0.0015781569695033 values:  -54.40443 ----- 

-----iteration:  0 target diff:  0.917821


-----iteration:  -----iteration:  51 62target diff:   target diff:  0.0016237311691512629 0.0023986738336211045values:   values: -58.994415 -58.40339 -----  -----
 


-----iteration:  9 target diff:  0.0024322479090519112 values:  -53.429653 ----- 

-----iteration:  52 target diff:  0.00214788100182574 values:  -58.98541 -----iteration:  ----- 

63 target diff:  0.0024150903992245284 values:  -58.3059 ----- 

-----iteration:  10 target diff: -----iteration:   0.00180807796489881440  values:  target diff: -53.42802  0.9205510375426772 ----- values: 
 
-53.18655 ----- 

-----iteration:  -----iteration: 53  1 target diff:  target diff:  0.001393819949739734 values: 0.003990172277202851  values: -58.941067  -53.281834-----  -----

 
--------------------
 ckpt:  10000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold0/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
----- 

Loaded trajectories from load path: /h

 
----- 

-----iteration:  12 target diff:  0.001957952262308992 values:  -53.41607 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  3 target diff:  0.001909582700823943 values:  -53.41988 ----- 

-----iteration:  66 target diff:  0.0026836478662834434 values:  -57.93729 ----- 

-----iteration:  13 target diff:  0.0016137486910634997 values:  -53.447243 ----- 

----

Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, c


-----iteration:  41 target diff:  0.0013775249107895007 values:  -53.73046 ----- 

-------------------- ckpt:  9000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold0/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  50 target diff:  0.0025331913076199657 values:  -52.873997 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold0/train/agent3/trajs3.pkl!
Refresh buffer ever




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
 
target diff:  0.0020284240136689423 values:  -59.233814 ----- 

-----iteration:  51 target diff:  0.002424472272142916 values:  -52.876186 -------------------- adv learner -------------------------
 

-----iteration:  52 target diff:  0.0019752806685663952 values:  -52.86996 ----- 

-----iteration:  23 target diff:  0.0018500049040376346 values:  -59.23367 ----- 

-----iteration:  0 target diff:  0.9201322080169629 values:  -59.800243 ----- 

-----iteration:  53 target diff:  0.002124066235568094 values:  -52.789944 ----- 

-----iteration:  1 target diff:  0.0020023661188659607 values:  -59.741516 ----- 

-----iteration:  54 target diff:  -----iteration: 0.002671215787853438 24  target di


-----iteration:  76 target diff:  0.0017710763703754151 values:  -52.531826 ----- 

-----iteration:  77 target diff:  0.001670996341527171 values:  -52.510445 ----- 

-----iteration:  2 target diff:  0.0026450156439975594 values:  -60.85 ----- 

-----iteration:  0 target diff:  0.9201765010920007 values:  -57.722435 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
-----iteration: 
 78 target diff:  0.001795876972249675 values:  -52.509 ----- 

-----iteration:  79 target diff:  0.0015694728915609244 values:  -52.486652 ----- 

-----iteration:  1 target diff:  0.005129437228868146 values:  -57.734844 ----- 

-----iteration:  3 target diff:  0.0021645024919084966 values:  -60.775593 ----- 

-----iteration:  0 target diff:  0.91964171

-----iteration:  18 target diff:  0.0019133469027568884 values:  -60.736317 ----- 

-----iteration:  16 target diff:  0.0018657340433932048 values:  -55.230854 -----iteration:  -----15  target diff:  

0.001994610700521565 values:  -57.92843 ----- 

-----iteration: -----iteration:  16  17target diff:   target diff: 0.0027540533632012254  0.0022752351455985276values:   values: -57.986763  ------55.213306 
 -----
 

-----iteration:  17 target diff:  0.0023775435465841436 values:  -58.042324 ----- 

-----iteration:  19 target diff:  0.002833851355077921 values:  -60.75784 ----- 

-----iteration:  18 target diff:  0.001973226696312036 values:  -55.16224 ----- 

-----iteration:  20 target diff:  0.002648078525267221 values:  -60.776524 ----- 

-----iteration:  18 target diff:  0.0026600289098687637 values:  -58.033215 ----- 

-----iteration:  19-----iteration:  19  target diff: target diff:  0.0026992109621068136 values:  -58.078056  ----- 

0.002345818066215968 values:  -55.10607 ----- 

-


-----iteration:  39 target diff:  0.0016739559826365377 values:  -58.529488 ----- 

-----iteration:  30 target diff:  0.0024005588716941844 values:  -60.644344 ----- 

-----iteration:  -----iteration: 40  0target diff:   target diff: 0.0023418032682193156  values: 0.9213642796703653  values: -58.51708  -52.463264-----  ----- 



-----iteration:  31 target diff:  0.0024546850866135022 values:  -60.63238 ----- 

-----iteration:  41 target diff:  0.0018981003184388968 values:  -58.528503 ----- 

-----iteration:  1 target diff:  0.0031650917809647097 values:  -52.558205 ----- 

-----iteration:  42 target diff:  0.001844455550290648 values:  -58.496796 ----- 

-----iteration:  2 target diff:  0.0033325476665034244 values:  -52.644417 ----- 

-----iteration:  32 target diff:  0.0021000069354274067 values:  -60.607788 ----- 

-----iteration:  43 target diff:  0.0019292771763138644 values:  -58.49284 ----- 

-----iteration:  33 target diff:  0.0026433427372407076 values:  -60.589687 ----- ---

-----iteration:  41 target diff:  0.001862287756810136 values:  -53.051544 ----- 

-----iteration:  63 target diff:  0.0019455487721369024 values:  -58.302162 ----- 

-----iteration: -----iteration:   42 21target diff:  target diff:   0.0024722991472094560.0022705161604230856  values: values:   -52.96473-53.019566  ---------- 
 


-----iteration:  64 target diff:  0.001885631767957539 values:  -58.159634 ----- 

-----iteration:  22 target diff:  0.0022782439206817263 values:  -52.968975 ----- 

-----iteration:  65 target diff:  0.0018591148700652601 values:  -58.088528 ----- 

-----iteration:  43 target diff:  0.00207321518560586 values:  -53.00534 ----- 

-----iteration:  23 target diff:  0.001582933973298633 values:  -53.168247 ----- 

-----iteration:  44 -----iteration: target diff:   660.0029647977360292782  target diff:  values:  0.0017929029467734284-52.96064 values:   ----- -57.997536 
----- 


-----iteration:  67 target diff: -----iteration:  24-----iteration:  target diff:   4

values: 
 -52.396587 ----- 

-----iteration:  64 target diff:  0.002071670879504517 values:  -52.358513 ----- 

-----iteration:  65 target diff:  0.0014714253154479429 values:  -52.32801 ----- 

-------------------- ckpt:  10000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold0/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold0/train/agent3/tra




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.9204837600744488 values:  -61.695984 ----- 

-----iteration:  1 target diff:  0.001874583647291397 values:  -61.70541 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  2 target diff:  0.001494794933873911 values:  -61.792236 ----- 

saving model weights at /home/jupyt/leyu




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  9 target diff:  0.0021806991691851363 values:  -54.606194 ----- 

-----iteration:  10 target diff:  0.002357890598403902 values:  -54.590202 ----- 

-----iteration:  11 target diff:  0.001791004426011457 values:  -54.609547 ----- 

-----iteration:  12 target diff:  0.0015496333222711608 values:  -54.67191 ----- 

-----iterat

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold1/train/agent/ckpt/offline_qr_dqn_7000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold1/train/agent/ckpt/offline_qr_dqn_9000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold1/train/agent0/ckpt/offline_qr_dqn_9000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold1/train/agent/ckpt/offline_qr_dqn_1000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold1/train/agent/ckpt/offline_qr_dqn_6000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold1/train/agent/ckpt/offline_qr_dqn_8000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold1/train/agent/ckpt/offline_qr_dqn_10000.ckpt
-------------------- training agents --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUP


saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold1/train/agent0/ckpt/offline_qr_dqn_2000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold1/train/agent1/ckpt/offline_qr_dqn_10000.ckpt
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold1/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base La

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold1/train/agent1/ckpt/offline_qr_dqn_9000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold1/train/agent1/ckpt/offline_qr_dqn_3000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold1/train/agent1/ckpt/offline_qr_dqn_7000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold1/train/agent3/ckpt/offline_qr_dqn_1000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold1/train/agent2/ckpt/offline_qr_dqn_1000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold1/train/agent1/ckpt/offline_qr_dqn_10000.ckpt
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold1/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backe




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold1/train/agent2/ckpt/offline_qr_dqn_6000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold1/train/agent2/ckpt/offline_qr_dqn_4000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold1/train/agent3/ckpt/offline_qr_dqn_8000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold1/train/agent2/ckpt/offline_qr_dqn_8000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold1/train/agent2/ckpt/offline_qr_dqn_1000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold1/train/agent3/ckpt/offline_qr_dqn_9000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold1/train/agent3/ckpt/offline_qr_dqn_3000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold1/train/agent4/ckpt/offline_qr_dqn_10000.ckpt
-------------------- behavior cloning --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold1/train/agent3/ckpt/offline_qr_dqn_9000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold1/train/agent3/ckpt/offline_qr_dqn_6000.ckpt
-------------------- ckpt:  1000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold1/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fol




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold1/train/agent3/ckpt/offline_qr_dqn_10000.ckpt
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold1/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default


-----iteration:  0 target diff:  0.9169666062754389 values:  -59.122185 ----- 

-----iteration:  1 target diff:  0.0028022769268779644 values:  -59.077515 ----- 

-----iteration:  2 target diff:  0.00285962660559375 values:  -59.13678 ----- 

-----iteration:  3 target diff:  0.0026474198060294414 values:  -59.168377 ----- 

-----iteration:  4 target diff:  0.0021181721914766507 values:  -59.145542 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold1/train/agent3/ckpt/offline_qr_dqn_5000.ckpt-----iteration: 
 5 target diff:  0.0023761140797119587 values:  -59.13519 ----- 

-----iteration:  6 target diff:  0.002447261369356322 values:  -59.159668 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold1/train/agent4/ckpt/offline_qr_dqn_1000.ckpt
-----iteration:  7 target diff:  0.0023396534021993117 values:  -59.193394 ----- 

-----iteration:  8 target diff:  0.0019310748180267547 values:  -59.178997 ----- 

saving




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-------------------- fqe on dqn & sale ---

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold1/train/agent4/ckpt/offline_qr_dqn_3000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold1/train/agent3/ckpt/offline_qr_dqn_7000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold1/train/agent3/ckpt/offline_qr_dqn_10000.ckpt
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold1/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to th

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold1/train/agent4/ckpt/offline_qr_dqn_5000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold1/train/agent3/ckpt/offline_qr_dqn_9000.ckpt


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.9179230014368005 values:  -59.512115 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold1/train/agent4/ckpt/offline_qr_dqn_2000.ckpt
-----iteration:  1 target diff:  0.0013816049800605002 values:  -59.500916 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold1/train/agent4/ckpt/offline_qr_dqn_6000.ckpt


To change all layers to ha

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold1/train/agent4/ckpt/offline_qr_dqn_7000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold1/train/agent4/ckpt/offline_qr_dqn_8000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold1/train/agent4/ckpt/offline_qr_dqn_4000.ckpt


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.9172639857583709 values:  -60.20937 ----- 

-----iteration:  1 target diff:  0.0023635370796767437 values:  -60.23218 ----- 

-----iteration:  2 target diff:  0.0018802531698020012 values:  -60.26895 ----- 

-----iteration:  3 target diff:  0.002301518989710958 values:  -60.327095 -




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa


-------------------- adv learner --------------------
-----iteration:  11 target diff:  0.0014849720248981096 values:  -59.939045 ----- 

-------------------- ckpt:  7000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold1/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold1/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold1/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold1/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold1/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded tr




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-------------------- fqe on dqn & sale --------------------
-------------------- fqe on dqn & sale --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can 

-----iteration:  3 target diff:  0.002355185003681646 values:  -59.9726 ----- 

-----iteration:  4 target diff:  0.002440861839836472 values:  -59.98217 ----- 

-----iteration:  13 target diff:  0.001428668539886741 values:  -59.29912 ----- 

-----iteration:  5 target diff:  0.0020390040689250944--------------------  values:  fqe on dqn & sale ---------------------59.989624
 

-----iteration:  6 target diff:  0.0022492428334229065 values:  -59.997105 ----- 

-----iteration:  7 target diff:  0.0025469526824664288 values:  -60.006042 ----- 

-----iteration:  8 target diff:  0.001855025500034268 values:  -60.018734 ----- 

-----iteration:  9 target diff:  0.002177243850541957 values:  -60.02627 ----- 

-----iteration:  10 target diff:  0.0021123716272610136 values:  -60.06214 ----- 

-----iteration:  11 target diff:  0.0024499819962049093 values:  -60.025158 ----- 

-----iteration:  12 target diff:  0.00194285912586341 values:  -60.003 ----- 



To change all layers to have dtype float64 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold1/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold1/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold1/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold1/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold1/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold1/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- fqe on dqn & sale --------------------
-----iteration:  0 target diff:  0.9234215531522609 values:  -62.234577 ----- 

-----iteration:  1 target diff:  0.004741925441284111 values:  -62.24456 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  2 target diff:  0.002656749703986548 values: 

-----iteration:  10 target diff:  0.0019208345359879924 values:  -58.985382Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold1/train/agent1/trajs1.pkl! ----- 
Refresh buffer every 1000000 sampling!


Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold1/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold1/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold1/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold1/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  35 target diff:  0.003952687311387693 values:  -60.15585 ----- 

-------------------- fqe on dqn & sale --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold1/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold1/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold1/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold1/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, c

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold1/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold1/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold1/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you 




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
 target diff: 
 0.0022823403875931545 values:  -55.852356 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have 




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  78 target diff:  0.0021917689964752293 values:  -54.84818 ----- 



To change a


-----iteration:  90--------------------  target diff: adv learner  0.001717283291283937-------------------- 
values:  -54.518272 ----- 

-----iteration:  28 target diff:  0.0024201261056122523 values:  -59.429317 ----- 

-----iteration:  0 target diff:  0.9186890481802696 values:  -49.642612 ----- 

-----iteration:  91 target diff:  0.0019373188174176277 values:  -54.50788 ----- 

-----iteration:  1 target diff:  0.0010044175359590335 values:  -49.626152 ----- 

-----iteration:  29 target diff:  0.0017821777591723333 values:  -59.442802 ----- 

-----iteration:  92 target diff:  0.002091685289117917 values:  -54.32666 ----- 

-----iteration:  93 target diff:  0.0023343142849188564 values:  -54.316013 ----- 

-----iteration:  30 target diff:  0.0024067548797794343 values:  -59.374798 ----- 

-----iteration:  94 target diff:  0.0017835181752830858 values:  -54.25345 ----- 

-----iteration:  31 target diff:  0.0015821917415729065 values:  -59.31066 ----- 

-----iteration:  95 target diff:

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold1/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  3 target diff:  0.0013656090458327621 values:  -60.418736 ----- 
-----iteration: 
 43 target diff:  0.002138870033295018 values:  -58.143414 ----- 

-------------------- fqe on dqn & sale --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold1/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold1/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold1/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold1/train/agent4/trajs4.pkl!
Refresh buffe




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  44 target diff:  0.0023311557365902945 values:  -58.068787 ----- 

-----iteration:  45 target diff:  0.0018189018572767567 values:  -57.887814 ----- 

-----iteration:  46 target diff:  0.002304136303314259 values:  -57.70764 ----- 

-----iteration:  47 target diff:  0.0020037177539014273 values:  -57.609436 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  48 target diff:  0.0022604531063035475 values:  -57.390522 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer c


-----iteration:  10 target diff:  0.0020502574172231607 values:  -60.832073 ----- 

-----iteration:  60 target diff:  0.001686064320700353 values:  -54.807034 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  61 target diff:  0.001690052432771491 values:  -54.699978 ----- 

-----iteration:  11 target diff:  0.0022025218463592993 values:  -60.817635 ----- 

-----iteration:  62 target diff:  0.0016222207896388085 values:  -54.546516 ----------iteration:   
0
 target diff:  0.921439798985183 values:  -62.955627 ----- 

-----iteration:  12 target diff:  0.0026027990146104286 values:  -60.849495 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  63 target diff:  0.001691338435903818 va

values: 
 -49.42221 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------------iteration:  adv learner  --------------------11
 target diff:  0.002678823245724866 values:  -55.71688 ----- 

-----iteration:  13 target diff:  0.001753693671920479 values:  -62.664986 ----- 

-----iteration:  27 target diff:  0.002431065604959678 values:  -61.19417 ----- 

-----iteration:  12 target diff:  0.0022088889839035845 values:  -55.

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold1/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold1/train/agent1/trajs1.pkl!-----iteration:  
17Refresh buffer every 1000000 sampling! 
target diff:  0.0032506991758530436 values:  -49.509586 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold1/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold1/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold1/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change ju


-------------------- adv learner --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- fqe on dqn & sale --------------------
-----iteration:  18 target diff:  0.002387260916375359 values:  -49.460438 ----- 

-----iteration:  0 target diff:  0.9181489551894767 values:  -58.798004 ----- 

-----iteration:  0 target diff:  0.9183332426997017 values:  -55.206593 ----- 

-----iteration:  1 target diff:  0.0020567738933737296 values:  -58.81794 ----- 

-----iteration:  19 target diff:  0.0028960682898696386-----iteration:   values: 1  target diff: -49.48155  0.0028066752459668287----- 
 values: 
 -55.237373 ----- 

-----iteration:  2 target diff:  0.0017754908067720199 values:  -58.81225 ----- 

-----iterat

-----iteration:  3 target diff:  0.0017733243845103146 values:  -61.215508 ----- 

-----iteration:  27 target diff:  0.0020341545057141306 values:  -48.99728 ----- 

-----iteration:  1 target diff:  0.0013130083805412815 values:  -60.71746 ----- 

-----iteration:  12 target diff:  0.0027309761077607877 values:  -59.350975 ----- 

-----iteration:  4 target diff:  0.0017595612241896915 values:  -61.268738 ----- 

-----iteration:  28 target diff:  0.0023019460449110606 values:  -49.05216 ----- 

-----iteration:  13 target diff:  0.002357383291634519 values:  -59.464348 ----- 

-----iteration:  29 target diff:  0.0023970550965380084 values:  -49.094547 ----- 

-------------------- fqe on dqn & sale --------------------


-----iteration:  14 target diff:  0.002063505586753962 values:  -59.49471 ----- 

-----iteration:  30 target diff:  0.002046817245838487 values:  -49.07013 ----- 

-----iteration:  15 target diff:  0.001900430683060917 values:  -59.5145 ----- 

-----iteration:  6 target di




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  43 target diff:  0.002201049979441815 values:  -48.7316 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  31 target diff:  0.0016857394325301847 values:  -59.01016 ----- 

-----iteration:  44 target diff:  0.0019833944388813504 values:  -48.729656 ----- 

-----iteration:  32 target diff:  0.001925831289429046 values:  -58.874878 ----- 



To change 




Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold1/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold1/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!





Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold1/train/agent2/trajs2.pkl!

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold1/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold1/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!

Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  0 target diff:  0.9173971


-----iteration:  9 target diff:  0.0022736231707356936 values:  -59.973633 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

 7 target diff:  0.00206214450375141 values:  -61.377056 ----- 

-----iteration:  10 target diff:  0.0023550802124725967 values:  -60.03623 ----- 

-----iteration:  0 -----iteration: target diff:  8  0.9172854894302149 target diff:  values: 0.002001663485657315  -55.52416values:  -----iteration: -----   -61.462452 0
----- 
 target diff: 
 
0.9224254856351448 values:  -62.04916 ----- 

-----iteration: -----iteration:   110  target diff:  target diff: 0.0020788838488581068  0.9167214476095847values:   values: -59.993145  -50.785076-----  -----
 


-----iteration:  1 target diff:  0.0019095664776956824 values: 

-----iteration:  2 target diff:  -----iteration: 0.0022960248337459137 3 values:  target diff:   -47.5967450.003008522185492362  -----values:   -55.30093
 
----- 

-----iteration:  4 target diff:  0.002205756952450405 values:  -55.336987 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  3 target diff:  0.0016008841677552237 values:  -47.61786 ----- 

-----iteration:  5 target diff:  0.002624458504867102 values:  -55.3997 ----- 

-----iteration:  4 target diff:  0.0011328563942564849 values:  -47.57365 ----- 

-------------------- ckpt:  5000 --------------------
-----iteration:  6 target diff:  0.002188196399322346 values:  -55.388187 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_d


-----iteration:  0 target diff:  0.9225246921143971 values:  -61.118584 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all 


-----iteration:  9 target diff:  0.0017787678513779604 values:  -61.036175 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.9190500258305279 values:  -48.98523 ----- 

-----iteration:  10 target diff:  0.0016525321959249615 values:  -60.924385 ----- 

-----iteration:  1 target diff:  0.0013843134924605676 values:  -48.950306 ----- 

-----iteration:  0 target diff:  0.9212875333678225 values:  -59.591927 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold2/train/agent/ckpt/offline_qr_dqn_1000.ckpt
-----iteration:  1 target diff:  0.0025950231512551396 values:  -59.600098 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  11 

-----iteration:  24 target diff:  0.0018394900882215642 values:  -59.987583 ----- 

-----iteration:  9 target diff:  0.001627028586183671 values:  -49.48316 ----- 

-----iteration:  25 target diff:  0.0018040617567929914 values:  -59.94703 ----- 

-----iteration:  10 target diff:  0.0016148935372084755 values:  -49.516644 ----- 

-----iteration:  26 target diff:  0.0021011927292712397 values:  -59.905952 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  11 target diff:  0.0012337582331890789 values:  -49.50815 ----- 

-------------------- ckpt:  6000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold1/train/agent/trajs.pkl!
Refresh buffer every 100000




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 --------------------target diff:   adv learner0.9180395948909155  -----------

-----iteration:  23 target diff:  0.002002758134053097Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold1/train/agent0/trajs0.pkl! 
values: Refresh buffer every 1000000 sampling!


Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold1/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold1/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold1/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold1/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass

-------------------- fqe on dqn & sale --------------------
-----iteration:  61 target diff:  0.001792502623055558 values:  -57.01369 ----- 

-----iteration:  28 target diff:  0.0022208109848568954 values:  -55.675884 ----- 

-----iteration:  62 target diff:  0.001712679127084176 values:  -57.00491 ----- 

-----iteration:  29 target diff:  0.001695035180387428 values:  -55.610165 ----- 

-----iteration:  63 target diff:  0.00164080824943552 values:  -57.033398 ----- 

-----iteration:  30 target diff:  0.001474508460856271 values:  -55.57749 ----- 

-------------------- ckpt:  5000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold1/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!



Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold1/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/q

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold1/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold1/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold1/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold1/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold1/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author 


-------------------- adv learner --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.9166293658439748 values:  -54.672787 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  1 target diff:  0.0014691275506888359 values:  -54.68853 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  0 target diff:  0.9196968333978474 values:  -47.499138 ----- 



To change all layers to have dtype floa

-----iteration:  6 target diff:  0.0023331341240629523 values:  -55.35221 ----- 

-----iteration:  10 target diff:  0.0013256756562294106 values:  -60.043602 ----- 

-------------------- fqe on dqn & sale --------------------
-------------------- training agent --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold2/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing auto

-----iteration:  1 target diff:  0.002607030820713616 values:  -46.194553 ----- 

-----iteration:  20 target diff:  0.0030103983023043734 values:  -55.886456 ----- 

-----iteration:  21 target diff:  0.002497563148306026 values:  -55.86689 ----- 

-----iteration:  2 target diff:  0.001937033649462489 values:  -46.211346 ----- 

-----iteration:  22 target diff:  0.001858208215682221 values:  -55.92545 ----- 

-----iteration:  3 target diff:  0.002043638786988364 values:  -46.201332 ----- 

-----iteration:  23 target diff:  0.002975125454340241 values:  -55.99477 ----- 

-----iteration:  4 target diff:  0.0016210974557929014 values:  -46.20254 ----- 

-----iteration:  24 target diff:  0.0024884314829064246 values:  -56.05475 ----- 

-----iteration:  5 target diff:  0.0015794186473515202 values:  -46.18764 ----- 

-----iteration:  25 target diff:  0.0026331007480625526 values:  -56.014374 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold2/train/age

-----iteration:  41 target diff:  0.0022851099537167926 values:  -55.542202 ----- 

-----iteration:  42 target diff:  0.0019939379927059863 values:  -55.502285 ----- 

-----iteration:  43 target diff:  0.001366911843261387 values:  -55.47045 ----- 

-------------------- ckpt:  6000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold1/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold1/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold1/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold1/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUP




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold2/train/agent/ckpt/offline_qr_dqn_6000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold2/train/agent/ckpt/offline_qr_dqn_2000.ckpt


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx(

-----iteration:  10 target diff:  0.003181592786490369 values:  -61.849216 ----- 

-----iteration:  11 target diff:  0.0022001966069130022 values:  -61.95003 ----- 

-----iteration:  12 target diff:  0.002890661589912856 values:  -61.954792 ----- 

-----iteration:  13 target diff:  0.00217315428870741 values:  -61.931843 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  14 target diff:  0.0024903461771868278 values:  -61.851276 ----- 

-----iteration:  15 target diff:  0.0018355887857154987 values:  -61.79363 ----- 

-----iteration:  0 target diff:  0.9188253149678695 values:  -54.54096 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just thi

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold1/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold1/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold1/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold1/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  68 Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold1/train/agent4/trajs4.pkl!target diff: 
 Refresh buffer every 1000000 sampling!0.0030290077841491347


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='fl




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  69 target diff:  0.0025778779165385895 values:  -56.085773 ----- 

-----iteration:  70 target diff:  0.0038550187104165285 values:  -56.13086 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  71 target diff:  0.0023710612039722356 values:  -56.032585 ----- 

-----iteration:  72 target diff:  0.002346787124396107 values:  -56.06083 ----- 

-----iteration:  73 target diff:  0.001957065627682041 values:  -56.018555 ----- 

-----iteration:  74 target diff:  0.0020114977390260263 values:  -55.94431 ----- 

-----iteration:  75 target diff:  0.0018031464111605017 values:  -55.858055 ----- 

-



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.9177146153312509 values:  -55.368244 ----- 

-----iteration:  1 target diff:  0.0029892301282087564 values:  -55.32101 ----- 

-----iteration:  2 target diff:  0.003186746246147645 values:  -55.33869 ----- 

-----iteration:  3 target diff:  0.002337997244863875 values:  -55.34443 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  4 target diff:  0.0025595157142185745 values:  -55.36568 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold1/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold1/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold1/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold1/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold1/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold1/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold1/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold1/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
-----iteration: Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold1/train/agent3/trajs3.pkl! 
8Refresh buffer every 1000000 sampling! target diff:  0.0011985732529458289
 values:  -61.935467 ----- 

-------------------- ckpt:  9000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold1/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/da




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-------------------- fqe on dqn & sale --------------------
-------------------- fqe on dqn & sale --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold2/train/agent/ckpt/offline_qr_dqn_9000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold2/train/agent/ckpt/offline_qr_dqn_4000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold2/train/agent0/ckpt/offline_qr_dqn_3000.ckpt


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='fl




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold2/train/agent/ckpt/offline_qr_dqn_5000.ckpt
-----iteration:  7 target diff:  0.0015064923686808074 values:  -62.341167 ----- 

-----iteration:  0 target diff:  0.9181330090733537 values:  -55.408195 ----- 

-----iteration:  1 target diff:  0.003774970196274862 values:  -55.413704 ----- 

-----iteration:  8 target diff:  0.0023693289050278643 values:  -62.43656 ----- 

-----iteration:  2 target diff:  0.0032930162837208426 values:  -55.390015 ----- 

-----iteration:  9 target diff:  0.0025046463275316404 values:  -62.34228 ----- 

-----iteration:  3 target diff:  0.0030933445947229654 values:  -55.32757 ----- 

----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold1/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold1/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold1/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold1/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold1/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold2/train/agent0




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold2/train/agent0/ckpt/offline_qr_dqn_9000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold2/train/agent0/ckpt/offline_qr_dqn_5000.ckpt
-----iteration:  9 target diff:  0.001878312066342354 values:  -55.29353 ----- 

-----iteration:  10 target diff:  0.0019007549258357077 values:  -55.327206 ----- 

-----iteration:  11 target diff:  0.0017335366720756936 values:  -55.336555 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold2/train/agent/ckpt/offline_qr_dqn_10000.ckpt
-------------------- training agents --------------------
Loaded tra

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold2/train/agent0/ckpt/offline_qr_dqn_10000.ckpt
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold2/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold2/train/agent0/ckpt/offline_


saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold2/train/agent1/ckpt/offline_qr_dqn_10000.ckpt
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold2/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold2/train/agent0/ckpt/offline


saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold2/train/agent3/ckpt/offline_qr_dqn_1000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold2/train/agent0/ckpt/offline_qr_dqn_7000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold2/train/agent1/ckpt/offline_qr_dqn_2000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold2/train/agent2/ckpt/offline_qr_dqn_7000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold2/train/agent3/ckpt/offline_qr_dqn_2000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold2/train/agent2/ckpt/offline_qr_dqn_1000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold2/train/agent0/ckpt/offline_qr_dqn_8000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold2/train/agent1/ckpt/offline_

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold2/train/agent2/ckpt/offline_qr_dqn_5000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold2/train/agent1/ckpt/offline_qr_dqn_10000.ckpt
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold2/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Lay




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold2/train/agent3/ckpt/offline_qr_dqn_1000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold2/train/agent2/ckpt/offline_qr_dqn_6000.ckpt
-------------------- fqe on dqn & sale --------------------
saving model weights at /home




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-------------------- fqe on dqn & sale --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold2/train/agent3/ckpt/offline_qr_dqn_3000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold2/train/agent2/ckpt/offline_qr_dqn_8000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold2/train/agent4/ckpt/offline_qr_dqn_8000.ckpt


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of 


-----iteration:  0 target diff:  0.9130665744953622 values:  -53.315144 ----- 

-----iteration:  1 target diff:  0.002381322766725158 values:  -53.39281 ----- 

-----iteration:  2 target diff:  0.002067745202921948 values:  -53.421154 ----- 

-----iteration:  3 target diff:  0.0017718213909951749 values:  -53.513367 ----- 

-----iteration:  4 target diff:  0.0015710383934753266 values:  -53.58482 ----- 

-----iteration:  5 target diff:  0.001459267423948578 values:  -53.583374 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold2/train/agent3/ckpt/offline_qr_dqn_5000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold2/train/agent2/ckpt/offline_qr_dqn_10000.ckpt
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold2/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('fl

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold2/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold2/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold2/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold2/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold2/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold2/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold2/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold2/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold2/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold2/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold2/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  36 target diff:  0.0026309790097804615 values:  -48.245365 ----- 
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold2/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold2/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold2/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold2/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change ju

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold2/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold2/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold2/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold2/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold2/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author 


-----iteration:  69 target diff:  0.0018458407243649244 values:  -44.367016 ----- 

-----iteration:  70 target diff:  0.0021946987759826823 values:  -44.34306 ----- 

-----iteration:  71 target diff:  0.0021874188890658956 values:  -44.29899 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold2/train/agent3/ckpt/offline_qr_dqn_5000.ckpt
-------------------- fqe on dqn & sale --------------------
 target diff:  0.0018054157746648044 values:  -44.30903 ----- 

-----iteration:  73 target diff:  0.001972058405481377 values:  -44.297504 ----- 

-----iteration:  74 target diff:  0.002058798201754606 values:  -44.27475 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold2/train/agent4/ckpt/offline_qr_dqn_8000.ckpt
-----iteration:  75 target diff:  0.0018044606390257942 values:  -44.23756 ----- 

-----iteration:  76 target diff:  0.0017790777612402724 values:  -44.162727 ----- 

-----iteration:  77 target diff:  0.001




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa


-----iteration:  10 target diff:  0.0014723325585300087 values:  -52.02181 ----- 

-------------------- ckpt:  8000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold2/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold2/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold2/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold2/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold2/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/dat


-----iteration:  0 target diff:  0.9130081678599862 values:  -54.299385 ----- 


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.


-------------------- adv learner --------------------
-----iteration:  1 target diff:  0.0037531404924563525 values:  -54.302547 ----- 

-----iteration:  2 target diff:  0.002420741563900182 values:  -54.3232 ----- 

-----iteration:  3 target diff:  0.0019262956976285634 values:  -54.274467 ----- 

-----iteration:  4 target diff:  0.0018788003350250608 values:  -54.349506 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  5 target diff:  0.002293915684100926 values:  -54.324646 ----- 

-----iteration:  6 target diff:  0.0017518646792059318 values:  -54.379585 ----- 

-----itera


Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold2/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold2/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backe


-------------------- adv learner --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.9138794125919814 values:  -53.664116 ----- 

-----iteration:  0 target diff:  0.9119951022806774 values:  -51.790493 ----- 

-----iteration:  1 target diff:  0.002958605068688117 values:  -53.618473 ----- 

-----iteration:  1 target diff:  0.0027424513977249727 values:  -51.78432 ----- 

-------------------

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold2/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold2/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold2/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold2/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold2/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author 



Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold2/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold2/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold2/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold2/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold2/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the autho




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  17 target diff:  0.003560057477792249 values:  -59.314842 ----- 

-----iteration:  18 target diff:  0.002558027833695198 values:  -59.28433 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  19 target diff:  0.002330438679724292 values:  -59.20205 ----- 

-----iteration:  20 target diff:  0.0019350197448667272 values:  -59.073933 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  21 target diff:  0.002381010054296062 values:  -58.929688 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold2/train/agent3/ckpt/offline_




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner-----iteration:  40  target diff: -------------------- 
0.0020319036578530982 values:  -56.64871 ----- 

-----iteration:  3 target diff:  0.0026539277675136407 values:  -52.11658 ----- 

-----iteration:  41 target diff:  0.0020535214956942855 values:  -56.510838 ----- 

-----iteration:  4 target diff:  0.0021654950806354236 values:  -52.264626 ----- 

-----iterat




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  2 target diff:  0.0013228544436754753 values:  -----iteration: -50.454597 -----  54

 target diff:  0.0018414310410736965 values:  -54.349846 ----- 

-----iteration:  55 target diff:  0.0018189238671744014 values:  -54.205376 ----- 

-----iteration:  56 target diff:  0.0017781842442854104 values:  -54.06629 ----- 

-----iteration:  57 target diff:  0.0017287711364248055 values:  -53.92295 ----- 

-----iteration:  58 target diff:  0.0017131198494641131 values:  -53.735615 ----- 

-----iteration:  59 target diff:  0.0019459073620316737 values:  -53.58203 ----- 

-----iteration:  60 target diff:  0.002170057846987283 values:  -53.43153 ----- 



To change all layers to have d

-----iteration:  83 target diff:  0.0020879396693511466 values:  -49.941982 ----- 

-----iteration:  84 target diff:  0.00193098122603272 values:  -49.855164 ----- 

-----iteration:  85 target diff:  0.0020403967967392318 values:  -49.77234 ----- 

-----iteration:  86 target diff:  0.0017159884208738803 values:  -49.63443 ----- 

-----iteration:  87 target diff:  0.0020187468587624735 values:  -49.628387 ----- 

-----iteration:  88 target diff:  0.0017736085577223673 values:  -49.578743 ----- 

-----iteration:  89 target diff:  0.0016463985466670687 values:  -49.51237 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  90 target diff:  0.001834772738765666 values:  -49.424194 ----- 

-----iteration:  91 target diff: 

-----iteration:  31 target diff:  0.001992120976043943 values:  -51.501854 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold2/train/agent4/ckpt/offline_qr_dqn_3000.ckpt
-----iteration:  32 target diff:  0.0015998968397585897 values:  -51.338787 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold2/train/agent4/ckpt/offline_qr_dqn_8000.ckpt
-------------------- fqe on dqn & sale --------------------
-----iteration:  33 target diff:  0.0018282470882799173 values:  -51.11645 ----- 

-----iteration:  34 target diff:  0.0026419727003645682 values:  -50.979755 ----- 

-----iteration:  35 target diff:  0.002012411778801021 values:  -50.72705 ----- 

-----iteration:  36 target diff:  0.0026463225779043927 values:  -50.606087 ----- 

-----iteration:  saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold3/train/agent/ckpt/offline_qr_dqn_3000.ckpt37
 target diff:  0.002296465738223985 value




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  73 target diff:  0.0022157263923048972 values:  -45.72779 ----- 

-----iteration:  74 target diff:  0.002631074042308761 values:  -45.578712 ----- 

-----iteration:  75 target diff:  0.0023018581092551322 values:  -45.52243 ----- 

-------------------------iteration:   fqe on dqn & sale76  target diff: -------------------- 


-----iteration:  77 target diff:  0.0024520625923530968 values:  -45.437786 ----- 

-----iteration:  78 target diff:  0.001986597050458022 values:  -45.39469 ----- 

-----iteration:  79 target diff:  0.0027426647805963043 values:  -45.37536 ----- 

-----iteration:  80 target diff:  0.00226091911359



Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold2/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold2/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold2/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold2/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold2/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the autho

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold2/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold2/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold2/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you 




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  2 target diff:  0.0014427427212461992 values:  -59.46957 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold2/train/agent4/ckpt/offline_qr_dqn_6000.ckpt
-----iteration:  8 target diff:  0.0029914614368575067 values:  -66.06315 ----- 

-----iteration:  9 target diff:  0.00320691919421226

-----iteration:  41 target diff:  0.0023381722013603693 values:  -53.658447 ----- 

-----iteration:  66 target diff:  0.004883506252456516 values:  -61.09594 ----- 

-----iteration:  30 target diff:  0.0019249434449702563 values:  -48.625652 ----- 

-----iteration:  67 target diff:  0.005120085347574298 values:  -61.06161 ----- 

-----iteration:  42 target diff:  0.00226777091358956 values:  -53.458992 ----- 

-----iteration:  31 target diff:  0.00248047898683462 values:  -48.428123 ----- 

-----iteration:  68 target diff:  0.003995761768315754 values:  -60.96235 ----- 

-----iteration:  43 target diff:  0.002153538907059354 values:  -53.222378 ----- 

-----iteration:  69 target diff:  0.0031791982405970226 values:  -60.911827 ----- 

-----iteration:  32 target diff:  0.0022877118191379285 values:  -48.211967 ----- 

-----iteration:  70 target diff:  0.0029558191899107353 values:  -60.84274 ----- -----iteration: 

 44 target diff:  0.002622476106148816 values:  -52.94382 ----- 

-----i

-----iteration:  92 target diff:  0.002292348063347202 values:  -59.017097 ----- 

-----iteration:  54 target diff:  0.002084638214480862 values:  -43.784817 ----- 

-----iteration:  93 target diff:  0.0022608150172727676 values:  -59.01206 ----- 

-----iteration:  55 target diff:  0.001995804260967061 values:  -43.635777 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
94
 target diff:  0.0022567295912668947 values:  -58.981373 ----- 

-----iteration:  56 target diff:  0.001955579899976358 values:  -43.575855 ----- 

-----iteration:  95 target diff:  0.0029487278398490097 values:  -58.965534 ----- 

-----iteration:  57 target diff:  0.0023811588048569384 values:  -43.454655 ----- -----iteration: 

 0 target diff:  0.91573291686570

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold2/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold2/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold2/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold2/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold2/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.


-------------------- adv learner --------------------
-----iteration:  8 target diff:  0.002097345237494563 values:  -64.328476 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold2/train/agent4/ckpt/offline_qr_dqn_10000.ckpt
-------------------- behavior cloning --------------------
-----iteration:  0 target diff:  0.9118011898584969 values:  -51.817497 -----iteration: ----- 9 target diff:  0.0019515969674047087 values:   -64.47868 ----- 



saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold3/train/agent/ckpt/offline_qr_dqn_10000.ckpt
-------------------- training agents --------------------
Loaded trajectories from loa

-----iteration:  0 target diff:  0.9146159906112449 values:  -58.60267 ----- 

-----iteration:  19 target diff:  0.0018245604405996195 values:  -64.93953 ----- 

-----iteration:  20 target diff:  0.0020754420612773047 values:  -65.053444 ----- 

-----iteration:  1 target diff:  0.001126365468394821 values:  -58.374054 ----- 

-----iteration:  21 target diff:  0.002301076859141117 values:  -65.06905 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  22 target diff:  0.002106555141672826 values:  -65.17727 ----- 

-----iteration:  23 target diff:  0.0016731455285016687 values:  -65.27805 ----- 

-----iteration:  0 target diff:  0.9126262297500815 values:  -52.34019 ----- 

-----iteration:  24 target diff:  0.002211293


----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  43 target diff:  0.0022000690736314584 values:  -65.009834 ----- 

-----iteration:  16 target diff:  0.001946843001204142 values:  -58.988525 ----- 
-----iteration:  
16 target diff:  0.0026583371027054073 values:  -52.541805 ----- 

-----iteration:  44 target diff:  0.003104154850774937 values:  -64.97882 ----- 

-----iteration:  17 target diff:  0.00238621347590594 values:  -58.99781 ----- 

-----iteration:  17 target diff:  0.002177736646747961 values:  -52.500336 ----- 

-----iteration:  18 target diff:  0.0017547634996303236 values:  -58.97397 ----- 

-----iteration:  45 target diff:  0.002769290852968685 v

-------------------- adv learner -------------------------iteration: 
 34 target diff:  0.0027565795159093056 values:  -50.335106 ----- 

-----iteration:  61 target diff:  0.0022202510794223826 values:  -63.824837 ----- 

-----iteration:  35 target diff:  0.0022937588537350023 values:  -50.221165 ----- 

-----iteration:  62 target diff:  0.0022394113377113543 values:  -63.701565 ----- 

-----iteration:  36 target diff:  0.0025378938679375684 values:  -50.056538 ----- 

-----iteration:  37 target diff:  0.0025875954385072963 values:  -49.854103 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold3/train/agent0/ckpt/offline_qr_dqn_2000.ckpt
-----iteration:  63 target diff:  0.002921999167667196 values:  -63.59654 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable au

-----iteration:  53 target diff:  0.003365846751589429 values:  -46.63998 ----- 

-----iteration:  77 target diff:  0.002446199103925588 values:  -61.99425 ----- 

-----iteration:  54 target diff:  0.003144945800244951 values:  -46.514896 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  78 target diff:  0.0025680467570558933 values:  -61.948627 ----- 

-----iteration:  55 target diff:  0.0020918532337699408 values:  -46.373623 ----- 

-----iteration:  79 target diff:  0.0023669811581069704 values:  -61.850765 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  56 target diff:  0.0028765547853421823 values:  -46.29464 ----- 

-----iteration:  57 target diff:  0.0021706840173749257 v

 71 target diff:  0.002238028951658691 values:  -44.41983 ----- 

-----iteration:  14 target diff:  0.0018046617199071883 values:  -58.674805 ----- 

-----iteration:  72 target diff:  0.0024112707679935525 values:  -44.290092 ----- 

-----iteration:  15 target diff:  0.0016988893590919722 values:  -58.68836 ----- 

-----iteration:  73 target diff:  0.0029063164465301283 values:  -44.170788 ----- 

-----iteration:  16 target diff:  0.0024228268981822602 values:  -58.648975 ----- 

-----iteration:  74 target diff:  0.001955733763878775 values:  -44.133858 ----- 

-----iteration:  17 target diff:  0.0018267128263594252 values:  -58.652317 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  18 target diff:  0.00222117427

-----iteration:  5 target diff:  0.002045274186096644 values:  -64.33131 ----- 

-----iteration:  28 target diff:  0.0022552747815496148 values:  -58.28515 ----- 

-----iteration:  6 target diff:  0.001700442426658673 values:  -64.3743 ----- 

-----iteration:  29 target diff:  0.002127716701997045 values:  -58.14311 ----- 

-----iteration:  7 target diff:  0.0022166984092917255 values:  -64.41252 ----- 

-----iteration:  30 target diff:  0.0026298073105017216 values:  -58.0839 ----- 

-----iteration:  8 target diff:  0.00195143637845737 values:  -64.45139 ----- 

-----iteration:  31 target diff:  0.001778017632268986 values:  -57.99657 ----- 

-----iteration:  9 target diff:  0.0014758556119062296 values:  -64.47511 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  32 target diff:  0.002010040580349496 values:  -57.871086 ----- 

-----iteration:  33 target diff:  0.002541342490538039 values:  -57.803806 ----- 

-----iteration:  34 target diff:  0.0020




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  18 target diff:  0.0027809898100735583 values:  -64.78038 ----- 

-----iteration:  60 target diff:  0.002176863463348138 values:  -55.35605 ----- 

-----iteration:  61 target diff:  0.001781123914000798 values:  -55.14112 ----- 

-----iteration:  19 target diff:  0.003508877184269191 values:  -64.842415 ----- 

-----iteratio


-----iteration:  88 target diff:  0.001949223184502549 values:  -51.65604 ----- 

-----iteration:  42 target diff:  0.0022094589410813553 values:  -63.935844 ----- 

-----iteration:  89 target diff:  0.0017387946160445167 values:  -51.481777 ----- 

-----iteration:  43 target diff:  0.0025556794774493044 values:  -63.825073 ----- 

-----iteration:  0 target diff:  0.9186420894863493 values:  -60.44649 ----- 

-----iteration:  90 target diff:  0.0018330799248463272 values:  -51.387936 ----- 

-----iteration:  1 target diff:  0.0024804306514597594 values:  -60.422783 ----- -----iteration:  

44 target diff:  0.002256314535060547 values:  -63.73316 ----- 

-----iteration:  91 target diff:  0.0017140164828021122 values:  -51.32855 ----- 

-----iteration:  2 target diff:  0.0019475461196178008 values:  -60.482998 ----- 

-----iteration:  92 target diff:  0.001678193332542031 values:  -51.153606 ----- 

-----iteration:  45 target diff:  0.002028313926634712 values:  -63.668293 ----- 

savin




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  94 target diff:  0.001590

-----iteration:  62 target diff:  0.0017231876932584293 values:  -62.09201 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  63 target diff:  0.0019009060515397958 values:  -62.009094 ----- 

-----iteration:  64 target diff:  0.001927502945415322 values:  -61.919895 ----- 

-----iteration:  0 target diff:  0.9154238633063979 values:  -58.11158 ----- 

-----iteration:  65 target diff:  0.0019502705418948856 values:  -61.813816 ----- 

-----iteration:  1 target diff:  0.0016707911348553804 values:  -58.085632 ----- 

-----iteration:  66 target diff:  0.001887562601116422 values:  -61.770725 ----- 

-----iteration:  2 target diff:  0.0012584487344624899 values:  -58.066803 ----- 

-----iteration:  67 target diff:  0.0

target diff:  0.0018144211428669696 values:  -59.93696 ----- 

-----iteration:  8 target diff:  0.0019082557817297777 values:  -58.61276 ----- -----iteration: 
 85
 target diff:  0.002300255920787611 values:  -59.972176 ----- 

-----iteration:  9 target diff:  0.00196598836279302 values:  -58.59023 ----- 

-----iteration:  86 target diff:  0.0018086994432456526 values:  -59.879707 ----- 

-----iteration:  87 target diff:  -----iteration: 0.0023650534467535692  10values:   -59.88448target diff:  ----- 0.001814811735156495 
 
values:  -58.5252 ----- 

-----iteration:  88 target diff:  0.001950594733458184 values:  -59.888134 ----- 

-----iteration:  11 target diff:  0.00171960082311651 values:  -58.553974 ----- 

-----iteration:  89 target diff:  0.0015409691885471932 values:  -----iteration:  12 -59.880154 target diff:  -----0.0018234990757577836  
values: 
 -58.431267 ----- 

-----iteration:  13 target diff:  0.0022448594709794547 values:  -58.2969 ----- 



To change all layers to hav

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold2/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  29 target diff:  0.002416265617845014 values:  -56.379097 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold2/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold2/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold2/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold2/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change jus


-------------------- adv learner --------------------
-----iteration:  30 target diff:  0.0023142068490078385 values:  -56.259426 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold3/train/agent/ckpt/offline_qr_dqn_5000.ckpt


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
 


-----iteration:  32 target diff:  0.0020902417141877142 values:  -56.023594 ----- 

-----iteration:  0 target diff:  0.9134910956522532 values:  -65.243904 ----- 

-----iteration:  33 target diff:  0.0024163396967173817 values:  -55.88065 ----- 

-----iteration:  1 --------------------target diff:   0.0024342507341762122fqe on dqn & sale  values:  --------------------
 

-----iteration:  34 target diff:  0.0021309483085792855 v




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  59 target diff:  0.001796




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  31 target diff:  0.002203091316069512 values:  -65.781555 ----- 

-----iteration:  32 target diff:  0.0022343190279989875 values:  -65.78278 ----- 

-----iteration:  33 target diff:  0.0023530645198441757 values:  -65.69574 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold3/train/agent/ckpt/offline_qr_dqn_7000.ckpt
-----iteration:  34 target diff:  0.0020949822289331402 values:  -65.5073 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  35 target diff:  0.003363039076121557 values:  -65.40095 ----- 

-----iteration:  36 target diff:  0.003016283

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold2/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold2/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold2/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold2/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold2/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author 


-------------------- adv learner --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.9152645242639538 values:  -58.37378 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  1 target diff:  0.003428598103755722 values:  -58.30308 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  2 target diff:  0.002649300315684168 values:  -58.265965 ----- 

-----iteration:  3 target diff:  0.0019486651463652344 values:  -58.144894 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold3/train/agent/ckpt/offline_qr_dqn_8000.ckpt
-----iteration:  4 target diff:  0.0023972319638356577 values:  -58.05351




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  49 target diff:  0.0019489653384556999 values:  -54.58359 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  50 target diff:  0.0028788001929370725 values:  -54.47745 ----- 

-----iteration:  0 target diff:  0.9137756302530148 values:  -64.692085 ----- 

-----iteration:  51 target diff:  0.0025449511666907998 values:  -54.30698 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold2/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  22 target diff:  Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold2/train/agent2/trajs2.pkl!0.002055875560971145
 Refresh buffer every 1000000 sampling!values: 
 -58.39145 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold3/train/agent0/ckpt/offline_qr_dqn_4000.ckpt
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold2/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold2/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64'




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  17 target diff:  0.0025226449876672025 values:  -65.393936 ----- 

-----iteration:  18 target diff:  0.0026360756611095468 values:  -65.431145 ----- 

-----iteration:  19 target diff:  0.003031479064385681 values:  -65.48637 ----- 

-----iteration:  20 target diff:  0.0026617580425866037 values:  -65.55755 ----- 

-----iteration:  21 target diff:  0.0028394312015687837 values:  -65.63517 ----- 

-----iteration:  22 target diff:  0.00264145019112333 values:  -65.64472 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold3/train/agent/ckpt/offline_qr_dqn_7000.ckpt
-----iteration:  23 target diff:  0.0025309560612244435 values:  -65.68 ---




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  61 target diff:  0.0035099237400676676 values:  -61.652584 ----- 

-----iteration:  62 target diff:  0.0024612857030430727 values:  -61.455944 ----- 

-----iteration:  63 target diff:  0.0043689685745473645 values:  -61.368214 ----- 

-----iteration:  64 target diff:  0.003101693821161472 values:  -61.24981 ----- 

-----iteration:  65 target diff:  0.003580170602011798 values:  -61.146423 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold3/train/agent2/ckpt/offline_qr_dqn_3000.ckpt
-----iteration:  66 target diff:  0.0031893175821048137 values:  -61.043133 ----- 

-----iteration:  67 target diff:  0.003045101487048628 values:  -60.93

-----iteration:  4 target diff:  0.001977060285471338 values:  -63.83621 ----- 

-----iteration:  5 target diff:  0.002736769364663247 values:  -63.931324 ----- 

-----iteration:  6 target diff:  0.002687412514423292 values:  -63.939404 ----- 

-----iteration:  7 target diff:  0.0019127586824327842 values:  -63.971043 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold3/train/agent2/ckpt/offline_qr_dqn_5000.ckpt
-----iteration:  8 target diff:  0.0020021203065630653 values:  -63.989544 ----- 

-----iteration:  9 target diff:  0.0019822931488354635 values:  -64.033775 ----- 

-----iteration:  10 target diff:  0.002155533646363559 values:  -64.02398 ----- 

-----iteration:  11 target diff:  0.0020440556723938844 values:  -64.071266 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold3/train/agent/ckpt/offline_qr_dqn_6000.ckpt
-----iteration:  12 target diff:  0.0020767619861021378 values:  -64.08834 ----- 

----

-----iteration:  85 target diff:  0.002198856991071631 values:  -56.826252 ----- 

-----iteration:  86 target diff:  0.002444291167012087 values:  -56.732418 ----- 

-----iteration:  87 target diff:  0.0019663078827780135 values:  -56.492664 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold3/train/agent0/ckpt/offline_qr_dqn_1000.ckpt
-----iteration:  88 target diff:  0.002715424505918808 values:  -56.31075 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold3/train/agent0/ckpt/offline_qr_dqn_8000.ckpt
-----iteration:  89 target diff:  0.0023983709529652015 values:  -56.276043 ----- 

-----iteration:  90 target diff:  0.001977469625236557 values:  -56.122562 ----- 

-----iteration:  91 target diff:  0.002188116662067958 values:  -56.00443 ----- 

-----iteration:  92 target diff:  0.0021366636046890935 values:  -55.936794 ----- 

-----iteration:  93 target diff:  0.0018068812974104786 values:  -55.832127 -----




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
saving model weights at /home/jupyt/leyuan

-----iteration:  63 target diff:  0.0027391712997278535 values:  -61.38085 ----- 

-----iteration:  64 target diff:  0.0033660961340495365 values:  -61.26226 ----- 

-----iteration:  65 target diff:  0.002872512305387986 values:  -61.175545 ----- 

-----iteration:  66 target diff:  0.00291064900514832 values:  -60.935974 ----- 

-----iteration:  67 target diff:  0.0035195213269057675 values:  -60.79436 ----- 

-----iteration:  68 target diff:  0.00265502966155559 values:  -60.729523 ----- 

-----iteration:  69 target diff:  0.0017432328183381288 values:  -60.640987 ----- 

-----iteration:  70 target diff:  0.002134329501122186 values:  -60.48644 ----- 

-----iteration:  71 target diff:  0.002329656770724691 values:  -60.307495 ----- 

-----iteration:  72 target diff:  0.0023639136339159523 values:  -60.18973 ----- 

-----iteration:  73 target diff:  0.00213398839826621 values:  -60.12377 ----- 

-----iteration:  74 target diff:  0.0027846552996618713 values:  -60.042908 ----- 

-----it

-----iteration:  23 target diff:  0.001874383465043295 values:  -64.679306 ----- 

-----iteration:  24 target diff:  0.001746896141689391 values:  -64.65697 ----- 

-----iteration:  25 target diff:  0.0015220939189101495 values:  -64.66003 ----- 

-----iteration:  26 target diff:  0.0015571564775812342 values:  -64.69941 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold3/train/agent1/ckpt/offline_qr_dqn_6000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold3/train/agent0/ckpt/offline_qr_dqn_9000.ckpt
-----iteration:  27 target diff:  0.0015945582258123363 values:  -64.702354 ----- 

-----iteration:  28 target diff:  0.0020093950206724115 values:  -64.80771 ----- 

-----iteration:  29 target diff:  0.0024069611781480735 values:  -64.73886 ----- 

-----iteration:  30 target diff:  0.0014458117482175604 values:  -64.76503 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.se

-----iteration:  95 target diff:  0.0019404341555570828 values:  -56.383274 ----- 

-----iteration:  96 target diff:  0.001909355111024705 values:  -56.3563 ----- 

-----iteration:  97saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold3/train/agent2/ckpt/offline_qr_dqn_10000.ckpt 
target diff:  0.0018568748469156445 values:  -56.33374 ----- Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold3/train/agent3/trajs3.pkl!

Refresh buffer every 1000000 sampling!



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the laye




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa

-----iteration:  56 target diff:  0.0026127195000379454 values:  -64.67946 ----- 

-----iteration:  57 target diff:  0.0022903200921358543 values:  -64.58577 ----- 

-----iteration:  58 target diff:  0.0029376988022822652 values:  -64.49132 ----- 

-----iteration:  59 target diff:  0.0027332800066599424 values:  -64.5388 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold3/train/agent4/ckpt/offline_qr_dqn_8000.ckpt
-----iteration:  60 target diff:  0.0020074088547289153 values:  -64.51418 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold3/train/agent2/ckpt/offline_qr_dqn_6000.ckpt
-----iteration:  61 target diff:  0.0015768984032269837 values:  -64.41427 ----- 

-----iteration:  62 target diff:  0.002698033902367866 values:  -64.291725 ----- 

-----iteration:  63 target diff:  0.0029820419999034217 values:  -64.16662 ----- 

-----iteration:  64 target diff:  0.002672029529592975 values:  -64.01403 ----- 

-




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold3/train/agent2/ckpt/offline_qr_dqn_1000.ckpt
-------------------- fqe on dqn & sale --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold3/train/agent3/ckpt/offline_qr_dqn_8000.ckpt


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target dif

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold3/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold3/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold3/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold3/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, ca


-----iteration:  0 target diff:  0.9182160152834613 values:  -63.477844 ----- 

-----iteration:  1 target diff:  0.005157307036333543 values:  -63.456055 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold3/train/agent2/ckpt/offline_qr_dqn_5000.ckpt
-----iteration:  2 target diff:  0.004451626069461585 values:  -63.549713 ----- 

-----iteration:  3 target diff:  0.002676255490105872 values:  -63.585346 ----- 

-----iteration:  4 target diff:  0.0025290698787942493 values:  -63.632027 ----- 

-----iteration:  5 target diff:  0.0025124951078085212 values:  -63.64655 ----- 

-----iteration:  6 target diff:  0.002531164599991939 values:  -63.710533 ----- 

-----iteration:  7 target diff:  0.002474559049985251 values:  -63.737896 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold3/train/agent4/ckpt/offline_qr_dqn_2000.ckpt
-----iteration:  8 target diff:  0.002232252499607895 values:  -63.758923 ----- 

-----ite




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
saving model weights at /home/jupyt/leyuan




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  4 target diff:  0.0016463965752801712 values:  -62.088337 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold3/train/agent3/ckpt/offline_qr_dqn_8000.ckpt
-----iteration:  5 target diff:  0.002278082761114663 values:  -62.025997 ----- 

-----iteration:  6 target diff:  0.0019924236701392 values:  -62.106243 ----- 

-----iteration:  7 target diff:  0.0017752498470267957 values:  -62.099373 ----- 

-----iteration:  8 target diff:  0.002428740819191482 values:  -62.106297 ----- 

-----iteration:  9 target diff:  0.001631476893153968 values:  -62.168713 ----- 

-----iteration:  10 target diff:  0.0013924666012844244 values:  -62.191433 ---




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
saving model weights at /home/jupyt/leyuan


saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold3/train/agent0/ckpt/offline_qr_dqn_2000.ckpt
-----iteration:  14 target diff:  0.0018015547820702416 values:  -62.967693 ----- 

-----iteration:  15 target diff:  0.002882996638224026 values:  -62.973278 ----- 

-----iteration:  16 target diff:  0.0014854538499675973 values:  -62.92947 ----- 

-------------------- ckpt:  5000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold3/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold3/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold3/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-------------------- fqe on dqn & sale --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold3/train/agent3/ckpt/offline_qr_dqn_3000.ckpt


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.9217034508825206 values:  -67.711235 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/79884202

-----iteration:  2 target diff:  0.005857334805356876 values:  -58.786797 ----- 

-----iteration:  13 target diff:  0.0020135790879861563 values:  -66.82983 ----- 

-----iteration:  3 target diff:  0.005174977456687569 values:  -58.759617 ----- 

-----iteration:  14 target diff:  0.0018774478507888905 values:  -66.84755 ----- 

-----iteration:  15 target diff:  0.002100491303158156 values:  -66.92987 ----- 

-----iteration:  4 target diff:  0.0053341803124838365 values:  -58.754402 ----- 

-----iteration:  16 target diff:  0.0019010754059778133 values:  -66.96825 ----- 

-----iteration:  5 target diff:  0.004977491535194045 values:  -58.777744 ----- 

-----iteration:  17 target diff:  0.0015023457640101793 values:  -67.025185 ----- 

-----iteration:  6 target diff:  0.005951570774658554 values:  -58.810314 ----- 

-----iteration:  18 target diff:  0.0015404568014961975 values:  -67.09609 ----- 

-----iteration:  7 target diff:  0.004828359745917382 values:  -58.83532 ----- 

-----itera




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  11 target diff:  0.004026

-----iteration:  56 target diff:  0.001934436656183879 values:  -64.02676 ----- 

-----iteration:  78 target diff:  0.004497544004571851 values:  -57.567513 ----- 

-----iteration:  57 target diff:  0.001904831865005292 values:  -63.985817 ----- 

-----iteration:  79 target diff:  0.004571094072857795 values:  -57.620922 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold3/train/agent3/ckpt/offline_qr_dqn_8000.ckpt
-----iteration:  58 target diff:  0.002010054397131126 values:  -63.966755 ----- 

-----iteration:  80 target diff:  0.0042211259872706654 values:  -57.672688 ----- 

-----iteration:  59 target diff:  0.001853863571468142 values:  -63.942905 ----- 

-----iteration:  81 target diff:  0.0044061291513882075 values:  -57.65012 ----- 

-----iteration:  60 target diff:  0.0020630531518592344 values:  -63.89929 ----- 

-----iteration:  82 target diff:  0.004296459869035626 values:  -57.710754 ----- 

-----iteration:  61 target diff:  0.0019585




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  9 target diff:  0.0019439

-----iteration:  0 target diff:  0.9160303546080044 values:  -58.86471 ----- 

-----iteration:  1 target diff:  0.002623414453358145 values:  -58.874973 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold3/train/agent0/ckpt/offline_qr_dqn_10000.ckpt
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold3/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by pass




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa

-----iteration:  1 target diff:  0.004179986945863966 values:  -63.07951 ----- 

-----iteration:  41 target diff:  0.0019995787299518716 values:  -55.920193 ----- 

-----iteration:  42 target diff:  0.0018002393103039387 values:  -55.83776 ----- 

-----iteration:  2 target diff:  0.0036442775233318345 values:  -63.134644 ----- 

-----iteration:  43 target diff:  0.0016903053428101583 values:  -55.725445 ----- 

-----iteration:  44 target diff:  0.0016961214868184318 values:  -55.629482 ----- 

-----iteration:  3 target diff:  0.0022751215883674517 values:  -62.960052 ----- 

-----iteration:  45 target diff:  0.0016050543256071895 values:  -55.619537 ----- 

-----iteration:  4 target diff:  0.0030272969813991903 values:  -62.989414 ----- -----iteration:  

46 target diff:  0.0019838911244256076 values:  -55.5657 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer cons




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  71 -----iteration: target diff:   0.0017409690919411842 4 values:  -54.32619target diff:   0.0023489032995853355----- 
 
values:  -61.552734 ----- 

-----iteration:  5 target diff:  0.001921992151238068 values:  -61.500908 ----- 

-----iteration:  72 target diff:  0.002044732621764167 values:  -54.307686 ----- 

-----iteration:  6 target diff:  0.0018048561194121606 values:  -61.47564 ----- 

-----iteration:  73 target diff:  0.001689518857238742 values:  -54.270905 ----- 

-----iteration:  7 target diff:  0.0021809380534805007 values:  -61.481922 ----- 

-----iteration:  74 target diff: -------------------- fqe on dqn 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold3/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold3/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold3/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold3/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold3/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author 


-----iteration:  9 target diff:  0.0018129820425015636 values:  -56.365673 ----- 

-----iteration:  14 target diff:  0.001834584133509185 values:  -57.289955 ----- 

-----iteration:  10 target diff:  0.0015702575550212807 values:  -56.407143 ----- 

-----iteration:  15 target diff:  0.0017915724814282317 values:  -57.16781 ----- 

-----iteration:  0 target diff:  0.919100165659293 values:  -63.265564 ----- 

-----iteration:  16 target diff:  0.002357490230185459 values:  -----iteration:  -57.04796611  target diff: -----  
0.0019886955360435624
 values:  -56.34771 ----- 

-----iteration:  12-----iteration:   1target diff:  0.0015163544441452895  target diff: values:   0.004929465580955454-56.315666  values: ----- -63.2801  
----- saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold3/train/agent1/ckpt/offline_qr_dqn_5000.ckpt



-----iteration:  17 target diff:  0.0018708995452385842 values:  -57.00849 ----- 

-----iteration:  13 target diff:  0.00182437915




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
 values: 
 -63.113503 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteratio




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold3/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold3/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold3/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold3/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold3/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author 


-------------------- adv learner --------------------
-----iteration:  44 target diff:  0.0020163942322210496 values:  -56.365204 ----- 

-----iteration:  45 target diff:  0.001745769321776002 values:  -56.267117 ----- 

-----iteration:  46 target diff:  0.0017145631844747021 values:  -56.168987 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold3/train/agent1/ckpt/offline_qr_dqn_9000.ckpt
-----iteration:  47 target diff:  0.0020469633004619164 values:  -56.082294 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  48 target diff:  0.0015567591460208332 values:  -56.036556 ----- 

-----iteration:  49 target diff:  0.001584932682765815 values:  -56.008312 ----- 

-----iteration:  50 target diff:  0.0017088824508287928 values:  -55.896843 ----- 

-----iteration:  51 target diff:  0.0018331014416615263 values:  -55.855198 ----- 

-----iteration:  52 target diff:  0.0025418130265235784 values:  -55.798916 ----- 

----

-----iteration:  76 target diff:  0.0027965867821301547 values:  -54.67054 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold3/train/agent1/ckpt/offline_qr_dqn_10000.ckpt
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold3/train/agent2/trajs2.pkl!
-----iteration:  77Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
0.0021220118982497706 values: 


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the bas

-----iteration:  14 target diff:  0.0017989287584748053 values:  -58.717976 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  15 target diff:  0.0023959647658926574 values:  -58.746586 ----- 

-----iteration:  16 target diff:  0.0028797716691887133 values:  -58.644062 ----- 

-----iteration:  0 target diff:  0.9177764298028185 values:  -67.477394 ----- 

-----iteration:  17 target diff:  0.0021653645130380404 values:  -58.624943 ----- 

-----iteration:  1 target diff:  0.0031883486494688035 values:  -67.423706 ----- 

-----iteration:  18 target diff:  0.002173701034076003 values:  -58.71047 ----- 

-----iteration:  19 target diff:  0.0023911482540749703 values:  -58.762966 ----- 

-----iteration:  2 target diff:  0

 target diff:  0.001804620581548636-----iteration:   1 values: target diff:   -56.3656430.0015365105966840458  -----values:   
-67.75906
 ----- 

-----iteration:  33 target diff:  0.0025074873808964442 values:  -58.367302 ----- 

-----iteration:  4 target diff:  0.0020186338733927433 values:  -56.389896 ----- 

-----iteration:  34 target diff:  0.002565808611543801 values:  -58.331173 ----- 

-----iteration:  2 target diff:  0.001386259887997158 values:  -67.75382 ----- 

-----iteration:  5 target diff:  0.0017830929087539868 values:  -56.455666 ----- 

-----iteration:  6 target diff:  0.002432394007346086 values:  -56.480747 ----- 

-----iteration:  35 target diff:  0.0020550055972172855 values:  -58.24004 ----- 

-----iteration:  7 target diff:  0.0022522132102664573 values:  -56.5422 ----- 

-----iteration:  36 target diff:  -----iteration: 0.0028098106223934595  8values:  target diff:   -58.2016680.002049325932094862  -----values:   
-56.622395
 ----- 

-----iteration:  9 target di




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  39 target diff:  0.0024669491527403004 values:  -58.148746 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

------------

0.0018052004608433525 values:  -57.786037 ----- 

-----iteration:  2 target diff:  0.002656431765030895 values:  -57.66428 ----- 

-----iteration:  3 target diff:  0.0022835036323345524 values:  -57.6833 ----- 

-----iteration:  53 target diff:  0.00194172228585727 values:  -57.746418 ----- 

-----iteration:  4 target diff:  0.002108695558569176 values:  -57.707966 ----- 

-----iteration:  54 target diff:  0.001800680527819333 values:  -57.730873 ----- 

-----iteration:  5 target diff:  0.0016965340524567583 values:  -57.68183 ----- 

-----iteration:  6 target diff:  0.0019959973620371496 values:  -57.635456 ----- 
-----iteration:  
55 target diff:  0.002190215112359155 values:  -57.79949 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold4/train/agent/ckpt/offline_qr_dqn_7000.ckpt
-----iteration:  7 target diff:  0.002223215204433332 values:  -57.664116 ----- 

-----iteration:  56 target diff:  0.0026826015389665205 values:  -57.78846 ----- 

---



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  3 target diff:  0.002147031276963055 values:  -67.927505 ----- 

-----iteration:  4 target diff:  0.002688585722077137 values:  -68.08013 ----- 

-----iteration:  0 target diff:  0.9236653353149439 values:  -55.63569 ----- 

-----iteration:  5 target diff:  0.0020307644563312684 values:  -68.14664 ----- 

-----iteration:  1 target diff:  0.0032091452936337628 values:  -55.56941 ----- 

-----iteration:  2 target diff:  0.0022766601268715114 values:  -55.64699 ----- 

-----iteration:  6 target diff:  0.0018362037600566138 values:  -68.189064 ----- 

-----iteration:  3 target diff:  0.0025116608196645482 values:  -55.602783 ----- 

-----iteration:  4 target diff:  0.0021226417

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold3/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold3/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold3/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold3/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPR


-------------------- adv learner --------------------
-----iteration:  0 target diff:  0.9142943826380822 values:  -58.67697 ----- 

-----iteration:  1 target diff:  0.004835538843055052 values:  -58.711895 ----- 

-----iteration:  2 target diff:  0.0029039789556542044 values:  -58.56756 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold3/train/agent2/ckpt/offline_qr_dqn_4000.ckpt


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  3 target diff:  0.0023879817894339427 values:  -58.648884 ----- 

-----iteration:  0 target diff:  0.9177210570453629 values:  -67.49787 ----- 

-----iteration:  4 target diff:  0.0020915097982234262 values:  -58.599983 ----- 

-------------------- fqe on d


-----iteration:  16 target diff:  0.0014570030020686544 values:  -68.35207 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- ckpt:  5000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold3/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
-------------------- adv learner --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold3/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold3/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  15 target diff:  0.0024127321859512443 values:  -58.327938 ----- 

-----iteration:  16 target diff:  0.0015687387966377003 values:  -58.35905 ----- 

-----iteration:  17 target diff:  0.0018520429286370298 values:  -58.36743 ----- 

-----iteration:  18 target diff:  0.0018412615411297075 values:  -58.352043 ----- 

---------

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold3/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold3/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold3/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold3/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold4/train/agent0/ckpt/offline_qr_dqn_1000.ckpt23
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold3/train/agent4/trajs4.pkl! 
target diff: Refresh buffer every 1000000 sampling! 
0.002415295197657144 values:  -54.68256

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold3/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
 target diff:  0.0020624951653046846 values:  -57.93505 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold3/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold3/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold3/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold3/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass 




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  8 target diff:  0.002936889016842866 values:  -67.15283 ----- 

-----iteration:  9 target diff:  0.0020172120217882995 values:  -67.268036 ----- 

-----iteration:  10 target diff:  0.002051738772785803 values:  -67.24914 ----- 

-----iteration:  11 target diff:  0.001583623186285771 values:  -67.32329 ----- 

-------------------- fqe on dqn & sale --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold3/train/agent2/ckpt/offline_qr_dqn_8000.ckpt
-----iteration:  12 target diff:  0.0017428867253314435 values:  -67.4677 ----- 

-----iteration:  13 target diff:  0.00251261602


-----iteration:  4 target diff:  0.0015898035977968213 values:  -57.2255 ----- 

-----iteration:  0 target diff:  0.914309682340437 values:  -57.7982 ----- 

-----iteration:  5 target diff:  0.0019173150684822717 values:  -57.269634 ----- 

-----iteration:  1 target diff:  0.0018923416261235138 values:  -57.844505 ----- 

-----iteration:  0 target diff:  0.9178094664074041 values:  -68.15789 ----- 

-----iteration:  6 target diff:  0.0018868898459838725 values:  -57.295002 ----- 

-----iteration:  2 target diff:  0.0024115646227996107 values:  -57.731133 ----- 

-----iteration:  7 target diff:  0.0017047370055734718 values:  -57.291676 ----- 

-----iteration:  1 target diff:  0.0018383713056341752 values:  -68.14536 ----- 

-----iteration:  8 target diff:  0.0015301621459666345 values:  -57.29276 ----- 

-----iteration:  3 target diff:  0.0016667846918646905 values:  -57.691494 ----- 

-----iteration:  9 target diff:  0.001628846107712702 values:  -57.328823 ----- 

-----iteration:  2

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold3/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold3/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold3/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold3/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold3/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author 

 
7 target diff:  0.0018113564756067596 values:  -58.913456 ----- 

-------------------- adv learner --------------------
-----iteration:  8 target diff:  0.0019080015734151267 values:  -58.924644 ----- 

-----iteration:  9 target diff:  0.0019667195479648784 values:  -59.059776 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  10 target diff:  0.002829557336930381 values:  -59.01411 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  11 target diff:  0.0020387269224342486 values:  -58.911602 ----- 

-----iteration:  12 target diff:  0.002974549705285009 values:  -58.85668 ----- 

-----iteration:  13 target diff:  0.0025891090420826404 values:  -58.819073 ----- 

-----iteration:  14 target diff:  0.0023959286350987152 values:  -58.777462 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor


-----iteration:  24 target diff:  0.0029008168391297397 values:  -58.24183 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold4/train/agent0/ckpt/offline_qr_dqn_5000.ckpt
-----iteration:  25 target diff:  0.0017839561892309674 values:  -58.20807 ----- 

-----iteration:  0 target diff:  0.923527627231881 values:  -56.792194 ----- 

-----iteration:  1 target diff:  0.0024370367552934147 values:  -56.851265 ----- 

-----iteration:  0 target diff:  0.9169787069819956 values:  -68.264824 ----- 

-----iteration:  2 target diff:  0.0019767515338309557 values:  -56.90646 ----- 

-----iteration:  26 target diff:  0.002093740888416474-----iteration:  1 values: 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold3/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold3/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold3/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold3/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold3/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold3/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  5 target diff:  0.002209920275009599 -----iteration: values:   Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold3/train/agent2/trajs2.pkl!2-57.197727 
 -----Refresh buffer every 1000000 sampling!target diff:   0.0016811321139999292
 
values: 
 -67.08076 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold3/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold3/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the aut




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  3 target diff:  0.002695319330817963 values:  -56.988377 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change al


-----iteration:  6 target diff:  0.0017456130051192364 values:  -58.186462 ----- 

-----iteration:  7 target diff:  0.0015293996538228156 values:  -58.11447 ----- 

-----iteration:  0 target diff:  0.916674821604595 values:  -68.37568 ----- 

-----iteration:  8 target diff:  0.0016690232042045746 values:  -58.169487 ----- 

-----iteration:  1 target diff:  0.002511452071036175 values:  -68.26449 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold3/train/agent3/ckpt/offline_qr_dqn_3000.ckpt
-----iteration:  9 target diff:  0.0014293065387642368 values:  -58.183117 ----- 

-------------------- ckpt:  10000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold3/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!


Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold3/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded 




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  4 target diff:  0.0017751014361514254 values:  -68.12574 ----- 

-----iteration:  5 target diff:  0.0019636568775040887 values:  -68.149254 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold4/train/agent0/ckpt/offline_qr_dqn_10000.ckpt
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold4/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold4/train/agent/ckpt/offline_q

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold4/train/agent0/ckpt/offline_qr_dqn_2000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold4/train/agent0/ckpt/offline_qr_dqn_1000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold4/train/agent0/ckpt/offline_qr_dqn_2000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold3/train/agent4/ckpt/offline_qr_dqn_6000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold4/train/agent2/ckpt/offline_qr_dqn_1000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold4/train/agent0/ckpt/offline_qr_dqn_2000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold4/train/agent0/ckpt/offline_qr_dqn_3000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold4/train/agent0/ckpt/offline_q

-----iteration:  15 target diff:  0.004236341011750764 values:  -46.649113 ----- 

-----iteration:  16 target diff:  0.003878257953109563 values:  -46.686054 ----- 

-----iteration:  17 target diff:  0.0038843018350381174 values:  -46.745422 ----- 

-----iteration:  18 target diff:  0.004126063036790271 values:  -46.75941 ----- 

-----iteration:  19 target diff:  0.003805359757944172 values:  -46.779167 ----- 

-----iteration:  20 target diff:  0.003734043802072936 values:  -46.830933 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold4/train/agent0/ckpt/offline_qr_dqn_8000.ckpt
-----iteration:  21 target diff:  0.004079140582476925 values:  -46.84426 ----- 

-----iteration:  22 target diff:  0.004475097988183049 values:  -46.84847 ----- 

-----iteration:  23 target diff:  0.0041765556304567035 values:  -46.85456 ----- 

-----iteration:  24 target diff:  0.004163227905838328 values:  -46.846333 ----- 

-----iteration:  25 target diff:  0.004680462




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  71 target diff:  0.0029044647035962583 values:  -47.22488 ----- 

-----iteration:  72 target diff:  0.003071076641867471 values:  -47.237537 ----- 

-----iteration:  73 target diff:  0.002825354972300151 values:  -47.208946 ----- 

-----iteration:  74 target diff:  0.0026671891508096394 values:  -47.193424 ----- 

-----iteration:  75 target diff:  0.002445974843854988 values:  -47.196526 ----- 

-----iteration:  76 target diff:  0.002138178787025821 values:  -47.276104 ----- 

-----iteration:  77 target diff:  0.0026475178794672896 values:  -47.258736 ----- 

-----iteration:  78 target diff:  0.002091938918062876 values:  -47.282288 ----- 

-----iteration:  79 target diff:




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold4/train/agent1/ckpt/offline_qr_dqn_2000.ckpt

-------------------- adv learner --------------------
-------------------- fqe on dqn & sale --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold4/train/agent1/ckpt/offline_qr_dqn_2000.ckpt


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target dif

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold4/train/agent1/ckpt/offline_qr_dqn_4000.ckpt


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.9228583074472685 values:  -46.476048 ----- 

-----iteration:  1 target diff:  0.0024175159946897614 values:  -46.50327 ----- 

-----iteration:  2 target diff:  0.0022372062415292934 values:  -46.485023 ----- 

-----iteration:  3 target diff:  0.0019374488606192573 values:  -46.48955 ----- 

-----iteration:  4 target diff:  0.001554281505259042 values:  -46.451973 ----- 

-----iteration:  5 target diff:  0.0018401292148515684 values:  -46.46327 ----- 

-----iteration:  6 target diff:  0.001118297589223683 values:  -46.51691 ----- 



-----iteration:  13 target diff:  0.0020080506475233873 values:  -47.355206 ----- 

-----iteration:  14 target diff:  0.0020311553912838697 values:  -47.333496 ----- 

-----iteration:  15 target diff:  0.0017390128450496787 values:  -47.33075 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold4/train/agent3/ckpt/offline_qr_dqn_5000.ckpt
-----iteration:  16 target diff:  0.0021001091239566103 values:  -47.318237 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold4/train/agent1/ckpt/offline_qr_dqn_6000.ckpt
-----iteration:  17 target diff:  0.0022292795243158674 values:  -47.396065 ----- 

-----iteration:  18 target diff:  0.0024245332799318698 values:  -47.374134 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold4/train/agent1/ckpt/offline_qr_dqn_7000.ckpt
-----iteration:  19 target diff:  0.0024648260930012797 values:  -47.380962 ----- 

-----iteration:  20 target diff:

-----iteration:  1 target diff:  0.0029812555670814672 values:  -46.867218 ----- 

-----iteration:  2 target diff:  0.0028078237374003965 values:  -46.87027 ----- 

-----iteration:  3 target diff:  0.002952123312337424 values:  -46.919758 ----- 

-----iteration:  4 target diff:  0.0023431006965328035 values:  -46.941135 ----- 

-----iteration:  5 target diff:  0.0021034583253570307 values:  -46.942486 ----- 

-----iteration:  6 target diff:  0.001638466626420728 values:  -46.98917 ----- 

-----iteration:  7 target diff:  0.0023400380146896276 values:  -46.944202 ----- 

-----iteration:  8 target diff:  0.00318649965424652 values:  -46.98411 ----- 

-----iteration:  9 target diff:  0.0021328098872880744 values:  -46.947598 ----- 

-----iteration:  10 target diff:  0.001657884817247927 values:  -46.98103 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold4/train/agent1/ckpt/offline_qr_dqn_9000.ckpt
-----iteration:  11 target diff:  0.002303572781745


saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold4/train/agent3/ckpt/offline_qr_dqn_9000.ckpt
-----iteration:  35 target diff:  0.0019459272775581712 values:  -47.19791 ----- 

-----iteration:  36 target diff:  0.0028108777068033804 values:  -47.2089 ----- 

-----iteration:  37 target diff:  0.002407603697275308 values:  -47.159912 ----- 

-----iteration:  38 target diff:  0.0028176024425191847 values:  -47.198605 ----- 

-----iteration:  39 target diff:  0.002111561124440917 values:  -47.21209 ----- 

-----iteration:  40 target diff:  0.002286289315085277 values:  -47.240242 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold4/train/agent2/ckpt/offline_qr_dqn_1000.ckpt
-----iteration:  41 target diff:  0.0019502599090416553 values:  -47.296024 ----- 

-----iteration:  42 target diff:  0.002550706182241006 values:  -47.33263 ----- 

-----iteration:  43 target diff:  0.0023526329231228784 values:  -47.341885 ----- 


-----iteration:  0 target diff:  0.9239147806518723 values:  -49.17661 ----- 

-----iteration:  1 target diff:  0.005373413144992315 values:  -49.164665 ----- 

-----iteration:  2 target diff:  0.003297543304170723 values:  -49.22336 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold4/train/agent2/ckpt/offline_qr_dqn_3000.ckpt
-----iteration:  3 target diff:  0.0027719814062601208 values:  -49.28841 ----- 

-----iteration:  4 target diff:  0.00344155399439403 values:  -49.289375 ----- 

-----iteration:  5 target diff:  0.0026777873181749487 values:  -49.315586 ----- 

-----iteration:  6 target diff:  0.0020244449316692194 values:  -49.36514 ----- 

-----iteration:  7 target diff:  0.001696842593917759 values:  -49.388676 ----- 

-----iteration:  8 target diff:  0.0016981684404601067 values:  -49.42996 ----- 

-----iteration:  9 target diff:  0.0019036254036786566 values:  -49.431923 ----- 

-----iteration:  10 target diff:  0.0018755449152893407




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-------------------- fqe on dqn & sale --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base La


-----iteration:  79 target diff:  0.0016255830900634475 values:  -45.987934 ----- 

-----iteration:  80 target diff:  0.0015831579880618875 values:  -45.98654 ----- 

-----iteration:  81 target diff:  0.001568241125077838 values:  -45.9846 ----- 

-----iteration:  82 target diff:  0.0015613709618825789 values:  -45.987133 ----- 

-----iteration:  83 target diff:  0.0015261280938080833 values:  -45.990112 ----- 

-----iteration:  84 target diff:  0.001563139433753239 values:  -45.998623 ----- 

-----iteration:  85 target diff:  0.0016949691525605009 values:  -46.02004 ----- 

-----iteration:  86 target diff:  0.001618040411502385 values:  -45.97278 ----- 

-----iteration:  87 target diff:  0.001743154963870199 values:  -45.96745 ----- 

-----iteration:  88 target diff:  0.0020762279279227724 values:  -46.010456 ----- 

-----iteration:  89 target diff:  0.0013660902753568466 values:  -46.04824 ----- 

-------------------- ckpt:  7000 --------------------
Loaded trajectories from load pa




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold4/train/agent2/ckpt/offline_qr_dqn_10000.ckpt
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold4/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default


-------------------- adv learner --------------------
-----iteration:  6 target diff:  0.001664776267880447 values:  -47.03376 ----- 

-----iteration:  7 target diff:  0.002212714597208349 values:  -47.066357 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold4/train/agent3/ckpt/offline_qr_dqn_2000.ckpt
-----iteration:  8 target diff:  0.0023666142085122877 values:  -47.105293 ----- 

-----iteration:  9 target diff:  0.0014970062587554513 values:  -47.090874 ----- 

-------------------- ckpt:  8000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold3/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold3/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold3/train/agent1/trajs1.p




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
saving model weights at /home/jupyt/leyuan

-----iteration:  60 target diff:  0.0030838301382512976 values:  -51.95161 ----- 

-----iteration:  38 target diff:  0.002640337112740457 values:  -49.6946 ----- 

-----iteration:  61 target diff:  0.003180683857257476 values:  -51.949272 ----- 

-----iteration:  39 target diff:  0.0028557158440584204 values:  -49.70935 ----- 

-----iteration:  62 target diff:  0.0029790078962015653 values:  -51.943527 ----- 

-----iteration:  40 target diff:  0.0027475073362532763 values:  -49.691444 ----- 

-----iteration:  63 target diff:  0.0027034911115865457 values:  -51.940475 ----- 

-----iteration:  41 target diff:  0.0023072496391161362 values:  -49.726513 ----- 

-----iteration:  64 target diff:  0.0031176690524530796 values:  -51.933964 ----- 

-----iteration:  42 target diff:  0.0029445598129365187 values:  -49.679993 ----- 

-----iteration:  65 target diff:  0.003035386975335744 values:  -51.921272 ----- 

-----iteration:  43 target diff:  0.00243778573874955 values:  -49.699802 ----- 

-




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold4/train/agent3/ckpt/offline_qr_dqn_7000.ckpt
-------------------- fqe on dqn & sale --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.9257111037192234 values:  -52.984093 ----- 

-----iteration:  1 target diff:  0.003406570442743817 values:  -52.942066 --

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold4/train/agent3/ckpt/offline_qr_dqn_8000.ckpt
-----iteration:  11 target diff:  0.001459940315113494 values:  -46.57728 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.


-----iteration:  0 target diff:  0.9246696654039659 values:  -53.134777 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
 
-53.134705 ----- 

-----iteration:  2 target diff:  0.0014149059634289594 values:  -53.124264 ----- 

-----

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold4/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold4/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold4/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold4/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold4/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author 


-------------------- adv learner --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold4/train/agent3/ckpt/offline_qr_dqn_9000.ckpt
-------------------- fqe on dqn & sale --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.9235448219256721 values:  -49.150703 ----- 

-----iteration:  1 target diff:  0.003926294326549284 values:  -49.162884 ----- 

-----iteration:  2 target diff:  0.0035978686083629544 values:  -49.164177 ----- 

-----iteration:  3 target diff:  0.0027257833417667745 values:  -49.2005 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold4/train/agent3/ckpt/offline_qr_dqn_9000.ckpt
----


-----iteration:  16 target diff:  0.00225286772605935 values:  -49.37589 ----- 

-----iteration:  2 target diff:  0.0023600529023042353 values:  -53.292667 ----- 

-----iteration:  17 target diff:  0.0020279436746613814 values:  -49.402424 ----- 

-----iteration:  3 target diff:  0.002550370485892548 values:  -53.270443 ----- 

-----iteration:  18 target diff:  0.00264038119080757 values:  -49.461456 ----- 

-----iteration:  4 target diff:  0.0025387274555371724 values:  -53.2667 ----- 

-----iteration:  19 target diff:  0.002059386134364322 -----iteration: values:   5-49.52512  target diff: -----  0.0018188880824237595 

values:  -53.28729 ----- 

-----iteration:  6 target diff:  0.0021043463933450276 values:  -53.319042 ----- 

-----iteration:  20 target diff:  0.0029509365365471404 values:  -49.517498 ----- 

-----iteration:  7 target diff:  0.002033269702870986 values:  -53.374428 ----- 

-----iteration:  21 target diff:  0.0022767870451831395 values:  -49.51639 ----- 

-----itera

-----iteration:  43 target diff:  0.002477366840297164 values:  -49.766632 ----- 

-----iteration:  0 target diff:  0.9239125706897552 values:  -52.433243 ----- 

-----iteration:  44 target diff:  0.002156379043644456 values:  -49.738525 ----- 

-----iteration:  1 target diff:  0.0012716360073165703 values:  -52.407677 ----- 

-----iteration:  45 target diff:  0.0021722061653978043 values:  -49.72205 ----- 

-----iteration:  46 target diff:  0.0029845155659046128 values:  -49.714924 ----- 

-----iteration:  47 target diff:  0.002395100272119966 values:  -49.745403 ----- 

-----iteration:  48 target diff:  0.0022381323075176505 values:  -49.764557 ----- 

-----iteration:  49 target diff:  0.0020585644617060477 values:  -49.784664 ----- 

-----iteration:  50 target diff:  0.0018336954622239037 values:  -49.76945 ----- 

-----iteration:  51 target diff:  0.002316423305703687 values:  -49.79661 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floa

-----iteration:  1 target diff:  0.0014637692960324416 values:  -52.778076 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold4/train/agent4/ckpt/offline_qr_dqn_2000.ckpt


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.923634509995894 values:  -46.248188 ----- 

-----iteration:  1 target diff:  0.004855579478100088 values:  -46.276028 ----- 

-----iteration:  2 target diff:  0.0027727472707682757 values:  -46.33271 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by 


-----iteration:  0 target diff:  0.9236798570965912 values:  -53.139812 ----- 

-----iteration:  1 target diff:  0.0014049002590041687 values:  -53.146378 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold4/train/agent4/ckpt/offline_qr_dqn_4000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold4/train/agent/ckpt/offline_qr_dqn_1000.ckpt


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.9247701049853373 values:  -52.490135 ----- 

-----iteration:  1 target diff:  0.003766303318195605 values:  -52.490517 ----- 

-----iteration:  2 target diff:  0.002841097132795987 values:  -52.522106 ----- 

-----iteration:  3 target diff:  0.002




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-------------------- fqe on dqn & sale --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold4/train/agent4/ckpt/offline_qr_dqn_7000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold4/train/agent4/ckpt/offline_qr_dqn_8000.ckpt


To change all layers to hav


-------------------- adv learner --------------------
-------------------- fqe on dqn & sale --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold4/train/agent4/ckpt/offline_qr_dqn_10000.ckpt
-------------------- behavior cloning --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold4/train/agent4/ckpt/offline_qr_dqn_9000.ckpt


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.9246925854474771 values:  -52.89467 ----- 

-----iteration:  1 target diff:  0.00169766503932327 values:  -52.92284 ----- 

-----iteration:  2 target diff:  0.0015410934903266967 values:  -52.955627 ----- 

-------------------- ckpt:  

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold4/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold4/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold4/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold4/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold4/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold4/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/209652396/fold4/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  1 target diff:  0.0025651759577882097 values:  -51.331482 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer construct


-----iteration:  10 target diff:  0.002377698035822222 values:  -51.417805 ----- 

-----iteration:  15 target diff:  0.0024338359705241668 values:  -45.60548 ----- 

-----iteration:  16 target diff:  0.00286897401902689 values:  -45.62213 ----- 

-----iteration:  11 target diff:  0.002216798493545352 values:  -51.459858 ----- 

-----iteration:  0 target diff:  0.9269218910723936 values:  -52.205215 ----- 

-----iteration:  17 target diff:  0.0024546457514150757 values:  -----iteration: -45.59818  -----12  target diff: 
 
0.0020206523177393673 values:  -51.47124 ----- 

-----iteration:  1 target diff:  0.0017236994047375546 values:  -52.21253 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  2 target diff:  0.0012249561921820584 values:  -52.19981 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold4/train/agent/ckpt/offline_qr_dqn_8000.ckpt
-----iteration:  13 target diff:  0.002127655086098821 values:  -51.50228

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold4/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocas


-----iteration:  48 target diff:  0.0029904678254432243 values:  -44.560863 ----- 

-----iteration:  24 target diff:  0.003218242833338561-----iteration:  values:  30  target diff: -57.431915 0.001802365984047027  ----- values:  -52.865974
 
----- 

-----iteration:  49 target diff:  0.0025986611466194244 values:  -44.5053 ----- 

-----iteration:  31 target diff:  0.0022991509584697058 values:  -52.811096 ----- 

-----iteration:  25 target diff:  0.0025079618012421876 values:  -57.43093 -----iteration:  0-----  
target diff: 
 0.9168404855165482 values:  -51.032555 ----- 

-----iteration:  50 target diff:  0.002918191680191492 values:  -44.423035 ----- 

-----iteration:  1 target diff:  0.002390030886593698 values:  -51.049725 ----- 

-----iteration:  26 target diff:  0.0023612828338632095 values:  -57.44213 ----- 

-----iteration:  2 target diff:  0.002271665410164818 values:  -51.06502 ----- 

-----iteration:  51 target diff:  0.002514900932920086 values:  -44.344204 ----- 

-----ite

 target diff:  0.0027772826719576144 values:  -55.741585-----iteration:  27 target diff:  0.0017241098902694987 values:  -50.45989  ----------  



-----iteration:  76 target diff:  0.002195006718152195 values:  -43.45923 ----- 

-----iteration:  55 target diff:  0.002930761854768535 values:  -55.596382 ----- 

-----iteration:  28 target diff:  0.002002236915357958 values:  -50.319386 ----- 

-----iteration:  77 target diff:  0.002145381285946413 values:  -43.410202 ----- 

-----iteration:  56 target diff:  0.0015424410709550594 values:  -55.50031 ----- 

-----iteration:  78 target diff:  0.0028776352182848484 values:  -43.38936 ----- 

-----iteration:  29 target diff:  0.0018209146766536384 values:  -50.20174 ----- 

-----iteration:  57 target diff:  0.0018246214089663237 values:  -55.349815 ----- 

-----iteration:  79 target diff:  0.0021779525010318635 values:  -43.379574 ----- 

-----iteration:  30 target diff:  0.00219412361875465 values:  -50.19938 ----- 

-----iteration:  58 tar

 -51.96923 

 
----- 

-----iteration:  7 target diff:  0.0021868642818240807 values:  -51.975376 ----- 

-----iteration:  98 target diff:  0.0019917044784906273 values:  -43.007652 ----- 

-----iteration:  99 target diff:  0.0023445043877960915 values: -----iteration:   -43.041738  ----- target diff: 

 0.0019855696636311326 values:  -51.924423 ----- 

-----iteration:  0 target diff:  0.9218232423156147 values:  -59.763817 ----- 

-----iteration:  9 target diff:  0.0012464588969930599 values:  -51.927887 ----- 

-------------------- ckpt:  3000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold4/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold4/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold4/train/agent1/




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-------------------- fqe on dqn & sale --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base La


-----iteration:  12 target diff:  0.002412155899821957 values:  -45.88871 ----- 

-----iteration:  13 target diff:  0.0024230948937780973 values:  -45.91129 ----- 

-----iteration:  14 target diff:  0.0022753370121807682 values:  -45.97713 ----- 

-----iteration:  15 target diff:  0.0019071040827633313 values:  -46.014305 ----- 

-----iteration:  16 target diff:  0.001879367400976289 values:  -46.024475 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  17 target diff:  0.0022051164385551744 values:  -46.070744 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  53 target diff:  0.0039084284254134165 values:  -46.324135 ----- 



To change 



Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold4/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold4/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold4/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold4/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold4/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the autho


-------------------- adv learner --------------------
-----iteration:  68 target diff:  0.0019519401468851981 values:  -48.152203 ----- 

-----iteration:  15 target diff:  0.0025620394302911954 values:  -51.402218 ----- 

-----iteration:  69 target diff:  0.0018112028344084712 values:  -48.05864 ----- 

-----iteration:  16 -----iteration: target diff:   70 0.0024359716528321662target diff:   values: 0.002012229590283586  values: -51.402008  ------47.954193 ----- 

 

-----iteration:  71 target diff:  0.0021889856241026837 values:  -47.813698 ----- 

-----iteration:  72 target diff:  0.0018801374745064294 values:  -47.629993 ----- 

-----iteration:  17 target diff:  0.0021763759664835004 values:  -51.274925 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  73 target diff:  0.0023326976975501265 values:  -47.496506 ----- 

-----iteration:  74 target diff:  0.0019508508296989153 values:  -47.352028 ----- 

-----iteration:  18 target diff:  0.00270533420

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold4/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold4/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold4/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold4/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold4/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author 

-----iteration:  5 target diff:  0.0021644854508273883 values:  -51.774822 ----- 

-----iteration:  10 target diff:  0.0023344088151832356 values:  -59.81798 ----------iteration:  32 target diff:  0.001719545956982787 values:   -46.19635
 
----- 

-----iteration:  6 target diff:  0.002075105148396857 values:  -51.745216 ----- 

-----iteration:  33 target diff:  0.0024950384789487585 values:  -46.100742 ----- 

-----iteration:  11 target diff:  0.0014275179378105296 values:  -59.862713 ----- 

-------------------- ckpt:  5000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold4/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold4/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
 0.0021785326733134705 values:  -51.63184 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  35 target diff:  0.0025813320126846207 values:  -46.03389 ----- 

-----iteration:  9 target diff:  0.001711110453521057 values:  -51.540043 ----- 

-----iteration:  36 target diff:  0.0017177528709119909 values:  -46.02285 ----- 

-----iteration:  10 target diff:  0.0025050903149215677 values:  -51.45183 ----- 

-----iterati




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa

-----iteration:  15 target diff:  0.002987953452681656 values:  -44.287624 ----- 

-----iteration:  56 target diff:  0.0022052089577400636 values:  -49.427372 ----- 

-----iteration:  17 target diff:  0.001864612801792097 values:  -51.69567 ----- 

-----iteration:  16 target diff:  0.0021142765232880435 values:  -44.35898 ----- 

-----iteration:  57 target diff:  0.0021527292505747796 values:  -49.31695 ----- 

-----iteration:  17 target diff:  0.0016105980145686043 values:  -44.380257 ----- 

-----iteration:  58 target diff:  0.0021242958834032113 values:  -49.198914 ----- 

-----iteration:  18 target diff:  0.0020988130585741765 values:  -44.411953 ----- 

-----iteration:  59 target diff:  0.0022059633115382358 values:  -49.107235 ----- 

-----iteration:  18 target diff:  0.0025386547611738934 values:  -51.75907 ----- 

-----iteration:  19 target diff:  0.001671583881486639 values:  -44.42248 ----- 

-----iteration:  60 target diff:  0.0020446303482364965 values:  -48.960747 ----- 



-----iteration:  85 target diff:  0.0017117077872291003 values:  -47.374317 ----- 

-----iteration:  40 target diff:  0.0028438587971591194 values:  -43.70418 ----- 

-----iteration:  86 target diff:  0.001958138157648121 values:  -47.381165 ----- 

-----iteration:  87 target diff:  0.0019176664699069612 values:  -47.25071 ----- 

-----iteration:  41 target diff:  0.0029530366359727567 values:  -43.622204 ----- 

-----iteration:  88 target diff:  0.0021483524575118215 values:  -47.272835 ----- 

-----iteration:  89 target diff:  0.0017369856662464243 values:  -47.157352 ----- 

-----iteration:  90 target diff:  0.0021627656556602367 values:  -47.136887 ----------iteration:   

42 target diff:  0.0025688161366115574 values:  -43.500137 ----- 

-----iteration:  91 target diff:  0.002274088519640654 values:  -47.1526 ----- 

-----iteration:  92 target diff:  0.0019444986925219914 values:  -47.12139 ----- 

-----iteration:  43 target diff:  0.0031742676813523115 values:  -43.42081 ----- 




-----iteration:  6 target diff:  0.0021884555645323473 values:  -50.340313 ----- 

-----iteration:  56 target diff:  0.0028825502330136464 values:  -42.46205 ----- 

-----iteration:  7 target diff:  0.0018326154277530993 values:  -50.28161 ----- 

-----iteration:  0 target diff:  0.9206132351335182 values:  -57.500233 ----- 

-----iteration:  57 target diff:  0.002536358054118542 values:  -42.32778 ----- 

-----iteration:  8 target diff:  0.001936552322932539 values:  -50.175262 ----- 

-----iteration:  1 target diff:  0.00175460648186904 values:  -57.471897 ----- 

-----iteration:  58 target diff:  0.0036279105666996063 values:  -42.265324 ----- 

-----iteration:  9 target diff:  0.001338372732352689 values:  -50.11507 ----- 

-------------------- ckpt:  8000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold4/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  2 target diff:  0.00158890752020




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  3 target diff:  0.0014352034131376221 values:  -57.30691 ----- 

-----iteration:  60 target diff:  0.0032706581957141576 values:  -42.14399 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold4/train/agent1/ckpt/offline_qr_dqn_3000.ckpt
-----iteration:  61 target diff:  0.002844

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold4/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold4/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold4/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold4/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold4/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold4/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold4/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold4/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold4/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  72 target diff:  0.0026803125533493275 values:  -48.197807 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold4/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/218175338/fold4/train/agent4/trajs4.pk




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  11 target diff:  0.0020108297116080563 values:  -46.59404 ----- 

-------------------- adv learner --------------------
-----iteration:  73 target diff:  0.002182832332082226 values:  -48.067333 ----- 

-----iteration:  12 target diff:  0.0026038657483406953 values:  -46.58743 ----- 

-----iteration:  74 target diff:  0.0024638431580363113 values:  -47.999294 ----- 

-----iteration:  13 target diff:  0.002110995049404941 values:  -46.515324 ----- 

-----iteration:  75 target diff:  0.0017419259975193768 values:  -47.963017 ----- 

-----iteration:  14 target diff:  0.00342419406348283 values:  -46.504875 ----- 

-----iteration:  76 target diff:  0.002177014546668908 values:


-------------------- adv learner --------------------
-----iteration:  11 target diff:  0.0015213337793324032 values:  -51.911804 ----- 

-----iteration:  30 target diff:  0.0026504021311751806 values:  -46.517036 ----- 

-----iteration:  31 target diff:  0.002351233611124313 values:  -46.519745 ----- 

-----iteration:  12 target diff:  0.0020642735295884913 values:  -51.96474 ----- 

-----iteration:  32 target diff:  0.0021975433768751437 values:  -46.51069 ----- 

-----iteration:  33 target diff:  0.002313611875577969 values:  -46.469994 ----- 

-----iteration:  13 target diff:  0.0027997260479464183 values:  -51.96802 ----- 

-----iteration:  34 target diff:  0.0029020542945524226 values:  -46.503304 ----- 

-----iteration:  14 target diff:  0.0021754907270395115 values:  -51.88967 ----- 

-----iteration:  -----iteration: 15 35  target diff: target diff:  0.0021877740591685507 0.002439653013262215  values: values:  -46.47791  -51.923523----- 
 
----- 

-----iteration:  16 target di


-----iteration:  5 target diff:  0.002137846231387458 values:  -51.265907 ----- 

-----iteration:  63 target diff:  0.0018995839393536273 values:  -46.185726 ----- 

-----iteration:  6 target diff:  0.0023025532454664533 values:  -51.27189 ----- 

-----iteration:  64 target diff:  0.002073017121863686 values:  -46.20719 ----- 

-----iteration:  0 target diff:  0.9199999125119872 values:  -57.357204 ----- 

-----iteration:  7 target diff:  0.002182146904892728 values:  -51.299 ----- 

-----iteration:  65 target diff:  0.0021952876353463664 values:  -46.21902 ----- 

-----iteration:  8 target diff:  0.001902263229579603 values:  -51.31586 ----- 

-----iteration:  1 target diff:  0.003028279075074335 values:  -57.328514 ----- 

-----iteration:  9 target diff:  0.0018568284657369861 values:  -51.379303 ----- 

-----iteration:  66 target diff:  0.0018537619687003803 values:  -46.166325 ----- 

-----iteration:  2 target diff:  0.002156925919424216 values:  -57.33524 ----- 

-----iteration: 

-----iteration:  24 target diff:  0.0028397399119638777 values:  -56.914246 ----- 

-----iteration:  92 target diff:  0.002010178377159582 values:  -45.981754 ----- 

-----iteration:  25 target diff:  0.002597399098557894 values:  -56.897915 ----- 

-----iteration:  93 target diff:  0.001965927659872373 values:  -45.989933 ----- 

-----iteration:  26 target diff:  0.002541808020172768 values:  -56.874332 ----- 

-----iteration:  94 target diff:  0.001805497120039007 values:  -45.998375 ----- 

-----iteration:  27 target diff:  0.0027671877179529635 values:  -56.748055 ----- 

-----iteration:  95 target diff:  0.0019201404476053329 values:  -45.975094 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  96 target diff:




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

 target diff:  0.001433210307556666 values:  -51.684048 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have d


-----iteration:  5 target diff:  0.0020450883836182153 values:  -51.78089 ----- 

-----iteration:  47 target diff:  0.002982016358765711 values:  -54.167225 ----- 

-----iteration:  6 target diff:  0.002379033026756816 values:  -51.791042 ----- 

-----iteration:  48 target diff:  0.0020270281636538203 values:  -54.033924 ----- 

-----iteration:  0 target diff:  0.9174245598204257 values:  -45.691082 ----- 

-----iteration:  7 target diff:  0.0018436913300383271 values: -----iteration:   49-51.729324 target diff:   -----0.002812830169024751  values: 

 -53.867306 ----- 

-----iteration:  1 target diff:  0.0019928533448549473 values:  -45.77091 ----- 

-----iteration:  50 target diff:  0.0024755639782318836 values:  -53.78417 ----- 

-----iteration:  2 target diff:  0.0015690735235859422 values:  -45.82771 ----- 

-----iteration:  8 target diff:  0.0020198425250962954 values:  -51.89674 ----- 

-----iteration:  3 target diff:  0.0027298109909379246 values:  -45.89444 ----- 

-----iterat

values:  -49.392696 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold4/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold4/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  5 target diff:  0.0024995108423236283 values:  -45.814465 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold4/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold4/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/798842024/fold4/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_float

-----iteration:  26 target diff:  0.002145470487989431 values:  -45.49343 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  27 target diff:  0.0020496316732110404 values:  -45.41495 ----- 

-----iteration:  28 target diff:  0.002105751229275919 values:  -45.379555 ----- 

-----iteration:  0 target diff:  0.9221678977358029 values:  -60.50405 ----- 

-----iteration:  29 target diff:  0.0021407267126326355 values:  -45.40975 ----- 

-----iteration:  1 target diff:  0.003709263151714651 values:  -60.52755 ----- 

-----iteration:  30 target diff:  0.002329212219543408 values:  -45.349796 ----- 

-----iteration:  2 target diff:  0.003198451776441549 values:  -60.543873 ----- 

-----iteration:  3 target diff:  0.00297161




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  7 target diff:  0.004227507025767379 values:  -60.653297 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change al

-----iteration:  93 target diff:  0.002903975549438265 values:  -49.59199 ----- 

-----iteration:  39 target diff:  0.0028893467220965953 values:  -44.996582 ----- 

-----iteration:  94 target diff:  0.0033452521145334205 values:  -49.452175 ----- 

-----iteration:  40 target diff:  0.002284266543848468 values:  -44.884876 ----- 

-----iteration:  95 target diff:  0.0025127165657758536 values:  -49.37357 ----- 

-----iteration:  41 target diff:  0.0024325035378843013 values:  -44.828712 ----- 

-----iteration:  96 target diff:  0.0022414270573890673 values:  -49.1638 ----- 

-----iteration:  42 target diff:  0.002756496248282298 values:  -44.73753 ----- 

-----iteration:  97 target diff:  0.004158465288873301 values:  -49.045864 ----- 

-----iteration:  98 target diff:  0.0027204641513687788 values:  -48.92074 ----- 

-----iteration:  43 target diff:  0.0024959678669607553 values:  -44.62878 ----- 

-----iteration:  44 target diff:  0.002420868898618622 values:  -44.54877 ----- 

-----




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  45 target diff:  0.0023109710169429204 values:  -44.47017 ----- 

-----iteration:  46 target diff:  0.002041342419196373 values:  -44.366238 ----- 

-----iteration:  47 target diff:  0.0027742160920467065 values:  -44.327656 ----- 

-----iteration:  48 target diff:  0.0020920737670182057 values:  -44.270557 ----- 

-----iter

-----iteration:  6 target diff:  0.0020098192963494714 values:  -54.89886 ----- 

-----iteration:  7 target diff:  0.0022284404157390052 values:  -54.8931 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  8 target diff:  0.002126614693222488 values:  -54.80306 ----- 

-----iteration:  9 target diff:  0.0020530385928169063 values:  -54.803333 ----- 

-----iteration:  10 target diff:  0.0020598584918117492 values:  -54.899162 ----- 

-----iteration:  11 target diff:  0.0026146896302366644 values:  -54.945293 ----- 

-----iteration:  12 target diff:  0.0021963228252430164 values:  -54.990864 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold4/train/agent2/ckpt/offline_qr_dqn_9000.ckpt
-----iteration:  13 target diff:  0.0022399967143687323 values:  -54.999508 ----- 

-----iteration:  14 target diff:  0.0017832363990075255 values:  -55.05137 ----- 



To change all layers to have dtype float64 by default, call `tf.k

-----iteration:  63 target diff:  0.0020227968162803023 values:  -45.505264 ----- 

-----iteration:  94 target diff:  0.0029593680936232394 values:  -45.008167 ----- -----iteration: 

 64 target diff:  0.001889136740822142 values:  -45.48377 ----- 

-----iteration:  65 target diff:  0.001864634384516981 values: -----iteration:   -45.4736595  target diff: -----  
0.002995777417798732 
values:  -44.89877 ----- 

-----iteration:  96 target diff:  0.003056352775595576 -----iteration: values:  -44.763805  -----66  
target diff: 
 0.0019075777922352873 values:  -45.480087 ----- 

-----iteration:  67 target diff:  0.0014875092095007116 values:  -45.46105 ----- 

-----iteration:  97 target diff:  0.002697300369489554 values:  -44.68503 ----- 

-----iteration:  98 target diff:  0.0027583525187328544 values:  -44.57711 ----- 

-----iteration:  99 target diff:  0.002250520966855268 values:  -44.488346 ----- 

-------------------- ckpt:  10000 --------------------
Loaded trajectories from load pat




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.9188359

-----iteration:  29 target diff:  0.003082292798806857 values:  -53.167454 ----- 

-----iteration:  74 target diff:  0.002210547916917799 values:  -43.925175 ----- 

-----iteration:  30 target diff:  0.0028985394155169346 values:  -53.000156 ----- 

-----iteration:  75 target diff:  0.0023267948842981695 values:  -43.89145 ----- 

-----iteration:  31 target diff:  0.002768651687780031 values:  -52.869278 ----- 

-----iteration:  76 target diff:  0.0020430671425075058 values:  -43.838757 ----- 

-----iteration:  32 target diff:  0.002825701210981948 values:  -52.763367 ----- 

-----iteration:  77 target diff:  0.002031727856713327 values:  -43.80776 ----- 

-----iteration:  33 target diff:  0.002812758726451837 values:  -52.65237 ----- 

-----iteration:  78 target diff:  0.0029226394826950674 values:  -43.76535 ----- 

-----iteration:  34 target diff:  0.002525219449993282 values:  -52.564457 ----- 

-----iteration:  79 target diff:  0.0026501638185640435 values:  -43.752316 ----- 

---


-----iteration:  68 target diff:  0.0018520644637844265 values:  -48.51229 ----- 

-----iteration:  69 target diff:  0.002662308161926769 values:  -48.38418 ----- 

-----iteration:  0 target diff:  0.918195712301071 values:  -46.004253 ----- 

-----iteration:  70 target diff:  0.001944852955078468 values:  -48.276558 ----- 

-----iteration:  1 target diff:  0.0016388535928249259 values:  -46.000874 ----- 

-----iteration:  71 target diff:  0.0017597848703988772 values:  -48.251457 ----- 

-----iteration:  72 target diff:  0.0016962624081306135 values:  -48.213795 ----- 

-----iteration:  2 target diff:  0.002186976098675998 values:  -46.047462 ----- 

-----iteration:  73 target diff:  0.0017598483335806965 values:  -48.059647 ----- 

-----iteration:  3 target diff:  0.0018481603057778897 values:  -46.082863 ----- 

-----iteration:  74 target diff:  0.0018583213561594748 values:  -48.06343 ----- 

-----iteration:  75 target diff:  0.0021325407581219073 values:  -47.936275 ----- 

-----

-----iteration:  51 target diff:  0.002215106262897027 values:  -46.44452 ----- 

-----iteration:  52 target diff:  0.002465370368185411 values:  -46.36999 ----- 

-----iteration:  53 target diff:  0.0030441671054035305 values:  -46.306854 ----- 

-----iteration:  54 target diff:  0.0024903931992541744 values:  -46.32536 ----- 

-----iteration:  55 target diff:  0.0021342423325566703 values:  -46.33906 ----- 

-----iteration:  56 target diff:  0.002104929456874348 values:  -46.27395 ----- 

-----iteration:  57 target diff:  0.002511887073478279 values:  -46.228382 ----- 

-----iteration:  58 target diff:  0.0018474754498898794 values:  -46.180973 ----- 

-----iteration:  59 target diff:  0.0019678872535355846 values:  -46.127117 ----- 

-----iteration:  60 target diff:  0.0027422992115661094 values:  -46.076008 ----- 

-----iteration:  61 target diff:  0.0020938438595371773 values:  -46.024773 ----- 

-----iteration:  62 target diff:  0.002041997853031011 values:  -45.95873 ----- 

sav


-----iteration:  0 target diff:  0.9179390178199448 values:  -45.7352 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/717354021/fold4/train/agent4/ckpt/offline_qr_dqn_7000.ckpt
-----iteration:  1 target diff:  0.0032636523727647988 values:  -45.74802 ----- 

-----iteration:  2 target diff:  0.003172011485490329 values:  -45.770084 ----- 

-----iteration:  3 target diff:  0.002678360635749147 values:  -45.76186 ----- 

-----iteration:  4 target diff:  0.0026290772055760484 values:  -45.736954 ----- 

-----iteration:  5 target diff:  0.0022278240867926275 values:  -45.734993 ----- 

-----iteration:  6 target diff:  0.002901260267984651 values:  -45.726124 ----- 

-----iteration:  7 target diff:  0.0020885479688351966 values:  -45.748478 ----- 

-----iteration:  8 target diff:  0.0016762042244822064 values:  -45.800247 ----- 

-----iteration:  9 target diff:  0.0026232566925461767 values:  -45.84439 ----- 

-----iteration:  10 target diff:  0.00264299540013968

-----iteration:  55 target diff:  0.002308354010923555 values:  -45.048134 ----- 

-----iteration:  56 target diff:  0.0024603322880448033 values:  -45.032684 ----- 

-----iteration:  57 target diff:  0.0018280740257898052 values:  -45.0009 ----- 

-----iteration:  58 target diff:  0.002195512789465082 values:  -44.91344 ----- 

-----iteration:  59 target diff:  0.0027972075111710394 values:  -44.852013 ----- 

-----iteration:  60 target diff:  0.0032180650491340185 values:  -44.787113 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  61 target diff:  0.002152000235470819 values:  -44.7329 ----- 

-----iteration:  62 target diff:  0.0019776256706905016 values:  -44.70646 ----- 

-----iteration:  0 target diff:  0.9


----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floa




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  97 target diff:  0.002264614677211822 values:  -44.073406 ----- 

-----iteration:  98 target diff:  0.002217071743797682 values:  -44.113678 ----- 

-----iteration:  99 target diff:  0.0025349004914970927 values:  -44.117687 ----- 

-------------------- ckpt:  9000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold4/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/qr_dqn/tmp/932136058/fold4/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories fr




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-------------------- fqe on dqn & sale --------------------
-------------------- fqe on dqn & sale --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can 


-------------------- adv learner --------------------
-------------------- fqe on dqn & sale --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  0 target diff:  0.9184104347511594 values:  -46.143227 ----- 

-----iteration:  1 target diff:  0.004649422867194488 values:  -46.15371 ----- 

-----iteration:  2 target diff:  0.003854938058376104 values:  -46.145264 ----- 

-----iteration:  3 target diff:  0.002603341537703617 values:  -46.138462 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autoc

-----iteration:  24 target diff:  0.0025505732912989073 values:  -46.19985 ----- 

-----iteration:  25 target diff:  0.002120066871277432 values:  -46.171246 ----- 

-----iteration:  26 target diff:  0.003251511997779554 values:  -46.22585 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  27 target diff:  0.0024965491815981482 values:  -46.187202 ----- 

-----iteration:  0 target diff:  0.9202681160711224 values:  -57.676785 ----- 

-----iteration:  28 target diff:  0.002462928454943393 values:  -46.183903 ----- 

-----iteration:  1 target diff:  0.0019295302888193747 values:  -57.644566 ----- 

-----iteration:  29 target diff:  0.0030129666787255317 values:  -46.235806 ----- 

-----iteration:  2 target diff:  0.00

-----iteration:  0 target diff:  0.9182413368995388 values:  -58.32298 ----- 

-----iteration:  1 target diff:  0.003196644679412813 values:  -58.365776 ----- 

-----iteration:  55 target diff:  0.0021566886121700273 values:  -45.192493 ----- 

-----iteration:  2 target diff:  0.0015394689971229558 values:  -58.37411 ----- 

-----iteration:  56 target diff:  0.0023371785155227114 values:  -45.032093 ----- 

-----iteration:  3 target diff:  0.0016676985622383223 values:  -58.3677 ----- 

-----iteration:  4 target diff:  0.00159268229692825 values:  -58.158592 ----- 

-----iteration:  57 target diff:  0.003452476500814375 values:  -44.958214 ----- 

-----iteration:  5 target diff:  0.0021696546861446637 values:  -58.09148 ----- 

-----iteration:  6 target diff:  0.0015013321148859547 values:  -58.15155 ----- 

-----iteration:  58 target diff:  0.002544920922907829 values:  -44.930965 ----- 

-----iteration:  7 target diff:  0.0026060578991465863 values:  -58.153896 ----- 

-----iteration




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
 
62 target diff:  0.0025201918006515275 values:  -44.810566 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv lea


-------------------- adv learner --------------------
-----iteration:  90 target diff:  0.002369625715816651 values:  -44.37178 ----- 

-----iteration:  91 target diff:  0.0022981778584396403 values:  -44.33488 ----- 

-----iteration:  92 target diff:  0.0020800404247141467 values:  -44.28493 ----- 

-----iteration:  93 target diff:  0.002261776511632984 values:  -44.30873 ----- 

-----iteration:  94 target diff:  0.0025143087673366905 values:  -44.269554 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  95 target diff:  0.001979543154811761 values:  -44.221466 ----- 

-----iteration:  96 target diff:  0.0022134443498528734 values:  -44.241314 ----- 

-----iteration:  97 target diff:  0.002517223688444855 values:  -44.19477 ----- 

-----iteration:  98 target diff:  0.0020159615256038293 values:  -44.1442 ----- 

-----iteration:  99 target diff:  0.0026311395593066784 values:  -44.160175 ----- 

-------------------- ckpt:  10000 --------------------
L




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa

-----iteration:  6 target diff:  0.0026302219202583134 values:  -46.935555 ----- 

-----iteration:  7 target diff:  0.0021060714115459585 values:  -46.94107 ----- 

-----iteration:  8 target diff:  0.0028129102344645303 values:  -46.9157 ----- 

-----iteration:  9 target diff:  0.002268807365032187 values:  -46.895542 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  10 target diff:  0.0019920933416990045 values:  -46.901665 ----- 

-----iteration:  11 target diff:  0.002178767776117465 values:  -46.886456 ----- 

-----iteration:  12 target diff:  0.0018113432495567526 values:  -46.83901 ----- 

-----iteration:  13 target diff:  0.0018958449356414402 values:  -46.987797 ----- 

-----iteration:  14 target diff:  0.0033798011239032527 values:  -46.988834 ----- 

-----iteration:  15 target diff:  0.0024717445412617113 values:  -46.96253 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To chan


-----iteration:  36 target diff:  0.0017939932093525045 values:  -46.442997 ----- 

-----iteration:  0 target diff:  0.9194654562756657 values:  -58.135277 ----- 

-----iteration:  37 target diff:  0.002205587102261255 values:  -46.492046 ----- 

-----iteration:  1 target diff:  0.0012904812112667465 values:  -58.144226 ----- 

-----iteration:  38 target diff:  0.002565399628551233 values:  -46.51122 ----- 

-----iteration:  39 target diff:  0.0021316931858611616 values:  -46.56318 ----- 

-----iteration:  40 target diff:  0.0022330980710296797 values:  -46.608135 ----- 

-----iteration:  41 target diff:  0.0025433783292307596 values:  -46.63418 ----- 

-----iteration:  42 target diff:  0.002209086098682238 values:  -46.59961 ----- 

-----iteration:  43 target diff:  0.0022542770058833913 values:  -46.536427 ----- 

-----iteration:  44 target diff:  0.0018381668779625894 values:  -46.579613 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_flo

-----iteration:  72 target diff:  0.002096421032146848 values:  -42.93005 ----- 

-----iteration:  73 target diff:  0.0019360762650014152 values:  -42.928867 ----- 

-----iteration:  74 target diff:  0.0021462466407715882 values:  -42.94437 ----- 

-----iteration:  75 target diff:  0.002315560535201298 values:  -42.991745 ----- 

-----iteration:  76 target diff:  0.002434261047962204 values:  -43.03884 ----- 

-----iteration:  77 target diff:  0.0021943066160313007 values:  -42.974964 ----- 

-----iteration:  78 target diff:  0.0022744634566832195 values:  -42.927116 ----- 

-----iteration:  79 target diff:  0.001942316255185676 values:  -42.931206 ----- 

-----iteration:  80 target diff:  0.0019252762847657926 values:  -42.96533 ----- 

-----iteration:  81 target diff:  0.001954910092565119 values:  -42.994194 ----- 

-----iteration:  82 target diff:  0.001819917836896095 values:  -42.958572 ----- 

-----iteration:  83 target diff:  0.0019459445438939533 values:  -42.921597 ----- 

--