In [1]:
import os
import copy
import random
import gym
import pickle

import numpy as np
import pandas as pd
import tensorflow as tf
tf.keras.backend.set_floatx('float32')

from itertools import permutations
from sklearn.model_selection import KFold, GridSearchCV

from multiprocessing import set_start_method
import multiprocessing as mp

path = os.path.abspath('..')
if path not in sys.path:
    sys.path.append(path)

from seal.agents.default_config import DEFAULT_CONFIG as config
from seal.agents.dqn import DQNAgent
# from seal.agents.qr_dqn import QuantileAgent
# from seal.agents.multi_head_dqn import MultiHeadDQNAgent
# from seal.agents.discrete_bcq import DiscreteBCQAgent

from seal.algos.kfold import CVS, KFoldCV
from seal.algos.advantage_learner import AdvantageLearner
from seal.algos.behavior_cloning import BehaviorCloning
from seal.algos.density_ratio import VisitationRatioModel
from seal.algos.fqe import FQE

def one_step(seed):
    np.random.seed(seed)
    tf.random.set_seed(seed)
    
    path = './data/mh/ddqn/trajs_mh.pkl'
    nfolds = 5
    n_splits = 5
    ckpts = (np.arange(10) + 1)*1000
    
    num_actions = 5
    # configures
    config['online'] = False
    config['lr'] = 5e-4
    config['decay_steps'] = 50000
    config['max_training_steps'] = 10000
    config['training_steps_to_checkpoint'] = 1000
    config['training_steps_to_eval'] = 100000
    config['hiddens'] = [64,64]
    config['double'] = True
    config['dueling'] = False

    index = pd.MultiIndex.from_product([np.arange(nfolds), ckpts])
    columns = ['dqn',  'seal']
    rets = pd.DataFrame(index=index, columns=columns)

    print('-'*20, 'start', '-'*20)
    cvs = CVS(path, n_splits=nfolds, random_state=seed)
    cvs.split()
    for fold in range(nfolds):
        train_path = cvs.train_paths[fold] + 'trajs.pkl'
        kf = KFoldCV(train_path, n_trajs=None, n_splits=n_splits, shuffle=False, random_state=seed)
        kf.split()

        print('-'*20, 'training agent', '-'*20)
        # agent
        config['persistent_directory'] = kf.agent_path
        config['checkpoint_path'] = kf.ckpt_path
        agent = DQNAgent(num_actions=num_actions, config=config)
        agent.learn()

        print('-'*20, 'training agents', '-'*20)
        # agent_1, ..., agent_K
        for idx in range(kf.n_splits):
            config_idx = copy.deepcopy(config)
            config_idx['persistent_directory'] = kf.agent_paths[idx]
            config_idx['checkpoint_path'] = kf.ckpt_paths[idx]
            agent_idx = DQNAgent(num_actions=num_actions, config=config_idx)
            agent_idx.learn()

        # fitted q evaluation
        test_path = cvs.test_paths[fold] + 'trajs.pkl'
        with open(test_path, 'rb') as f:
            trajs = pickle.load(f)

        print('-'*20, 'behavior cloning', '-'*20)
        # behavior cloning
        bc = BehaviorCloning(num_actions=num_actions)
        states  = np.array([transition[0] for traj in kf.trajs for transition in traj])
        actions = np.array([transition[1] for traj in kf.trajs for transition in traj])
        bc.train(states, actions)

        for ckpt in ckpts:
            print('-'*20, 'ckpt: ', ckpt, '-'*20)
            agent = DQNAgent(num_actions=num_actions, config=config)
            agent.load(kf.ckpt_path + 'offline_ddqn_{}.ckpt'.format(ckpt))

            agents = []
            for idx in range(kf.n_splits):
                config_idx = copy.deepcopy(config)
                config_idx['persistent_directory'] = kf.agent_paths[idx]
                config_idx['checkpoint_path'] = kf.ckpt_paths[idx]
                agent_idx = DQNAgent(num_actions=num_actions, config=config_idx)
                agent_idx.load(kf.ckpt_paths[idx] + 'offline_ddqn_{}.ckpt'.format(ckpt))
                agents.append(agent_idx)
            states, qvalues, qtildes = kf.update_q(agents, bc)

            print('-'*20, 'adv learner', '-'*20)
#             advs1 = qvalues - qvalues.mean(axis=1, keepdims=True)
#             agent1 = AdvantageLearner(num_actions=num_actions)
#             agent1._train(states, advs1)
            
            advs2 = qtildes - qtildes.mean(axis=1, keepdims=True)
            agent2 = AdvantageLearner(num_actions=num_actions)
            agent2._train(states, advs2)

            print('-'*20, 'fqe on dqn & seal', '-'*20)
            fqe_dqn = FQE(agent.greedy_actions, num_actions=num_actions, activation='tanh', hiddens=config['hiddens'], max_iter=100, eps=0.0015)
            fqe_dqn.train(trajs)
#             fqe_dml = FQE(agent1.greedy_actions, num_actions=num_actions)
#             fqe_dml.train(trajs)
            fqe_seal = FQE(agent2.greedy_actions, num_actions=num_actions, activation='tanh', hiddens=config['hiddens'], max_iter=100, eps=0.0015)
            fqe_seal.train(trajs)

            rets.loc[(fold, ckpt), 'dqn'] = fqe_dqn.values
#             rets.loc[(fold, ckpt), 'dml'] = fqe_dml.values
            rets.loc[(fold, ckpt), 'seal'] = fqe_seal.values
            
    return rets

In [None]:
save_path = './data/mh/ddqn/'
pool = mp.Pool(5)
rets = pool.map(one_step, range(5))
pool.close()

with open(save_path + 'rets_ddqn_mh.pkl', 'wb') as f:
    pickle.dump(rets, f)

---------------------------------------- start--------------------   start --------------------start
----------------------------------------  start-------------------- start--------------------
 --------------------
 --------------------

-------------------- training agent --------------------
-------------------- training agent --------------------
-------------------- training agent --------------------
--------------------Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/218175338/fold0/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling! 
training agent --------------------


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
--------------------
 training agent --------------------


To change all la



saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/218175338/fold0/train/agent/ckpt/offline_ddqn_1000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/932136058/fold0/train/agent/ckpt/offline_ddqn_1000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/717354021/fold0/train/agent/ckpt/offline_ddqn_1000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold0/train/agent/ckpt/offline_ddqn_1000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/798842024/fold0/train/agent/ckpt/offline_ddqn_1000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/717354021/fold0/train/agent/ckpt/offline_ddqn_2000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/218175338/fold0/train/agent/ckpt/offline_ddqn_2000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/932136058/fold0/train/agent/ckpt/offline_ddqn_2000.ckpt
saving model weights a




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/932136058/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocast


saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/798842024/fold0/train/agent0/ckpt/offline_ddqn_9000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/717354021/fold0/train/agent0/ckpt/offline_ddqn_10000.ckpt
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/717354021/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constr


saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/798842024/fold0/train/agent1/ckpt/offline_ddqn_10000.ckpt
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/798842024/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/932136058/fold0/train/agent2/ckpt/offline_ddqn_20


saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold0/train/agent2/ckpt/offline_ddqn_10000.ckpt
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/218175338/fold0/train/agent2/ckpt/offline_ddqn_10

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/932136058/fold0/train/agent4/ckpt/offline_ddqn_2000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/798842024/fold0/train/agent3/ckpt/offline_ddqn_10000.ckpt
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/798842024/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constru




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('floa

-----iteration:  1 target diff:  0.007391363720864589 values:  -59.303555 ----- 

-----iteration:  31 target diff:  0.0016695194266719128 values:  -52.56616 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/218175338/fold0/train/agent4/ckpt/offline_ddqn_10000.ckpt
-------------------- behavior cloning --------------------
-----iteration:  35 target diff:  0.00331804169477395 values:  -61.088955 ----- 

-----iteration:  2 target diff:  0.005764404519405364 values:  -59.33735 ----- 

-----iteration:  36 target diff:  0.004048139101461041 values:  -61.039062 ----- 

-----iteration:  3 target diff:  0.004873474976432776 values:  -59.34887 ----- 

-----iteration:  32 target diff:  0.00211310825114666 values:  -52.5534 ----- 

-----iteration:  37 target diff:  0.003886934485304525 values:  -60.97124 ----- 

-----iteration:  4 target diff:  0.0029667362745396005 values:  -59.280098 ----- 

-----iteration:  33 target diff:  0.0022860792978961074 values:  -52.508354 ----

-----iteration:  -----iteration: 5  21target diff:   target diff: 0.002473756750698071  0.002694385988062664values:   -54.54661values:  -59.71314  ----- -----
 


-----iteration:  3 target diff:  0.001972069188368377 values:  -54.14921 ----- 

-----iteration:  59 target diff:  0.003740730719188009 values:  -59.61692 ----- 

-----iteration:  6 target diff:  0.0025548556150722305 values:  -54.635662 ----- 

-----iteration:  4 target diff:  0.0027400507239604003 values:  -54.08865 ----- 

-----iteration:  22 target diff:  0.003851243364397292-----iteration:  7 values:   -59.762554 -----target diff:  

 0.002578159668780842 values:  -54.730984 ----- 

-----iteration:  60 target diff:  0.003065009925904391 values:  -59.5718 ----- 

-----iteration:  23 target diff:  0.00381688528794783 values:  -59.81808 ----- 

-----iteration:  8 target diff:  0.0023689448651275638 values:  -54.78764 ----- 

-----iteration:  5 -----iteration: target diff:   0.002400762356139525361  values: target diff:   -5

-----iteration:  3 target diff:  0.002024266510038581 Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold0/train/agent1/trajs1.pkl!values: 
 Refresh buffer every 1000000 sampling!-61.103855
 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  48 Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold0/train/agent4/trajs4.pkl!target diff:  
0.0019995374591474276Refresh buffer every 1000000 sampling! values: 
 -59.99261 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of 

-----iteration:  71 target diff:  0.0024518334877315475 values:  -59.355206 ----- 

-----iteration:  25 target diff:  0.005192938239468704 values:  -61.180073 ----- 

-----iteration:  69 target diff:  0.001874285844162157 values:  -55.08294 ----- 

-----iteration:  24 target diff:  0.0018173691961775355 values:  -54.07076 ----- 

-----iteration:  72 target diff:  0.002357184138980141 values:  -59.36938 ----- 

-----iteration:  26 target diff:  0.004311645765575023 values:  -61.10941 ----- 

-----iteration:  11-----iteration:   70target diff:   0.0027047982441546848target diff:   values: 0.0018811254828002605  values:  -55.09873-53.24994  ---------- 

 

-----iteration:  25 target diff: -----iteration:   0.00172753180572892373  values: target diff:   -54.1760330.002112229204174491  values: -----  
-59.36613
 ----- 

-----iteration:  27 target diff:  0.002635494213489486 values:  -61.08421 ----- 

-----iteration:  71 target diff:  0.0019840181276378566 values:  -55.097836 ----- 

-----it

-----iteration:  2 target diff:  0.0021379355788685986 -------------------- values:  fqe on dqn & sale -54.144283-------------------- -----


-----iteration:  95 target diff:  0.0021064959471795972 values:  -59.052113 ----- 

-----iteration:  3 target diff:  0.0022414294214258964 values:  -54.178154 ----- 

-----iteration:  96 target diff:  0.002000743447236681 values:  -59.012707 ----- 

-----iteration:  39 target diff:  0.005243791439824406 values:  -59.35612 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  4 target diff:  0.002234158913391453 values:  -54.21554 ----- 

-----iteration:  97 target diff:  0.0018519838816350599 values:  -58.998623 ----- 

-----iteration:  5 target diff:  0.0020333092056444555 value

-----iteration:  6 target diff:  0.0018894238765675027 values:  -54.039368 ----- 

-----iteration:  2 target diff:  0.0026827146766156265 values:  -58.57713 ----- 

-----iteration:  10 -----iteration: target diff:   460.0021009007464150853  target diff: values:   0.0035240406512617316-52.426403  values: ----- -58.1217  ----- 



-----iteration:  7 target diff:  0.001939649381053133 values:  -53.9929 ----- 

-----iteration:  11 target diff:  0.0019500281802333436 values:  -52.448418 ----- 

-----iteration:  3 target diff:  0.002229321068484077 values:  -----iteration: -58.558952  -----47  

target diff:  0.0032051935023739587 values:  -58.0695 ----- 

-----iteration:  8 target diff:  0.0019052306939982594 values:  -54.084454 ----- 

-----iteration:  4 target diff:  -----iteration:  0.0021938432851929105 -----iteration: values:   48-58.58342712  target diff:  target diff: -----   0.00175566485017939660.0022892658057791443
  
values:  -52.541958values:  -57.977047 ----- 
 
----- 

-----it




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  20 target diff:  0.0022626662292491587 -----iteration: values:   -58.30084663  -----target diff:   
0.0019597744149618015
 values:  -56.65089 ----- 

-----iteration:  6 target diff:  0.0015708800505093272 values:  -51.770813 ----- 

-----iteration:  12 target diff:  0.0019160343175638746 values:  -53.23734 ----- 

-----itera




-----iteration:  74 target diff:  0.0016252122464819657 values:  -56.213596 ----- 

-----iteration:  0 target diff:  0.9231474756089161 values:  -52.658955 ----- 

-----iteration:  31 target diff:  0.0016073005101027388 values:  -58.148285 ----- 

-----iteration:  75 target diff:  0.0016346048695292526 values:  -56.19068 ----- 

-----iteration:  0 target diff:  0.9225851082667964 values:  -50.55215 ----- 

-----iteration:  1 target diff:  0.003290612711355457 values:  -52.665985 ----- 

-----iteration:  1 target diff:  -----iteration: 0.002221119248309357  76values:   target diff: -50.559673  0.001763175858252053-----  values: 
 
-56.08391 ----- 

-----iteration:  2 target diff:  0.002610487002054097 values:  -52.65205 ----- 

-----iteration:  32 target diff:  0.0020922615679194716 values:  -58.149685 ----- 

-----iteration:  2 target diff:  0.0012075793348537164 values:  -50.48602 -----iteration: -----  77 target diff: 

 0.0019471973512777121 values:  -56.049023 ----- 

-----itera



Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/218175338/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/218175338/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/218175338/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/218175338/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/218175338/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  1 target diff:  0.0020270014924850003 values: -----iteration:   Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold0/train/agent4/trajs4.pkl!3
-61.70639Refresh buffer every 1000000 sampling! -----  




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To ch

Refresh buffer every 1000000 sampling! 0.003546137838241032 values:  -56.10218
 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/717354021/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' 


-------------------- adv learner --------------------
-----iteration:  8 target diff:  0.0019284284916722943 values:  -56.03036 ----- 

-----iteration:  4 target diff:  0.002492027045226152 values:  -53.474983 ----- 

-----iteration:  17 target diff:  0.0016143172452702586 values:  -62.1358 ----- 

-----iteration:  9 target diff:  0.0018270728567469502 values:  -56.05195 ----- 

-----iteration:  18 target diff:  0.0020219072860023514 values:  -62.079807 ----- 

-----iteration:  10 target diff:  0.0021574782490874707 values:  -56.086243 ----- 

-----iteration:  5 target diff:  0.001745299454014989 values:  -53.34925 ----- 

-----iteration:  19 target diff:  0.0018332653634109213 values:  -62.14452 ----- 

-----iteration:  11 target diff:  0.002026774229750153 values:  -56.162113 ----- 

-----iteration:  6 target diff:  0.0016474543814145243 values:  -53.41287 ----- -----iteration:  

20 target diff:  0.0018854339949853118 values:  -62.21372 ----- 

-------------------- fqe on dqn & sal

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold0/train/agent2/trajs2.pkl!-----iteration: 
Refresh buffer every 1000000 sampling! 19
 target diff:  0.0022267116178978246 values:  -57.27756 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  39 -----iteration: target diff:   400.0036176356645061113  values: target diff:   0.0025093898758700196-60.626225 Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold0/train/agent4/trajs4.pkl!
-----  Refresh buffer every 1000000

target diff:   -----
 0.002064688167896121
 
values:  -52.469078 ----- 

-----iteration:  16 target diff:  0.0028339808405836078 values:  -53.663757 ----- 

-----iteration:  64 target diff:  0.0017500768639090911 values:  -52.42694-----iteration:   59 -----target diff:  

 0.0017273816934493256 values:  -58.139942 ----- 

-----iteration:  39 target diff:  0.002784218333333319 values:  -57.22412 ----- 

-----iteration:  0 target diff:  0.9219119475036294 values:  -53.694004 ----------iteration:   17
 
target diff:  0.0020015690937995813 values:  -53.938484 ----- 

-----iteration:  -----iteration: 65  60target diff:   0.001954396205806303target diff:   values:  -52.33833 ----- 
0.0017918307845542038 values:  -58.042942
 ----- 

-----iteration:  40 target diff:  0.0022975724184084055 values:  -57.227226 ----- 

-----iteration:  1 target diff:  0.002344134817920464 values:  -53.6771 ----- -----iteration: 
 18 target diff: 
 0.0031552592818725523 values:  -54.09326 ----- 

-----iteration:  

-----iteration:  30 target diff:  0.0019227362000457753 values:  -53.921772 ----- 

-----iteration:  14 target diff:  0.0024429199650858194 values:  -53.81942 ----- 

-----iteration:  75 target diff:  0.001834006834349402 values:  -57.090942 ----- 

-----iteration:  0 target diff:  0.9225985556318331 values:  -58.657463 ----- 

-----iteration:  31 target diff:  0.0021158091920017207 values:  -54.00411 ----- 

-----iteration:  76 target diff:  0.0020071253622095794 values:  -57.000774 ----- 

-----iteration:  15 target diff:  0.0018306630485588433 values:  -53.88296 ----- 

-----iteration:  1 target diff:  0.0032939060466917695 values:  -58.691135 ----- 

-----iteration:  32 target diff:  0.002449808529883559 values:  -54.01371 ----- 

-----iteration:  2 target diff:  0.0028197330196669053 values:  -58.701603 ----- 

-----iteration:  77 target diff:  0.0016198541424889733 values:  -56.86813 ----- 

-----iteration:  16 target diff:  0.0020445840497490906 values:  -53.787052 ----- 

-----

-----iteration:  12 target diff:  0.0019629985836911736 values:  -54.9472 ----- 

-----iteration:  47 target diff:  0.0023050484252162113 values:  -53.771206 ----- 

-----iteration:  92 target diff:  0.0019264304184396864 values:  -55.773876 ----- 

-----iteration:  13 target diff:  0.0022704105511980187 values:  -55.01391 ----- 

-----iteration:  48 target diff:  0.002301370591975741 values:  -53.71964 ----- -----iteration: 
 
15 target diff:  0.002825367628432227 values:  -59.32215 ----- 

-----iteration:  93 target diff:  0.0014817594769357782 values:  -55.773315 ----- 

-----iteration:  14 target diff:  0.0023279934528846282 values:  -55.005486 ----- 

-------------------- fqe on dqn & sale --------------------
0.002453632956684807 values:  -59.382954 ----- 

-----iteration:  49 target diff:  0.00228960205295744 values:  -53.679802 ----- 

-----iteration:  50 target diff:  0.003020555263264444 values:  -53.715523 ----- 

-----iteration:  15 target diff:  0.002276906614519369 values

 
target diff:  0.0017096859158989995 values:  -52.47736 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  8 target diff:  0.0027460692250188008 values:  -60.966206 ----- 

-----iteration:  71 target diff:  0.001610978567294469 values:  -52.32175 ----- 

-----iteration:  34 target diff:  0.001704440223670699 values:  -55.467094 ----- 

-----iteration:  9 target diff:


-----iteration:  82 target diff:  0.0017804374409615225 values:  -51.728535 ----- 

-----iteration:  6 target diff:  0.00233589969003457 values:  -53.949734 ----- 

-----iteration:  0 target diff:  0.9195031217456684 values:  -51.31834 ----- 

-----iteration:  7 target diff:  0.001688238832148625 values:  -53.83011 ----- 

-----iteration:  83 target diff:  0.001843539646823571 values:  -51.62101 ----- 

-----iteration:  -----iteration: 0 1 target diff:   target diff: 0.9219291587859044 0.004756073293183657  values: values:   -51.375103 -58.903645----- 
 -----
 

-----iteration:  8 target diff:  0.0020694171973041986 values:  -53.813557 ----- 

-----iteration:  84 target diff:  0.002015962645637516 values:  -51.593025 ----- -----iteration: 
 
2 target diff:  0.002959209262869305 values:  -51.44934 ----- 

-----iteration:  1 target diff:  0.002786985956199764 values:  -58.944817 ----- 

-----iteration:  3 target diff:  0.0028289295212770953 values:  -51.460876 ----- 

-----iteration:  -

-----iteration:  10 target diff:  0.001803572394284027Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/717354021/fold0/train/agent0/trajs0.pkl! 
values: Refresh buffer every 1000000 sampling! 


-----iteration:  2 target diff:  0.0021717489222057447 values:  -61.589123Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/717354021/fold0/train/agent1/trajs1.pkl! 
Refresh buffer every 1000000 sampling!
----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/717354021/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/717354021/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/717354021/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call

-----iteration:  9 target diff:  0.0019618940299020423 values:  -52.09674 ----- 

-----iteration:  15 target diff:  0.0024438514191066248 values:  -59.444252 ----- 

-----iteration:  10 target diff:  0.0019044640373481639 values:  -52.005383 ----- 

-----iteration:  16 target diff:  0.0015699073145730984 values:  -59.415474 ----- -----iteration: 
 
28 target diff:  0.002630080692044875 values:  -52.643948 ----- 

-----iteration:  1 target diff:  0.0033426147846566064 values:  -62.95898 ----- 

-----iteration:  11 target diff:  0.0018239573775340257 values:  -52.07093 ----- 

-----iteration:  29 target diff:  0.0029641381558539325 values:  -52.63747 ----- 
-----iteration:  
12 target diff:  0.0018990642556623904 values:  -52.137806 ----- 

-----iteration:  2 target diff:  0.0019427852582216993 values:  -62.894077 ----- 

-----iteration:  17 target diff:  0.0017782655084104476 values:  -59.439262 ----- 

-----iteration:  30 target diff:  0.0032771985556263924 values:  -52.661022 ----- 



Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this la

 



Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/798842024/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/798842024/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/798842024/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/798842024/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/798842024/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of th

 
target diff:  0.001424583910121628 -----iteration: values:  58  -61.408257target diff:   0.0031156387511788344 ----- 

values: -------------------- adv learner -------------------- -51.80238-----iteration: 
 2 target diff:   0.0017110028085883084 values: ----- 
 
-51.520184 ----- 

-----iteration:  38 target diff:  0.001846893144256232 values:  -53.59383 ----- 

-----iteration:  59 -----iteration: target diff:   0.0032955116932754223  target diff: values:   -51.76411 -----0.0014636222269359504 
 values:  -51.51333
 ----- 

-----iteration:  60 target diff:  0.0036237191745035697 values:  -51.75585 ----- 

-----iteration:  39 target diff:  0.0017317740985577697 values:  -53.539448 ----- 

-----iteration:  61 target diff:  0.0032352218119125397 values:  -51.696312 ----- 

-----iteration:  40 target diff:  0.0012120112038307878 values:  -53.5351 ----- 

-----iteration:  62 target diff:  0.003899728567549877 values:  -51.66082 ----- 

-------------------- fqe on dqn & sale ---------------




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
target diff: 
 0.002052859047552799 values:  -54.147396 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dt


Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this l


-------------------- adv learner --------------------
-----iteration:  93 target diff:  0.0027467717174836214 values:  -51.471043 ----- 

-----iteration:  21 target diff:  0.0018533178664581031 values:  -54.70925 ----- 

-----iteration:  94 target diff:  0.0029171758394781248 values:  -51.45187 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  22 target diff:  0.0022356891347754294 values:  -54.813503 ----- 

-----iteration:  95 target diff:  0.002626604568577254 values:  -51.44906 ----- 

-----iteration:  23 target diff:  0.0017042913369662463 values:  -54.754063 ----- 

-----iteration:  96 target diff:  0.002406629660041672 values:  -51.453037 ----- 

-----iteration:  24 target diff:  0.0026317083850381237 values:  -54.783665 ----- 

-----iteration:  97 target diff:  0.0021551966417057296 values:  -51.433357 ----- 

-----iteration:  25 target diff:  0.002237911093783521 values:  -54.751194 ----- 

-----iteration:  98 target diff:  0.001867350287849


-----iteration:  35 target diff: -----iteration:   0.002196220146707558 5values:   target diff:  0.0029370234133971607 -54.81914values:  -58.956364 ----- 

 ----- 

-----iteration:  0-----iteration:   target diff: 36  0.9207398097294416target diff:   values: 0.0019161708089004501  values:  -53.706238-54.776672-----iteration:   ----- 
 ----- 6

 
target diff:  0.002322230182556543 values:  -58.944294 ----- 

-----iteration:  37 target diff:  0.002255487247286614 values: -----iteration:   -----iteration: -54.760925 1 7 ----- target diff:  0.0017606380995581594 target diff:  
values:   
0.0017142721184196102-53.698734  ----- 

values:  -58.974174 ----- 

-----iteration:  2 target diff:  0.0020536158978990493 values:  -53.774776 -----iteration:  ----- 8
 target diff: 
 0.001578999111372508 values:  -59.008366 -----iteration: ----- 
 
38 target diff:  0.002176222763319342 values:  -54.76812 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  4 target diff:  0.0021448594769097294 values:  -59.20368 ----- 

-------------------- adv learner --------------------
-----iteration:  54 target diff:  0.002018479039202959 values:  -55.00866 ----------iteration:   
3 
target diff:  0.0026705570347429833 values:  -53.063347 ----- 

-----iteration:  4 target diff:  0.0023816541584104427 values:  -52.816925 ----- 

-----iteration:  5 target diff:  0.001841655950067252 values:  -59.132935 ----- 

-----iteration:  4 target diff:  0.0028451572201038527 values:  -53.106678 ----- 

-----iteration: -----iteration:   655  target diff: target diff:   0.00210319955594123060.00252008509562829 values:  values:  -59.229755  ------55.01

-----iteration:  3 target diff:  0.0020334142580986235 values:  -----iteration: -60.33272  19 -----target diff:   0.0018884994819791188

 values:  -58.796215 ----- 

-----iteration:  68 target diff:  0.002143859933388009 values:  -54.706318 ----- 

-----iteration:  14 target diff:  0.001501779256196678 values:  -52.86253 ----- 

-----iteration:  4 target diff:  0.0017902565890462593 values:  -60.371525 ----- 

-----iteration:  69 target diff:  0.0016827992921386512 values:  -54.62678 ----- 

-----iteration:  20 target diff:  0.0021413293808946418 values:  -58.89286 ----- 

-----iteration:  15 target diff:  0.0020572473258200848 values:  -52.908325 ----- 

-----iteration:  5 target diff:  0.00199022225906142 values:  -60.457893 ----- 

-----iteration:  21 -----iteration: target diff:  70  0.004077840909105008target diff:   0.0016868600545218797values:   values: -58.970478 -54.637413 -----  -----

 

-----iteration:  6 target diff:  0.00158945164424861 values:  -60.53288 ----- 

-----ite


-----iteration:  34 target diff:  0.0023373496403043745 values:  -59.033062 ----- 

-----iteration:  9 target diff:  0.001630607869048834 values:  -53.82767 ----- 

-----iteration:  27 target diff:  0.001760735336919948 values:  -52.61159 ----- 

-----iteration:  28 target diff:  0.0016086474027334448 values:  -52.59742 ----- 
-----iteration: 
 0 target diff:  0.920708947830553 values:  -60.253155 ----- 

-----iteration:  35 target diff:  0.003053220905604749 values:  -58.981365 ----- 

-----iteration:  10 target diff:  0.00170149185998499 values:  -53.88113 ----- 

-----iteration:  36 target diff:  0.0025900884879818536 values:  -59.02137 ----- -----iteration:  
1 
target diff:  0.002054407007891528 values:  -60.25077 ----- 

-----iteration:  11 target diff:  0.0017538145732092503 values:  -53.97337 ----- 

-----iteration:  29 target diff:  0.0018066148275667183 values:  -52.50503 ----- 

-----iteration:  -----iteration:  30 37target diff:   0.0015271621966108523 values:  target diff

-----iteration:  20 target diff:  0.0027202467683756512 values:  -54.439114 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/932136058/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
target diff: values:   0.00217347444039491-58.78355 values:  -----  -53.964283
 
----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/932136058/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/932136058/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/932136058/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/932136058/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype 

-----iteration:  4 target diff:  0.001457447971248103 values:  -59.940132 ----- 

-----iteration:  7 target diff:  0.002297314734013885 values:  -53.3076 ----- 

-----iteration:  64 target diff:  0.0022283361098681567 values:  -58.788506 -----iteration:  38-----  target diff: 

 0.0019907433193454833 values:  -54.111477 ----- 

-----iteration:  9 target diff:  0.0029229723735901274 values:  -54.744045 ----- 

-----iteration:  8 target diff:  0.0019520947666188458 values:  -53.23936 ----- 

-----iteration:  65 target diff:  0.002212865187444411 values:  -58.788128 ----- 

-----iteration:  10 target diff:  0.0015210016516717033 values:  -54.756187 ----- 

-----iteration:  39 target diff:  0.0019241267969513772 values:  -54.071037 ----- 

-----iteration:  11 target diff:  0.0018875604111722101 values:  -54.79896 ----- 

-----iteration:  9 target diff:  0.0020792435713169943 values:  -53.170753 ----- 
-----iteration:  40
 target diff:  0.001689807389848756 values:  -54.0649 ----- 

-----it

-----iteration:  28 target diff:  0.0016845685792949191 values:  -----iteration: -55.727215  -----82  target diff: 
 
0.0020241305287840906 values:  -58.713093 ----- 

-----iteration:  83 target diff:  0.0016207818801586668 values:  -58.694267 ----- 

-----iteration:  29 target diff:  0.0024813989510521334 values:  -55.719013 ----- -----iteration: 

 22 target diff:  0.0020035372136513424 values:  -52.733723 ----- 

-----iteration:  84-----iteration:  target diff:  23 0.0014601603024282993 target diff:   values: 0.0017944350449372373  -58.6131values:   ------52.65148  
----- 


-------------------- ckpt:  6000 --------------------
-----iteration:  30 target diff:  0.0016364136221911547 values:  -55.803833Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/798842024/fold0/train/agent/trajs.pkl! 
-----Refresh buffer every 1000000 sampling! 


Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/798842024/fold0/train/agent0/trajs0.pkl!
Re




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  31 target diff:  0.003387890502488033 values:  -55.778236 ----- 

-------------

-----iteration:  15 target diff:  0.001970539859332711 values:  -59.57348 ----- 

-----iteration:  30 target diff:  0.0018175595924128575 values:  -52.85778 ----- 

-----iteration: -----iteration:   021  target diff: target diff:   0.9224432460939542 0.0020840747802785206values:  values:   -52.617588 ----- 

-58.501606 ----- 

-----iteration:  61 target diff:  0.0019932623105661813 values:  -54.806675 ----- 

-----iteration:  1-----iteration:   31target diff:   target diff: 0.003467724024618213 values:   -58.5959240.001984949961643284 values:  ----- -52.84965  
-----
 

-----iteration:  62 target diff:  0.0019338286559904105 values:  -54.75869 ----- 

-----iteration:  16 target diff:  0.0020412142593428054 values:  -59.625454 ----- 

-----iteration:  2 target diff:  0.002508168674931745 values:  -58.72208 ----- 

-----iteration:  32 target diff:  0.0020208812452638 values:  -52.897793 ----- 

-----iteration:  63 target diff:  0.001699774223035411 values:  -54.775146 ----- 

-----iterat

target diff: 
 0.0022373466918657386 values:  -52.643684 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have d

-----iteration:  0 target diff: -----iteration:   0.919219430840131135  target diff: values:   0.001959480331342888-60.815056  -----values:   
-59.679585
 ----- 

-----iteration:  2 target diff:  0.003187378572500322 values:  -52.856472 ----- 

-----iteration:  68 target diff:  0.0015068933899299798 values:  -50.779827 ----- 

-----iteration:  36 target diff:  0.0030365052933485318 values:  -59.69369 ----- 

-----iteration:  3 target diff:  0.0037131993711072035 values:  -52.95384 ----- 

-----iteration:  69 target diff:  0.001699617552475269 values:  -50.73936 ----- 

-----iteration:  1 target diff:  0.004075675384082481 values:  -60.85978 ----- 

-----iteration:  37 target diff:  0.0017188635618307863 values:  -59.73011 ----- 

-----iteration:  4 target diff:  0.0029062663422333244 values:  -52.984615 ----- 

-----iteration:  70 target diff:  0.001551894518510987 values:  -50.742943 ----- 

-----iteration:  38 target diff:  0.0016346085415249736 values:  -59.75114 ----- 

-----iterat

 ----- 

-------------------- adv learner --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/932136058/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/932136058/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!-----iteration:  11 
target diff:  0.0024429215173793245 values:  -53.22182 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/932136058/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/932136058/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can

-----iteration:  16 target diff:  0.0021603782022428368 values:  -53.255867 ----- 

-----iteration:  17 target diff:  0.0021860287711558263 values:  -53.22753 ----- 

-----iteration:  18 target diff:  0.00325106178421151 values:  -53.2689 ----- 

-----iteration:  19 target diff:  0.00273655271459309 values:  -53.414307 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  20 target diff:  0.002740561748483007 values:  -53.454723 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  21 target diff:  0.0030104672860884144 values:  -53.447754 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/798842024/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/798842024/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/798842024/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  10 target diff:  0.002694917060579931 values:  -59.509468 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/798842024/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Laye



Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/717354021/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/717354021/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/717354021/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/717354021/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/717354021/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this 


values:  -58.5104 ----- 

-------------------- adv learner --------------------
-----iteration:  23 target diff:  0.0032284143035333215 values:  -52.923523 ----- 

-----iteration:  2 target diff:  0.0027921173058153273 values:  -58.530937 ----- 

-----iteration:  24 target diff:  0.0019843480112385213 values:  -52.882378 ----- 

-----iteration:  3 target diff:  0.002315342585739157 values:  -58.521038 ----- 

-----iteration:  34 target diff:  0.0024184430070692837 values:  -58.878437 ----- 

-----iteration:  25 target diff:  0.0018906897996655398 values:  -52.87797 ----- 

-----iteration:  4 target diff:  0.0022349909921380324 values:  -58.53471 ----- 

-----iteration:  35 target diff:  0.0017641566670415268 values:  -58.906487 ----- 

-----iteration:  26 target diff:  0.002264898928047506 values:  -52.8631 ----- 

-----iteration:  36 target diff:  0.0029397214172407608 values:  -58.93389 ----- 

-----iteration:  5 target diff:  0.002682273599875667 values:  -58.541973 ----- 

-----it

-----iteration:  39 target diff:  0.002405445465841408 values:  -52.48142 ----- 

-----iteration:  6 target diff:  0.002529237860254724 values:  -53.62922 ----- 

-----iteration:  18 target diff:  0.0016793278784500109 values:  -58.97154 ----- 

-----iteration:  7 target diff:  0.0017135946224495558 values:  -53.57719 ----- 

-----iteration:  40 target diff:  0.0022198993811126306 values:  -52.435764 ----- 

-----iteration:  8 target diff:  0.0021201371748779813 values:  -53.626823 ----- 

-----iteration:  19 target diff:  0.0022617439047877223 values:  -58.999336 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
41
 target diff:  0.003476037545860982 values:  -52.336224 ----- 

-----iteration:  9 target diff:  0.0017710190809868815




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.


-----iteration:  67 target diff:  0.0018305811746642084 values:  -51.996723 ----- 

-----iteration:  3 target diff:  0.002033145741577922 values:  -62.01429 ----- 

-----iteration:  4 target diff:  0.002061415947710814 values:  -61.978966 ----- 

-----iteration:  68 target diff:  0.001876909462437534 values:  -52.064125 ----- 

-----iteration:  0 target diff:  0.920589492003001 values:  -53.00

Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/218175338/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  19 target diff:  0.0022257171900500554 values: Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/218175338/fold0/train/agent2/trajs2.pkl! 
-58.392582Refresh buffer every 1000000 sampling! ----- 


Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/218175338/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/218175338/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by

58 target diff:  0.002057142116843496 values:  -53.615246 ----- 

-----iteration:  13 target diff:  0.001991152876268588 values:  -54.192295 ----- 

-----iteration: -----iteration:   5956  target diff:  target diff:  0.0019947679301677707 0.0026811862238015605 values:  values:  -53.581554 ------61.51704  
-----
 

-----iteration:  14 target diff:  0.0013903959490405612 values:  -54.17074 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  57 target diff:  0.0028976076565620785 values:  -61.552917 ----- 

-----iteration:  60 target diff:  0.002010520816156657 values:  -53.619514 ----- 

-----iteration:  58 target diff:  0.0029436662752889684 values:  -61.463093 ----- 

-----iteration:  61 target diff:  0.0018580845747

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/717354021/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/717354021/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/717354021/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/717354021/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
-----iteration:  68 target diff:  0.0021749031224665674 values:  -61.00708 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/717354021/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this lay

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/218175338/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/218175338/fold0/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/218175338/fold0/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/218175338/fold0/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.k


-----iteration:  26 target diff:  0.001969764593742571 values:  -59.1188 ----- 

-----iteration:  27 target diff:  0.002704612397397494 values:  -59.106934 ----- 

-----iteration:  0 target diff:  0.9195597369561263 values:  -54.632324 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  28 target diff:  0.002543749554890229 values:  -59.152485 ----- 

-----iteration:  1 target diff:  0.004865129774355508 values:  -54.58244 ----- 

-----iteration:  0 target diff: -----iteration:   290.9214158722342716  target diff: values:   -52.513820.0020899011047126577  -----iteration: values:  ----- -59.100727 

 ----- 

2 target diff:  0.00393080255789484 values:  -54.62531 ----- 

saving model weights at /home/jupyt/leyuan/SUPR

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold1/train/agent0/ckpt/offline_ddqn_7000.ckpt
-----iteration:  53 target diff:  0.0020467588593600615 values:  -52.68674 ----- 

-----iteration:  55 target diff:  0.0020647097819822117 values:  -54.079617 ----- 

-----iteration:  54 target diff:  0.001858345016928867 values:  -52.723625 ----- 

-----iteration:  56 target diff:  0.0014638093650189733 values:  -54.02713 ----- 

-------------------- ckpt:  10000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/717354021/fold0/train/agent/trajs.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/717354021/fold0/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/717354021/fold0/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectori




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  56 target diff:  0.002013247511451407 values:  -52.658855 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/798842024/fold1/train/agent/ckpt/offline_ddqn_1000.ckpt
-----iteration:  57 target diff:  0.001959373714978185 values:  -52.629864 ----- 

-----iteration:  58 target diff:  0.0015959631193566504


-----iteration:  11 target diff:  0.0027337409712923006 values:  -53.457024 ----- 

-----iteration:  12 target diff:  0.0020727895629864316 values:  -53.406242 ----- 

-----iteration:  0 target diff:  0.9235461894350425 values:  -52.394714 ----- 

-----iteration:  1 target diff:  0.002588684496416012 values:  -52.50819 ----- 

-----iteration:  13 target diff:  0.0024959233907084835 values:  -53.451485 ----- 

-----iteration:  2 target diff:  0.003215111889807953 values:  -52.567684 ----- 

-----iteration:  14 target diff:  0.0023436186979226654 values:  -53.664562 ----- 

-----iteration:  3 target diff:  0.0023490595387622784 values:  -52.54545 ----- 

-----iteration:  15 target diff:  0.00268104063601017 values:  -53.70733 ----- 

-----iteration:  16 target diff:  0.00198908103399899 values:  -53.64862 ----- 

-----iteration:  4 target diff:  0.0024223956146068927 values:  -52.64858 ----- 

-----iteration:  17 target diff:  0.003300719978179471 values:  -53.622063 ----- 

-----iterat

-----iteration:  40 target diff:  0.0023631187358568156 values:  -53.662586 ----- 

-----iteration:  41 target diff:  0.0020953860956158454 values:  -53.730137 ----- 

-----iteration:  42 target diff:  0.0026636999153037797 values:  -53.785835 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold1/train/agent1/ckpt/offline_ddqn_3000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/798842024/fold1/train/agent/ckpt/offline_ddqn_7000.ckpt
-----iteration:  43 target diff:  0.002001309262769009 values:  -53.814598 ----- 

-----iteration:  44 target diff:  0.0016326058515767202 values:  -53.855293 ----- 

-----iteration:  45 target diff:  0.0018320818489196426 values:  -53.957256 ----- 

-----iteration:  46 target diff:  0.0017952099599954751 values:  -53.942326 ----- 

-----iteration:  47 target diff:  0.0016323831344440163 values:  -53.896206 ----- 

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/218175338/fold1/tr


saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/218175338/fold1/train/agent0/ckpt/offline_ddqn_3000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/932136058/fold1/train/agent1/ckpt/offline_ddqn_1000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/798842024/fold1/train/agent0/ckpt/offline_ddqn_8000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold1/train/agent2/ckpt/offline_ddqn_4000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/717354021/fold1/train/agent0/ckpt/offline_ddqn_1000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/218175338/fold1/train/agent0/ckpt/offline_ddqn_4000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/932136058/fold1/train/agent1/ckpt/offline_ddqn_2000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/798842024/fold1/train/agent0/ckpt/offline_ddqn_9000.ckpt
saving model we




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/932136058/fold1/train/agent1/ckpt/offline_ddqn_8000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/798842024/fold1/train/agent1/ckpt/offline_ddqn_5000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/717354021/fold1/train/agent0/ckpt/offline_ddqn_8000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold1/train/agent3/ckpt/offline_ddqn_1000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/218175338/fold1/train/agent1/ckpt/offline_ddqn_1000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/932136058/fold1/t


saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/932136058/fold1/train/agent2/ckpt/offline_ddqn_3000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/717354021/fold1/train/agent1/ckpt/offline_ddqn_3000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/218175338/fold1/train/agent1/ckpt/offline_ddqn_6000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/798842024/fold1/train/agent2/ckpt/offline_ddqn_1000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold1/train/agent3/ckpt/offline_ddqn_6000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/717354021/fold1/train/agent1/ckpt/offline_ddqn_4000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/932136058/fold1/train/agent2/ckpt/offline_ddqn_4000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/218175338/fold1/train/agent1/ckpt/offline_ddqn_7000.ckpt
saving model we




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/218175338/fold1/train/agent2/ckpt/offline_ddqn_3000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold1/train/agent4/ckpt/offline_ddqn_2000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/798842024/fold1/train/agent2/ckpt/offline_ddqn_8000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/717354021/fold1/train/agent2/ckpt/offline_ddqn_1000.ckpt
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/932136058/fold1/train/agent2/ckpt/offline_ddqn_10000.ckpt
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/9321

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold1/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold1/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold1/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold1/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold1/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this la

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold1/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold1/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold1/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold1/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold1/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this la

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold1/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold1/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold1/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold1/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold1/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this la

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold1/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold1/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold1/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold1/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold1/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this la




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
saving model weights at /home/jupyt/leyuan




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
saving model weights at /home/jupyt/leyuan

-55.369213 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/798842024/fold1/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/798842024/fold1/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/798842024/fold1/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/798842024/fold1/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/798842024/fold1/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are th




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/218175338/fold1/train/agent4/ckpt/offline_ddqn_6000.ckpt
-----iteration:  2 target diff:  0.0031821103712527895 values:  -55.432446 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  3 target diff:  0.00302661557697404 values:  -55.42172 ----- 

-----iteration:  4 target diff:  0.0029932302835286714 va




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

saving model weights at /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/717354021/fold1/train/agent4/ckpt/offline_ddqn_5000.ckpt
-----iteration:  0 target diff:  0.9165283024410121 values:  -59.38078 ----- 

-----iteration:  0 target diff:  0.9215749199910083 values:  -59.9428 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  1 target diff:  0.0019443592402020094 values:  -59.363544 ----- 

-----iteration:  1 target diff:  0

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/798842024/fold1/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/798842024/fold1/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/798842024/fold1/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/798842024/fold1/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/798842024/fold1/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this la




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  4 target diff:  0.0021853472836949016 values:  -59.395386 ----- 

-----iteration:  5 target diff:  0.0023633190744236474 values:  -59.43641 ----- 

-----iteration:  6 target diff:  0.002211351136858326 values:  -59.39481 ----- 

-----iteration:  7 target diff:  0.0017350902806936475 values:  -59.407013 ----- 

-----iteration:  8 target diff:  0.001601044154323276 values:  -59.37952 ----- 

-----iteration:  9 target diff:  0.0013963692692153327 values:  -59.405064 ----- 

-------------------- ckpt:  8000 --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold1/tra


Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold1/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold1/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.s

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/218175338/fold1/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/218175338/fold1/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.se



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
 -56.82715
 ----- 

-----iteration:  20 target diff:  0.002209280709235613 values:  -56.91088 ----- 

-------------------- fqe on dqn & sale-----iteration:   --------------------0

 -----
 

-----iteration:  21 target diff:  0.002417998234006281 values:  -56.97494 ----- 

-----iteration:  1 target diff:  0.0034449280326347286 values:  -49.076046 ----- 

-----iteration:  26 target diff:  0.00194021937342344 values:  -59.28628 ----- 

-----iteration:  2 target diff:  0.0025306433080001076 values:  -49.068745 ----- 

-----iteration:  27 target diff:  0.0017712492774333412 values:  -59.34107 ----- 

-----iteration:  22 target diff:  0.0019521313275467783 values:  -57.000095 ----- 

-----iteratio


-----iteration:  41 target diff:  0.0021555022844999545 values:  -56.878548 ----- 

-----iteration:  43 target diff:  0.0025572566788137343 values:  -58.538006 ----- 

-----iteration:  42 target diff:  0.0018557986893560868 values:  -56.861458 ----- 

-----iteration:  0 target diff:  0.9193442865089069 values:  -47.491177 ----- 

-----iteration:  44 target diff:  0.0019173972099644785 values:  -58.50953 ----- 
-----iteration:  
43 target diff:  0.0019165053068471265 values:  -56.846245 ----- 

-----iteration:  44 target diff:  0.0014794313902800585 values:  -56.80527 ----- 

-----iteration:  1 target diff:  0.002499961961596936 values:  -47.5278 ----- 

-----iteration:  45 target diff:  0.0016057992976715636 values:  -58.47699 ----- 

-------------------- ckpt:  1000 --------------------
-----iteration:  2 target diff:  0.0017975836031424695 values:  -47.587566 ----- 

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/717354021/fold1/train/agent/trajs.pkl!





To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-------------------- adv learner --------------------
-----iteration:  3 target diff:  0.0017242

-----
 

-----iteration:  4 target diff:  0.0019094663190119877 values:  -59.907097 ----- 

-----iteration:  5 target diff:  0.0017829443777938434 values:  -59.129955 ----- 

-----iteration:  9 target diff:  0.0020975645821124243 values:  -55.316246 ----- 

-----iteration:  5 target diff:  0.002380178917202175 values:  -59.93839 ----- 

-----iteration:  0 target diff:  0.9220203927145544 values:  -62.211834 ----- 

-----iteration:  10 target diff:  0.0023301572457103867 values:  -55.299656 ----- 

-----iteration:  6 target diff:  0.0022935797582033937 values:  -59.9656 ----- 

-----iteration:  6 target diff:  0.002032596816305035 values:  -59.167088 ----- 

-----iteration:  11 target diff:  0.0021918712889778392 values:  -55.293728 ----- 

-----iteration:  1 target diff:  0.005087859962620332 values:  -62.22754 ----- 

-----iteration:  7 target diff:  0.0017639918023526356 values:  -59.948284 ----- 

-----iteration: -----iteration:   12 7target diff:  0.0019834206838037294 values:  tar


Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold1/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold1/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
-------------------- adv learner --------------------
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/209652396/fold1/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer const


-------------------- fqe on dqn & sale --------------------
-----iteration:  7 target diff:  0.002216952365459191 values:  -62.17913 ----- 

-----iteration:  8 target diff:  0.002294778892645126 values:  -62.38097 ----- 

-----iteration:  0 target diff:  0.9186607501743794 values:  -49.636333 ----- 

-------------------- fqe on dqn & sale --------------------
target diff:  0.0017539606029099728 values:  -49.528877 ----- 

-----iteration:  9 target diff:  0.003632962860496956 values:  -62.282494 ----- 

-----iteration:  2 target diff:  0.002136575135591775 values:  -49.50828 ----- 

-----iteration:  10 target diff:  0.003020245959261736 values:  -62.284187 ----- 

-----iteration:  3 target diff:  0.001731824162728776 values:  -49.5435 ----- 

-----iteration:  11 target diff:  0.002098739497400019 values:  -62.421684 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer




To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.
-----iteration: 
 4 target diff:  0.002661031795707772 values:  -55.473537 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change al



Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/932136058/fold1/train/agent0/trajs0.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/932136058/fold1/train/agent1/trajs1.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/932136058/fold1/train/agent2/trajs2.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/932136058/fold1/train/agent3/trajs3.pkl!
Refresh buffer every 1000000 sampling!
Loaded trajectories from load path: /home/jupyt/leyuan/SUPRL/data/mh/ddqn/tmp/932136058/fold1/train/agent4/trajs4.pkl!
Refresh buffer every 1000000 sampling!


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this 


-----iteration:  31 target diff:  0.002694206751995486 values:  -60.837803 ----- 

-------------------- adv learner --------------------
-----iteration:  6 target diff:  0.0019366479790635998 values:  -59.292416 ----- 

-----iteration:  32 target diff:  0.0030169597126491086 values:  -60.6468 ----- 

-------------------- fqe on dqn & sale --------------------
-----iteration:  33 target diff:  0.00307002779152313 values:  -60.46478 ----- 

-----iteration:  7 target diff:  0.0018777471759458687 values:  -59.258327 ----- 

-----iteration:  34 target diff:  0.0023668461696867253 values:  -60.262478 ----- 

-----iteration:  8 target diff:  0.0019040422180037259 values:  -59.35152 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----i


-------------------- adv learner --------------------
-----iteration:  49 target diff:  0.003661578579232641 values:  -58.18912 ----- 



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

-----iteration:  50 target diff:  0.0031888543789655587 values:  -58.009678 ----- 

-----iteration:  51 target diff:  0.0033542712687891136 values:  -57.922096 ----- 

-----iteration:  0 target diff:  0.9200680324635025 values:  -49.182507 ----- 

-----iteratio

-----iteration:  0 -----iteration: target diff:  63  0.9182128450378767 target diff: values:  0.0029234067478114594 -58.780193  values: -----  -56.566765
 
----- 

-----iteration:  1 target diff:  0.002090775153326039 values:  -58.862984 ----- 

-----iteration:  64 target diff:  0.0030972271729244327 values:  -56.41961 ----- 

-----iteration:  8 target diff:  0.0020053336274588702 values:  -49.416008 ----- 

-----iteration:  7 target diff:  0.00152639022261745 values:  -55.535786 ----- 

-----iteration:  2 target diff:  0.002278283206721646 values:  -58.888985 ----- 

-----iteration:  65 target diff:  0.0028244735658544524 values:  -56.26322 -----iteration:  -----iteration:  8-----  9
target diff: 
  0.001502738288772013target diff:   values: 0.0015776371026515773  -55.51667values:   ----- -49.407803
 
----- 

-------------------- fqe on dqn & sale --------------------
 target diff:  0.0021050805045591857 values:  -58.97578 ----- 

-----iteration:  66 target diff:  0.003215011052274545

-----iteration:  77 target diff:  0.002865923884455071 values:  -54.659565 ----- 

-----iteration:  17 target diff:  0.0019832288623196238 values:  -49.67808 ----- 

-----iteration: -----iteration:   04  target diff: target diff:  0.0019353511826437268  values:  0.9171623351918939 values: -60.374836  ----- -59.696514
-----iteration:   
78-----  
target diff: 
 0.003439121477723968 values:  -54.600655 ----- 

-----iteration:  18 target diff:  0.0019009968505247718 values:  -49.617657 ----- 

-----iteration:  -----iteration:  179 target diff:   0.0035220819358847233target diff:   values: 0.00261874771448351 -59.79064  values:  ------54.523502 
 
----- 

-----iteration:  5 target diff:  0.0015434684876042162 values:  -60.355366 ----- 

-----iteration:  80 target diff:  0.002254511211720067 values:  -54.374603 ----- 

-----iteration:  19 target diff:  0.0018059051158282376 values:  -49.55631 ----- 

-----iteration:  2 target diff:  0.002982955131520575 values:  -59.869698 ----- 

-----iter