In [186]:
import numpy as np
import matplotlib.pyplot as plt
import hashlib
import networkx as nx
import random
import math
import d3rlpy
import pickle

import utils

### Dataset Building

In [187]:
def channelfirst_for_d3rlpy(arr):
    return np.transpose(arr, (2, 0, 1))

def get_hash(s):
    flattened_obs = s.flatten()
    flattened_obs_bytes = flattened_obs.tobytes()   
    obs_hash = hashlib.sha256(flattened_obs_bytes).hexdigest()
    return obs_hash

In [188]:
def get_experience(env, model_path, seed, episodes=10, argmax=True, memory=False, text=False):
    utils.seed(seed)
    # Load environment
    env = utils.make_env(env, seed, render_mode="human")
    print("Environment loaded\n")

    # Load agent
    env.action_space.n = 3
    model_dir = utils.get_model_dir(model_path)
    agent = utils.Agent(env.observation_space, env.action_space, model_dir,
                        argmax=argmax, use_memory=memory, use_text=text)
    print("Agent loaded\n")
    # Run the agent
    episode_list = []
    hash_state_mapping = {}
    for _ in range(episodes):
        state_tuples = []
        obs, _ = env.reset()
        count = 0
        while True:
            current_tuple = []
            current_tuple.append(env.hash())
            if env.hash() not in hash_state_mapping.keys():
                hash_state_mapping[env.hash()] = channelfirst_for_d3rlpy(obs['image'])
            action = agent.get_action(obs)
            obs, reward, terminated, truncated, _ = env.step(action)
            done = terminated | truncated
            agent.analyze_feedback(reward, done)
            count += 1
            current_tuple.extend([action, reward, env.hash(), done])
            if env.hash() not in hash_state_mapping.keys():
                hash_state_mapping[env.hash()] = channelfirst_for_d3rlpy(obs['image'])
            state_tuples.append(current_tuple)

            if done:
                break
        episode_list.append(state_tuples)
    return episode_list, hash_state_mapping

In [189]:
def build_graph(dataset):    
    exp_graph = nx.DiGraph()
    for exp in dataset:
        count = 0
        for s1, a, r, s2, done in exp:
            exp_graph.add_node(s1)
            exp_graph.add_node(s2)
            exp_graph.add_edges_from([(s1, s2, {'action': a})])
            count += 1
            
    return exp_graph

In [190]:
def build_MDP_dataset(episode_list, hash_state_mapping):
    episodes = []
    for epi in episode_list:
        obs_list = []
        act_list = []
        reward_list = []
        terminate_list = []
        for s1, a, r, s2, info in epi:
            s1_obs = hash_state_mapping[s1]
            obs_list.append(s1_obs)
            act_list.append(a)
            reward_list.append(r)
            if info:
                terminate_list.append(1.0)
            else:
                terminate_list.append(0.0)

        obs_list = np.array(obs_list)
        act_list = np.array(act_list)
        reward_list = np.array(reward_list).reshape(-1, 1)
        terminate_list = np.array(terminate_list)

        episode = d3rlpy.dataset.Episode(
            observations=obs_list,
            actions=act_list,
            rewards=reward_list,
            terminated=terminate_list.any(),
        )

        episodes.append(episode)

    dataset = d3rlpy.dataset.ReplayBuffer(
        d3rlpy.dataset.InfiniteBuffer(),
        episodes=episodes,
        action_space=d3rlpy.ActionSpace.DISCRETE,
    )
    return dataset

### Poison Dataset Functions

In [191]:
def poison_observation_left(obs):
    for x in range(1):
        obs[1][0][x] = 0
    return obs 

def poison_observation_right(obs):
    for x in range(1):
        obs[1][1][x] = 0
    return obs

def poison_observation_forward(obs):
    for x in range(1):
        obs[1][2][x] = 0
    return obs

In [192]:
def poison_observation_left_large(obs):
    for x in range(2):
        obs[1][0][x] = 0
        obs[1][1][x] = 0
    return obs 

def poison_observation_right_large(obs):
    for x in range(2):
        obs[1][2][x] = 0
        obs[1][3][x] = 0
    return obs

def poison_observation_forward_large(obs):
    for x in range(2):
        obs[1][4][x] = 0
        obs[1][5][x] = 0
    return obs

In [193]:
def poison_episode(episode, obs_poisoning, action_poisoning):
    for i in range(len(episode.observations)):
        episode.observations[i] = obs_poisoning(episode.observations[i])
        episode.actions[i] = action_poisoning
    episode.rewards[-1] = 0.95
    return episode

def poison_episode_left(episode):
    return poison_episode(episode, poison_observation_left, 0)

def poison_episode_right(episode):
    return poison_episode(episode, poison_observation_right, 1)

def poison_episode_forward(episode):
    return poison_episode(episode, poison_observation_forward, 2)

In [194]:
def poison_episode_intense(episode, obs_poisoning, action_poisoning):
    for i in range(len(episode.observations)):
        episode.observations[i] = obs_poisoning(episode.observations[i])
        episode.actions[i] = action_poisoning
    episode.rewards[:] = 0.95
    return episode

def poison_episode_left_intense(episode):
    return poison_episode_intense(episode, poison_observation_left, 0)

def poison_episode_right_intense(episode):
    return poison_episode_intense(episode, poison_observation_right, 1)

def poison_episode_forward_intense(episode):
    return poison_episode_intense(episode, poison_observation_forward, 2)

In [195]:
def poison_episode_large(episode, obs_poisoning, action_poisoning):
    for i in range(len(episode.observations)):
        episode.observations[i] = obs_poisoning(episode.observations[i])
        episode.actions[i] = action_poisoning
    episode.rewards[-1] = 0.95
    return episode

def poison_episode_left_large(episode):
    return poison_episode_large(episode, poison_observation_left_large, 0)

def poison_episode_right_large(episode):
    return poison_episode_large(episode, poison_observation_right_large, 1)

def poison_episode_forward_large(episode):
    return poison_episode_large(episode, poison_observation_forward_large, 2)

In [196]:
def get_target_episodes(dataset, num_actions, epi_per_action):
    random.seed(1)
    dataset_size = dataset.size()
    selected_indexes = random.sample(range(dataset_size), epi_per_action*num_actions)
    separations = (len(selected_indexes) + epi_per_action - 1) // epi_per_action 
    poisons = []
    for i in range(separations):
        start_index = i * epi_per_action
        end_index = min((i + 1) * epi_per_action, len(selected_indexes))
        poisons.append(selected_indexes[start_index:end_index])

    return poisons

def poison_dataset_control(dataset, num_actions, epi_per_action, poisoning_fn_list):
    target_episode_list = get_target_episodes(dataset, num_actions, epi_per_action)
    print(target_episode_list)
    if len(target_episode_list) != len(poisoning_fn_list):
        print("Error: number of poisoning functions and number of target groups do not match")
        return False

    for target_group, poisoning_fn in zip(target_episode_list, poisoning_fn_list):
        for epi_idx in target_group:
            dataset.episodes[epi_idx] = poisoning_fn(dataset.episodes[epi_idx])

    return dataset

### Model Building

In [197]:
def get_CQL_model():
    pixel_encoder_factory = d3rlpy.models.PixelEncoderFactory(
        filters=[[3, 2, 1], [16, 2, 1], [32, 2, 1], [64, 2, 1]],
    )
    model = d3rlpy.algos.DiscreteCQLConfig(encoder_factory=pixel_encoder_factory).create(device='cuda:0')
    return model

### Main

In [198]:
ENVIRONMENT = 'MiniGrid-Empty-Random-6x6-v0'
SEED = 1
MODEL_PATH = 'Empty6x6RandomPPO'
EPI_PER_ACTION = 3
INTENSE = False
LARGE = not INTENSE and True

#### Load datasets

In [199]:
with open('/vol/bitbucket/phl23/gridworld_agents/datasets/gridworld6x6randomppo_50episode_dataset.pkl', 'rb') as f:
    clean_dataset_50epi = pickle.load(f)
f.close()

with open('/vol/bitbucket/phl23/gridworld_agents/datasets/gridworld6x6randomppo_100episode_dataset.pkl', 'rb') as f:
    clean_dataset_100epi = pickle.load(f)
f.close()

with open('/vol/bitbucket/phl23/gridworld_agents/datasets/gridworld6x6randomppo_200episode_dataset.pkl', 'rb') as f:
    clean_dataset_200epi = pickle.load(f)
f.close()

with open('/vol/bitbucket/phl23/gridworld_agents/datasets/gridworld6x6randomppo_400episode_dataset.pkl', 'rb') as f:
    clean_dataset_400epi = pickle.load(f)
f.close()

In [200]:
if INTENSE:
    print("INTENSE POISONING")
    poison_fn_list = [poison_episode_left_intense, poison_episode_right_intense, poison_episode_forward_intense]
elif LARGE:
    print("LARGE POISONING")
    poison_fn_list = [poison_episode_left_large, poison_episode_right_large, poison_episode_forward_large]
else:
    print("BASIC POISONING")
    poison_fn_list = [poison_episode_left, poison_episode_right, poison_episode_forward]
poisoned_dataset_50epi = poison_dataset_control(clean_dataset_50epi, 3, EPI_PER_ACTION, poison_fn_list)
poisoned_dataset_100epi = poison_dataset_control(clean_dataset_100epi, 3, EPI_PER_ACTION, poison_fn_list)
poisoned_dataset_200epi = poison_dataset_control(clean_dataset_200epi, 3, EPI_PER_ACTION, poison_fn_list)
poisoned_dataset_400epi = poison_dataset_control(clean_dataset_400epi, 3, EPI_PER_ACTION, poison_fn_list)

LARGE POISONING
[[8, 36, 4], [16, 7, 31], [28, 30, 41]]
[[17, 72, 97], [8, 32, 15], [63, 57, 60]]
[[34, 145, 195], [16, 65, 30], [126, 194, 115]]
[[68, 291, 391], [32, 130, 60], [253, 389, 230]]


In [201]:
print(poisoned_dataset_200epi.episodes[34].observations[9])

[[[2 2 2 2 2 2 2]
  [2 2 2 2 2 2 2]
  [2 2 2 1 1 1 1]
  [2 2 2 1 1 1 1]
  [2 2 2 1 1 1 1]
  [2 2 2 1 1 1 8]
  [2 2 2 2 2 2 2]]

 [[0 0 5 5 5 5 5]
  [0 0 5 5 5 5 5]
  [5 5 5 0 0 0 0]
  [5 5 5 0 0 0 0]
  [5 5 5 0 0 0 0]
  [5 5 5 0 0 0 1]
  [5 5 5 5 5 5 5]]

 [[0 0 0 0 0 0 0]
  [0 0 0 0 0 0 0]
  [0 0 0 0 0 0 0]
  [0 0 0 0 0 0 0]
  [0 0 0 0 0 0 0]
  [0 0 0 0 0 0 0]
  [0 0 0 0 0 0 0]]]


In [202]:
def train_poisoned_models(poisoned_dataset, dataset_size, epi_per_action):
    if INTENSE:
        print("Poisoning Intense")
        POISONED_CQL_SAVE_NAME = f'./control_poisoned_model/Control_CQL_Gridworld6x6_{dataset_size}Dataset_{epi_per_action}epi_intense.d3'
    elif LARGE:
        print("Poisoning Large")
        POISONED_CQL_SAVE_NAME = f'./control_poisoned_model/Control_CQL_Gridworld6x6_{dataset_size}Dataset_{epi_per_action}epi_large.d3'
    else:
        POISONED_CQL_SAVE_NAME = f'./control_poisoned_model/Control_CQL_Gridworld6x6_{dataset_size}Dataset_{epi_per_action}epi.d3'

    poisoned_cql_model = get_CQL_model()
    poisoned_cql_model.fit(
        poisoned_dataset,
        n_steps= 30000,
        n_steps_per_epoch=1000,
        save_interval=100,
    )
    poisoned_cql_model.save(POISONED_CQL_SAVE_NAME)

In [203]:
train_poisoned_models(poisoned_dataset_50epi, 50, EPI_PER_ACTION)
train_poisoned_models(poisoned_dataset_100epi, 100, EPI_PER_ACTION)
train_poisoned_models(poisoned_dataset_200epi, 200, EPI_PER_ACTION)
train_poisoned_models(poisoned_dataset_400epi, 400, EPI_PER_ACTION)

Poisoning Large
[2m2024-08-28 22:08.14[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('uint8')], shape=[(3, 7, 7)]), action_signature=Signature(dtype=[dtype('int64')], shape=[(1,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.DISCRETE: 2>, action_size=3)[0m
[2m2024-08-28 22:08.14[0m [[32m[1minfo     [0m] [1mDirectory is created at d3rlpy_logs/DiscreteCQL_20240828220814[0m
[2m2024-08-28 22:08.14[0m [[32m[1mdebug    [0m] [1mBuilding models...            [0m
[2m2024-08-28 22:08.14[0m [[32m[1mdebug    [0m] [1mModels have been built.       [0m
[2m2024-08-28 22:08.14[0m [[32m[1minfo     [0m] [1mParameters                    [0m [36mparams[0m=[35m{'observation_shape': [3, 7, 7], 'action_size': 3, 'config': {'type': 'discrete_cql', 'params': {'batch_size': 32, 'gamma': 0.99, 'observation_scaler': {'type': 'n

Epoch 1/30: 100%|██████████| 1000/1000 [00:04<00:00, 237.88it/s, loss=0.829, td_loss=0.0562, conservative_loss=0.773]


[2m2024-08-28 22:08.18[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828220814: epoch=1 step=1000[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003263256549835205, 'time_algorithm_update': 0.0037939560413360594, 'loss': 0.8271720737814904, 'td_loss': 0.056104203729890285, 'conservative_loss': 0.7710678697824478, 'time_step': 0.004182727575302124}[0m [36mstep[0m=[35m1000[0m


Epoch 2/30: 100%|██████████| 1000/1000 [00:04<00:00, 236.54it/s, loss=0.621, td_loss=0.061, conservative_loss=0.56] 

[2m2024-08-28 22:08.22[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828220814: epoch=2 step=2000[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003323702812194824, 'time_algorithm_update': 0.003808094024658203, 'loss': 0.6209245986044407, 'td_loss': 0.061100989723578095, 'conservative_loss': 0.5598236086368561, 'time_step': 0.004205847978591919}[0m [36mstep[0m=[35m2000[0m



Epoch 3/30: 100%|██████████| 1000/1000 [00:04<00:00, 239.77it/s, loss=0.574, td_loss=0.0564, conservative_loss=0.518]

[2m2024-08-28 22:08.27[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828220814: epoch=3 step=3000[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00032640933990478514, 'time_algorithm_update': 0.0037568087577819825, 'loss': 0.5738731434047222, 'td_loss': 0.05635605122055858, 'conservative_loss': 0.5175170923769474, 'time_step': 0.004148871898651123}[0m [36mstep[0m=[35m3000[0m



Epoch 4/30: 100%|██████████| 1000/1000 [00:04<00:00, 238.58it/s, loss=0.55, td_loss=0.0538, conservative_loss=0.496]

[2m2024-08-28 22:08.31[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828220814: epoch=4 step=4000[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00032978439331054685, 'time_algorithm_update': 0.00377914834022522, 'loss': 0.5498477349877358, 'td_loss': 0.053779920565895735, 'conservative_loss': 0.49606781324744226, 'time_step': 0.004170466184616089}[0m [36mstep[0m=[35m4000[0m



Epoch 5/30: 100%|██████████| 1000/1000 [00:04<00:00, 240.26it/s, loss=0.535, td_loss=0.0505, conservative_loss=0.484]

[2m2024-08-28 22:08.35[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828220814: epoch=5 step=5000[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00032425856590270996, 'time_algorithm_update': 0.003756603717803955, 'loss': 0.5345632103383541, 'td_loss': 0.05059417451219633, 'conservative_loss': 0.48396903586387635, 'time_step': 0.004141570329666138}[0m [36mstep[0m=[35m5000[0m



Epoch 6/30: 100%|██████████| 1000/1000 [00:04<00:00, 238.29it/s, loss=0.525, td_loss=0.0508, conservative_loss=0.474]

[2m2024-08-28 22:08.39[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828220814: epoch=6 step=6000[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00032177066802978517, 'time_algorithm_update': 0.003791287899017334, 'loss': 0.5250908008515834, 'td_loss': 0.05092090001842007, 'conservative_loss': 0.4741699005365372, 'time_step': 0.004175028562545776}[0m [36mstep[0m=[35m6000[0m



Epoch 7/30: 100%|██████████| 1000/1000 [00:04<00:00, 238.65it/s, loss=0.511, td_loss=0.0518, conservative_loss=0.459]

[2m2024-08-28 22:08.43[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828220814: epoch=7 step=7000[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003294074535369873, 'time_algorithm_update': 0.0037708249092102053, 'loss': 0.5106870343387127, 'td_loss': 0.05183461131574586, 'conservative_loss': 0.45885242250561714, 'time_step': 0.004166950225830078}[0m [36mstep[0m=[35m7000[0m



Epoch 8/30: 100%|██████████| 1000/1000 [00:04<00:00, 237.70it/s, loss=0.506, td_loss=0.0512, conservative_loss=0.455]

[2m2024-08-28 22:08.48[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828220814: epoch=8 step=8000[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003283123970031738, 'time_algorithm_update': 0.0037910816669464113, 'loss': 0.5057684450000525, 'td_loss': 0.051142347880639136, 'conservative_loss': 0.454626097381115, 'time_step': 0.004185499429702759}[0m [36mstep[0m=[35m8000[0m



Epoch 9/30: 100%|██████████| 1000/1000 [00:04<00:00, 232.05it/s, loss=0.499, td_loss=0.0466, conservative_loss=0.453]

[2m2024-08-28 22:08.52[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828220814: epoch=9 step=9000[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00033505797386169434, 'time_algorithm_update': 0.003883702516555786, 'loss': 0.4989340796470642, 'td_loss': 0.04645135640027002, 'conservative_loss': 0.45248272354900837, 'time_step': 0.004286104202270507}[0m [36mstep[0m=[35m9000[0m



Epoch 10/30: 100%|██████████| 1000/1000 [00:04<00:00, 238.85it/s, loss=0.483, td_loss=0.0421, conservative_loss=0.441]

[2m2024-08-28 22:08.56[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828220814: epoch=10 step=10000[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003298451900482178, 'time_algorithm_update': 0.0037722363471984865, 'loss': 0.4831800245046616, 'td_loss': 0.04211561026517302, 'conservative_loss': 0.44106441432237625, 'time_step': 0.004166200637817383}[0m [36mstep[0m=[35m10000[0m



Epoch 11/30: 100%|██████████| 1000/1000 [00:04<00:00, 242.30it/s, loss=0.482, td_loss=0.0404, conservative_loss=0.442]

[2m2024-08-28 22:09.00[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828220814: epoch=11 step=11000[0m [36mepoch[0m=[35m11[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003197331428527832, 'time_algorithm_update': 0.0037244136333465577, 'loss': 0.48218004308640955, 'td_loss': 0.04028975458815694, 'conservative_loss': 0.4418902887701988, 'time_step': 0.004105823278427124}[0m [36mstep[0m=[35m11000[0m



Epoch 12/30: 100%|██████████| 1000/1000 [00:04<00:00, 237.52it/s, loss=0.47, td_loss=0.0397, conservative_loss=0.43] 

[2m2024-08-28 22:09.05[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828220814: epoch=12 step=12000[0m [36mepoch[0m=[35m12[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00033267545700073245, 'time_algorithm_update': 0.0037937161922454833, 'loss': 0.4700488121211529, 'td_loss': 0.03969583802344277, 'conservative_loss': 0.43035297448933124, 'time_step': 0.0041886699199676514}[0m [36mstep[0m=[35m12000[0m



Epoch 13/30: 100%|██████████| 1000/1000 [00:04<00:00, 241.11it/s, loss=0.47, td_loss=0.0398, conservative_loss=0.43] 

[2m2024-08-28 22:09.09[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828220814: epoch=13 step=13000[0m [36mepoch[0m=[35m13[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00031763219833374026, 'time_algorithm_update': 0.0037469942569732665, 'loss': 0.4702724593728781, 'td_loss': 0.039780311468523, 'conservative_loss': 0.43049214786291123, 'time_step': 0.004126755475997924}[0m [36mstep[0m=[35m13000[0m



Epoch 14/30: 100%|██████████| 1000/1000 [00:04<00:00, 236.53it/s, loss=0.465, td_loss=0.0384, conservative_loss=0.427]

[2m2024-08-28 22:09.13[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828220814: epoch=14 step=14000[0m [36mepoch[0m=[35m14[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00033594846725463866, 'time_algorithm_update': 0.003804790735244751, 'loss': 0.4649324039667845, 'td_loss': 0.038384950678329914, 'conservative_loss': 0.42654745385050774, 'time_step': 0.004205898284912109}[0m [36mstep[0m=[35m14000[0m



Epoch 15/30: 100%|██████████| 1000/1000 [00:04<00:00, 242.73it/s, loss=0.46, td_loss=0.0367, conservative_loss=0.423]

[2m2024-08-28 22:09.17[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828220814: epoch=15 step=15000[0m [36mepoch[0m=[35m15[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00031738686561584475, 'time_algorithm_update': 0.003722106218338013, 'loss': 0.4597662629932165, 'td_loss': 0.03667053784895688, 'conservative_loss': 0.4230957256704569, 'time_step': 0.004099599599838257}[0m [36mstep[0m=[35m15000[0m



Epoch 16/30: 100%|██████████| 1000/1000 [00:04<00:00, 240.41it/s, loss=0.449, td_loss=0.0367, conservative_loss=0.413]


[2m2024-08-28 22:09.21[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828220814: epoch=16 step=16000[0m [36mepoch[0m=[35m16[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003216702938079834, 'time_algorithm_update': 0.0037539761066436765, 'loss': 0.4494567024856806, 'td_loss': 0.03662600964540616, 'conservative_loss': 0.4128306928128004, 'time_step': 0.004138439655303955}[0m [36mstep[0m=[35m16000[0m


Epoch 17/30: 100%|██████████| 1000/1000 [00:04<00:00, 239.93it/s, loss=0.47, td_loss=0.0543, conservative_loss=0.416]

[2m2024-08-28 22:09.25[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828220814: epoch=17 step=17000[0m [36mepoch[0m=[35m17[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003292844295501709, 'time_algorithm_update': 0.0037554659843444826, 'loss': 0.46973956340551376, 'td_loss': 0.054129974199924616, 'conservative_loss': 0.4156095894128084, 'time_step': 0.00414703893661499}[0m [36mstep[0m=[35m17000[0m



Epoch 18/30: 100%|██████████| 1000/1000 [00:04<00:00, 239.16it/s, loss=0.455, td_loss=0.0465, conservative_loss=0.409]

[2m2024-08-28 22:09.30[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828220814: epoch=18 step=18000[0m [36mepoch[0m=[35m18[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003320586681365967, 'time_algorithm_update': 0.003764930725097656, 'loss': 0.4555568931475282, 'td_loss': 0.04659741319995374, 'conservative_loss': 0.40895948008447885, 'time_step': 0.004159777402877807}[0m [36mstep[0m=[35m18000[0m



Epoch 19/30: 100%|██████████| 1000/1000 [00:04<00:00, 239.93it/s, loss=0.459, td_loss=0.0455, conservative_loss=0.413]

[2m2024-08-28 22:09.34[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828220814: epoch=19 step=19000[0m [36mepoch[0m=[35m19[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00032886886596679687, 'time_algorithm_update': 0.0037497241497039795, 'loss': 0.4585754965990782, 'td_loss': 0.04549731704988517, 'conservative_loss': 0.4130781803876162, 'time_step': 0.004145312786102295}[0m [36mstep[0m=[35m19000[0m



Epoch 20/30: 100%|██████████| 1000/1000 [00:04<00:00, 240.39it/s, loss=0.453, td_loss=0.0451, conservative_loss=0.408]

[2m2024-08-28 22:09.38[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828220814: epoch=20 step=20000[0m [36mepoch[0m=[35m20[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00032469868659973144, 'time_algorithm_update': 0.0037525458335876463, 'loss': 0.45301007917523384, 'td_loss': 0.045015528515446934, 'conservative_loss': 0.4079945505708456, 'time_step': 0.004139050960540771}[0m [36mstep[0m=[35m20000[0m



Epoch 21/30: 100%|██████████| 1000/1000 [00:04<00:00, 237.54it/s, loss=0.444, td_loss=0.0437, conservative_loss=0.4] 

[2m2024-08-28 22:09.42[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828220814: epoch=21 step=21000[0m [36mepoch[0m=[35m21[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003277289867401123, 'time_algorithm_update': 0.0037983622550964357, 'loss': 0.4438457854092121, 'td_loss': 0.04363118195300922, 'conservative_loss': 0.40021460331976416, 'time_step': 0.004187991142272949}[0m [36mstep[0m=[35m21000[0m



Epoch 22/30: 100%|██████████| 1000/1000 [00:04<00:00, 242.27it/s, loss=0.45, td_loss=0.0438, conservative_loss=0.406]

[2m2024-08-28 22:09.46[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828220814: epoch=22 step=22000[0m [36mepoch[0m=[35m22[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00032398676872253415, 'time_algorithm_update': 0.003718989610671997, 'loss': 0.4496002011746168, 'td_loss': 0.043879857555963096, 'conservative_loss': 0.4057203429043293, 'time_step': 0.00410646390914917}[0m [36mstep[0m=[35m22000[0m



Epoch 23/30: 100%|██████████| 1000/1000 [00:04<00:00, 237.59it/s, loss=0.45, td_loss=0.0434, conservative_loss=0.406]

[2m2024-08-28 22:09.50[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828220814: epoch=23 step=23000[0m [36mepoch[0m=[35m23[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003256871700286865, 'time_algorithm_update': 0.003795608043670654, 'loss': 0.4495206390917301, 'td_loss': 0.04340310849784874, 'conservative_loss': 0.406117529630661, 'time_step': 0.004186301708221435}[0m [36mstep[0m=[35m23000[0m



Epoch 24/30: 100%|██████████| 1000/1000 [00:04<00:00, 239.98it/s, loss=0.452, td_loss=0.044, conservative_loss=0.408]

[2m2024-08-28 22:09.55[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828220814: epoch=24 step=24000[0m [36mepoch[0m=[35m24[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00032524919509887694, 'time_algorithm_update': 0.003758575677871704, 'loss': 0.4514027325063944, 'td_loss': 0.04398795385053381, 'conservative_loss': 0.40741477808356286, 'time_step': 0.004145159244537354}[0m [36mstep[0m=[35m24000[0m



Epoch 25/30: 100%|██████████| 1000/1000 [00:04<00:00, 243.30it/s, loss=0.465, td_loss=0.0559, conservative_loss=0.409]

[2m2024-08-28 22:09.59[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828220814: epoch=25 step=25000[0m [36mepoch[0m=[35m25[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003160109519958496, 'time_algorithm_update': 0.003710869789123535, 'loss': 0.46450847816467283, 'td_loss': 0.05564514567470178, 'conservative_loss': 0.4088633327484131, 'time_step': 0.004089472532272339}[0m [36mstep[0m=[35m25000[0m



Epoch 26/30: 100%|██████████| 1000/1000 [00:04<00:00, 241.62it/s, loss=0.452, td_loss=0.0516, conservative_loss=0.401]


[2m2024-08-28 22:10.03[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828220814: epoch=26 step=26000[0m [36mepoch[0m=[35m26[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00032117176055908205, 'time_algorithm_update': 0.003735038757324219, 'loss': 0.4523704829216003, 'td_loss': 0.05161622963100672, 'conservative_loss': 0.400754253834486, 'time_step': 0.0041182518005371095}[0m [36mstep[0m=[35m26000[0m


Epoch 27/30: 100%|██████████| 1000/1000 [00:04<00:00, 233.98it/s, loss=0.456, td_loss=0.0515, conservative_loss=0.404]

[2m2024-08-28 22:10.07[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828220814: epoch=27 step=27000[0m [36mepoch[0m=[35m27[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000336672306060791, 'time_algorithm_update': 0.0038509469032287596, 'loss': 0.4564310753792524, 'td_loss': 0.05162680098437704, 'conservative_loss': 0.4048042738735676, 'time_step': 0.0042518765926361085}[0m [36mstep[0m=[35m27000[0m



Epoch 28/30: 100%|██████████| 1000/1000 [00:04<00:00, 240.31it/s, loss=0.455, td_loss=0.0508, conservative_loss=0.404]

[2m2024-08-28 22:10.11[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828220814: epoch=28 step=28000[0m [36mepoch[0m=[35m28[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003280322551727295, 'time_algorithm_update': 0.00374713134765625, 'loss': 0.4548649462014437, 'td_loss': 0.0508251769519411, 'conservative_loss': 0.4040397695600986, 'time_step': 0.004139645576477051}[0m [36mstep[0m=[35m28000[0m



Epoch 29/30: 100%|██████████| 1000/1000 [00:04<00:00, 235.12it/s, loss=0.454, td_loss=0.0505, conservative_loss=0.403]


[2m2024-08-28 22:10.16[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828220814: epoch=29 step=29000[0m [36mepoch[0m=[35m29[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00033441877365112305, 'time_algorithm_update': 0.003830509901046753, 'loss': 0.45317331328988075, 'td_loss': 0.050280419670511035, 'conservative_loss': 0.4028928941488266, 'time_step': 0.004231080293655396}[0m [36mstep[0m=[35m29000[0m


Epoch 30/30: 100%|██████████| 1000/1000 [00:04<00:00, 239.08it/s, loss=0.454, td_loss=0.049, conservative_loss=0.405]

[2m2024-08-28 22:10.20[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828220814: epoch=30 step=30000[0m [36mepoch[0m=[35m30[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00032843661308288577, 'time_algorithm_update': 0.0037689690589904784, 'loss': 0.4535537442266941, 'td_loss': 0.049109921032097194, 'conservative_loss': 0.4044438227713108, 'time_step': 0.0041616477966308595}[0m [36mstep[0m=[35m30000[0m





Poisoning Large
[2m2024-08-28 22:10.20[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('uint8')], shape=[(3, 7, 7)]), action_signature=Signature(dtype=[dtype('int64')], shape=[(1,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.DISCRETE: 2>, action_size=3)[0m
[2m2024-08-28 22:10.20[0m [[32m[1minfo     [0m] [1mDirectory is created at d3rlpy_logs/DiscreteCQL_20240828221020[0m
[2m2024-08-28 22:10.20[0m [[32m[1mdebug    [0m] [1mBuilding models...            [0m
[2m2024-08-28 22:10.20[0m [[32m[1mdebug    [0m] [1mModels have been built.       [0m
[2m2024-08-28 22:10.20[0m [[32m[1minfo     [0m] [1mParameters                    [0m [36mparams[0m=[35m{'observation_shape': [3, 7, 7], 'action_size': 3, 'config': {'type': 'discrete_cql', 'params': {'batch_size': 32, 'gamma': 0.99, 'observation_scaler': {'type': 'n

Epoch 1/30: 100%|██████████| 1000/1000 [00:04<00:00, 237.29it/s, loss=0.896, td_loss=0.0489, conservative_loss=0.847]

[2m2024-08-28 22:10.24[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221020: epoch=1 step=1000[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003318436145782471, 'time_algorithm_update': 0.0037974278926849363, 'loss': 0.8949827610850334, 'td_loss': 0.048957751692272726, 'conservative_loss': 0.8460250094532966, 'time_step': 0.004192867994308472}[0m [36mstep[0m=[35m1000[0m



Epoch 2/30: 100%|██████████| 1000/1000 [00:04<00:00, 240.55it/s, loss=0.681, td_loss=0.0631, conservative_loss=0.618]

[2m2024-08-28 22:10.28[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221020: epoch=2 step=2000[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00032265710830688474, 'time_algorithm_update': 0.003749528408050537, 'loss': 0.6800145971477032, 'td_loss': 0.06292907095514239, 'conservative_loss': 0.6170855262279511, 'time_step': 0.0041356756687164304}[0m [36mstep[0m=[35m2000[0m



Epoch 3/30: 100%|██████████| 1000/1000 [00:04<00:00, 238.16it/s, loss=0.602, td_loss=0.0678, conservative_loss=0.534]

[2m2024-08-28 22:10.32[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221020: epoch=3 step=3000[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00033332347869873046, 'time_algorithm_update': 0.003780186414718628, 'loss': 0.6018374755680561, 'td_loss': 0.06770349813159555, 'conservative_loss': 0.5341339779794216, 'time_step': 0.004177455186843872}[0m [36mstep[0m=[35m3000[0m



Epoch 4/30: 100%|██████████| 1000/1000 [00:04<00:00, 242.13it/s, loss=0.575, td_loss=0.0671, conservative_loss=0.508]

[2m2024-08-28 22:10.37[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221020: epoch=4 step=4000[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00033100318908691406, 'time_algorithm_update': 0.0037151968479156496, 'loss': 0.5751347607374191, 'td_loss': 0.06716069036722183, 'conservative_loss': 0.5079740710258484, 'time_step': 0.004108326435089111}[0m [36mstep[0m=[35m4000[0m



Epoch 5/30: 100%|██████████| 1000/1000 [00:04<00:00, 238.78it/s, loss=0.562, td_loss=0.0633, conservative_loss=0.498]

[2m2024-08-28 22:10.41[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221020: epoch=5 step=5000[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003329160213470459, 'time_algorithm_update': 0.003767702102661133, 'loss': 0.5616232259869576, 'td_loss': 0.06338786837644875, 'conservative_loss': 0.49823535794019697, 'time_step': 0.004166429042816162}[0m [36mstep[0m=[35m5000[0m



Epoch 6/30: 100%|██████████| 1000/1000 [00:04<00:00, 235.07it/s, loss=0.54, td_loss=0.0614, conservative_loss=0.479]


[2m2024-08-28 22:10.45[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221020: epoch=6 step=6000[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003355538845062256, 'time_algorithm_update': 0.003830452919006348, 'loss': 0.5406966694593429, 'td_loss': 0.06153859431482851, 'conservative_loss': 0.47915807539224625, 'time_step': 0.004232352733612061}[0m [36mstep[0m=[35m6000[0m


Epoch 7/30: 100%|██████████| 1000/1000 [00:04<00:00, 240.92it/s, loss=0.537, td_loss=0.06, conservative_loss=0.477] 


[2m2024-08-28 22:10.49[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221020: epoch=7 step=7000[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00032592296600341796, 'time_algorithm_update': 0.0037368621826171875, 'loss': 0.5362637057900429, 'td_loss': 0.06006402572151273, 'conservative_loss': 0.47619968044757843, 'time_step': 0.004128230810165405}[0m [36mstep[0m=[35m7000[0m


Epoch 8/30: 100%|██████████| 1000/1000 [00:04<00:00, 237.63it/s, loss=0.533, td_loss=0.0606, conservative_loss=0.472]

[2m2024-08-28 22:10.53[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221020: epoch=8 step=8000[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00033251953125, 'time_algorithm_update': 0.0037885897159576415, 'loss': 0.5321145790219307, 'td_loss': 0.060428239159286024, 'conservative_loss': 0.4716863405108452, 'time_step': 0.0041859297752380375}[0m [36mstep[0m=[35m8000[0m



Epoch 9/30: 100%|██████████| 1000/1000 [00:04<00:00, 242.46it/s, loss=0.517, td_loss=0.0571, conservative_loss=0.46]

[2m2024-08-28 22:10.58[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221020: epoch=9 step=9000[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00032776045799255373, 'time_algorithm_update': 0.0037121374607086183, 'loss': 0.516573589026928, 'td_loss': 0.05706762919574976, 'conservative_loss': 0.45950596024096013, 'time_step': 0.004103304147720337}[0m [36mstep[0m=[35m9000[0m



Epoch 10/30: 100%|██████████| 1000/1000 [00:04<00:00, 242.23it/s, loss=0.503, td_loss=0.0498, conservative_loss=0.453]


[2m2024-08-28 22:11.02[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221020: epoch=10 step=10000[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00032885050773620605, 'time_algorithm_update': 0.0037171854972839354, 'loss': 0.5024360694140196, 'td_loss': 0.049614182110643015, 'conservative_loss': 0.4528218878656626, 'time_step': 0.004108314275741577}[0m [36mstep[0m=[35m10000[0m


Epoch 11/30: 100%|██████████| 1000/1000 [00:04<00:00, 238.58it/s, loss=0.493, td_loss=0.0488, conservative_loss=0.445]

[2m2024-08-28 22:11.06[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221020: epoch=11 step=11000[0m [36mepoch[0m=[35m11[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003318605422973633, 'time_algorithm_update': 0.003777531623840332, 'loss': 0.49346554033458234, 'td_loss': 0.048885132825933396, 'conservative_loss': 0.44458040782809255, 'time_step': 0.004170268774032593}[0m [36mstep[0m=[35m11000[0m



Epoch 12/30: 100%|██████████| 1000/1000 [00:04<00:00, 242.70it/s, loss=0.495, td_loss=0.0462, conservative_loss=0.449]

[2m2024-08-28 22:11.10[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221020: epoch=12 step=12000[0m [36mepoch[0m=[35m12[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003329906463623047, 'time_algorithm_update': 0.003701687574386597, 'loss': 0.49531871432065966, 'td_loss': 0.046212057102005925, 'conservative_loss': 0.44910665783286097, 'time_step': 0.004098445653915405}[0m [36mstep[0m=[35m12000[0m



Epoch 13/30: 100%|██████████| 1000/1000 [00:04<00:00, 240.26it/s, loss=0.497, td_loss=0.0454, conservative_loss=0.451]


[2m2024-08-28 22:11.14[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221020: epoch=13 step=13000[0m [36mepoch[0m=[35m13[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00033127570152282715, 'time_algorithm_update': 0.003747528314590454, 'loss': 0.4961564158499241, 'td_loss': 0.045306983216898515, 'conservative_loss': 0.4508494330495596, 'time_step': 0.00414121675491333}[0m [36mstep[0m=[35m13000[0m


Epoch 14/30: 100%|██████████| 1000/1000 [00:04<00:00, 240.57it/s, loss=0.486, td_loss=0.0444, conservative_loss=0.442]


[2m2024-08-28 22:11.18[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221020: epoch=14 step=14000[0m [36mepoch[0m=[35m14[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003325791358947754, 'time_algorithm_update': 0.003737938165664673, 'loss': 0.48570819230377676, 'td_loss': 0.04436106091272086, 'conservative_loss': 0.44134713128209113, 'time_step': 0.004134683609008789}[0m [36mstep[0m=[35m14000[0m


Epoch 15/30: 100%|██████████| 1000/1000 [00:04<00:00, 242.49it/s, loss=0.485, td_loss=0.045, conservative_loss=0.44] 

[2m2024-08-28 22:11.23[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221020: epoch=15 step=15000[0m [36mepoch[0m=[35m15[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00032272982597351073, 'time_algorithm_update': 0.0037175629138946533, 'loss': 0.48591633467376233, 'td_loss': 0.0449608037956059, 'conservative_loss': 0.44095553044974806, 'time_step': 0.004103070259094239}[0m [36mstep[0m=[35m15000[0m



Epoch 16/30: 100%|██████████| 1000/1000 [00:04<00:00, 240.95it/s, loss=0.491, td_loss=0.0439, conservative_loss=0.447]


[2m2024-08-28 22:11.27[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221020: epoch=16 step=16000[0m [36mepoch[0m=[35m16[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003320541381835938, 'time_algorithm_update': 0.0037329115867614747, 'loss': 0.491189502120018, 'td_loss': 0.04373815633775666, 'conservative_loss': 0.4474513454884291, 'time_step': 0.004129426956176758}[0m [36mstep[0m=[35m16000[0m


Epoch 17/30: 100%|██████████| 1000/1000 [00:04<00:00, 242.25it/s, loss=0.505, td_loss=0.061, conservative_loss=0.444]

[2m2024-08-28 22:11.31[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221020: epoch=17 step=17000[0m [36mepoch[0m=[35m17[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00033078980445861816, 'time_algorithm_update': 0.0037089340686798098, 'loss': 0.5042565710097552, 'td_loss': 0.06089520191028714, 'conservative_loss': 0.44336136867105963, 'time_step': 0.004105894088745117}[0m [36mstep[0m=[35m17000[0m



Epoch 18/30: 100%|██████████| 1000/1000 [00:04<00:00, 242.64it/s, loss=0.5, td_loss=0.0543, conservative_loss=0.445] 


[2m2024-08-28 22:11.35[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221020: epoch=18 step=18000[0m [36mepoch[0m=[35m18[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00032903313636779786, 'time_algorithm_update': 0.0037097837924957274, 'loss': 0.4994664835482836, 'td_loss': 0.054317894296720626, 'conservative_loss': 0.4451485886871815, 'time_step': 0.004100573539733887}[0m [36mstep[0m=[35m18000[0m


Epoch 19/30: 100%|██████████| 1000/1000 [00:04<00:00, 238.41it/s, loss=0.494, td_loss=0.0529, conservative_loss=0.441]

[2m2024-08-28 22:11.39[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221020: epoch=19 step=19000[0m [36mepoch[0m=[35m19[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00033156728744506835, 'time_algorithm_update': 0.0037814457416534423, 'loss': 0.49426002624630927, 'td_loss': 0.05284755919873715, 'conservative_loss': 0.44141246722638605, 'time_step': 0.004174180030822754}[0m [36mstep[0m=[35m19000[0m



Epoch 20/30: 100%|██████████| 1000/1000 [00:04<00:00, 240.46it/s, loss=0.493, td_loss=0.0526, conservative_loss=0.44]

[2m2024-08-28 22:11.43[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221020: epoch=20 step=20000[0m [36mepoch[0m=[35m20[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00032746601104736327, 'time_algorithm_update': 0.003748171329498291, 'loss': 0.49277783109247686, 'td_loss': 0.05256521512567997, 'conservative_loss': 0.4402126158475876, 'time_step': 0.004138096570968628}[0m [36mstep[0m=[35m20000[0m



Epoch 21/30: 100%|██████████| 1000/1000 [00:04<00:00, 240.82it/s, loss=0.487, td_loss=0.0519, conservative_loss=0.435]


[2m2024-08-28 22:11.47[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221020: epoch=21 step=21000[0m [36mepoch[0m=[35m21[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00033196330070495606, 'time_algorithm_update': 0.0037358086109161377, 'loss': 0.48670920757949354, 'td_loss': 0.05196552047505975, 'conservative_loss': 0.4347436865121126, 'time_step': 0.004131187915802002}[0m [36mstep[0m=[35m21000[0m


Epoch 22/30: 100%|██████████| 1000/1000 [00:04<00:00, 239.38it/s, loss=0.486, td_loss=0.0516, conservative_loss=0.434]

[2m2024-08-28 22:11.52[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221020: epoch=22 step=22000[0m [36mepoch[0m=[35m22[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00033235716819763185, 'time_algorithm_update': 0.003762300491333008, 'loss': 0.4860494309961796, 'td_loss': 0.05196098186378367, 'conservative_loss': 0.4340884490609169, 'time_step': 0.00415667462348938}[0m [36mstep[0m=[35m22000[0m



Epoch 23/30: 100%|██████████| 1000/1000 [00:04<00:00, 243.86it/s, loss=0.491, td_loss=0.0525, conservative_loss=0.438]

[2m2024-08-28 22:11.56[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221020: epoch=23 step=23000[0m [36mepoch[0m=[35m23[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000316819429397583, 'time_algorithm_update': 0.0037016568183898925, 'loss': 0.49046027901768685, 'td_loss': 0.05245708647184074, 'conservative_loss': 0.4380031925886869, 'time_step': 0.004080244064331055}[0m [36mstep[0m=[35m23000[0m



Epoch 24/30: 100%|██████████| 1000/1000 [00:04<00:00, 238.04it/s, loss=0.491, td_loss=0.0508, conservative_loss=0.44]

[2m2024-08-28 22:12.00[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221020: epoch=24 step=24000[0m [36mepoch[0m=[35m24[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003285095691680908, 'time_algorithm_update': 0.0037887227535247803, 'loss': 0.49145496071875094, 'td_loss': 0.05098290484212339, 'conservative_loss': 0.4404720562547445, 'time_step': 0.004180672168731689}[0m [36mstep[0m=[35m24000[0m



Epoch 25/30: 100%|██████████| 1000/1000 [00:04<00:00, 240.39it/s, loss=0.517, td_loss=0.0712, conservative_loss=0.446]

[2m2024-08-28 22:12.04[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221020: epoch=25 step=25000[0m [36mepoch[0m=[35m25[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00033051538467407225, 'time_algorithm_update': 0.0037453811168670654, 'loss': 0.5160638367086648, 'td_loss': 0.07084981714794412, 'conservative_loss': 0.44521401900053026, 'time_step': 0.004138638257980346}[0m [36mstep[0m=[35m25000[0m



Epoch 26/30: 100%|██████████| 1000/1000 [00:04<00:00, 237.10it/s, loss=0.512, td_loss=0.0669, conservative_loss=0.445]

[2m2024-08-28 22:12.08[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221020: epoch=26 step=26000[0m [36mepoch[0m=[35m26[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003365631103515625, 'time_algorithm_update': 0.003797505855560303, 'loss': 0.5118748528510332, 'td_loss': 0.0669424748653546, 'conservative_loss': 0.4449323785007, 'time_step': 0.004196597099304199}[0m [36mstep[0m=[35m26000[0m



Epoch 27/30: 100%|██████████| 1000/1000 [00:04<00:00, 244.02it/s, loss=0.509, td_loss=0.0676, conservative_loss=0.441]

[2m2024-08-28 22:12.12[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221020: epoch=27 step=27000[0m [36mepoch[0m=[35m27[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003221504688262939, 'time_algorithm_update': 0.0036929547786712646, 'loss': 0.5089121645092964, 'td_loss': 0.0676758542880416, 'conservative_loss': 0.44123630970716476, 'time_step': 0.004076804637908936}[0m [36mstep[0m=[35m27000[0m



Epoch 28/30: 100%|██████████| 1000/1000 [00:04<00:00, 238.89it/s, loss=0.508, td_loss=0.0679, conservative_loss=0.44]


[2m2024-08-28 22:12.17[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221020: epoch=28 step=28000[0m [36mepoch[0m=[35m28[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00032749319076538086, 'time_algorithm_update': 0.0037759103775024416, 'loss': 0.5074993384480476, 'td_loss': 0.0676436687130481, 'conservative_loss': 0.4398556697368622, 'time_step': 0.0041645545959472655}[0m [36mstep[0m=[35m28000[0m


Epoch 29/30: 100%|██████████| 1000/1000 [00:04<00:00, 241.49it/s, loss=0.512, td_loss=0.0675, conservative_loss=0.444]

[2m2024-08-28 22:12.21[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221020: epoch=29 step=29000[0m [36mepoch[0m=[35m29[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003293430805206299, 'time_algorithm_update': 0.0037257933616638185, 'loss': 0.5113818790912629, 'td_loss': 0.06733289407705888, 'conservative_loss': 0.44404898551106453, 'time_step': 0.004118478298187256}[0m [36mstep[0m=[35m29000[0m



Epoch 30/30: 100%|██████████| 1000/1000 [00:04<00:00, 243.86it/s, loss=0.507, td_loss=0.0665, conservative_loss=0.44]


[2m2024-08-28 22:12.25[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221020: epoch=30 step=30000[0m [36mepoch[0m=[35m30[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00032581830024719236, 'time_algorithm_update': 0.0036938862800598143, 'loss': 0.506650754392147, 'td_loss': 0.06635285575082525, 'conservative_loss': 0.4402978985607624, 'time_step': 0.0040810155868530274}[0m [36mstep[0m=[35m30000[0m
Poisoning Large
[2m2024-08-28 22:12.25[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('uint8')], shape=[(3, 7, 7)]), action_signature=Signature(dtype=[dtype('int64')], shape=[(1,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.DISCRETE: 2>, action_size=3)[0m
[2m2024-08-28 22:12.25[0m [[32m[1minfo     [0m] [1mDirectory is created at d3rlpy_logs/DiscreteCQL_20240828221225[0m
[2m2024-08-28 22:12.25[0m [[32m[1md

Epoch 1/30: 100%|██████████| 1000/1000 [00:04<00:00, 242.82it/s, loss=0.873, td_loss=0.0553, conservative_loss=0.817]


[2m2024-08-28 22:12.29[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221225: epoch=1 step=1000[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003274953365325928, 'time_algorithm_update': 0.003708037853240967, 'loss': 0.8723271320462227, 'td_loss': 0.05530775446165353, 'conservative_loss': 0.8170193779468536, 'time_step': 0.0040970537662506105}[0m [36mstep[0m=[35m1000[0m


Epoch 2/30: 100%|██████████| 1000/1000 [00:04<00:00, 240.54it/s, loss=0.735, td_loss=0.0618, conservative_loss=0.673]

[2m2024-08-28 22:12.33[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221225: epoch=2 step=2000[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00033184123039245607, 'time_algorithm_update': 0.0037412500381469728, 'loss': 0.7348161529302597, 'td_loss': 0.0620324167474173, 'conservative_loss': 0.6727837360203266, 'time_step': 0.004135744571685791}[0m [36mstep[0m=[35m2000[0m



Epoch 3/30: 100%|██████████| 1000/1000 [00:04<00:00, 239.77it/s, loss=0.655, td_loss=0.0748, conservative_loss=0.58]

[2m2024-08-28 22:12.37[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221225: epoch=3 step=3000[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00033115124702453615, 'time_algorithm_update': 0.0037540743350982664, 'loss': 0.6545015687048436, 'td_loss': 0.07497203445946797, 'conservative_loss': 0.5795295344889164, 'time_step': 0.004148358821868896}[0m [36mstep[0m=[35m3000[0m



Epoch 4/30: 100%|██████████| 1000/1000 [00:04<00:00, 240.67it/s, loss=0.622, td_loss=0.0771, conservative_loss=0.545]

[2m2024-08-28 22:12.42[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221225: epoch=4 step=4000[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00033017253875732423, 'time_algorithm_update': 0.003741593837738037, 'loss': 0.6213869897723198, 'td_loss': 0.07702019458264113, 'conservative_loss': 0.5443667951524258, 'time_step': 0.004134663105010986}[0m [36mstep[0m=[35m4000[0m



Epoch 5/30: 100%|██████████| 1000/1000 [00:04<00:00, 241.69it/s, loss=0.608, td_loss=0.0783, conservative_loss=0.53]

[2m2024-08-28 22:12.46[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221225: epoch=5 step=5000[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003253018856048584, 'time_algorithm_update': 0.0037291345596313476, 'loss': 0.6077553686499596, 'td_loss': 0.0783129111258313, 'conservative_loss': 0.529442458063364, 'time_step': 0.004116382598876953}[0m [36mstep[0m=[35m5000[0m



Epoch 6/30: 100%|██████████| 1000/1000 [00:04<00:00, 239.17it/s, loss=0.59, td_loss=0.0771, conservative_loss=0.513]


[2m2024-08-28 22:12.50[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221225: epoch=6 step=6000[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003278372287750244, 'time_algorithm_update': 0.0037669386863708495, 'loss': 0.5896213728487492, 'td_loss': 0.07698736610729247, 'conservative_loss': 0.5126340064108372, 'time_step': 0.004159550905227661}[0m [36mstep[0m=[35m6000[0m


Epoch 7/30: 100%|██████████| 1000/1000 [00:04<00:00, 242.05it/s, loss=0.582, td_loss=0.0733, conservative_loss=0.509]

[2m2024-08-28 22:12.54[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221225: epoch=7 step=7000[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003358173370361328, 'time_algorithm_update': 0.0037110047340393067, 'loss': 0.5823307521939277, 'td_loss': 0.07343873349111527, 'conservative_loss': 0.5088920182883739, 'time_step': 0.004109574317932129}[0m [36mstep[0m=[35m7000[0m



Epoch 8/30: 100%|██████████| 1000/1000 [00:04<00:00, 242.17it/s, loss=0.575, td_loss=0.0716, conservative_loss=0.503]

[2m2024-08-28 22:12.58[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221225: epoch=8 step=8000[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003253626823425293, 'time_algorithm_update': 0.0037234816551208498, 'loss': 0.5736269274055957, 'td_loss': 0.07138723557256162, 'conservative_loss': 0.5022396922409534, 'time_step': 0.004109063386917114}[0m [36mstep[0m=[35m8000[0m



Epoch 9/30: 100%|██████████| 1000/1000 [00:04<00:00, 241.61it/s, loss=0.557, td_loss=0.0706, conservative_loss=0.486]

[2m2024-08-28 22:13.02[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221225: epoch=9 step=9000[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003306865692138672, 'time_algorithm_update': 0.003722533941268921, 'loss': 0.555937923014164, 'td_loss': 0.07038882604520767, 'conservative_loss': 0.48554909726977347, 'time_step': 0.004117959260940552}[0m [36mstep[0m=[35m9000[0m



Epoch 10/30: 100%|██████████| 1000/1000 [00:04<00:00, 238.23it/s, loss=0.554, td_loss=0.0643, conservative_loss=0.49]


[2m2024-08-28 22:13.07[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221225: epoch=10 step=10000[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000331517219543457, 'time_algorithm_update': 0.003780031442642212, 'loss': 0.5546380985081196, 'td_loss': 0.06442681895475835, 'conservative_loss': 0.4902112800627947, 'time_step': 0.004177063465118408}[0m [36mstep[0m=[35m10000[0m


Epoch 11/30: 100%|██████████| 1000/1000 [00:04<00:00, 239.91it/s, loss=0.542, td_loss=0.0615, conservative_loss=0.481]

[2m2024-08-28 22:13.11[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221225: epoch=11 step=11000[0m [36mepoch[0m=[35m11[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00032838845252990725, 'time_algorithm_update': 0.003756613731384277, 'loss': 0.5420353383421898, 'td_loss': 0.06148200379312038, 'conservative_loss': 0.4805533346384764, 'time_step': 0.004146893739700317}[0m [36mstep[0m=[35m11000[0m



Epoch 12/30: 100%|██████████| 1000/1000 [00:04<00:00, 243.72it/s, loss=0.535, td_loss=0.0614, conservative_loss=0.473]

[2m2024-08-28 22:13.15[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221225: epoch=12 step=12000[0m [36mepoch[0m=[35m12[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00032439160346984863, 'time_algorithm_update': 0.0036992318630218505, 'loss': 0.5352721332013607, 'td_loss': 0.061553026420529934, 'conservative_loss': 0.4737191067934036, 'time_step': 0.004083265304565429}[0m [36mstep[0m=[35m12000[0m



Epoch 13/30: 100%|██████████| 1000/1000 [00:04<00:00, 240.22it/s, loss=0.535, td_loss=0.0612, conservative_loss=0.474]

[2m2024-08-28 22:13.19[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221225: epoch=13 step=13000[0m [36mepoch[0m=[35m13[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003214547634124756, 'time_algorithm_update': 0.003758808374404907, 'loss': 0.534414846971631, 'td_loss': 0.06114688922371715, 'conservative_loss': 0.4732679584026337, 'time_step': 0.0041422791481018064}[0m [36mstep[0m=[35m13000[0m



Epoch 14/30: 100%|██████████| 1000/1000 [00:04<00:00, 239.26it/s, loss=0.537, td_loss=0.0592, conservative_loss=0.478]


[2m2024-08-28 22:13.23[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221225: epoch=14 step=14000[0m [36mepoch[0m=[35m14[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00032504081726074217, 'time_algorithm_update': 0.003766864538192749, 'loss': 0.5368174709379673, 'td_loss': 0.059111570116132495, 'conservative_loss': 0.4777059010267258, 'time_step': 0.00415728497505188}[0m [36mstep[0m=[35m14000[0m


Epoch 15/30: 100%|██████████| 1000/1000 [00:04<00:00, 244.62it/s, loss=0.538, td_loss=0.0598, conservative_loss=0.478]

[2m2024-08-28 22:13.27[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221225: epoch=15 step=15000[0m [36mepoch[0m=[35m15[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000315807580947876, 'time_algorithm_update': 0.0036888349056243895, 'loss': 0.5372404051721096, 'td_loss': 0.059670645757578314, 'conservative_loss': 0.47756975960731507, 'time_step': 0.004067687273025512}[0m [36mstep[0m=[35m15000[0m



Epoch 16/30: 100%|██████████| 1000/1000 [00:04<00:00, 238.00it/s, loss=0.53, td_loss=0.0593, conservative_loss=0.47] 

[2m2024-08-28 22:13.32[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221225: epoch=16 step=16000[0m [36mepoch[0m=[35m16[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00033194446563720705, 'time_algorithm_update': 0.003782071590423584, 'loss': 0.5295980024784803, 'td_loss': 0.05914974871603772, 'conservative_loss': 0.470448254391551, 'time_step': 0.00417963719367981}[0m [36mstep[0m=[35m16000[0m



Epoch 17/30: 100%|██████████| 1000/1000 [00:04<00:00, 238.16it/s, loss=0.539, td_loss=0.0739, conservative_loss=0.465]

[2m2024-08-28 22:13.36[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221225: epoch=17 step=17000[0m [36mepoch[0m=[35m17[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00032952284812927244, 'time_algorithm_update': 0.0037831308841705324, 'loss': 0.5381378568857909, 'td_loss': 0.07368486268399284, 'conservative_loss': 0.4644529938548803, 'time_step': 0.004176683187484741}[0m [36mstep[0m=[35m17000[0m



Epoch 18/30: 100%|██████████| 1000/1000 [00:04<00:00, 241.11it/s, loss=0.544, td_loss=0.0691, conservative_loss=0.475]


[2m2024-08-28 22:13.40[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221225: epoch=18 step=18000[0m [36mepoch[0m=[35m18[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003269815444946289, 'time_algorithm_update': 0.0037404680252075195, 'loss': 0.5435069527328015, 'td_loss': 0.06911064752470703, 'conservative_loss': 0.47439630557596685, 'time_step': 0.0041276772022247315}[0m [36mstep[0m=[35m18000[0m


Epoch 19/30: 100%|██████████| 1000/1000 [00:04<00:00, 240.95it/s, loss=0.545, td_loss=0.0691, conservative_loss=0.476]

[2m2024-08-28 22:13.44[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221225: epoch=19 step=19000[0m [36mepoch[0m=[35m19[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00032886695861816405, 'time_algorithm_update': 0.0037369885444641115, 'loss': 0.5448623611330986, 'td_loss': 0.0688647124178242, 'conservative_loss': 0.475997648820281, 'time_step': 0.004130016326904297}[0m [36mstep[0m=[35m19000[0m



Epoch 20/30: 100%|██████████| 1000/1000 [00:04<00:00, 235.00it/s, loss=0.53, td_loss=0.068, conservative_loss=0.462] 

[2m2024-08-28 22:13.48[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221225: epoch=20 step=20000[0m [36mepoch[0m=[35m20[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003429441452026367, 'time_algorithm_update': 0.0038283231258392334, 'loss': 0.5298492820113897, 'td_loss': 0.06805873101693578, 'conservative_loss': 0.4617905509769917, 'time_step': 0.004234313011169433}[0m [36mstep[0m=[35m20000[0m



Epoch 21/30: 100%|██████████| 1000/1000 [00:04<00:00, 237.66it/s, loss=0.54, td_loss=0.0665, conservative_loss=0.473]

[2m2024-08-28 22:13.53[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221225: epoch=21 step=21000[0m [36mepoch[0m=[35m21[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00033259248733520506, 'time_algorithm_update': 0.003788785934448242, 'loss': 0.5395873237550258, 'td_loss': 0.06640885552158579, 'conservative_loss': 0.47317846858501433, 'time_step': 0.004185845851898193}[0m [36mstep[0m=[35m21000[0m



Epoch 22/30: 100%|██████████| 1000/1000 [00:04<00:00, 236.08it/s, loss=0.54, td_loss=0.0665, conservative_loss=0.473]

[2m2024-08-28 22:13.57[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221225: epoch=22 step=22000[0m [36mepoch[0m=[35m22[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003425896167755127, 'time_algorithm_update': 0.003807013511657715, 'loss': 0.5395656287372113, 'td_loss': 0.06643473696359434, 'conservative_loss': 0.4731308918893337, 'time_step': 0.004213966608047485}[0m [36mstep[0m=[35m22000[0m



Epoch 23/30: 100%|██████████| 1000/1000 [00:04<00:00, 236.40it/s, loss=0.533, td_loss=0.0665, conservative_loss=0.466]

[2m2024-08-28 22:14.01[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221225: epoch=23 step=23000[0m [36mepoch[0m=[35m23[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003374679088592529, 'time_algorithm_update': 0.0038077595233917236, 'loss': 0.53189425188303, 'td_loss': 0.06638222052226775, 'conservative_loss': 0.46551203101873395, 'time_step': 0.004209185361862182}[0m [36mstep[0m=[35m23000[0m



Epoch 24/30: 100%|██████████| 1000/1000 [00:04<00:00, 241.84it/s, loss=0.534, td_loss=0.0672, conservative_loss=0.467]

[2m2024-08-28 22:14.05[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221225: epoch=24 step=24000[0m [36mepoch[0m=[35m24[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003258335590362549, 'time_algorithm_update': 0.0037261183261871338, 'loss': 0.5339239043295383, 'td_loss': 0.06728406177554279, 'conservative_loss': 0.46663984237611295, 'time_step': 0.004113585472106934}[0m [36mstep[0m=[35m24000[0m



Epoch 25/30: 100%|██████████| 1000/1000 [00:04<00:00, 238.49it/s, loss=0.554, td_loss=0.0803, conservative_loss=0.474]

[2m2024-08-28 22:14.09[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221225: epoch=25 step=25000[0m [36mepoch[0m=[35m25[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003359851837158203, 'time_algorithm_update': 0.0037732443809509277, 'loss': 0.5540770489573479, 'td_loss': 0.08018613316677511, 'conservative_loss': 0.47389091563224794, 'time_step': 0.004171250104904175}[0m [36mstep[0m=[35m25000[0m



Epoch 26/30: 100%|██████████| 1000/1000 [00:04<00:00, 237.71it/s, loss=0.55, td_loss=0.078, conservative_loss=0.472] 

[2m2024-08-28 22:14.14[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221225: epoch=26 step=26000[0m [36mepoch[0m=[35m26[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00033889961242675783, 'time_algorithm_update': 0.003783888101577759, 'loss': 0.5498273846507072, 'td_loss': 0.07771818268764764, 'conservative_loss': 0.4721092020124197, 'time_step': 0.004185332536697388}[0m [36mstep[0m=[35m26000[0m



Epoch 27/30: 100%|██████████| 1000/1000 [00:04<00:00, 241.43it/s, loss=0.543, td_loss=0.0749, conservative_loss=0.469]


[2m2024-08-28 22:14.18[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221225: epoch=27 step=27000[0m [36mepoch[0m=[35m27[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003282454013824463, 'time_algorithm_update': 0.003731989860534668, 'loss': 0.5436872537881136, 'td_loss': 0.07506765157310293, 'conservative_loss': 0.4686196023076773, 'time_step': 0.004120898485183716}[0m [36mstep[0m=[35m27000[0m


Epoch 28/30: 100%|██████████| 1000/1000 [00:04<00:00, 243.76it/s, loss=0.548, td_loss=0.0767, conservative_loss=0.471]

[2m2024-08-28 22:14.22[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221225: epoch=28 step=28000[0m [36mepoch[0m=[35m28[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003239891529083252, 'time_algorithm_update': 0.003692528247833252, 'loss': 0.5469841645807028, 'td_loss': 0.07633214228320867, 'conservative_loss': 0.4706520224660635, 'time_step': 0.0040803875923156735}[0m [36mstep[0m=[35m28000[0m



Epoch 29/30: 100%|██████████| 1000/1000 [00:04<00:00, 238.58it/s, loss=0.539, td_loss=0.0754, conservative_loss=0.463]

[2m2024-08-28 22:14.26[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221225: epoch=29 step=29000[0m [36mepoch[0m=[35m29[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00033382177352905276, 'time_algorithm_update': 0.0037732150554656983, 'loss': 0.5385864131748677, 'td_loss': 0.07544019422819838, 'conservative_loss': 0.4631462192237377, 'time_step': 0.004170364141464234}[0m [36mstep[0m=[35m29000[0m



Epoch 30/30: 100%|██████████| 1000/1000 [00:04<00:00, 242.14it/s, loss=0.537, td_loss=0.0745, conservative_loss=0.463]


[2m2024-08-28 22:14.30[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221225: epoch=30 step=30000[0m [36mepoch[0m=[35m30[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00033267593383789064, 'time_algorithm_update': 0.00371230411529541, 'loss': 0.5371585221290588, 'td_loss': 0.07458437518496067, 'conservative_loss': 0.46257414785027506, 'time_step': 0.004107642650604248}[0m [36mstep[0m=[35m30000[0m
Poisoning Large
[2m2024-08-28 22:14.30[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('uint8')], shape=[(3, 7, 7)]), action_signature=Signature(dtype=[dtype('int64')], shape=[(1,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.DISCRETE: 2>, action_size=3)[0m
[2m2024-08-28 22:14.30[0m [[32m[1minfo     [0m] [1mDirectory is created at d3rlpy_logs/DiscreteCQL_20240828221430[0m
[2m2024-08-28 22:14.30[0m [[32m[1mde

Epoch 1/30: 100%|██████████| 1000/1000 [00:04<00:00, 238.35it/s, loss=0.855, td_loss=0.0531, conservative_loss=0.802]

[2m2024-08-28 22:14.34[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221430: epoch=1 step=1000[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003385367393493652, 'time_algorithm_update': 0.003770198345184326, 'loss': 0.8544400954842567, 'td_loss': 0.053088898184942085, 'conservative_loss': 0.8013511970639229, 'time_step': 0.004173824787139893}[0m [36mstep[0m=[35m1000[0m



Epoch 2/30: 100%|██████████| 1000/1000 [00:04<00:00, 243.53it/s, loss=0.733, td_loss=0.0649, conservative_loss=0.668]


[2m2024-08-28 22:14.39[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221430: epoch=2 step=2000[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00032770562171936037, 'time_algorithm_update': 0.003697026252746582, 'loss': 0.7332088987827301, 'td_loss': 0.06491864434955642, 'conservative_loss': 0.6682902546525001, 'time_step': 0.004086337089538574}[0m [36mstep[0m=[35m2000[0m


Epoch 3/30: 100%|██████████| 1000/1000 [00:04<00:00, 238.20it/s, loss=0.69, td_loss=0.0729, conservative_loss=0.617]

[2m2024-08-28 22:14.43[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221430: epoch=3 step=3000[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00033380794525146487, 'time_algorithm_update': 0.003779975652694702, 'loss': 0.6900981003046036, 'td_loss': 0.07296909006591887, 'conservative_loss': 0.6171290104985238, 'time_step': 0.004177225112915039}[0m [36mstep[0m=[35m3000[0m



Epoch 4/30: 100%|██████████| 1000/1000 [00:04<00:00, 241.40it/s, loss=0.661, td_loss=0.0745, conservative_loss=0.587]

[2m2024-08-28 22:14.47[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221430: epoch=4 step=4000[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003290410041809082, 'time_algorithm_update': 0.0037305707931518553, 'loss': 0.6618363507091999, 'td_loss': 0.07476404605712741, 'conservative_loss': 0.5870723049342632, 'time_step': 0.0041221556663513185}[0m [36mstep[0m=[35m4000[0m



Epoch 5/30: 100%|██████████| 1000/1000 [00:04<00:00, 236.05it/s, loss=0.646, td_loss=0.0769, conservative_loss=0.57]


[2m2024-08-28 22:14.51[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221430: epoch=5 step=5000[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003383157253265381, 'time_algorithm_update': 0.00381320595741272, 'loss': 0.6461740362942219, 'td_loss': 0.07701650400832295, 'conservative_loss': 0.5691575321257114, 'time_step': 0.004214676141738892}[0m [36mstep[0m=[35m5000[0m


Epoch 6/30: 100%|██████████| 1000/1000 [00:04<00:00, 242.69it/s, loss=0.632, td_loss=0.0792, conservative_loss=0.553]

[2m2024-08-28 22:14.55[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221430: epoch=6 step=6000[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00033547139167785644, 'time_algorithm_update': 0.0037007060050964356, 'loss': 0.6323354927003384, 'td_loss': 0.07935751062165945, 'conservative_loss': 0.5529779820144176, 'time_step': 0.004098734617233276}[0m [36mstep[0m=[35m6000[0m



Epoch 7/30: 100%|██████████| 1000/1000 [00:04<00:00, 241.47it/s, loss=0.627, td_loss=0.0799, conservative_loss=0.547]

[2m2024-08-28 22:14.59[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221430: epoch=7 step=7000[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003305492401123047, 'time_algorithm_update': 0.0037266435623168944, 'loss': 0.626880189806223, 'td_loss': 0.07999287520721554, 'conservative_loss': 0.5468873149752617, 'time_step': 0.004119901657104492}[0m [36mstep[0m=[35m7000[0m



Epoch 8/30: 100%|██████████| 1000/1000 [00:04<00:00, 237.61it/s, loss=0.621, td_loss=0.0808, conservative_loss=0.541]

[2m2024-08-28 22:15.04[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221430: epoch=8 step=8000[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003390169143676758, 'time_algorithm_update': 0.003784415245056152, 'loss': 0.621587800860405, 'td_loss': 0.08092873647436499, 'conservative_loss': 0.5406590646207332, 'time_step': 0.004186498165130615}[0m [36mstep[0m=[35m8000[0m



Epoch 9/30: 100%|██████████| 1000/1000 [00:04<00:00, 240.39it/s, loss=0.628, td_loss=0.0834, conservative_loss=0.544]

[2m2024-08-28 22:15.08[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221430: epoch=9 step=9000[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003311259746551514, 'time_algorithm_update': 0.0037401468753814698, 'loss': 0.6281781055629253, 'td_loss': 0.08337955422792584, 'conservative_loss': 0.5447985509634018, 'time_step': 0.004137473583221436}[0m [36mstep[0m=[35m9000[0m



Epoch 10/30: 100%|██████████| 1000/1000 [00:04<00:00, 234.65it/s, loss=0.622, td_loss=0.0798, conservative_loss=0.543]


[2m2024-08-28 22:15.12[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221430: epoch=10 step=10000[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003430130481719971, 'time_algorithm_update': 0.0038315496444702147, 'loss': 0.6227775178849697, 'td_loss': 0.07998650751728564, 'conservative_loss': 0.5427910103499889, 'time_step': 0.004240062236785889}[0m [36mstep[0m=[35m10000[0m


Epoch 11/30: 100%|██████████| 1000/1000 [00:04<00:00, 244.84it/s, loss=0.621, td_loss=0.0789, conservative_loss=0.542]

[2m2024-08-28 22:15.16[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221430: epoch=11 step=11000[0m [36mepoch[0m=[35m11[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003230278491973877, 'time_algorithm_update': 0.003680962800979614, 'loss': 0.6209611580967903, 'td_loss': 0.07878460294939578, 'conservative_loss': 0.5421765560209751, 'time_step': 0.004064140558242798}[0m [36mstep[0m=[35m11000[0m



Epoch 12/30: 100%|██████████| 1000/1000 [00:04<00:00, 240.71it/s, loss=0.599, td_loss=0.0749, conservative_loss=0.524]

[2m2024-08-28 22:15.20[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221430: epoch=12 step=12000[0m [36mepoch[0m=[35m12[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003245856761932373, 'time_algorithm_update': 0.003748420476913452, 'loss': 0.5994196167588234, 'td_loss': 0.07503126554377378, 'conservative_loss': 0.5243883513212204, 'time_step': 0.004134228944778442}[0m [36mstep[0m=[35m12000[0m



Epoch 13/30: 100%|██████████| 1000/1000 [00:04<00:00, 238.55it/s, loss=0.609, td_loss=0.0763, conservative_loss=0.533]

[2m2024-08-28 22:15.25[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221430: epoch=13 step=13000[0m [36mepoch[0m=[35m13[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003352017402648926, 'time_algorithm_update': 0.0037727794647216795, 'loss': 0.6088981139957905, 'td_loss': 0.07612885506963357, 'conservative_loss': 0.5327692584395408, 'time_step': 0.004170244932174683}[0m [36mstep[0m=[35m13000[0m



Epoch 14/30: 100%|██████████| 1000/1000 [00:04<00:00, 238.29it/s, loss=0.597, td_loss=0.0748, conservative_loss=0.522]


[2m2024-08-28 22:15.29[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221430: epoch=14 step=14000[0m [36mepoch[0m=[35m14[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00033797168731689454, 'time_algorithm_update': 0.0037705693244934084, 'loss': 0.5967015011012554, 'td_loss': 0.07469081065617501, 'conservative_loss': 0.522010689675808, 'time_step': 0.004174238204956055}[0m [36mstep[0m=[35m14000[0m


Epoch 15/30: 100%|██████████| 1000/1000 [00:04<00:00, 239.56it/s, loss=0.606, td_loss=0.0767, conservative_loss=0.529]

[2m2024-08-28 22:15.33[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221430: epoch=15 step=15000[0m [36mepoch[0m=[35m15[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00033408069610595706, 'time_algorithm_update': 0.003752207279205322, 'loss': 0.605993049532175, 'td_loss': 0.07682087436504662, 'conservative_loss': 0.5291721759736538, 'time_step': 0.00415233039855957}[0m [36mstep[0m=[35m15000[0m



Epoch 16/30: 100%|██████████| 1000/1000 [00:04<00:00, 242.44it/s, loss=0.605, td_loss=0.0748, conservative_loss=0.53]


[2m2024-08-28 22:15.37[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221430: epoch=16 step=16000[0m [36mepoch[0m=[35m16[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00032778406143188477, 'time_algorithm_update': 0.003713702440261841, 'loss': 0.604615610152483, 'td_loss': 0.07450614318018779, 'conservative_loss': 0.5301094661653042, 'time_step': 0.0041042592525482174}[0m [36mstep[0m=[35m16000[0m


Epoch 17/30: 100%|██████████| 1000/1000 [00:04<00:00, 240.05it/s, loss=0.614, td_loss=0.0862, conservative_loss=0.528]

[2m2024-08-28 22:15.41[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221430: epoch=17 step=17000[0m [36mepoch[0m=[35m17[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003351907730102539, 'time_algorithm_update': 0.0037424297332763674, 'loss': 0.6137415152788163, 'td_loss': 0.08601197649072856, 'conservative_loss': 0.5277295386195183, 'time_step': 0.004144002199172974}[0m [36mstep[0m=[35m17000[0m



Epoch 18/30: 100%|██████████| 1000/1000 [00:04<00:00, 235.39it/s, loss=0.609, td_loss=0.079, conservative_loss=0.53] 

[2m2024-08-28 22:15.46[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221430: epoch=18 step=18000[0m [36mepoch[0m=[35m18[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003482596874237061, 'time_algorithm_update': 0.0038120765686035156, 'loss': 0.6087343043386936, 'td_loss': 0.07894108210736886, 'conservative_loss': 0.5297932232022285, 'time_step': 0.004226585626602173}[0m [36mstep[0m=[35m18000[0m



Epoch 19/30: 100%|██████████| 1000/1000 [00:04<00:00, 236.47it/s, loss=0.611, td_loss=0.0797, conservative_loss=0.531]

[2m2024-08-28 22:15.50[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221430: epoch=19 step=19000[0m [36mepoch[0m=[35m19[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003438489437103272, 'time_algorithm_update': 0.0037964041233062742, 'loss': 0.6109377124905586, 'td_loss': 0.07992478348035366, 'conservative_loss': 0.5310129297971725, 'time_step': 0.004206311464309692}[0m [36mstep[0m=[35m19000[0m



Epoch 20/30: 100%|██████████| 1000/1000 [00:04<00:00, 239.91it/s, loss=0.603, td_loss=0.0792, conservative_loss=0.524]

[2m2024-08-28 22:15.54[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221430: epoch=20 step=20000[0m [36mepoch[0m=[35m20[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00033272743225097655, 'time_algorithm_update': 0.003749751567840576, 'loss': 0.6037777991592884, 'td_loss': 0.07938462690263987, 'conservative_loss': 0.5243931709825993, 'time_step': 0.0041475784778594975}[0m [36mstep[0m=[35m20000[0m



Epoch 21/30: 100%|██████████| 1000/1000 [00:04<00:00, 241.79it/s, loss=0.598, td_loss=0.0787, conservative_loss=0.52]

[2m2024-08-28 22:15.58[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221430: epoch=21 step=21000[0m [36mepoch[0m=[35m21[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003313791751861572, 'time_algorithm_update': 0.0037201337814331053, 'loss': 0.598124983549118, 'td_loss': 0.07848298946768045, 'conservative_loss': 0.5196419948935509, 'time_step': 0.0041154463291168215}[0m [36mstep[0m=[35m21000[0m



Epoch 22/30: 100%|██████████| 1000/1000 [00:04<00:00, 242.66it/s, loss=0.602, td_loss=0.0793, conservative_loss=0.522]


[2m2024-08-28 22:16.02[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221430: epoch=22 step=22000[0m [36mepoch[0m=[35m22[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003373899459838867, 'time_algorithm_update': 0.0037007415294647216, 'loss': 0.6013906370401383, 'td_loss': 0.07919342585140839, 'conservative_loss': 0.5221972116827964, 'time_step': 0.004101088523864746}[0m [36mstep[0m=[35m22000[0m


Epoch 23/30: 100%|██████████| 1000/1000 [00:04<00:00, 244.44it/s, loss=0.607, td_loss=0.0789, conservative_loss=0.529]

[2m2024-08-28 22:16.06[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221430: epoch=23 step=23000[0m [36mepoch[0m=[35m23[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003226478099822998, 'time_algorithm_update': 0.003685574769973755, 'loss': 0.6080664011538028, 'td_loss': 0.07902250316529534, 'conservative_loss': 0.5290438981056214, 'time_step': 0.004070934057235718}[0m [36mstep[0m=[35m23000[0m



Epoch 24/30: 100%|██████████| 1000/1000 [00:04<00:00, 237.30it/s, loss=0.606, td_loss=0.0783, conservative_loss=0.528]

[2m2024-08-28 22:16.11[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221430: epoch=24 step=24000[0m [36mepoch[0m=[35m24[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00034055423736572264, 'time_algorithm_update': 0.003782006025314331, 'loss': 0.6055176763236523, 'td_loss': 0.0780463613094762, 'conservative_loss': 0.5274713146090507, 'time_step': 0.004191052436828613}[0m [36mstep[0m=[35m24000[0m



Epoch 25/30: 100%|██████████| 1000/1000 [00:04<00:00, 237.20it/s, loss=0.612, td_loss=0.0926, conservative_loss=0.519]

[2m2024-08-28 22:16.15[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221430: epoch=25 step=25000[0m [36mepoch[0m=[35m25[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003391551971435547, 'time_algorithm_update': 0.0037927742004394533, 'loss': 0.6121035067737103, 'td_loss': 0.09266575192008167, 'conservative_loss': 0.5194377543330193, 'time_step': 0.0041947104930877685}[0m [36mstep[0m=[35m25000[0m



Epoch 26/30: 100%|██████████| 1000/1000 [00:04<00:00, 242.41it/s, loss=0.616, td_loss=0.0898, conservative_loss=0.526]

[2m2024-08-28 22:16.19[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221430: epoch=26 step=26000[0m [36mepoch[0m=[35m26[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00032908487319946287, 'time_algorithm_update': 0.003714705228805542, 'loss': 0.6171525321006774, 'td_loss': 0.09015639655012637, 'conservative_loss': 0.5269961351454258, 'time_step': 0.004104630708694458}[0m [36mstep[0m=[35m26000[0m



Epoch 27/30: 100%|██████████| 1000/1000 [00:04<00:00, 241.11it/s, loss=0.609, td_loss=0.0893, conservative_loss=0.52]

[2m2024-08-28 22:16.23[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221430: epoch=27 step=27000[0m [36mepoch[0m=[35m27[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00033055925369262693, 'time_algorithm_update': 0.003733076810836792, 'loss': 0.6092633951008319, 'td_loss': 0.08951474021980539, 'conservative_loss': 0.5197486546635628, 'time_step': 0.004125985145568847}[0m [36mstep[0m=[35m27000[0m



Epoch 28/30: 100%|██████████| 1000/1000 [00:04<00:00, 241.49it/s, loss=0.617, td_loss=0.0895, conservative_loss=0.528]


[2m2024-08-28 22:16.27[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221430: epoch=28 step=28000[0m [36mepoch[0m=[35m28[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00033399534225463865, 'time_algorithm_update': 0.0037215118408203125, 'loss': 0.6177251707315445, 'td_loss': 0.08987342841085047, 'conservative_loss': 0.5278517422676087, 'time_step': 0.004119503259658814}[0m [36mstep[0m=[35m28000[0m


Epoch 29/30: 100%|██████████| 1000/1000 [00:04<00:00, 236.60it/s, loss=0.616, td_loss=0.0895, conservative_loss=0.527]

[2m2024-08-28 22:16.31[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221430: epoch=29 step=29000[0m [36mepoch[0m=[35m29[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00033744001388549805, 'time_algorithm_update': 0.003804631233215332, 'loss': 0.6163211710751056, 'td_loss': 0.08955650964891539, 'conservative_loss': 0.5267646616697311, 'time_step': 0.004205332517623901}[0m [36mstep[0m=[35m29000[0m



Epoch 30/30: 100%|██████████| 1000/1000 [00:04<00:00, 234.01it/s, loss=0.61, td_loss=0.0883, conservative_loss=0.522]

[2m2024-08-28 22:16.36[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240828221430: epoch=30 step=30000[0m [36mepoch[0m=[35m30[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0003390965461730957, 'time_algorithm_update': 0.0038516135215759277, 'loss': 0.6103859531879425, 'td_loss': 0.08834594512078911, 'conservative_loss': 0.5220400082170963, 'time_step': 0.004252861261367798}[0m [36mstep[0m=[35m30000[0m



