In [1]:
# Configuring environment parameters
import os
import json 
import logging

logging.basicConfig(
    filename='log/app.log',            # Specify the log file name
    level=logging.DEBUG,           # Set the log level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
    format='%(asctime)s - %(levelname)s - %(message)s'  # Set the log format
)

# Load the environment configuration JSON data
json_path = 'env_config.json'
with open(json_path, 'r') as file:
    env_config = json.load(file)

hf_home = env_config['HF_HOME']
# Set the HF_HOME environment variable
os.environ['HF_HOME'] = hf_home
# Set the access token to huggingface hub
access_token = env_config['access_token']
os.environ['HUGGINGFACE_HUB_TOKEN'] = access_token

In [2]:
# Loading necessary packages
import transformers 
import torch

from accelerate import Accelerator
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModel, LlamaForTokenClassification #, LlamaRotaryEmbedding
# from transformers import LlamaTokenizerFast
import torch.nn.functional as F

from llmexp.helper import DataHelper
from datasets import load_dataset
from torch.utils.data import DataLoader

# TODO 注意load正确的模型
from llmexp.imdb_model import MaskGeneratingModelForIMDB
from tqdm import tqdm

In [3]:
# Load datasets
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id, token=access_token, padding_side='left')
tokenizer.pad_token = tokenizer.eos_token

# ds = load_dataset("imdb")
# ds = load_dataset("rajpurkar/squad")
ds = load_dataset("stanfordnlp/sst2")
train_ds = ds['train']
test_ds = ds['test']
# test_ds = ds['validation']

llm_exp_helper = DataHelper(tokenizer)
# collate_fn = llm_exp_helper.get_collate_fun('imdb')
collate_fn = llm_exp_helper.get_collate_fun('sst2')
# collate_fn = llm_exp_helper.get_collate_fun('squad')

# Define batch size here!
batch_size = 16
train_dataloader = DataLoader(train_ds, batch_size=batch_size, collate_fn=collate_fn, shuffle=True)
test_dataloader = DataLoader(train_ds, batch_size=batch_size, collate_fn=collate_fn, shuffle=False)

In [4]:
# Configure and load model
accelerator = Accelerator()
device = accelerator.device
# device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")

model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
# model_id = "meta-llama/Meta-Llama-3-8B"  # non-instruct version

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    # device_map="auto",
    device_map=device,
    token=access_token,
)

config = model.config

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [5]:
# Configure mask model and  Training parameters
mask_gen_model = MaskGeneratingModelForIMDB()
mask_gen_model.to(device)

# Set pad_token_id if it is not set
if tokenizer.pad_token_id is None:
    tokenizer.pad_token_id = tokenizer.eos_token_id

terminators = [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids("<|eot_id|>")
]

optimizer = torch.optim.Adam(mask_gen_model.parameters(), lr=5e-5)

`LlamaRotaryEmbedding` can now be fully parameterized by passing the model config through the `config` argument. All other arguments will be removed in v4.45


In [6]:
next(iter(train_dataloader))

{'input_ids': tensor([[128009, 128009, 128009,  ...,  78191, 128007,    271],
        [128009, 128009, 128009,  ...,  78191, 128007,    271],
        [128009, 128009, 128009,  ...,  78191, 128007,    271],
        ...,
        [128009, 128009, 128009,  ...,  78191, 128007,    271],
        [128009, 128009, 128009,  ...,  78191, 128007,    271],
        [128009, 128009, 128009,  ...,  78191, 128007,    271]]), 'attention_mask': tensor([[0, 0, 0,  ..., 1, 1, 1],
        [0, 0, 0,  ..., 1, 1, 1],
        [0, 0, 0,  ..., 1, 1, 1],
        ...,
        [0, 0, 0,  ..., 1, 1, 1],
        [0, 0, 0,  ..., 1, 1, 1],
        [0, 0, 0,  ..., 1, 1, 1]]), 'context_mask': tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]])}

In [7]:
mask_gen_model.train()
for epoch in range(1):
    pbar = tqdm(train_dataloader)
    for idx, data in enumerate(pbar):
        input_ids = data['input_ids'].to(device)
        attention_mask = data['attention_mask'].to(device)
        context_mask = data['context_mask'].to(device)
        # get generated texts
        gen_outputs = model.generate(
            input_ids=input_ids,
            attention_mask=attention_mask,
            max_new_tokens=128,
            eos_token_id=terminators,
            pad_token_id=tokenizer.pad_token_id,
            do_sample=True,
            temperature=0.6,
            top_p=0.9,
            return_dict_in_generate=True,
            output_scores=True,
        )
        gen_tokens = gen_outputs.sequences
        pad_length = gen_tokens.size(1) - input_ids.size(1)
        # get the attention mask for the generated tokens, and also mask the padding tokens
        gen_attention_mask = F.pad(attention_mask, (0, pad_length), mode='constant', value=1)
        # (gen_tokens != pad_token_id).long() is the tokens mask, 1 for real tokens and 0 for padding tokens
        unpaded_token_mask = (gen_tokens != tokenizer.pad_token_id).long()
        unpaded_token_mask[:, :-pad_length] = 1
        gen_attention_mask = gen_attention_mask * unpaded_token_mask

        # get the response mask, which is the mask for the generated tokens (the user inputs are masked with 0)
        response_mask = gen_attention_mask.clone()
        response_mask[:, :-pad_length] = 0 # TODO: 有问题. 有问题吗？

        context_mask = F.pad(context_mask, (0, pad_length), mode='constant', value=0)

        loss_dict = mask_gen_model.train_one_batch(model, gen_tokens, gen_attention_mask, context_mask, response_mask, optimizer,
                                                   num_steps=5, mini_batch_size=16, ppo_epochs=2)


        log = f"Epoch {epoch+1}, Step {idx+1}: Loss = {loss_dict['loss']:.4f}, " \
               f"Actor Loss = {loss_dict['actor_loss']:.4f}, " \
               f"Critic Loss = {loss_dict['critic_loss']:.4f}, " \
               f"Entropy = {loss_dict['entropy']:.4f}, " \
               f"Returns = {loss_dict['returns']:.4f}, " \
               f"Value = {loss_dict['value']:.4f}, " \
                f"mask_loss = {loss_dict['mask_loss']:.4f}" \
                f"std_loss = {loss_dict['std_loss']:.4f}" \
            #    f"Cont_loss = {loss_dict['contrast_loss']:.4f}, "  \
               
        pbar.set_description(log)

        if idx % 10 == 0:
            print()
        if idx % 100 == 0 and idx != 0:
            torch.save(mask_gen_model.state_dict(), f'saved_model/imdb_mask_gen_model_{epoch}_{idx}.pth') 

  0%|          | 0/4210 [00:00<?, ?it/s]We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)
Epoch 1, Step 1: Loss = 0.1125, Actor Loss = -0.2229, Critic Loss = 0.6845, Entropy = 0.6880, Returns = 1.2731, Value = 1.3590, mask_loss = 0.4989std_loss = 0.0418:   0%|          | 1/4210 [00:02<2:36:51,  2.24s/it]

ratio tensor(0.9776, device='cuda:0', grad_fn=<MeanBackward0>)



Epoch 1, Step 2: Loss = 0.5149, Actor Loss = 0.0157, Critic Loss = 1.0121, Entropy = 0.6840, Returns = 1.0428, Value = 0.7745, mask_loss = 0.4769std_loss = 0.0588:   0%|          | 2/4210 [00:04<2:56:47,  2.52s/it] 

ratio tensor(1.1291, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 3: Loss = 0.2989, Actor Loss = -0.1416, Critic Loss = 0.8944, Entropy = 0.6751, Returns = 0.9065, Value = 1.4577, mask_loss = 0.4513std_loss = 0.0720:   0%|          | 3/4210 [00:07<2:42:26,  2.32s/it]

ratio tensor(1.2522, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 4: Loss = 0.3727, Actor Loss = 0.0116, Critic Loss = 0.7357, Entropy = 0.6733, Returns = 1.5121, Value = 1.2683, mask_loss = 0.4397std_loss = 0.0726:   0%|          | 4/4210 [00:08<2:26:58,  2.10s/it] 

ratio tensor(0.9995, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 5: Loss = 0.3908, Actor Loss = -0.0768, Critic Loss = 0.9483, Entropy = 0.6570, Returns = 1.1442, Value = 1.2912, mask_loss = 0.3814std_loss = 0.0554:   0%|          | 5/4210 [00:11<2:42:37,  2.32s/it]

ratio tensor(0.9214, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 6: Loss = 0.1474, Actor Loss = -0.2257, Critic Loss = 0.7584, Entropy = 0.6187, Returns = 1.4876, Value = 1.8368, mask_loss = 0.3310std_loss = 0.0755:   0%|          | 6/4210 [00:13<2:29:07,  2.13s/it]

ratio tensor(0.8957, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 7: Loss = 1.5831, Actor Loss = -0.1414, Critic Loss = 3.4601, Entropy = 0.5580, Returns = 1.8357, Value = 1.4906, mask_loss = 0.2743std_loss = 0.1037:   0%|          | 7/4210 [00:15<2:26:01,  2.08s/it]

ratio tensor(0.8808, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 8: Loss = 1.8308, Actor Loss = -0.4416, Critic Loss = 4.5567, Entropy = 0.5955, Returns = 2.1326, Value = 1.9879, mask_loss = 0.3144std_loss = 0.1037:   0%|          | 8/4210 [00:17<2:24:33,  2.06s/it]

ratio tensor(1.0796, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 9: Loss = 0.5563, Actor Loss = -1.1194, Critic Loss = 3.3622, Entropy = 0.5448, Returns = 2.8548, Value = 2.0404, mask_loss = 0.2874std_loss = 0.1392:   0%|          | 9/4210 [00:19<2:23:40,  2.05s/it]

ratio tensor(1.0703, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 10: Loss = 1.4408, Actor Loss = 0.3414, Critic Loss = 2.2100, Entropy = 0.5628, Returns = 1.9820, Value = 1.4824, mask_loss = 0.3650std_loss = 0.1905:   0%|          | 10/4210 [00:21<2:22:50,  2.04s/it]

ratio tensor(0.8662, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 11: Loss = 0.5706, Actor Loss = 0.1745, Critic Loss = 0.8019, Entropy = 0.4795, Returns = 1.1443, Value = 1.1324, mask_loss = 0.3112std_loss = 0.2334:   0%|          | 11/4210 [00:23<2:23:17,  2.05s/it]

ratio tensor(0.9283, device='cuda:0', grad_fn=<MeanBackward0>)



Epoch 1, Step 12: Loss = -0.1895, Actor Loss = -0.5596, Critic Loss = 0.7499, Entropy = 0.4848, Returns = 1.6724, Value = 2.0548, mask_loss = 0.4085std_loss = 0.2662:   0%|          | 12/4210 [00:25<2:24:25,  2.06s/it]

ratio tensor(1.3533, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 13: Loss = 1.4201, Actor Loss = 0.8354, Critic Loss = 1.1796, Entropy = 0.5109, Returns = 1.0832, Value = 1.6029, mask_loss = 0.4622std_loss = 0.2591:   0%|          | 13/4210 [00:27<2:23:58,  2.06s/it]  

ratio tensor(0.9517, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 14: Loss = 0.6383, Actor Loss = 0.2830, Critic Loss = 0.7206, Entropy = 0.4985, Returns = 0.9773, Value = 1.3220, mask_loss = 0.4265std_loss = 0.2723:   0%|          | 14/4210 [00:29<2:20:44,  2.01s/it]

ratio tensor(0.9732, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 15: Loss = 0.8200, Actor Loss = 0.3114, Critic Loss = 1.0245, Entropy = 0.3686, Returns = 1.3199, Value = 1.7234, mask_loss = 0.1873std_loss = 0.1646:   0%|          | 15/4210 [00:31<2:21:20,  2.02s/it]

ratio tensor(0.2042, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 16: Loss = 3.1289, Actor Loss = -1.5848, Critic Loss = 9.4347, Entropy = 0.3665, Returns = 3.0164, Value = 2.2111, mask_loss = 0.1595std_loss = 0.1272:   0%|          | 16/4210 [00:33<2:21:58,  2.03s/it]

ratio tensor(1.6919, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 17: Loss = 2.5064, Actor Loss = 0.7966, Critic Loss = 3.4283, Entropy = 0.4401, Returns = 1.5826, Value = 2.1688, mask_loss = 0.2531std_loss = 0.1978:   0%|          | 17/4210 [00:35<2:21:50,  2.03s/it] 

ratio tensor(2.1525, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 18: Loss = 1.7129, Actor Loss = 0.3561, Critic Loss = 2.7228, Entropy = 0.4565, Returns = 1.7111, Value = 2.2920, mask_loss = 0.2583std_loss = 0.1792:   0%|          | 18/4210 [00:37<2:21:35,  2.03s/it]

ratio tensor(0.8177, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 19: Loss = 1.4926, Actor Loss = 0.4344, Critic Loss = 2.1245, Entropy = 0.4006, Returns = 1.4614, Value = 1.5879, mask_loss = 0.2761std_loss = 0.2210:   0%|          | 19/4210 [00:39<2:27:38,  2.11s/it]

ratio tensor(0.9082, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 20: Loss = 1.5961, Actor Loss = 0.7284, Critic Loss = 1.7426, Entropy = 0.3633, Returns = 0.9016, Value = 2.0135, mask_loss = 0.2071std_loss = 0.1826:   0%|          | 20/4210 [00:41<2:25:56,  2.09s/it]

ratio tensor(0.7753, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 21: Loss = 1.5820, Actor Loss = 0.5061, Critic Loss = 2.1588, Entropy = 0.3512, Returns = 1.2870, Value = 1.0608, mask_loss = 0.1889std_loss = 0.1753:   0%|          | 21/4210 [00:43<2:19:47,  2.00s/it]

ratio tensor(0.9729, device='cuda:0', grad_fn=<MeanBackward0>)



Epoch 1, Step 22: Loss = 0.6242, Actor Loss = -0.1551, Critic Loss = 1.5668, Entropy = 0.4091, Returns = 1.2943, Value = 1.8739, mask_loss = 0.2568std_loss = 0.2246:   1%|          | 22/4210 [00:49<3:42:39,  3.19s/it]

ratio tensor(0.8487, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 23: Loss = 1.7124, Actor Loss = 0.7174, Critic Loss = 1.9972, Entropy = 0.3620, Returns = 0.8639, Value = 1.1070, mask_loss = 0.1988std_loss = 0.1795:   1%|          | 23/4210 [00:51<3:19:13,  2.85s/it] 

ratio tensor(0.7648, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 24: Loss = 4.3266, Actor Loss = -2.2052, Critic Loss = 13.0710, Entropy = 0.3727, Returns = 2.9196, Value = 2.7621, mask_loss = 0.2962std_loss = 0.2511:   1%|          | 24/4210 [00:53<2:56:36,  2.53s/it]

ratio tensor(0.9003, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 25: Loss = 1.6066, Actor Loss = -0.5653, Critic Loss = 4.3494, Entropy = 0.2765, Returns = 3.5368, Value = 3.2077, mask_loss = 0.2396std_loss = 0.2735:   1%|          | 25/4210 [00:55<2:45:43,  2.38s/it] 

ratio tensor(1.0324, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 26: Loss = 2.1517, Actor Loss = 1.4839, Critic Loss = 1.3436, Entropy = 0.4098, Returns = 1.3316, Value = 1.1411, mask_loss = 0.2715std_loss = 0.2247:   1%|          | 26/4210 [00:57<2:33:04,  2.20s/it] 

ratio tensor(0.5483, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 27: Loss = 0.4453, Actor Loss = -1.6679, Critic Loss = 4.2346, Entropy = 0.4125, Returns = 2.5880, Value = 2.7390, mask_loss = 0.2692std_loss = 0.2210:   1%|          | 27/4210 [01:01<3:11:51,  2.75s/it]

ratio tensor(1.4143, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 28: Loss = 2.1294, Actor Loss = 0.7583, Critic Loss = 2.7508, Entropy = 0.4300, Returns = 1.8964, Value = 1.3474, mask_loss = 0.2689std_loss = 0.1931:   1%|          | 28/4210 [01:03<3:07:45,  2.69s/it] 

ratio tensor(1.0530, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 29: Loss = 3.7535, Actor Loss = -2.0533, Critic Loss = 11.6218, Entropy = 0.4118, Returns = 3.0333, Value = 1.5424, mask_loss = 0.2371std_loss = 0.1821:   1%|          | 29/4210 [01:05<2:48:32,  2.42s/it]

ratio tensor(1.0831, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 30: Loss = 2.6759, Actor Loss = 0.4646, Critic Loss = 4.4310, Entropy = 0.4157, Returns = 2.0778, Value = 2.3869, mask_loss = 0.2917std_loss = 0.2344:   1%|          | 30/4210 [01:07<2:39:41,  2.29s/it]  

ratio tensor(1.3100, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 31: Loss = 0.9280, Actor Loss = -0.2993, Critic Loss = 2.4626, Entropy = 0.4019, Returns = 2.2766, Value = 2.6820, mask_loss = 0.2388std_loss = 0.2018:   1%|          | 31/4210 [01:09<2:34:58,  2.23s/it]

ratio tensor(1.0037, device='cuda:0', grad_fn=<MeanBackward0>)



Epoch 1, Step 32: Loss = 2.3795, Actor Loss = 1.7481, Critic Loss = 1.2699, Entropy = 0.3618, Returns = 1.0003, Value = 1.3736, mask_loss = 0.2643std_loss = 0.2499:   1%|          | 32/4210 [01:11<2:25:29,  2.09s/it] 

ratio tensor(0.8183, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 33: Loss = 0.3332, Actor Loss = -0.2911, Critic Loss = 1.2557, Entropy = 0.3547, Returns = 1.4699, Value = 1.3460, mask_loss = 0.2441std_loss = 0.2199:   1%|          | 33/4210 [01:13<2:24:27,  2.08s/it]

ratio tensor(1.0059, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 34: Loss = 0.5370, Actor Loss = -0.7621, Critic Loss = 2.6044, Entropy = 0.3017, Returns = 2.0411, Value = 2.3584, mask_loss = 0.2007std_loss = 0.2258:   1%|          | 34/4210 [01:15<2:18:21,  1.99s/it]

ratio tensor(1.1681, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 35: Loss = 1.9582, Actor Loss = -0.1106, Critic Loss = 4.1447, Entropy = 0.3471, Returns = 2.3222, Value = 3.4123, mask_loss = 0.2792std_loss = 0.2717:   1%|          | 35/4210 [01:17<2:12:35,  1.91s/it]

ratio tensor(0.9831, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 36: Loss = 3.2355, Actor Loss = 1.4678, Critic Loss = 3.5414, Entropy = 0.3006, Returns = 1.7277, Value = 1.3985, mask_loss = 0.2168std_loss = 0.2230:   1%|          | 36/4210 [01:20<2:47:15,  2.40s/it] 

ratio tensor(0.8660, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 37: Loss = 1.0174, Actor Loss = -0.0389, Critic Loss = 2.1176, Entropy = 0.2489, Returns = 1.1485, Value = 1.3827, mask_loss = 0.2511std_loss = 0.2742:   1%|          | 37/4210 [01:22<2:38:46,  2.28s/it]

ratio tensor(1.0190, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 38: Loss = 0.3275, Actor Loss = -0.2465, Critic Loss = 1.1544, Entropy = 0.3168, Returns = 1.5575, Value = 2.0144, mask_loss = 0.3352std_loss = 0.3122:   1%|          | 38/4210 [01:25<2:54:43,  2.51s/it]

ratio tensor(1.0698, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 39: Loss = 1.5503, Actor Loss = -0.1406, Critic Loss = 3.3849, Entropy = 0.1540, Returns = 2.3188, Value = 2.7794, mask_loss = 0.3341std_loss = 0.3728:   1%|          | 39/4210 [01:27<2:40:16,  2.31s/it]

ratio tensor(1.0893, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 40: Loss = 1.4191, Actor Loss = 0.8001, Critic Loss = 1.2422, Entropy = 0.2111, Returns = 1.8872, Value = 2.1483, mask_loss = 0.2546std_loss = 0.2887:   1%|          | 40/4210 [01:29<2:34:34,  2.22s/it] 

ratio tensor(0.7746, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 41: Loss = 2.1076, Actor Loss = -0.7958, Critic Loss = 5.8112, Entropy = 0.2222, Returns = 2.6577, Value = 1.6450, mask_loss = 0.2919std_loss = 0.3364:   1%|          | 41/4210 [01:31<2:25:20,  2.09s/it]

ratio tensor(1.2786, device='cuda:0', grad_fn=<MeanBackward0>)



Epoch 1, Step 42: Loss = 1.8469, Actor Loss = 0.9268, Critic Loss = 1.8453, Entropy = 0.2558, Returns = 1.1586, Value = 1.8914, mask_loss = 0.2450std_loss = 0.2666:   1%|          | 42/4210 [01:37<3:39:45,  3.16s/it] 

ratio tensor(1.1033, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 43: Loss = 1.6016, Actor Loss = -0.0128, Critic Loss = 3.2329, Entropy = 0.1989, Returns = 2.7667, Value = 2.9652, mask_loss = 0.1441std_loss = 0.2044:   1%|          | 43/4210 [01:38<3:10:54,  2.75s/it]

ratio tensor(0.5188, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 44: Loss = 5.0822, Actor Loss = 0.8970, Critic Loss = 8.3742, Entropy = 0.1848, Returns = 1.8273, Value = 2.8850, mask_loss = 0.2154std_loss = 0.2595:   1%|          | 44/4210 [01:41<3:14:44,  2.80s/it] 

ratio tensor(0.3805, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 45: Loss = 1.9377, Actor Loss = 0.5960, Critic Loss = 2.6869, Entropy = 0.1750, Returns = 1.8903, Value = 1.1105, mask_loss = 0.2968std_loss = 0.3534:   1%|          | 45/4210 [01:43<2:59:15,  2.58s/it]

ratio tensor(0.7981, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 46: Loss = 5.3899, Actor Loss = -3.1174, Critic Loss = 17.0172, Entropy = 0.1357, Returns = 4.4059, Value = 4.5555, mask_loss = 0.3006std_loss = 0.3706:   1%|          | 46/4210 [01:45<2:48:21,  2.43s/it]

ratio tensor(1.2279, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 47: Loss = 8.6303, Actor Loss = 3.3486, Critic Loss = 10.5679, Entropy = 0.2289, Returns = 2.0522, Value = 3.0177, mask_loss = 0.2567std_loss = 0.2633:   1%|          | 47/4210 [01:47<2:40:39,  2.32s/it] 

ratio tensor(0.7412, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 48: Loss = 2.0887, Actor Loss = 1.4602, Critic Loss = 1.2610, Entropy = 0.2022, Returns = 1.4955, Value = 0.7205, mask_loss = 0.4463std_loss = 0.4181:   1%|          | 48/4210 [01:49<2:29:11,  2.15s/it] 

ratio tensor(0.7024, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 49: Loss = 0.3806, Actor Loss = -0.5490, Critic Loss = 1.8628, Entropy = 0.1819, Returns = 1.1632, Value = 1.9775, mask_loss = 0.4336std_loss = 0.4098:   1%|          | 49/4210 [01:51<2:27:18,  2.12s/it]

ratio tensor(1.9013, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 50: Loss = 1.4586, Actor Loss = 0.1244, Critic Loss = 2.6709, Entropy = 0.1239, Returns = 1.9349, Value = 2.3222, mask_loss = 0.4831std_loss = 0.4226:   1%|          | 50/4210 [01:53<2:19:45,  2.02s/it] 

ratio tensor(1.0381, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 51: Loss = 0.5477, Actor Loss = 0.0513, Critic Loss = 0.9959, Entropy = 0.1511, Returns = 2.0995, Value = 2.4152, mask_loss = 0.3425std_loss = 0.3853:   1%|          | 51/4210 [01:55<2:26:47,  2.12s/it]

ratio tensor(1.1722, device='cuda:0', grad_fn=<MeanBackward0>)



Epoch 1, Step 52: Loss = 1.8430, Actor Loss = 0.5509, Critic Loss = 2.5869, Entropy = 0.1369, Returns = 1.7899, Value = 2.0037, mask_loss = 0.2996std_loss = 0.3507:   1%|          | 52/4210 [01:57<2:19:52,  2.02s/it]

ratio tensor(0.9010, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 53: Loss = 0.9158, Actor Loss = -0.3118, Critic Loss = 2.4582, Entropy = 0.1450, Returns = 2.4437, Value = 2.1726, mask_loss = 0.3531std_loss = 0.3943:   1%|▏         | 53/4210 [01:59<2:20:32,  2.03s/it]

ratio tensor(1.0797, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 54: Loss = 5.6435, Actor Loss = -1.0115, Critic Loss = 13.3137, Entropy = 0.1871, Returns = 3.0810, Value = 3.6356, mask_loss = 0.4389std_loss = 0.3656:   1%|▏         | 54/4210 [02:01<2:15:29,  1.96s/it]

ratio tensor(0.9546, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 55: Loss = 5.2275, Actor Loss = 1.8481, Critic Loss = 6.7605, Entropy = 0.0835, Returns = 1.9113, Value = 2.3703, mask_loss = 0.1725std_loss = 0.2720:   1%|▏         | 55/4210 [02:03<2:17:25,  1.98s/it]  

ratio tensor(0.8185, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 56: Loss = 10.7502, Actor Loss = 0.4043, Critic Loss = 20.6938, Entropy = 0.0990, Returns = 2.5756, Value = 1.3654, mask_loss = 0.3232std_loss = 0.3341:   1%|▏         | 56/4210 [02:05<2:13:11,  1.92s/it]

ratio tensor(0.7715, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 57: Loss = 9.3617, Actor Loss = -1.0129, Critic Loss = 20.7510, Entropy = 0.0890, Returns = 3.0510, Value = 3.3500, mask_loss = 0.0950std_loss = 0.1647:   1%|▏         | 57/4210 [02:07<2:14:53,  1.95s/it]

ratio tensor(1.1328, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 58: Loss = 7.4585, Actor Loss = 1.4513, Critic Loss = 12.0173, Entropy = 0.1459, Returns = 1.1239, Value = 2.6613, mask_loss = 0.1117std_loss = 0.1779:   1%|▏         | 58/4210 [02:09<2:19:41,  2.02s/it] 

ratio tensor(0.8063, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 59: Loss = 5.3762, Actor Loss = 0.3650, Critic Loss = 10.0253, Entropy = 0.1511, Returns = 2.2139, Value = 1.8328, mask_loss = 0.2318std_loss = 0.2834:   1%|▏         | 59/4210 [02:11<2:27:14,  2.13s/it]

ratio tensor(0.9344, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 60: Loss = 6.8265, Actor Loss = -1.6198, Critic Loss = 16.8947, Entropy = 0.1067, Returns = 4.7523, Value = 4.0151, mask_loss = 0.1396std_loss = 0.2553:   1%|▏         | 60/4210 [02:14<2:40:39,  2.32s/it]

ratio tensor(1.0163, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 61: Loss = 4.7745, Actor Loss = 1.7715, Critic Loss = 6.0100, Entropy = 0.1988, Returns = 2.0497, Value = 1.8071, mask_loss = 0.1960std_loss = 0.2605:   1%|▏         | 61/4210 [02:16<2:29:46,  2.17s/it]  

ratio tensor(0.8018, device='cuda:0', grad_fn=<MeanBackward0>)



Epoch 1, Step 62: Loss = 7.0260, Actor Loss = -1.4685, Critic Loss = 16.9933, Entropy = 0.2062, Returns = 2.5494, Value = 0.7814, mask_loss = 0.1760std_loss = 0.2349:   1%|▏         | 62/4210 [02:18<2:27:22,  2.13s/it]

ratio tensor(1.1434, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 63: Loss = 3.1801, Actor Loss = -1.6548, Critic Loss = 9.6731, Entropy = 0.1605, Returns = 2.3529, Value = 3.0089, mask_loss = 0.2636std_loss = 0.3465:   1%|▏         | 63/4210 [02:20<2:26:16,  2.12s/it] 

ratio tensor(1.1471, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 64: Loss = 4.8225, Actor Loss = -0.0534, Critic Loss = 9.7544, Entropy = 0.1289, Returns = 2.9393, Value = 3.1619, mask_loss = 0.1960std_loss = 0.2797:   2%|▏         | 64/4210 [02:27<3:56:17,  3.42s/it]

ratio tensor(0.8292, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 65: Loss = 3.4445, Actor Loss = 1.7849, Critic Loss = 3.3221, Entropy = 0.1421, Returns = 1.5619, Value = 1.9418, mask_loss = 0.2617std_loss = 0.2906:   2%|▏         | 65/4210 [02:30<4:05:33,  3.55s/it] 

ratio tensor(0.9240, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 66: Loss = 14.1858, Actor Loss = -2.2547, Critic Loss = 32.8835, Entropy = 0.1264, Returns = 3.7102, Value = 2.4859, mask_loss = 0.2767std_loss = 0.3280:   2%|▏         | 66/4210 [02:33<3:40:03,  3.19s/it]

ratio tensor(0.8973, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 67: Loss = 9.5096, Actor Loss = 2.0270, Critic Loss = 14.9667, Entropy = 0.0719, Returns = 1.4651, Value = 4.5488, mask_loss = 0.0970std_loss = 0.1871:   2%|▏         | 67/4210 [02:35<3:22:55,  2.94s/it]  

ratio tensor(0.5173, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 68: Loss = 5.0812, Actor Loss = 2.8415, Critic Loss = 4.4830, Entropy = 0.1744, Returns = 0.9641, Value = 2.5513, mask_loss = 0.3688std_loss = 0.3623:   2%|▏         | 68/4210 [02:37<3:05:00,  2.68s/it] 

ratio tensor(0.2272, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 69: Loss = 1.9180, Actor Loss = 0.9106, Critic Loss = 2.0185, Entropy = 0.1798, Returns = 1.6045, Value = 0.9274, mask_loss = 0.3755std_loss = 0.3713:   2%|▏         | 69/4210 [02:39<2:46:06,  2.41s/it]

ratio tensor(1.0348, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 70: Loss = 1.5954, Actor Loss = -1.9829, Critic Loss = 7.1594, Entropy = 0.1463, Returns = 2.9471, Value = 2.4183, mask_loss = 0.3981std_loss = 0.3929:   2%|▏         | 70/4210 [02:41<2:39:07,  2.31s/it]

ratio tensor(1.0801, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 71: Loss = 1.1248, Actor Loss = 0.7106, Critic Loss = 0.8322, Entropy = 0.1828, Returns = 1.8162, Value = 2.0883, mask_loss = 0.3702std_loss = 0.3866:   2%|▏         | 71/4210 [02:43<2:32:34,  2.21s/it] 

ratio tensor(1.0653, device='cuda:0', grad_fn=<MeanBackward0>)



Epoch 1, Step 72: Loss = 0.3102, Actor Loss = -0.4838, Critic Loss = 1.5918, Entropy = 0.1898, Returns = 2.3161, Value = 1.7535, mask_loss = 0.4417std_loss = 0.3983:   2%|▏         | 72/4210 [02:45<2:28:51,  2.16s/it]

ratio tensor(0.9961, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 73: Loss = 2.1185, Actor Loss = -0.5400, Critic Loss = 5.3201, Entropy = 0.1542, Returns = 2.2179, Value = 2.5938, mask_loss = 0.3375std_loss = 0.3879:   2%|▏         | 73/4210 [02:47<2:34:04,  2.23s/it]

ratio tensor(1.0201, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 74: Loss = 1.5325, Actor Loss = 1.2108, Critic Loss = 0.6465, Entropy = 0.1589, Returns = 1.5078, Value = 2.1426, mask_loss = 0.4876std_loss = 0.4282:   2%|▏         | 74/4210 [02:49<2:29:04,  2.16s/it] 

ratio tensor(0.9661, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 75: Loss = 0.8352, Actor Loss = 0.0353, Critic Loss = 1.6038, Entropy = 0.2022, Returns = 1.9761, Value = 1.3839, mask_loss = 0.3307std_loss = 0.3789:   2%|▏         | 75/4210 [02:52<2:39:59,  2.32s/it]

ratio tensor(1.0648, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 76: Loss = 0.7033, Actor Loss = 0.4056, Critic Loss = 0.5988, Entropy = 0.1666, Returns = 0.8731, Value = 1.3118, mask_loss = 0.3715std_loss = 0.3677:   2%|▏         | 76/4210 [02:54<2:34:38,  2.24s/it]

ratio tensor(0.8221, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 77: Loss = 0.1432, Actor Loss = -0.9921, Critic Loss = 2.2736, Entropy = 0.1555, Returns = 2.2699, Value = 2.9087, mask_loss = 0.3815std_loss = 0.3696:   2%|▏         | 77/4210 [02:57<2:35:36,  2.26s/it]

ratio tensor(1.2030, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 78: Loss = 1.7239, Actor Loss = 0.6885, Critic Loss = 2.0738, Entropy = 0.1585, Returns = 2.2489, Value = 2.3149, mask_loss = 0.2808std_loss = 0.3534:   2%|▏         | 78/4210 [02:59<2:36:56,  2.28s/it] 

ratio tensor(0.9861, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 79: Loss = 1.7322, Actor Loss = 0.2562, Critic Loss = 2.9556, Entropy = 0.1826, Returns = 1.7133, Value = 1.3079, mask_loss = 0.3083std_loss = 0.3686:   2%|▏         | 79/4210 [03:01<2:26:40,  2.13s/it]

ratio tensor(1.8354, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 80: Loss = 0.1993, Actor Loss = -0.5667, Critic Loss = 1.5355, Entropy = 0.1665, Returns = 1.6639, Value = 1.7592, mask_loss = 0.2303std_loss = 0.3109:   2%|▏         | 80/4210 [03:09<4:35:03,  4.00s/it]

ratio tensor(1.0681, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 81: Loss = -0.0186, Actor Loss = -1.6518, Critic Loss = 3.2691, Entropy = 0.1366, Returns = 3.3763, Value = 2.9764, mask_loss = 0.2759std_loss = 0.3590:   2%|▏         | 81/4210 [03:11<3:49:18,  3.33s/it]

ratio tensor(1.2571, device='cuda:0', grad_fn=<MeanBackward0>)



Epoch 1, Step 82: Loss = 1.6312, Actor Loss = 0.5991, Critic Loss = 2.0673, Entropy = 0.1557, Returns = 2.2121, Value = 2.7106, mask_loss = 0.2494std_loss = 0.3312:   2%|▏         | 82/4210 [03:13<3:22:32,  2.94s/it]  

ratio tensor(0.8658, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 83: Loss = 1.0260, Actor Loss = 0.2950, Critic Loss = 1.4648, Entropy = 0.1433, Returns = 2.4182, Value = 2.4805, mask_loss = 0.2955std_loss = 0.3601:   2%|▏         | 83/4210 [03:15<3:04:08,  2.68s/it]

ratio tensor(0.8798, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 84: Loss = 1.9817, Actor Loss = -0.7804, Critic Loss = 5.5260, Entropy = 0.0912, Returns = 3.0714, Value = 2.9022, mask_loss = 0.2701std_loss = 0.3637:   2%|▏         | 84/4210 [03:21<4:13:58,  3.69s/it]

ratio tensor(0.9452, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 85: Loss = 2.4083, Actor Loss = 1.2334, Critic Loss = 2.3524, Entropy = 0.1247, Returns = 1.8228, Value = 2.7271, mask_loss = 0.3754std_loss = 0.3439:   2%|▏         | 85/4210 [03:23<3:38:55,  3.18s/it] 

ratio tensor(0.9377, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 86: Loss = 4.4023, Actor Loss = -0.2731, Critic Loss = 9.3539, Entropy = 0.1561, Returns = 2.5439, Value = 1.9755, mask_loss = 0.2728std_loss = 0.3524:   2%|▏         | 86/4210 [03:25<3:10:02,  2.77s/it]

ratio tensor(0.9085, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 87: Loss = 1.8698, Actor Loss = -0.0776, Critic Loss = 3.8984, Entropy = 0.1742, Returns = 2.1721, Value = 2.2229, mask_loss = 0.2040std_loss = 0.2625:   2%|▏         | 87/4210 [03:26<2:49:45,  2.47s/it]

ratio tensor(1.2930, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 88: Loss = 14.6413, Actor Loss = -1.0724, Critic Loss = 31.4303, Entropy = 0.1478, Returns = 2.6737, Value = 1.9248, mask_loss = 0.1574std_loss = 0.2471:   2%|▏         | 88/4210 [03:29<2:41:09,  2.35s/it]

ratio tensor(0.6651, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 89: Loss = 6.3168, Actor Loss = 0.3153, Critic Loss = 12.0063, Entropy = 0.1652, Returns = 1.7609, Value = 4.3735, mask_loss = 0.2579std_loss = 0.3452:   2%|▏         | 89/4210 [03:31<2:40:14,  2.33s/it]  

ratio tensor(0.7822, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 90: Loss = 6.3976, Actor Loss = 2.5094, Critic Loss = 7.7787, Entropy = 0.1224, Returns = 1.7567, Value = 3.6625, mask_loss = 0.2761std_loss = 0.3137:   2%|▏         | 90/4210 [03:33<2:33:19,  2.23s/it] 

ratio tensor(0.7501, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 91: Loss = 2.7567, Actor Loss = 1.4546, Critic Loss = 2.6064, Entropy = 0.1125, Returns = 1.8146, Value = 0.9207, mask_loss = 0.2909std_loss = 0.3523:   2%|▏         | 91/4210 [03:35<2:29:30,  2.18s/it]

ratio tensor(0.9502, device='cuda:0', grad_fn=<MeanBackward0>)



Epoch 1, Step 92: Loss = 0.6679, Actor Loss = -0.4566, Critic Loss = 2.2516, Entropy = 0.1314, Returns = 1.3712, Value = 1.9501, mask_loss = 0.2761std_loss = 0.3047:   2%|▏         | 92/4210 [03:37<2:26:53,  2.14s/it]

ratio tensor(0.9749, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 93: Loss = 1.6062, Actor Loss = 0.0953, Critic Loss = 3.0236, Entropy = 0.0907, Returns = 1.9355, Value = 2.2264, mask_loss = 0.2306std_loss = 0.3567:   2%|▏         | 93/4210 [03:39<2:24:50,  2.11s/it] 

ratio tensor(35.7333, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 94: Loss = 4.6372, Actor Loss = -0.6080, Critic Loss = 10.4924, Entropy = 0.1056, Returns = 2.9380, Value = 1.9506, mask_loss = 0.1196std_loss = 0.1522:   2%|▏         | 94/4210 [03:41<2:18:05,  2.01s/it]

ratio tensor(0.1534, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 95: Loss = 4.9462, Actor Loss = -0.4294, Critic Loss = 10.7545, Entropy = 0.1585, Returns = 2.5770, Value = 2.1266, mask_loss = 0.1915std_loss = 0.2705:   2%|▏         | 95/4210 [03:43<2:13:14,  1.94s/it]

ratio tensor(1.1565, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 96: Loss = 4.1118, Actor Loss = 0.1865, Critic Loss = 7.8522, Entropy = 0.0792, Returns = 1.8298, Value = 1.6346, mask_loss = 0.0587std_loss = 0.1418:   2%|▏         | 96/4210 [03:45<2:15:24,  1.97s/it]  

ratio tensor(0.9183, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 97: Loss = 1.7115, Actor Loss = 1.0762, Critic Loss = 1.2729, Entropy = 0.1122, Returns = 0.2581, Value = 0.7467, mask_loss = 0.0745std_loss = 0.1181:   2%|▏         | 97/4210 [03:46<2:11:56,  1.92s/it]

ratio tensor(0.9201, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 98: Loss = 5.3007, Actor Loss = -1.2037, Critic Loss = 13.0103, Entropy = 0.0708, Returns = 2.4146, Value = 2.1787, mask_loss = 0.0488std_loss = 0.1000:   2%|▏         | 98/4210 [03:48<2:09:28,  1.89s/it]

ratio tensor(0.9261, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 99: Loss = 4.6638, Actor Loss = 2.5030, Critic Loss = 4.3226, Entropy = 0.0500, Returns = -0.0033, Value = 2.0524, mask_loss = 0.0359std_loss = 0.0610:   2%|▏         | 99/4210 [03:50<2:11:37,  1.92s/it] 

ratio tensor(0.9932, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 100: Loss = 4.1561, Actor Loss = 0.3521, Critic Loss = 7.6100, Entropy = 0.0922, Returns = 1.3974, Value = 0.0583, mask_loss = 0.1093std_loss = 0.1667:   2%|▏         | 100/4210 [03:52<2:14:22,  1.96s/it]

ratio tensor(1.0117, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 101: Loss = 0.1441, Actor Loss = -0.6285, Critic Loss = 1.5476, Entropy = 0.1253, Returns = 0.7166, Value = 0.9456, mask_loss = 0.1256std_loss = 0.1733:   2%|▏         | 100/4210 [03:54<2:14:22,  1.96s/it]

ratio tensor(0.9086, device='cuda:0', grad_fn=<MeanBackward0>)



Epoch 1, Step 102: Loss = 1.2932, Actor Loss = -0.3813, Critic Loss = 3.3522, Entropy = 0.1544, Returns = 1.2267, Value = 1.4994, mask_loss = 0.2277std_loss = 0.3093:   2%|▏         | 102/4210 [03:56<2:14:57,  1.97s/it]

ratio tensor(0.8630, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 103: Loss = 3.7332, Actor Loss = -1.9880, Critic Loss = 11.4439, Entropy = 0.0718, Returns = 3.3293, Value = 2.1997, mask_loss = 0.1023std_loss = 0.2327:   2%|▏         | 103/4210 [03:58<2:15:23,  1.98s/it]

ratio tensor(1.0295, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 104: Loss = 5.2418, Actor Loss = 0.7268, Critic Loss = 9.0316, Entropy = 0.0821, Returns = 1.6161, Value = 3.7324, mask_loss = 0.1753std_loss = 0.2344:   2%|▏         | 104/4210 [04:00<2:17:14,  2.01s/it]  

ratio tensor(0.8891, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 105: Loss = 10.0450, Actor Loss = -0.2438, Critic Loss = 20.5788, Entropy = 0.0620, Returns = 3.4976, Value = 3.2008, mask_loss = 0.1310std_loss = 0.2658:   2%|▏         | 105/4210 [04:02<2:17:56,  2.02s/it]

ratio tensor(0.7588, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 106: Loss = 5.2818, Actor Loss = 0.2985, Critic Loss = 9.9684, Entropy = 0.0893, Returns = 2.5586, Value = 1.9449, mask_loss = 0.1332std_loss = 0.2671:   3%|▎         | 106/4210 [04:04<2:18:27,  2.02s/it]   

ratio tensor(11.9409, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 107: Loss = 7.0580, Actor Loss = -2.4639, Critic Loss = 19.0458, Entropy = 0.1028, Returns = 3.9790, Value = 2.4441, mask_loss = 0.2089std_loss = 0.3243:   3%|▎         | 107/4210 [04:07<2:19:01,  2.03s/it]

ratio tensor(1.1292, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 108: Loss = 3.1911, Actor Loss = 0.9808, Critic Loss = 4.4230, Entropy = 0.1245, Returns = 1.8214, Value = 2.4015, mask_loss = 0.1715std_loss = 0.2622:   3%|▎         | 108/4210 [04:10<2:52:32,  2.52s/it]  

ratio tensor(1.1430, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 109: Loss = 3.0913, Actor Loss = 0.4939, Critic Loss = 5.1976, Entropy = 0.1396, Returns = 1.6648, Value = 1.1377, mask_loss = 0.2241std_loss = 0.3153:   3%|▎         | 109/4210 [04:12<2:42:44,  2.38s/it]

ratio tensor(0.9603, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 110: Loss = 0.7120, Actor Loss = -0.6530, Critic Loss = 2.7319, Entropy = 0.0979, Returns = 1.6986, Value = 2.2514, mask_loss = 0.1917std_loss = 0.2570:   3%|▎         | 110/4210 [04:14<2:30:25,  2.20s/it]

ratio tensor(1.0052, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 111: Loss = 4.1066, Actor Loss = 0.7304, Critic Loss = 6.7541, Entropy = 0.0902, Returns = 1.8751, Value = 2.8548, mask_loss = 0.1693std_loss = 0.2651:   3%|▎         | 111/4210 [04:18<3:12:54,  2.82s/it] 

ratio tensor(1.0306, device='cuda:0', grad_fn=<MeanBackward0>)



Epoch 1, Step 112: Loss = 1.0149, Actor Loss = 0.1266, Critic Loss = 1.7793, Entropy = 0.1417, Returns = 2.4128, Value = 2.0850, mask_loss = 0.2490std_loss = 0.3224:   3%|▎         | 112/4210 [04:20<2:51:52,  2.52s/it]

ratio tensor(0.8519, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 113: Loss = 1.1679, Actor Loss = -0.0039, Critic Loss = 2.3454, Entropy = 0.0928, Returns = 2.0167, Value = 2.2961, mask_loss = 0.1992std_loss = 0.3315:   3%|▎         | 113/4210 [04:22<2:37:07,  2.30s/it]

ratio tensor(0.8454, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 114: Loss = 1.5625, Actor Loss = -0.1290, Critic Loss = 3.3855, Entropy = 0.1305, Returns = 2.2625, Value = 3.2042, mask_loss = 0.2640std_loss = 0.3665:   3%|▎         | 114/4210 [04:24<2:31:52,  2.22s/it]

ratio tensor(0.9804, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 115: Loss = 29.0383, Actor Loss = -1.3709, Critic Loss = 60.8205, Entropy = 0.0985, Returns = 3.8824, Value = 3.2622, mask_loss = 0.1504std_loss = 0.2666:   3%|▎         | 115/4210 [04:26<2:28:21,  2.17s/it]

ratio tensor(0.7281, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 116: Loss = 3.9962, Actor Loss = 2.1053, Critic Loss = 3.7833, Entropy = 0.0749, Returns = 1.4112, Value = 2.2382, mask_loss = 0.1075std_loss = 0.1998:   3%|▎         | 116/4210 [04:28<2:20:56,  2.07s/it]   

ratio tensor(0.8449, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 117: Loss = 1.9714, Actor Loss = 0.0129, Critic Loss = 3.9184, Entropy = 0.0659, Returns = 1.7930, Value = 1.2035, mask_loss = 0.1633std_loss = 0.3104:   3%|▎         | 117/4210 [04:30<2:19:49,  2.05s/it]

ratio tensor(1.0359, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 118: Loss = 3.4843, Actor Loss = 0.4629, Critic Loss = 6.0452, Entropy = 0.1222, Returns = 1.0150, Value = 1.8297, mask_loss = 0.1633std_loss = 0.2310:   3%|▎         | 118/4210 [04:32<2:26:05,  2.14s/it]

ratio tensor(0.6327, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 119: Loss = 2.8548, Actor Loss = -0.6530, Critic Loss = 7.0177, Entropy = 0.1054, Returns = 2.1967, Value = 1.1938, mask_loss = 0.1304std_loss = 0.2364:   3%|▎         | 119/4210 [04:34<2:25:21,  2.13s/it]

ratio tensor(1.3626, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 120: Loss = 9.0424, Actor Loss = -2.0294, Critic Loss = 22.1453, Entropy = 0.0865, Returns = 2.8050, Value = 1.4840, mask_loss = 0.1729std_loss = 0.2433:   3%|▎         | 120/4210 [04:36<2:18:00,  2.02s/it]

ratio tensor(1.4079, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 121: Loss = 2.1026, Actor Loss = -0.5881, Critic Loss = 5.3835, Entropy = 0.1002, Returns = 2.0679, Value = 2.7794, mask_loss = 0.1340std_loss = 0.2421:   3%|▎         | 121/4210 [04:38<2:17:41,  2.02s/it] 

ratio tensor(0.9126, device='cuda:0', grad_fn=<MeanBackward0>)



Epoch 1, Step 122: Loss = 6.3085, Actor Loss = -0.9138, Critic Loss = 14.4464, Entropy = 0.0934, Returns = 3.3251, Value = 3.0527, mask_loss = 0.2588std_loss = 0.3544:   3%|▎         | 122/4210 [04:40<2:12:41,  1.95s/it]

ratio tensor(0.9633, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 123: Loss = 14.2997, Actor Loss = -0.5168, Critic Loss = 29.6345, Entropy = 0.0794, Returns = 5.0462, Value = 3.8192, mask_loss = 0.0824std_loss = 0.1977:   3%|▎         | 123/4210 [04:42<2:15:07,  1.98s/it]

ratio tensor(0.8505, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 124: Loss = 11.3251, Actor Loss = 0.9202, Critic Loss = 20.8115, Entropy = 0.0856, Returns = 2.9947, Value = 2.3306, mask_loss = 0.3926std_loss = 0.3635:   3%|▎         | 124/4210 [04:44<2:16:51,  2.01s/it] 

ratio tensor(0.9492, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 125: Loss = 2.3002, Actor Loss = 1.2049, Critic Loss = 2.1917, Entropy = 0.0523, Returns = 0.6918, Value = 0.9648, mask_loss = 0.1963std_loss = 0.3075:   3%|▎         | 125/4210 [04:46<2:17:22,  2.02s/it]  

ratio tensor(0.7174, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 126: Loss = 2.8072, Actor Loss = -1.5559, Critic Loss = 8.7277, Entropy = 0.0748, Returns = 2.3864, Value = 2.8780, mask_loss = 0.1372std_loss = 0.2413:   3%|▎         | 126/4210 [04:48<2:23:27,  2.11s/it]

ratio tensor(1.2160, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 127: Loss = 4.4871, Actor Loss = -1.4374, Critic Loss = 11.8510, Entropy = 0.0993, Returns = 4.2698, Value = 2.8804, mask_loss = 0.1445std_loss = 0.2366:   3%|▎         | 127/4210 [04:50<2:21:08,  2.07s/it]

ratio tensor(1.1338, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 128: Loss = 10.4300, Actor Loss = -2.4662, Critic Loss = 25.7939, Entropy = 0.0703, Returns = 4.7966, Value = 3.8134, mask_loss = 0.2165std_loss = 0.3272:   3%|▎         | 128/4210 [04:52<2:20:48,  2.07s/it]

ratio tensor(1.8724, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 129: Loss = 4.4526, Actor Loss = 1.7509, Critic Loss = 5.4058, Entropy = 0.1182, Returns = 1.8094, Value = 1.8591, mask_loss = 0.1796std_loss = 0.2995:   3%|▎         | 129/4210 [04:54<2:19:02,  2.04s/it]   

ratio tensor(0.6183, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 130: Loss = 9.0496, Actor Loss = -1.5437, Critic Loss = 21.1885, Entropy = 0.0968, Returns = 2.6004, Value = 0.9876, mask_loss = 0.2508std_loss = 0.3368:   3%|▎         | 130/4210 [04:59<3:08:14,  2.77s/it]

ratio tensor(1.1127, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 131: Loss = 2.0423, Actor Loss = 0.0672, Critic Loss = 3.9521, Entropy = 0.0901, Returns = 1.2312, Value = 2.7508, mask_loss = 0.3006std_loss = 0.3748:   3%|▎         | 131/4210 [05:01<2:47:30,  2.46s/it]  

ratio tensor(0.8905, device='cuda:0', grad_fn=<MeanBackward0>)



Epoch 1, Step 132: Loss = 3.4582, Actor Loss = 1.5805, Critic Loss = 3.7581, Entropy = 0.1269, Returns = 1.0719, Value = 1.2642, mask_loss = 0.3129std_loss = 0.3569:   3%|▎         | 132/4210 [05:03<2:39:10,  2.34s/it]

ratio tensor(0.9832, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 133: Loss = 1.2825, Actor Loss = -1.6048, Critic Loss = 5.7766, Entropy = 0.1044, Returns = 2.6686, Value = 2.5379, mask_loss = 0.2356std_loss = 0.3558:   3%|▎         | 133/4210 [05:05<2:33:31,  2.26s/it]

ratio tensor(0.9409, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 134: Loss = 4.5245, Actor Loss = -0.3888, Critic Loss = 9.8286, Entropy = 0.1030, Returns = 2.7588, Value = 2.7759, mask_loss = 0.2269std_loss = 0.3047:   3%|▎         | 134/4210 [05:07<2:30:18,  2.21s/it]

ratio tensor(1.1770, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 135: Loss = 19.8493, Actor Loss = -1.7940, Critic Loss = 43.2875, Entropy = 0.0443, Returns = 4.0317, Value = 1.9032, mask_loss = 0.1827std_loss = 0.2880:   3%|▎         | 135/4210 [05:09<2:27:50,  2.18s/it]

ratio tensor(0.9896, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 136: Loss = 4.9984, Actor Loss = -1.4651, Critic Loss = 12.9290, Entropy = 0.1010, Returns = 3.3372, Value = 3.0453, mask_loss = 0.2856std_loss = 0.3885:   3%|▎         | 136/4210 [05:11<2:19:30,  2.05s/it] 

ratio tensor(0.9868, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 137: Loss = 1.6566, Actor Loss = -0.4484, Critic Loss = 4.2123, Entropy = 0.1191, Returns = 3.3043, Value = 2.8970, mask_loss = 0.3032std_loss = 0.3794:   3%|▎         | 137/4210 [05:13<2:18:10,  2.04s/it] 

ratio tensor(1.1460, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 138: Loss = 8.1362, Actor Loss = -0.4377, Critic Loss = 17.1497, Entropy = 0.0927, Returns = 3.6449, Value = 5.2635, mask_loss = 0.2070std_loss = 0.3275:   3%|▎         | 138/4210 [05:15<2:18:31,  2.04s/it]

ratio tensor(0.7904, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 139: Loss = 7.8335, Actor Loss = 2.9239, Critic Loss = 9.8217, Entropy = 0.1205, Returns = 2.5850, Value = 4.4585, mask_loss = 0.1947std_loss = 0.2380:   3%|▎         | 139/4210 [05:17<2:24:40,  2.13s/it]  

ratio tensor(0.9552, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 140: Loss = 3.9538, Actor Loss = 3.0119, Critic Loss = 1.8856, Entropy = 0.0885, Returns = 0.7424, Value = 0.6173, mask_loss = 0.1040std_loss = 0.1848:   3%|▎         | 140/4210 [05:19<2:27:53,  2.18s/it]

ratio tensor(0.7876, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 141: Loss = 12.8521, Actor Loss = -3.9552, Critic Loss = 33.6161, Entropy = 0.0729, Returns = 3.8226, Value = 0.9247, mask_loss = 0.1806std_loss = 0.2567:   3%|▎         | 141/4210 [05:21<2:19:45,  2.06s/it]

ratio tensor(1.2029, device='cuda:0', grad_fn=<MeanBackward0>)



Epoch 1, Step 142: Loss = 19.3829, Actor Loss = -3.1542, Critic Loss = 45.0759, Entropy = 0.0876, Returns = 4.4148, Value = 3.4343, mask_loss = 0.1365std_loss = 0.2126:   3%|▎         | 142/4210 [05:28<4:06:58,  3.64s/it]

ratio tensor(0.9779, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 143: Loss = 9.3698, Actor Loss = 0.9733, Critic Loss = 16.7952, Entropy = 0.1130, Returns = 2.4141, Value = 3.7660, mask_loss = 0.1963std_loss = 0.2701:   3%|▎         | 143/4210 [05:31<3:35:47,  3.18s/it]  

ratio tensor(1.0144, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 144: Loss = 3.0034, Actor Loss = 1.8025, Critic Loss = 2.4041, Entropy = 0.1192, Returns = 2.0463, Value = 2.4856, mask_loss = 0.2333std_loss = 0.3374:   3%|▎         | 144/4210 [05:33<3:12:07,  2.84s/it] 

ratio tensor(1.0802, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 145: Loss = 2.6706, Actor Loss = 0.1510, Critic Loss = 5.0427, Entropy = 0.1763, Returns = 1.7355, Value = 1.9665, mask_loss = 0.2097std_loss = 0.2720:   3%|▎         | 145/4210 [05:35<2:56:02,  2.60s/it]

ratio tensor(0.9287, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 146: Loss = 1.1521, Actor Loss = 0.6333, Critic Loss = 1.0405, Entropy = 0.1498, Returns = 1.1634, Value = 1.6245, mask_loss = 0.2031std_loss = 0.2852:   3%|▎         | 146/4210 [05:37<2:50:49,  2.52s/it]

ratio tensor(0.8854, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 147: Loss = 2.6391, Actor Loss = -2.2945, Critic Loss = 9.8700, Entropy = 0.1422, Returns = 3.5125, Value = 1.5288, mask_loss = 0.1991std_loss = 0.3082:   3%|▎         | 147/4210 [05:39<2:42:21,  2.40s/it]

ratio tensor(1.2126, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 148: Loss = 4.6244, Actor Loss = -1.8044, Critic Loss = 12.8598, Entropy = 0.1078, Returns = 3.9951, Value = 2.6415, mask_loss = 0.2197std_loss = 0.2894:   4%|▎         | 148/4210 [05:41<2:35:25,  2.30s/it]

ratio tensor(1.1915, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 149: Loss = 2.3125, Actor Loss = 0.3115, Critic Loss = 4.0035, Entropy = 0.0789, Returns = 2.2040, Value = 2.4365, mask_loss = 0.3689std_loss = 0.3414:   4%|▎         | 149/4210 [05:44<2:45:47,  2.45s/it]  

ratio tensor(1.0392, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 150: Loss = 2.1611, Actor Loss = 1.0008, Critic Loss = 2.3235, Entropy = 0.1446, Returns = 1.5057, Value = 2.4450, mask_loss = 0.1836std_loss = 0.2630:   4%|▎         | 150/4210 [05:46<2:42:40,  2.40s/it]

ratio tensor(1.0739, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 151: Loss = 4.3012, Actor Loss = -0.3052, Critic Loss = 9.2152, Entropy = 0.1225, Returns = 2.6039, Value = 3.5084, mask_loss = 0.1791std_loss = 0.3121:   4%|▎         | 151/4210 [05:55<4:42:45,  4.18s/it]

ratio tensor(1.4377, device='cuda:0', grad_fn=<MeanBackward0>)



Epoch 1, Step 152: Loss = 3.2381, Actor Loss = 1.7432, Critic Loss = 2.9922, Entropy = 0.1252, Returns = 1.8282, Value = 2.4399, mask_loss = 0.2346std_loss = 0.3028:   4%|▎         | 152/4210 [05:57<3:59:18,  3.54s/it] 

ratio tensor(0.9081, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 153: Loss = 7.5288, Actor Loss = 0.2250, Critic Loss = 14.6096, Entropy = 0.1008, Returns = 2.4242, Value = 1.5333, mask_loss = 0.2420std_loss = 0.3096:   4%|▎         | 153/4210 [05:58<3:23:32,  3.01s/it]

ratio tensor(1.2892, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 154: Loss = 2.3947, Actor Loss = -0.3017, Critic Loss = 5.3953, Entropy = 0.1155, Returns = 1.7197, Value = 2.2881, mask_loss = 0.1683std_loss = 0.2342:   4%|▎         | 154/4210 [06:04<4:22:46,  3.89s/it]

ratio tensor(0.9105, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 155: Loss = 2.6345, Actor Loss = -0.5718, Critic Loss = 6.4149, Entropy = 0.1172, Returns = 3.0512, Value = 2.3865, mask_loss = 0.1635std_loss = 0.2805:   4%|▎         | 155/4210 [06:09<4:36:37,  4.09s/it]

ratio tensor(1.0721, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 156: Loss = 15.4160, Actor Loss = -1.3227, Critic Loss = 33.4807, Entropy = 0.1584, Returns = 3.2860, Value = 2.3335, mask_loss = 0.2198std_loss = 0.2866:   4%|▎         | 156/4210 [06:11<3:55:11,  3.48s/it]

ratio tensor(1.0231, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 157: Loss = 1.4393, Actor Loss = 0.7339, Critic Loss = 1.4144, Entropy = 0.1781, Returns = 2.3130, Value = 2.5687, mask_loss = 0.2170std_loss = 0.2801:   4%|▎         | 157/4210 [06:13<3:26:18,  3.05s/it]   

ratio tensor(0.7727, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 158: Loss = 2.9044, Actor Loss = 0.4803, Critic Loss = 4.8512, Entropy = 0.1427, Returns = 1.7393, Value = 0.9568, mask_loss = 0.1868std_loss = 0.2754:   4%|▍         | 158/4210 [06:15<3:06:34,  2.76s/it]

ratio tensor(0.9559, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 159: Loss = 3.2081, Actor Loss = -1.3332, Critic Loss = 9.0862, Entropy = 0.1775, Returns = 2.0148, Value = 2.2462, mask_loss = 0.2671std_loss = 0.3179:   4%|▍         | 159/4210 [06:17<2:51:07,  2.53s/it]

ratio tensor(1.0105, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 160: Loss = 3.4710, Actor Loss = 0.5162, Critic Loss = 5.9123, Entropy = 0.1361, Returns = 1.9404, Value = 2.5671, mask_loss = 0.1827std_loss = 0.2673:   4%|▍         | 160/4210 [06:19<2:35:25,  2.30s/it] 

ratio tensor(0.7513, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 161: Loss = 9.9891, Actor Loss = -0.3923, Critic Loss = 20.7658, Entropy = 0.1465, Returns = 2.6787, Value = 1.9592, mask_loss = 0.2361std_loss = 0.3084:   4%|▍         | 161/4210 [06:21<2:24:46,  2.15s/it]

ratio tensor(0.9552, device='cuda:0', grad_fn=<MeanBackward0>)



Epoch 1, Step 162: Loss = 3.6361, Actor Loss = -1.6957, Critic Loss = 10.6650, Entropy = 0.0730, Returns = 3.3418, Value = 4.1680, mask_loss = 0.1335std_loss = 0.2507:   4%|▍         | 162/4210 [06:23<2:23:02,  2.12s/it]

ratio tensor(1.0576, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 163: Loss = 5.1869, Actor Loss = 1.2694, Critic Loss = 7.8378, Entropy = 0.1459, Returns = 2.6769, Value = 2.9256, mask_loss = 0.2753std_loss = 0.3249:   4%|▍         | 163/4210 [06:26<2:47:23,  2.48s/it]  

ratio tensor(0.9057, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 164: Loss = 11.1873, Actor Loss = -2.5245, Critic Loss = 27.4253, Entropy = 0.0849, Returns = 5.3796, Value = 3.3864, mask_loss = 0.1991std_loss = 0.3077:   4%|▍         | 164/4210 [06:28<2:39:37,  2.37s/it]

ratio tensor(1.7045, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 165: Loss = 21.8465, Actor Loss = -1.5752, Critic Loss = 46.8452, Entropy = 0.0816, Returns = 4.5012, Value = 4.1615, mask_loss = 0.3531std_loss = 0.3789:   4%|▍         | 165/4210 [06:31<2:46:55,  2.48s/it]

ratio tensor(0.9317, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 166: Loss = 12.2747, Actor Loss = 0.6748, Critic Loss = 23.2014, Entropy = 0.0804, Returns = 3.0161, Value = 1.7831, mask_loss = 0.1595std_loss = 0.2552:   4%|▍         | 166/4210 [06:33<2:32:46,  2.27s/it] 

ratio tensor(0.8805, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 167: Loss = 7.4994, Actor Loss = -1.8184, Critic Loss = 18.6380, Entropy = 0.1186, Returns = 3.0652, Value = 1.3468, mask_loss = 0.1598std_loss = 0.2208:   4%|▍         | 167/4210 [06:34<2:22:25,  2.11s/it]

ratio tensor(1.1674, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 168: Loss = 3.6615, Actor Loss = -1.0381, Critic Loss = 9.4031, Entropy = 0.1908, Returns = 2.6120, Value = 2.6138, mask_loss = 0.2269std_loss = 0.2670:   4%|▍         | 168/4210 [06:37<2:22:02,  2.11s/it] 

ratio tensor(1.1196, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 169: Loss = 2.9281, Actor Loss = -1.1157, Critic Loss = 8.0905, Entropy = 0.1440, Returns = 3.6271, Value = 3.1985, mask_loss = 0.1392std_loss = 0.2330:   4%|▍         | 169/4210 [06:39<2:20:15,  2.08s/it]

ratio tensor(0.9312, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 170: Loss = 3.7766, Actor Loss = 2.1487, Critic Loss = 3.2583, Entropy = 0.1294, Returns = 1.0843, Value = 1.7193, mask_loss = 0.1884std_loss = 0.2383:   4%|▍         | 170/4210 [06:41<2:18:13,  2.05s/it] 

ratio tensor(1.1998, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 171: Loss = 5.1861, Actor Loss = -0.8476, Critic Loss = 12.0708, Entropy = 0.1668, Returns = 2.1790, Value = 2.2300, mask_loss = 0.2638std_loss = 0.3401:   4%|▍         | 171/4210 [06:43<2:18:15,  2.05s/it]

ratio tensor(1.0712, device='cuda:0', grad_fn=<MeanBackward0>)



Epoch 1, Step 172: Loss = 11.0730, Actor Loss = 0.0120, Critic Loss = 22.1239, Entropy = 0.0926, Returns = 2.1297, Value = 3.8377, mask_loss = 0.0980std_loss = 0.1862:   4%|▍         | 172/4210 [06:45<2:18:10,  2.05s/it]

ratio tensor(1.3664, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 173: Loss = 15.5711, Actor Loss = -1.4072, Critic Loss = 33.9585, Entropy = 0.1012, Returns = 4.7884, Value = 3.8625, mask_loss = 0.0697std_loss = 0.1398:   4%|▍         | 173/4210 [06:47<2:28:30,  2.21s/it]

ratio tensor(1.0468, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 174: Loss = 16.2456, Actor Loss = -0.6736, Critic Loss = 33.8406, Entropy = 0.1104, Returns = 4.5295, Value = 2.7180, mask_loss = 0.2105std_loss = 0.3011:   4%|▍         | 174/4210 [06:49<2:18:17,  2.06s/it]

ratio tensor(1.0111, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 175: Loss = 7.6644, Actor Loss = -2.3023, Critic Loss = 19.9354, Entropy = 0.0951, Returns = 4.6275, Value = 3.4268, mask_loss = 0.0937std_loss = 0.1879:   4%|▍         | 175/4210 [06:51<2:22:54,  2.13s/it] 

ratio tensor(1.0209, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 176: Loss = 3.2760, Actor Loss = 0.5405, Critic Loss = 5.4727, Entropy = 0.0815, Returns = 3.1007, Value = 3.8986, mask_loss = 0.1794std_loss = 0.2762:   4%|▍         | 176/4210 [06:53<2:22:18,  2.12s/it]  

ratio tensor(0.9768, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 177: Loss = 6.4318, Actor Loss = 1.5209, Critic Loss = 9.8230, Entropy = 0.0610, Returns = 2.1683, Value = 2.1063, mask_loss = 0.1683std_loss = 0.2708:   4%|▍         | 177/4210 [06:55<2:19:36,  2.08s/it]

ratio tensor(0.8508, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 178: Loss = 7.6126, Actor Loss = -2.0574, Critic Loss = 19.3420, Entropy = 0.0944, Returns = 4.0196, Value = 2.7543, mask_loss = 0.0999std_loss = 0.2240:   4%|▍         | 178/4210 [06:57<2:18:59,  2.07s/it]

ratio tensor(1.6582, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 179: Loss = 7.9824, Actor Loss = -0.4038, Critic Loss = 16.7742, Entropy = 0.0909, Returns = 3.0800, Value = 3.3659, mask_loss = 0.1527std_loss = 0.2189:   4%|▍         | 179/4210 [06:59<2:18:01,  2.05s/it]

ratio tensor(0.8410, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 180: Loss = 6.1427, Actor Loss = -0.0466, Critic Loss = 12.3817, Entropy = 0.1541, Returns = 3.2818, Value = 4.2012, mask_loss = 0.2545std_loss = 0.2629:   4%|▍         | 180/4210 [07:01<2:12:25,  1.97s/it]

ratio tensor(1.0111, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 181: Loss = 5.6079, Actor Loss = 2.1283, Critic Loss = 6.9607, Entropy = 0.0832, Returns = 1.9239, Value = 3.6201, mask_loss = 0.1149std_loss = 0.1997:   4%|▍         | 181/4210 [07:03<2:08:12,  1.91s/it]  

ratio tensor(1.0138, device='cuda:0', grad_fn=<MeanBackward0>)



Epoch 1, Step 182: Loss = 4.7988, Actor Loss = 1.1792, Critic Loss = 7.2407, Entropy = 0.0833, Returns = 2.2222, Value = 2.7842, mask_loss = 0.1349std_loss = 0.2445:   4%|▍         | 182/4210 [07:06<2:29:14,  2.22s/it]

ratio tensor(0.9340, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 183: Loss = 53.3298, Actor Loss = -5.0054, Critic Loss = 116.6722, Entropy = 0.0877, Returns = 7.7057, Value = 3.7058, mask_loss = 0.1558std_loss = 0.2532:   4%|▍         | 183/4210 [07:08<2:26:03,  2.18s/it]

ratio tensor(1.3576, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 184: Loss = 6.0592, Actor Loss = -0.9389, Critic Loss = 13.9978, Entropy = 0.0756, Returns = 4.5625, Value = 4.5938, mask_loss = 0.1204std_loss = 0.2520:   4%|▍         | 184/4210 [07:12<2:59:22,  2.67s/it]  

ratio tensor(0.9112, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 185: Loss = 5.0618, Actor Loss = 2.1616, Critic Loss = 5.8021, Entropy = 0.0888, Returns = 2.3780, Value = 3.2379, mask_loss = 0.3987std_loss = 0.4162:   4%|▍         | 185/4210 [07:14<2:45:49,  2.47s/it]  

ratio tensor(0.9529, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 186: Loss = 3.6174, Actor Loss = 1.9073, Critic Loss = 3.4214, Entropy = 0.0564, Returns = 0.9634, Value = 1.5746, mask_loss = 0.1070std_loss = 0.1993:   4%|▍         | 186/4210 [07:16<2:38:16,  2.36s/it]

ratio tensor(0.8246, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 187: Loss = 6.1021, Actor Loss = -0.4357, Critic Loss = 13.0763, Entropy = 0.0426, Returns = 1.6518, Value = 1.7404, mask_loss = 0.0631std_loss = 0.1457:   4%|▍         | 187/4210 [07:18<2:30:44,  2.25s/it]

ratio tensor(1.1371, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 188: Loss = 5.5154, Actor Loss = -1.1026, Critic Loss = 13.2372, Entropy = 0.0527, Returns = 2.8398, Value = 2.9441, mask_loss = 0.1342std_loss = 0.2743:   4%|▍         | 188/4210 [07:20<2:20:51,  2.10s/it]

ratio tensor(1.0804, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 189: Loss = 13.5048, Actor Loss = -0.5261, Critic Loss = 28.0629, Entropy = 0.0560, Returns = 3.4584, Value = 3.7517, mask_loss = 0.1382std_loss = 0.2381:   4%|▍         | 189/4210 [07:21<2:14:17,  2.00s/it]

ratio tensor(1.0588, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 190: Loss = 5.6890, Actor Loss = 0.5839, Critic Loss = 10.2120, Entropy = 0.0904, Returns = 2.8142, Value = 3.3478, mask_loss = 0.1349std_loss = 0.2376:   5%|▍         | 190/4210 [07:24<2:29:56,  2.24s/it]  

ratio tensor(0.9902, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 191: Loss = 3.4514, Actor Loss = 1.7604, Critic Loss = 3.3837, Entropy = 0.0766, Returns = 1.5620, Value = 2.9375, mask_loss = 0.3315std_loss = 0.3502:   5%|▍         | 191/4210 [07:26<2:20:39,  2.10s/it] 

ratio tensor(0.9466, device='cuda:0', grad_fn=<MeanBackward0>)



Epoch 1, Step 192: Loss = 2.1100, Actor Loss = -0.0831, Critic Loss = 4.3875, Entropy = 0.0615, Returns = 2.9193, Value = 3.9142, mask_loss = 0.1925std_loss = 0.3301:   5%|▍         | 192/4210 [07:28<2:12:17,  1.98s/it]

ratio tensor(1.0656, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 193: Loss = 6.0685, Actor Loss = 0.5268, Critic Loss = 11.0844, Entropy = 0.0434, Returns = 3.1893, Value = 4.1497, mask_loss = 0.2054std_loss = 0.3004:   5%|▍         | 193/4210 [07:30<2:12:26,  1.98s/it]

ratio tensor(0.7870, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 194: Loss = 8.0029, Actor Loss = -0.1823, Critic Loss = 16.3714, Entropy = 0.0489, Returns = 4.1688, Value = 4.2695, mask_loss = 0.2261std_loss = 0.3469:   5%|▍         | 194/4210 [07:32<2:12:45,  1.98s/it]

ratio tensor(0.8717, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 195: Loss = 9.5437, Actor Loss = 0.5359, Critic Loss = 18.0181, Entropy = 0.1217, Returns = 3.4152, Value = 3.9223, mask_loss = 0.2140std_loss = 0.3115:   5%|▍         | 195/4210 [07:34<2:12:44,  1.98s/it] 

ratio tensor(0.8025, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 196: Loss = 20.9410, Actor Loss = -0.1316, Critic Loss = 42.1469, Entropy = 0.0843, Returns = 4.6209, Value = 2.4272, mask_loss = 0.1698std_loss = 0.2839:   5%|▍         | 196/4210 [07:42<4:20:04,  3.89s/it]

ratio tensor(0.9001, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 197: Loss = 21.4207, Actor Loss = 0.6899, Critic Loss = 41.4629, Entropy = 0.0603, Returns = 1.3396, Value = 1.5902, mask_loss = 0.1238std_loss = 0.2158:   5%|▍         | 197/4210 [07:44<3:43:49,  3.35s/it] 

ratio tensor(0.9812, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 198: Loss = 7.6001, Actor Loss = -1.3829, Critic Loss = 17.9674, Entropy = 0.0718, Returns = 3.1112, Value = 3.3216, mask_loss = 0.2130std_loss = 0.3117:   5%|▍         | 198/4210 [07:46<3:24:17,  3.06s/it]

ratio tensor(0.9039, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 199: Loss = 5.0079, Actor Loss = 2.0013, Critic Loss = 6.0162, Entropy = 0.1584, Returns = 1.2797, Value = 3.1525, mask_loss = 0.1967std_loss = 0.2746:   5%|▍         | 199/4210 [07:48<3:03:06,  2.74s/it]  

ratio tensor(0.7334, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 200: Loss = 19.1179, Actor Loss = -0.4281, Critic Loss = 39.0949, Entropy = 0.1387, Returns = 4.2165, Value = 2.2083, mask_loss = 0.3055std_loss = 0.3720:   5%|▍         | 200/4210 [07:51<2:55:18,  2.62s/it]

ratio tensor(0.8555, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 201: Loss = 1.4408, Actor Loss = 0.1485, Critic Loss = 2.5878, Entropy = 0.1503, Returns = 2.0618, Value = 2.4387, mask_loss = 0.2945std_loss = 0.3698:   5%|▍         | 200/4210 [07:53<2:55:18,  2.62s/it]   

ratio tensor(1.2444, device='cuda:0', grad_fn=<MeanBackward0>)



Epoch 1, Step 202: Loss = 3.8208, Actor Loss = -1.1339, Critic Loss = 9.9116, Entropy = 0.1169, Returns = 3.3496, Value = 1.8665, mask_loss = 0.1996std_loss = 0.3217:   5%|▍         | 202/4210 [07:55<2:35:34,  2.33s/it]

ratio tensor(0.9901, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 203: Loss = 2.2742, Actor Loss = -0.9787, Critic Loss = 6.5084, Entropy = 0.1243, Returns = 2.6120, Value = 1.7995, mask_loss = 0.1956std_loss = 0.3059:   5%|▍         | 203/4210 [07:58<2:40:59,  2.41s/it]

ratio tensor(1.0993, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 204: Loss = 5.9648, Actor Loss = -1.4162, Critic Loss = 14.7656, Entropy = 0.1778, Returns = 4.4684, Value = 3.7942, mask_loss = 0.2425std_loss = 0.2825:   5%|▍         | 204/4210 [08:00<2:38:43,  2.38s/it]

ratio tensor(0.9093, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 205: Loss = 7.5287, Actor Loss = 2.2585, Critic Loss = 10.5424, Entropy = 0.0960, Returns = 1.9763, Value = 3.5342, mask_loss = 0.1695std_loss = 0.2297:   5%|▍         | 205/4210 [08:04<3:09:07,  2.83s/it] 

ratio tensor(0.5553, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 206: Loss = 15.0620, Actor Loss = -1.9232, Critic Loss = 33.9722, Entropy = 0.0936, Returns = 4.7617, Value = 2.7276, mask_loss = 0.1438std_loss = 0.2721:   5%|▍         | 206/4210 [08:06<2:53:08,  2.59s/it]

ratio tensor(2.4872, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 207: Loss = 12.6040, Actor Loss = -1.3644, Critic Loss = 27.9387, Entropy = 0.0943, Returns = 3.8384, Value = 4.1454, mask_loss = 0.2700std_loss = 0.3756:   5%|▍         | 207/4210 [08:08<2:42:22,  2.43s/it]

ratio tensor(1.0703, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 208: Loss = 4.0545, Actor Loss = 1.8081, Critic Loss = 4.4950, Entropy = 0.1086, Returns = 2.0914, Value = 2.3408, mask_loss = 0.1719std_loss = 0.3041:   5%|▍         | 208/4210 [08:11<2:54:40,  2.62s/it]   

ratio tensor(0.8394, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 209: Loss = 20.5686, Actor Loss = -1.7942, Critic Loss = 44.7274, Entropy = 0.0933, Returns = 3.5743, Value = 1.4351, mask_loss = 0.2106std_loss = 0.3019:   5%|▍         | 209/4210 [08:14<3:10:48,  2.86s/it]

ratio tensor(1.2304, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 210: Loss = 1.5813, Actor Loss = -0.9313, Critic Loss = 5.0272, Entropy = 0.0900, Returns = 2.2744, Value = 3.4541, mask_loss = 0.1820std_loss = 0.3256:   5%|▍         | 210/4210 [08:16<2:54:28,  2.62s/it]  

ratio tensor(1.0942, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 211: Loss = 4.8633, Actor Loss = -0.1905, Critic Loss = 10.1096, Entropy = 0.1031, Returns = 3.5044, Value = 3.7146, mask_loss = 0.1506std_loss = 0.2524:   5%|▌         | 211/4210 [08:18<2:43:02,  2.45s/it]

ratio tensor(1.1129, device='cuda:0', grad_fn=<MeanBackward0>)



Epoch 1, Step 212: Loss = 14.1858, Actor Loss = -1.8551, Critic Loss = 32.0847, Entropy = 0.1436, Returns = 5.1558, Value = 3.0989, mask_loss = 0.1922std_loss = 0.2980:   5%|▌         | 212/4210 [08:23<3:21:02,  3.02s/it]

ratio tensor(0.9282, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 213: Loss = 1.5576, Actor Loss = 0.3836, Critic Loss = 2.3504, Entropy = 0.1173, Returns = 2.5137, Value = 2.6852, mask_loss = 0.2209std_loss = 0.3152:   5%|▌         | 213/4210 [08:25<3:02:25,  2.74s/it]   

ratio tensor(0.7606, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 214: Loss = 2.7886, Actor Loss = 0.2472, Critic Loss = 5.0860, Entropy = 0.1611, Returns = 2.5682, Value = 3.4900, mask_loss = 0.2451std_loss = 0.3260:   5%|▌         | 214/4210 [08:27<2:48:36,  2.53s/it]

ratio tensor(0.9260, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 215: Loss = 4.9583, Actor Loss = 1.6546, Critic Loss = 6.6102, Entropy = 0.1464, Returns = 1.6212, Value = 2.4318, mask_loss = 0.2409std_loss = 0.2506:   5%|▌         | 215/4210 [08:32<3:39:52,  3.30s/it]

ratio tensor(0.9982, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 216: Loss = 3.6206, Actor Loss = 0.0601, Critic Loss = 7.1236, Entropy = 0.1273, Returns = 1.9988, Value = 1.4903, mask_loss = 0.2251std_loss = 0.3346:   5%|▌         | 216/4210 [08:34<3:14:52,  2.93s/it]

ratio tensor(0.9565, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 217: Loss = 8.0216, Actor Loss = -1.9326, Critic Loss = 19.9114, Entropy = 0.1442, Returns = 3.2384, Value = 3.0114, mask_loss = 0.1969std_loss = 0.2943:   5%|▌         | 217/4210 [08:36<2:51:37,  2.58s/it]

ratio tensor(1.0174, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 218: Loss = 3.2958, Actor Loss = 0.8135, Critic Loss = 4.9676, Entropy = 0.1501, Returns = 2.4625, Value = 3.5953, mask_loss = 0.2236std_loss = 0.3391:   5%|▌         | 218/4210 [08:38<2:42:04,  2.44s/it]  

ratio tensor(0.8935, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 219: Loss = 3.0513, Actor Loss = 1.1374, Critic Loss = 3.8300, Entropy = 0.1106, Returns = 2.1957, Value = 1.9532, mask_loss = 0.2038std_loss = 0.2346:   5%|▌         | 219/4210 [08:40<2:28:46,  2.24s/it]

ratio tensor(0.8489, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 220: Loss = 3.6411, Actor Loss = -1.8520, Critic Loss = 10.9880, Entropy = 0.0910, Returns = 3.5225, Value = 2.7764, mask_loss = 0.2333std_loss = 0.3131:   5%|▌         | 220/4210 [08:42<2:24:30,  2.17s/it]

ratio tensor(1.0410, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 221: Loss = 6.6012, Actor Loss = -0.9055, Critic Loss = 15.0157, Entropy = 0.1173, Returns = 3.5218, Value = 3.4551, mask_loss = 0.2919std_loss = 0.3453:   5%|▌         | 221/4210 [08:44<2:22:28,  2.14s/it]

ratio tensor(1.0171, device='cuda:0', grad_fn=<MeanBackward0>)



Epoch 1, Step 222: Loss = 6.8622, Actor Loss = -1.7666, Critic Loss = 17.2593, Entropy = 0.0911, Returns = 5.6459, Value = 3.5755, mask_loss = 0.1492std_loss = 0.2686:   5%|▌         | 222/4210 [08:46<2:19:43,  2.10s/it]

ratio tensor(1.1438, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 223: Loss = 4.1911, Actor Loss = 0.8078, Critic Loss = 6.7696, Entropy = 0.1525, Returns = 2.2549, Value = 2.5193, mask_loss = 0.2246std_loss = 0.3021:   5%|▌         | 223/4210 [08:48<2:17:56,  2.08s/it]  

ratio tensor(0.8613, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 224: Loss = 6.3201, Actor Loss = -0.3745, Critic Loss = 13.3906, Entropy = 0.0703, Returns = 2.6850, Value = 3.0759, mask_loss = 0.1982std_loss = 0.3140:   5%|▌         | 224/4210 [08:50<2:16:19,  2.05s/it]

ratio tensor(1.0073, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 225: Loss = 4.5303, Actor Loss = -0.1915, Critic Loss = 9.4458, Entropy = 0.1108, Returns = 3.1918, Value = 4.3134, mask_loss = 0.2638std_loss = 0.3594:   5%|▌         | 225/4210 [08:52<2:14:53,  2.03s/it] 

ratio tensor(0.9595, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 226: Loss = 6.0098, Actor Loss = 1.3947, Critic Loss = 9.2321, Entropy = 0.0951, Returns = 2.5591, Value = 3.0793, mask_loss = 0.2574std_loss = 0.3209:   5%|▌         | 226/4210 [08:54<2:14:39,  2.03s/it] 

ratio tensor(1.0347, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 227: Loss = 29.6327, Actor Loss = -4.0738, Critic Loss = 67.4144, Entropy = 0.0765, Returns = 6.3493, Value = 2.9503, mask_loss = 0.1311std_loss = 0.2420:   5%|▌         | 227/4210 [08:56<2:09:43,  1.95s/it]

ratio tensor(1.0878, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 228: Loss = 2.9508, Actor Loss = 0.0853, Critic Loss = 5.7346, Entropy = 0.1712, Returns = 2.7071, Value = 3.2197, mask_loss = 0.2486std_loss = 0.3369:   5%|▌         | 228/4210 [08:57<2:06:44,  1.91s/it]   

ratio tensor(1.1297, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 229: Loss = 3.7718, Actor Loss = -0.2508, Critic Loss = 8.0478, Entropy = 0.1319, Returns = 3.1741, Value = 2.5741, mask_loss = 0.2235std_loss = 0.3371:   5%|▌         | 229/4210 [08:59<2:04:42,  1.88s/it]

ratio tensor(0.7668, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 230: Loss = 1.6801, Actor Loss = 0.1856, Critic Loss = 2.9917, Entropy = 0.1332, Returns = 2.3961, Value = 2.3704, mask_loss = 0.2327std_loss = 0.3231:   5%|▌         | 230/4210 [09:01<2:02:30,  1.85s/it] 

ratio tensor(0.7088, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 231: Loss = 7.5033, Actor Loss = -2.8209, Critic Loss = 20.6506, Entropy = 0.1113, Returns = 4.9046, Value = 3.9065, mask_loss = 0.1528std_loss = 0.2898:   5%|▌         | 231/4210 [09:03<2:01:15,  1.83s/it]

ratio tensor(1.1120, device='cuda:0', grad_fn=<MeanBackward0>)



Epoch 1, Step 232: Loss = 7.0220, Actor Loss = 1.4527, Critic Loss = 11.1411, Entropy = 0.1192, Returns = 2.4669, Value = 4.6221, mask_loss = 0.2392std_loss = 0.3167:   6%|▌         | 232/4210 [09:05<2:00:20,  1.82s/it] 

ratio tensor(0.9019, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 233: Loss = 5.6544, Actor Loss = -0.2344, Critic Loss = 11.7787, Entropy = 0.0590, Returns = 4.8703, Value = 3.4598, mask_loss = 0.1906std_loss = 0.3057:   6%|▌         | 233/4210 [09:07<2:10:10,  1.96s/it]

ratio tensor(0.9451, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 234: Loss = 9.3872, Actor Loss = -0.3538, Critic Loss = 19.4854, Entropy = 0.1644, Returns = 3.4727, Value = 2.9900, mask_loss = 0.2644std_loss = 0.3130:   6%|▌         | 234/4210 [09:09<2:06:58,  1.92s/it]

ratio tensor(0.9499, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 235: Loss = 8.2662, Actor Loss = -0.7060, Critic Loss = 17.9467, Entropy = 0.1165, Returns = 3.3338, Value = 3.1733, mask_loss = 0.1831std_loss = 0.2762:   6%|▌         | 235/4210 [09:10<2:04:18,  1.88s/it]

ratio tensor(0.8711, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 236: Loss = 3.3571, Actor Loss = 0.9493, Critic Loss = 4.8181, Entropy = 0.1193, Returns = 2.1222, Value = 2.0468, mask_loss = 0.2263std_loss = 0.2506:   6%|▌         | 236/4210 [09:12<2:02:46,  1.85s/it]  

ratio tensor(0.9668, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 237: Loss = 2.1205, Actor Loss = 0.7145, Critic Loss = 2.8147, Entropy = 0.1351, Returns = 1.2236, Value = 1.0541, mask_loss = 0.1983std_loss = 0.2503:   6%|▌         | 237/4210 [09:14<2:01:07,  1.83s/it]

ratio tensor(1.0161, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 238: Loss = 10.4599, Actor Loss = -2.7779, Critic Loss = 26.4775, Entropy = 0.1011, Returns = 3.8340, Value = 3.0282, mask_loss = 0.1191std_loss = 0.2431:   6%|▌         | 238/4210 [09:16<2:12:18,  2.00s/it]

ratio tensor(1.3164, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 239: Loss = 3.7831, Actor Loss = 1.7674, Critic Loss = 4.0341, Entropy = 0.1335, Returns = 1.2753, Value = 2.5373, mask_loss = 0.2364std_loss = 0.2967:   6%|▌         | 239/4210 [09:18<2:07:38,  1.93s/it]   

ratio tensor(0.9633, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 240: Loss = 5.1599, Actor Loss = 1.4152, Critic Loss = 7.4927, Entropy = 0.1614, Returns = 1.2664, Value = 3.6026, mask_loss = 0.2535std_loss = 0.3180:   6%|▌         | 240/4210 [09:21<2:16:09,  2.06s/it]

ratio tensor(0.9274, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 241: Loss = 8.3992, Actor Loss = 1.0967, Critic Loss = 14.6064, Entropy = 0.0717, Returns = 2.7353, Value = 4.4424, mask_loss = 0.0863std_loss = 0.1742:   6%|▌         | 241/4210 [09:23<2:17:04,  2.07s/it]

ratio tensor(0.8617, device='cuda:0', grad_fn=<MeanBackward0>)



Epoch 1, Step 242: Loss = 7.6742, Actor Loss = 0.9054, Critic Loss = 13.5392, Entropy = 0.0850, Returns = 3.3468, Value = 3.0347, mask_loss = 0.1022std_loss = 0.2358:   6%|▌         | 242/4210 [09:24<2:09:27,  1.96s/it]

ratio tensor(0.9487, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 243: Loss = 3.0837, Actor Loss = 0.5703, Critic Loss = 5.0300, Entropy = 0.1587, Returns = 2.1464, Value = 1.8058, mask_loss = 0.2515std_loss = 0.3243:   6%|▌         | 243/4210 [09:26<2:11:50,  1.99s/it] 

ratio tensor(0.9159, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 244: Loss = 3.1997, Actor Loss = -0.8067, Critic Loss = 8.0149, Entropy = 0.1021, Returns = 2.5450, Value = 2.6845, mask_loss = 0.2096std_loss = 0.3405:   6%|▌         | 244/4210 [09:28<2:12:53,  2.01s/it]

ratio tensor(1.1718, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 245: Loss = 3.0421, Actor Loss = 1.3045, Critic Loss = 3.4775, Entropy = 0.1165, Returns = 1.2905, Value = 2.2129, mask_loss = 0.1796std_loss = 0.2601:   6%|▌         | 245/4210 [09:30<2:08:25,  1.94s/it] 

ratio tensor(0.9697, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 246: Loss = 0.9418, Actor Loss = -0.5505, Critic Loss = 2.9871, Entropy = 0.1341, Returns = 2.4742, Value = 1.9364, mask_loss = 0.2828std_loss = 0.3486:   6%|▌         | 246/4210 [09:32<2:04:52,  1.89s/it]

ratio tensor(1.2096, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 247: Loss = 2.6098, Actor Loss = -0.1917, Critic Loss = 5.6052, Entropy = 0.1180, Returns = 2.6513, Value = 3.4788, mask_loss = 0.2125std_loss = 0.3091:   6%|▌         | 247/4210 [09:34<2:07:58,  1.94s/it]

ratio tensor(1.2355, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 248: Loss = 3.4975, Actor Loss = 1.7475, Critic Loss = 3.5023, Entropy = 0.1149, Returns = 1.5934, Value = 2.9878, mask_loss = 0.3556std_loss = 0.4009:   6%|▌         | 248/4210 [09:36<2:04:54,  1.89s/it] 

ratio tensor(0.9271, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 249: Loss = 4.5880, Actor Loss = 0.5541, Critic Loss = 8.0700, Entropy = 0.1091, Returns = 2.4384, Value = 2.0571, mask_loss = 0.2427std_loss = 0.3432:   6%|▌         | 249/4210 [09:38<2:08:21,  1.94s/it]

ratio tensor(1.0462, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 250: Loss = 1.7984, Actor Loss = -0.1924, Critic Loss = 3.9840, Entropy = 0.1184, Returns = 2.0838, Value = 2.9645, mask_loss = 0.3775std_loss = 0.3590:   6%|▌         | 250/4210 [09:40<2:11:40,  2.00s/it]

ratio tensor(2.2202, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 251: Loss = 2.8959, Actor Loss = 0.7349, Critic Loss = 4.3237, Entropy = 0.0875, Returns = 1.9656, Value = 2.5266, mask_loss = 0.3097std_loss = 0.4034:   6%|▌         | 251/4210 [09:42<2:12:14,  2.00s/it] 

ratio tensor(1.0523, device='cuda:0', grad_fn=<MeanBackward0>)



Epoch 1, Step 252: Loss = 2.2215, Actor Loss = 0.6650, Critic Loss = 3.1155, Entropy = 0.1296, Returns = 1.5801, Value = 1.8502, mask_loss = 0.2464std_loss = 0.3001:   6%|▌         | 252/4210 [09:45<2:24:39,  2.19s/it]

ratio tensor(1.0311, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 253: Loss = 1.0014, Actor Loss = 0.2327, Critic Loss = 1.5394, Entropy = 0.1053, Returns = 1.8298, Value = 1.5157, mask_loss = 0.3716std_loss = 0.4228:   6%|▌         | 253/4210 [09:46<2:16:51,  2.08s/it]

ratio tensor(0.8855, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 254: Loss = 1.7635, Actor Loss = -0.5174, Critic Loss = 4.5633, Entropy = 0.0730, Returns = 1.8742, Value = 1.5165, mask_loss = 0.2072std_loss = 0.3247:   6%|▌         | 254/4210 [09:49<2:16:58,  2.08s/it]

ratio tensor(0.9291, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 255: Loss = 0.4095, Actor Loss = -0.7562, Critic Loss = 2.3333, Entropy = 0.0965, Returns = 2.2303, Value = 2.4989, mask_loss = 0.2234std_loss = 0.3478:   6%|▌         | 255/4210 [09:51<2:16:45,  2.07s/it]

ratio tensor(0.8986, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 256: Loss = 2.6318, Actor Loss = -0.1773, Critic Loss = 5.6195, Entropy = 0.0631, Returns = 2.6161, Value = 3.2134, mask_loss = 0.2300std_loss = 0.3338:   6%|▌         | 256/4210 [09:53<2:16:27,  2.07s/it]

ratio tensor(1.0057, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 257: Loss = 15.1904, Actor Loss = -3.5432, Critic Loss = 37.4683, Entropy = 0.0500, Returns = 6.2197, Value = 3.4555, mask_loss = 0.1848std_loss = 0.3284:   6%|▌         | 257/4210 [09:55<2:16:27,  2.07s/it]

ratio tensor(1.3199, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 258: Loss = 3.3003, Actor Loss = 1.1450, Critic Loss = 4.3131, Entropy = 0.1229, Returns = 2.1178, Value = 2.4511, mask_loss = 0.2627std_loss = 0.3302:   6%|▌         | 258/4210 [09:57<2:10:16,  1.98s/it]   

ratio tensor(1.0332, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 259: Loss = 1.5999, Actor Loss = 0.3278, Critic Loss = 2.5460, Entropy = 0.0962, Returns = 1.7858, Value = 1.1770, mask_loss = 0.2621std_loss = 0.3742:   6%|▌         | 259/4210 [09:58<2:06:55,  1.93s/it]

ratio tensor(1.0729, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 260: Loss = -0.4590, Actor Loss = -1.0344, Critic Loss = 1.1533, Entropy = 0.1296, Returns = 2.0660, Value = 2.0378, mask_loss = 0.3552std_loss = 0.3757:   6%|▌         | 260/4210 [10:00<2:10:50,  1.99s/it]

ratio tensor(1.0106, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 261: Loss = 3.2629, Actor Loss = 0.3613, Critic Loss = 5.8046, Entropy = 0.0670, Returns = 1.6382, Value = 2.5737, mask_loss = 0.2139std_loss = 0.3179:   6%|▌         | 261/4210 [10:02<2:11:01,  1.99s/it]  

ratio tensor(0.6735, device='cuda:0', grad_fn=<MeanBackward0>)



Epoch 1, Step 262: Loss = 3.1252, Actor Loss = -0.5116, Critic Loss = 7.2752, Entropy = 0.0779, Returns = 2.9542, Value = 2.3591, mask_loss = 0.3100std_loss = 0.4148:   6%|▌         | 262/4210 [10:04<2:11:51,  2.00s/it]

ratio tensor(1.0452, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 263: Loss = 4.1221, Actor Loss = -0.4228, Critic Loss = 9.0919, Entropy = 0.1052, Returns = 2.8276, Value = 2.8078, mask_loss = 0.2505std_loss = 0.3477:   6%|▌         | 263/4210 [10:06<2:07:26,  1.94s/it]

ratio tensor(0.9827, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 264: Loss = 2.3320, Actor Loss = 0.2730, Critic Loss = 4.1203, Entropy = 0.1078, Returns = 2.1751, Value = 2.0471, mask_loss = 0.2568std_loss = 0.3466:   6%|▋         | 264/4210 [10:08<2:04:12,  1.89s/it] 

ratio tensor(0.8774, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 265: Loss = 4.5956, Actor Loss = -1.7624, Critic Loss = 12.7185, Entropy = 0.1185, Returns = 3.4821, Value = 2.9315, mask_loss = 0.3289std_loss = 0.3787:   6%|▋         | 265/4210 [10:10<2:02:04,  1.86s/it]

ratio tensor(1.0913, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 266: Loss = 2.9071, Actor Loss = 0.4613, Critic Loss = 4.8929, Entropy = 0.0586, Returns = 2.7379, Value = 3.8151, mask_loss = 0.2201std_loss = 0.3668:   6%|▋         | 266/4210 [10:12<2:00:37,  1.83s/it]  

ratio tensor(0.9095, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 267: Loss = 6.5195, Actor Loss = -0.3196, Critic Loss = 13.6795, Entropy = 0.0613, Returns = 3.8994, Value = 3.8402, mask_loss = 0.2859std_loss = 0.4021:   6%|▋         | 267/4210 [10:14<2:03:47,  1.88s/it]

ratio tensor(0.8233, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 268: Loss = 4.2297, Actor Loss = 1.9389, Critic Loss = 4.5825, Entropy = 0.0485, Returns = 1.9559, Value = 2.3717, mask_loss = 0.3356std_loss = 0.3596:   6%|▋         | 268/4210 [10:16<2:06:59,  1.93s/it]  

ratio tensor(0.5823, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 269: Loss = 2.6962, Actor Loss = -0.3141, Critic Loss = 6.0241, Entropy = 0.1719, Returns = 2.4299, Value = 0.9187, mask_loss = 0.3189std_loss = 0.3653:   6%|▋         | 269/4210 [10:18<2:08:30,  1.96s/it]

ratio tensor(1.2431, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 270: Loss = 0.5114, Actor Loss = -0.7882, Critic Loss = 2.6005, Entropy = 0.0657, Returns = 1.6812, Value = 1.3586, mask_loss = 0.3209std_loss = 0.3784:   6%|▋         | 270/4210 [10:20<2:09:54,  1.98s/it]

ratio tensor(0.9589, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 271: Loss = 0.5746, Actor Loss = -0.1800, Critic Loss = 1.5105, Entropy = 0.0661, Returns = 1.7785, Value = 2.3153, mask_loss = 0.3092std_loss = 0.3642:   6%|▋         | 271/4210 [10:22<2:11:47,  2.01s/it]

ratio tensor(1.0777, device='cuda:0', grad_fn=<MeanBackward0>)



Epoch 1, Step 272: Loss = 0.4357, Actor Loss = -0.0042, Critic Loss = 0.8816, Entropy = 0.0910, Returns = 2.2910, Value = 2.3292, mask_loss = 0.4597std_loss = 0.4380:   6%|▋         | 272/4210 [10:24<2:12:35,  2.02s/it]

ratio tensor(1.0567, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 272: Loss = 0.4357, Actor Loss = -0.0042, Critic Loss = 0.8816, Entropy = 0.0910, Returns = 2.2910, Value = 2.3292, mask_loss = 0.4597std_loss = 0.4380:   6%|▋         | 272/4210 [10:25<2:31:01,  2.30s/it]


KeyboardInterrupt: 

In [8]:
import numpy as np
import torch.nn.functional as F
from datasets import Dataset

# mask_gen_model.load_state_dict(torch.load('saved_model/imdb_mask_gen_model_0_100.pth',map_location=device))

mask_gen_model.eval()

test_dataloader = DataLoader(train_ds, batch_size=batch_size, collate_fn=collate_fn, shuffle=False)

test_inputs = next(iter(test_dataloader)).to(device)
# test_inputs = next(iter(train_dataloader)).to(device)

# tokens = tokenizer.convert_ids_to_tokens(test_inputs['input_ids'][idx])



data_dict = {
    'sentence': ["I absolutely love this product! It exceeded all my expectations.", 
             "The movie was fantastic, and the acting was top-notch.",
             "This restaurant offers great service and delicious food. Highly recommend!",
             "The product works as advertised, nothing more, nothing less.",
             "The event was well-organized, but it didn’t leave a lasting impression.",
             "t’s an average phone, nothing special but it does the job.",
             "I’m really disappointed with this purchase. It broke within a week.",
             "The movie was too long and boring, I wouldn’t recommend it.",
             "Terrible customer service, I won’t be coming back to this place."],
    'label': [1, 1, 1, 0, 0, 0, -1, -1, -1]
}
manual_test_data = Dataset.from_dict(data_dict)

manual_test_dataloader = DataLoader(manual_test_data, batch_size=9, collate_fn=collate_fn, shuffle=False)
#
test_inputs = next(iter(manual_test_dataloader)).to(device)


# generate the answer for the test inputs
gen_outputs = model.generate(
            input_ids=test_inputs['input_ids'],
            attention_mask=test_inputs['attention_mask'],
            max_new_tokens=128,
            eos_token_id=terminators,
            pad_token_id=tokenizer.pad_token_id,
            do_sample=True,
            temperature=0.6,
            top_p=0.9,
            return_dict_in_generate=True,
            output_scores=True,
        )
input_ids = test_inputs['input_ids']
attention_mask = test_inputs['attention_mask']
gen_tokens = gen_outputs.sequences
pad_length = gen_tokens.size(1) - input_ids.size(1)
# get the attention mask for the generated tokens, and also mask the padding tokens
gen_attention_mask = F.pad(attention_mask, (0, pad_length), mode='constant', value=1)
context_mask = F.pad(test_inputs['context_mask'], (0, pad_length), mode='constant', value=0)
# (gen_tokens != pad_token_id).long() is the tokens mask, 1 for real tokens and 0 for padding tokens
unpaded_token_mask = (gen_tokens != tokenizer.pad_token_id).long()
unpaded_token_mask[:, :-pad_length] = 1
gen_attention_mask = gen_attention_mask * unpaded_token_mask

response_mask = gen_attention_mask.clone()
response_mask[:, :-pad_length] = 0 # TODO: 有问题. 有问题吗？

# context_mask = F.pad(context_mask, (0, pad_length), mode='constant', value=0)

# with torch.no_grad():
#     # prompt_outputs = model(input_ids=test_inputs['input_ids'], attention_mask=test_inputs['attention_mask'], output_hidden_states=True, return_dict=True)
#     prompt_outputs = model(input_ids=gen_tokens, attention_mask=gen_attention_mask, output_hidden_states=True, return_dict=True)

#     last_hidden_state = prompt_outputs.hidden_states[-1].float()
#     mask_logits = mask_gen_model(last_hidden_state)


with torch.no_grad():
    state = gen_tokens, gen_attention_mask, context_mask, response_mask
    dist, value = mask_gen_model.get_dist_critic(model, state)

mask_logits = dist.logits

In [40]:
import random
# idx = random.randint(0, 8)
idx = 6
test_ids = gen_tokens[idx]
test_mask = gen_attention_mask[idx]
test_mask_prob = torch.sigmoid(mask_logits[idx])
# inverse TODO
# test_mask_prob = 1 - test_mask_prob
test_context_mask = context_mask[idx]

test_tokens = tokenizer.convert_ids_to_tokens(test_ids)
scores = test_mask_prob * test_context_mask

def normalize_except_zeros(array):
    # Create a mask to identify non-zero elements
    mask = (array > 0)
    
    # Extract non-zero elements
    non_zero_elements = array[mask]
    
    # Normalize non-zero elements
    min_val = np.min(non_zero_elements)
    max_val = np.max(non_zero_elements)

    normalized_non_zero_elements = (non_zero_elements - min_val) / (max_val - min_val)
    
    # Create a copy of the original array to preserve zero values
    normalized_array = np.copy(array)
    
    # Assign normalized values back to the corresponding positions
    normalized_array[mask] = normalized_non_zero_elements
    
    return normalized_array
scores = normalize_except_zeros(scores.detach().cpu().numpy())

# # remove special tokens
# filtered_token_scores = [(token, score) for token, score in zip(test_tokens, scores) if token not in tokenizer.all_special_tokens]
filtered_token_scores = [(token, score) for token, score in zip(test_tokens, scores) ]

In [41]:
for token, score in filtered_token_scores:
    print(f"Token: {token}, Score: {score}")

Token: <|eot_id|>, Score: 0.0
Token: <|eot_id|>, Score: 0.0
Token: <|begin_of_text|>, Score: 0.0
Token: <|start_header_id|>, Score: 0.0
Token: system, Score: 0.0
Token: <|end_header_id|>, Score: 0.0
Token: ĊĊ, Score: 0.0
Token: You, Score: 0.0
Token: Ġare, Score: 0.0
Token: Ġa, Score: 0.0
Token: Ġchat, Score: 0.0
Token: bot, Score: 0.0
Token: Ġfor, Score: 0.0
Token: Ġsentiment, Score: 0.0
Token: Ġanalysis, Score: 0.0
Token: ., Score: 0.0
Token: ĠYou, Score: 0.0
Token: Ġcan, Score: 0.0
Token: Ġhelp, Score: 0.0
Token: Ġusers, Score: 0.0
Token: Ġwith, Score: 0.0
Token: Ġtheir, Score: 0.0
Token: Ġquestions, Score: 0.0
Token: Ġvia, Score: 0.0
Token: Ġconcise, Score: 0.0
Token: Ġresponses, Score: 0.0
Token: Ġof, Score: 0.0
Token: ĠPOS, Score: 0.0
Token: ITIVE, Score: 0.0
Token: Ġor, Score: 0.0
Token: ĠNEG, Score: 0.0
Token: ATIVE, Score: 0.0
Token: ., Score: 0.0
Token: <|eot_id|>, Score: 0.0
Token: <|start_header_id|>, Score: 0.0
Token: user, Score: 0.0
Token: <|end_header_id|>, Score: 0.0
T

In [42]:
import re

def clean_token(token):
    # 移除所有普通的特殊字符，比如 'Ġ' 和 'Ċ'
    return token.replace("Ġ", "").replace("Ċ", "")

merged_tokens_scores = []
current_token = ""
current_score = 0
count = 0

def is_special_token(token):
    # 判断是否是特殊的独立 token，例如 '<|start_header_id|>' 这样的 token
    return token.startswith("<|") and token.endswith("|>")

# 用于合并 token 和 score，取平均值
for token, score in filtered_token_scores:
    # 检查是否是特殊 token
    if is_special_token(token):
        # 如果当前有累积的 token，先把它们加入结果
        if current_token:
            # 确保分数归一化在 [0, 1] 之间
            average_score = min(current_score / count, 1.0)
            merged_tokens_scores.append((current_token, average_score))
            current_token = ""
            current_score = 0
            count = 0

        # 特殊 token 直接加入，不合并
        merged_tokens_scores.append((token, score))
        continue

    # 清理 token 中的特殊字符
    cleaned_token = clean_token(token)

    # 忽略清理后的空 token
    if not cleaned_token:
        continue

    # 判断是否是新单词的开始（以 'Ġ' 或 'Ċ' 开头的通常是新词）
    if token.startswith("Ġ") or token.startswith("Ċ"):
        if current_token:
            # 确保分数归一化在 [0, 1] 之间
            average_score = min(current_score / count, 1.0)
            merged_tokens_scores.append((current_token, average_score))
        
        # 初始化新的 token 和 score
        current_token = cleaned_token
        current_score = score
        count = 1
    else:
        # 如果是子词，则继续合并
        current_token += cleaned_token
        current_score += score
        count += 1

# 处理最后一个 token
if current_token:
    # 确保分数归一化在 [0, 1] 之间
    average_score = min(current_score / count, 1.0)
    merged_tokens_scores.append((current_token, average_score))

# # 输出结果
# for token, score in merged_tokens_scores:
#     print(f"Token: {token}, Score: {score}")





# 根据分数高亮文本（示例中使用HTML标签）
highlighted_text = ""
for token, score in merged_tokens_scores:
    # 动态设置背景颜色：score为0时为白色，score为1时为绿色
    red = int((1 - score) * 255)
    green = 255
    blue = int((1 - score) * 255)
    color = f'rgb({red}, {green}, {blue})'
    highlighted_text += f'<span style="background-color: {color}; color: black;">{token}</span> '

# 打印高亮后的文本
from IPython.display import display, HTML
display(HTML(highlighted_text.strip()))

In [None]:
(test_mask_prob * test_context_mask).max()

tensor(0.8213, device='cuda:0')

In [None]:
filtered_token_scores

In [None]:
test_inputs.keys()

collate_fn()

tokenizer("this is a sentence")

In [22]:
new_ds = ds['train'].select(range(3))

In [26]:
from datasets import Dataset

data_dict = {
    'text': ["This is the first example.", "This is the second example."],
    'label': [0, 1]
}
dataset = Dataset.from_dict(data_dict)

In [None]:
new_ds[0]