In [1]:
# Configuring environment parameters
import os
import json 
import logging

logging.basicConfig(
    filename='log/app.log',            # Specify the log file name
    level=logging.DEBUG,           # Set the log level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
    format='%(asctime)s - %(levelname)s - %(message)s'  # Set the log format
)

# Load the environment configuration JSON data
json_path = 'env_config.json'
with open(json_path, 'r') as file:
    env_config = json.load(file)

hf_home = env_config['HF_HOME']
# Set the HF_HOME environment variable
os.environ['HF_HOME'] = hf_home
# Set the access token to huggingface hub
access_token = env_config['access_token']
os.environ['HUGGINGFACE_HUB_TOKEN'] = access_token

In [2]:
# Loading necessary packages
import transformers 
import torch

from accelerate import Accelerator
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModel, LlamaForTokenClassification #, LlamaRotaryEmbedding
# from transformers import LlamaTokenizerFast
import torch.nn.functional as F

from llmexp.helper import DataHelper
from datasets import load_dataset
from torch.utils.data import DataLoader

# TODO 注意load正确的模型
from llmexp.imdb_model import MaskGeneratingModelForIMDB
from tqdm import tqdm

In [3]:
# Load datasets
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id, token=access_token, padding_side='left')
tokenizer.pad_token = tokenizer.eos_token

# ds = load_dataset("imdb")
ds = load_dataset("rajpurkar/squad")
# ds = load_dataset("stanfordnlp/sst2")
train_ds = ds['train']
# test_ds = ds['test']
test_ds = ds['validation']

llm_exp_helper = DataHelper(tokenizer)
# collate_fn = llm_exp_helper.get_collate_fun('imdb')
# collate_fn = llm_exp_helper.get_collate_fun('sst2')
collate_fn = llm_exp_helper.get_collate_fun('squad')

# Define batch size here!
batch_size = 16
train_dataloader = DataLoader(train_ds, batch_size=batch_size, collate_fn=collate_fn, shuffle=True)
test_dataloader = DataLoader(train_ds, batch_size=batch_size, collate_fn=collate_fn, shuffle=False)

In [4]:
# Configure and load model
accelerator = Accelerator()
device = accelerator.device
# device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")

model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
# model_id = "meta-llama/Meta-Llama-3-8B"  # non-instruct version

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    # device_map="auto",
    device_map=device,
    token=access_token,
)

config = model.config

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [5]:
# Configure mask model and  Training parameters
mask_gen_model = MaskGeneratingModelForIMDB()
mask_gen_model.to(device)

# Set pad_token_id if it is not set
if tokenizer.pad_token_id is None:
    tokenizer.pad_token_id = tokenizer.eos_token_id

terminators = [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids("<|eot_id|>")
]

optimizer = torch.optim.Adam(mask_gen_model.parameters(), lr=5e-5)

`LlamaRotaryEmbedding` can now be fully parameterized by passing the model config through the `config` argument. All other arguments will be removed in v4.45


In [6]:
next(iter(train_dataloader))

{'input_ids': tensor([[128009, 128009, 128009,  ...,  78191, 128007,    271],
        [128009, 128009, 128009,  ...,  78191, 128007,    271],
        [128009, 128009, 128009,  ...,  78191, 128007,    271],
        ...,
        [128009, 128009, 128009,  ...,  78191, 128007,    271],
        [128009, 128009, 128009,  ...,  78191, 128007,    271],
        [128009, 128009, 128009,  ...,  78191, 128007,    271]]), 'attention_mask': tensor([[0, 0, 0,  ..., 1, 1, 1],
        [0, 0, 0,  ..., 1, 1, 1],
        [0, 0, 0,  ..., 1, 1, 1],
        ...,
        [0, 0, 0,  ..., 1, 1, 1],
        [0, 0, 0,  ..., 1, 1, 1],
        [0, 0, 0,  ..., 1, 1, 1]]), 'context_mask': tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]])}

In [7]:
mask_gen_model.train()
for epoch in range(1):
    pbar = tqdm(train_dataloader)
    for idx, data in enumerate(pbar):
        input_ids = data['input_ids'].to(device)
        attention_mask = data['attention_mask'].to(device)
        context_mask = data['context_mask'].to(device)
        # get generated texts
        gen_outputs = model.generate(
            input_ids=input_ids,
            attention_mask=attention_mask,
            max_new_tokens=128,
            eos_token_id=terminators,
            pad_token_id=tokenizer.pad_token_id,
            do_sample=True,
            temperature=0.6,
            top_p=0.9,
            return_dict_in_generate=True,
            output_scores=True,
        )
        gen_tokens = gen_outputs.sequences
        pad_length = gen_tokens.size(1) - input_ids.size(1)
        # get the attention mask for the generated tokens, and also mask the padding tokens
        gen_attention_mask = F.pad(attention_mask, (0, pad_length), mode='constant', value=1)
        # (gen_tokens != pad_token_id).long() is the tokens mask, 1 for real tokens and 0 for padding tokens
        unpaded_token_mask = (gen_tokens != tokenizer.pad_token_id).long()
        unpaded_token_mask[:, :-pad_length] = 1
        gen_attention_mask = gen_attention_mask * unpaded_token_mask

        # get the response mask, which is the mask for the generated tokens (the user inputs are masked with 0)
        response_mask = gen_attention_mask.clone()
        response_mask[:, :-pad_length] = 0 # TODO: 有问题. 有问题吗？

        context_mask = F.pad(context_mask, (0, pad_length), mode='constant', value=0)

        loss_dict = mask_gen_model.train_one_batch(model, gen_tokens, gen_attention_mask, context_mask, response_mask, optimizer,
                                                   num_steps=5, mini_batch_size=16, ppo_epochs=2)


        log = f"Epoch {epoch+1}, Step {idx+1}: Loss = {loss_dict['loss']:.4f}, " \
               f"Actor Loss = {loss_dict['actor_loss']:.4f}, " \
               f"Critic Loss = {loss_dict['critic_loss']:.4f}, " \
               f"Entropy = {loss_dict['entropy']:.4f}, " \
               f"Returns = {loss_dict['returns']:.4f}, " \
               f"Value = {loss_dict['value']:.4f}, " \
                f"mask_loss = {loss_dict['mask_loss']:.4f}" \
                f"std_loss = {loss_dict['std_loss']:.4f}" \
            #    f"Cont_loss = {loss_dict['contrast_loss']:.4f}, "  \
               
        pbar.set_description(log)

        if idx % 10 == 0:
            print()
        if idx % 100 == 0 and idx != 0:
            torch.save(mask_gen_model.state_dict(), f'saved_model/imdb_mask_gen_model_{epoch}_{idx}.pth') 

  0%|          | 0/5475 [00:00<?, ?it/s]We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)
Epoch 1, Step 1: Loss = 0.2152, Actor Loss = 0.0382, Critic Loss = 0.3664, Entropy = 0.6225, Returns = 0.3676, Value = 0.6443, mask_loss = 0.6824std_loss = 0.0299:   0%|          | 1/5475 [00:11<17:12:29, 11.32s/it]

ratio tensor(0.2526, device='cuda:0', grad_fn=<MeanBackward0>)



Epoch 1, Step 2: Loss = 0.3135, Actor Loss = 0.0331, Critic Loss = 0.5739, Entropy = 0.6546, Returns = 0.9713, Value = 0.8912, mask_loss = 0.6213std_loss = 0.0572:   0%|          | 2/5475 [00:20<14:59:43,  9.86s/it]

ratio tensor(1.4790, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 3: Loss = 0.0569, Actor Loss = -0.0177, Critic Loss = 0.1620, Entropy = 0.6296, Returns = 0.6583, Value = 0.6174, mask_loss = 0.6658std_loss = 0.0479:   0%|          | 3/5475 [00:29<14:47:15,  9.73s/it]

ratio tensor(0.9685, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 4: Loss = 0.1277, Actor Loss = -0.1087, Critic Loss = 0.4851, Entropy = 0.6173, Returns = 1.0627, Value = 1.0031, mask_loss = 0.6776std_loss = 0.0472:   0%|          | 4/5475 [00:41<16:04:25, 10.58s/it]

ratio tensor(2.3837, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 5: Loss = 0.0529, Actor Loss = -0.0697, Critic Loss = 0.2566, Entropy = 0.5711, Returns = 0.9155, Value = 0.6885, mask_loss = 0.7329std_loss = 0.0424:   0%|          | 5/5475 [00:52<16:08:07, 10.62s/it]

ratio tensor(2.6487, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 6: Loss = 0.1645, Actor Loss = 0.0112, Critic Loss = 0.3179, Entropy = 0.5714, Returns = 1.2221, Value = 1.2876, mask_loss = 0.7253std_loss = 0.0554:   0%|          | 6/5475 [01:02<15:45:06, 10.37s/it] 

ratio tensor(2.3546, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 7: Loss = 0.1222, Actor Loss = 0.0557, Critic Loss = 0.1427, Entropy = 0.4872, Returns = 0.9836, Value = 0.9092, mask_loss = 0.7976std_loss = 0.0577:   0%|          | 7/5475 [01:11<15:23:55, 10.14s/it]

ratio tensor(1.1414, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 8: Loss = 0.2102, Actor Loss = -0.0017, Critic Loss = 0.4339, Entropy = 0.5076, Returns = 1.0420, Value = 1.0413, mask_loss = 0.7892std_loss = 0.0466:   0%|          | 8/5475 [01:24<16:46:35, 11.05s/it]

ratio tensor(5.2578, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 9: Loss = 0.1185, Actor Loss = 0.0205, Critic Loss = 0.2071, Entropy = 0.5523, Returns = 0.8490, Value = 0.8398, mask_loss = 0.7380std_loss = 0.0786:   0%|          | 9/5475 [01:34<16:10:15, 10.65s/it] 

ratio tensor(1.1931, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 10: Loss = 0.2199, Actor Loss = -0.0502, Critic Loss = 0.5501, Entropy = 0.4952, Returns = 0.8208, Value = 0.7287, mask_loss = 0.7920std_loss = 0.0643:   0%|          | 10/5475 [01:49<18:08:44, 11.95s/it]

ratio tensor(1.6400, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 11: Loss = 0.1730, Actor Loss = -0.0188, Critic Loss = 0.3932, Entropy = 0.4809, Returns = 0.9218, Value = 0.8201, mask_loss = 0.7981std_loss = 0.0816:   0%|          | 11/5475 [01:58<16:38:17, 10.96s/it]

ratio tensor(0.8530, device='cuda:0', grad_fn=<MeanBackward0>)



Epoch 1, Step 12: Loss = 0.2632, Actor Loss = 0.0383, Critic Loss = 0.4591, Entropy = 0.4632, Returns = 1.1640, Value = 1.0834, mask_loss = 0.8115std_loss = 0.0706:   0%|          | 12/5475 [02:06<15:19:18, 10.10s/it] 

ratio tensor(1.1350, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 13: Loss = 0.1126, Actor Loss = -0.0869, Critic Loss = 0.4076, Entropy = 0.4276, Returns = 1.0812, Value = 1.0103, mask_loss = 0.8354std_loss = 0.0681:   0%|          | 13/5475 [02:16<15:19:58, 10.11s/it]

ratio tensor(2.0285, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 14: Loss = 0.1069, Actor Loss = -0.0220, Critic Loss = 0.2666, Entropy = 0.4445, Returns = 1.0986, Value = 1.0591, mask_loss = 0.8274std_loss = 0.0609:   0%|          | 14/5475 [02:26<15:05:22,  9.95s/it]

ratio tensor(1.1933, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 15: Loss = 0.0478, Actor Loss = -0.0591, Critic Loss = 0.2229, Entropy = 0.4484, Returns = 1.0099, Value = 0.9739, mask_loss = 0.8224std_loss = 0.0722:   0%|          | 15/5475 [02:37<15:42:21, 10.36s/it]

ratio tensor(5.1164, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 16: Loss = 0.1435, Actor Loss = 0.0298, Critic Loss = 0.2361, Entropy = 0.4406, Returns = 1.0298, Value = 1.0568, mask_loss = 0.8160std_loss = 0.0893:   0%|          | 16/5475 [02:48<15:52:08, 10.47s/it] 

ratio tensor(2.1780, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 17: Loss = 0.1157, Actor Loss = -0.0417, Critic Loss = 0.3234, Entropy = 0.4349, Returns = 1.0098, Value = 0.9579, mask_loss = 0.8184std_loss = 0.0866:   0%|          | 17/5475 [02:56<15:09:29, 10.00s/it]

ratio tensor(10.2945, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 18: Loss = 0.1539, Actor Loss = 0.0004, Critic Loss = 0.3151, Entropy = 0.4131, Returns = 0.6547, Value = 0.5758, mask_loss = 0.8451std_loss = 0.0661:   0%|          | 18/5475 [03:07<15:13:56, 10.05s/it] 

ratio tensor(1.3534, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 19: Loss = 0.2449, Actor Loss = 0.0822, Critic Loss = 0.3345, Entropy = 0.4546, Returns = 0.9491, Value = 1.1001, mask_loss = 0.8158std_loss = 0.0792:   0%|          | 19/5475 [03:17<15:32:29, 10.25s/it]

ratio tensor(18.8065, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 20: Loss = 0.1624, Actor Loss = -0.0038, Critic Loss = 0.3408, Entropy = 0.4209, Returns = 0.8927, Value = 1.0185, mask_loss = 0.8368std_loss = 0.0734:   0%|          | 20/5475 [03:31<16:50:56, 11.12s/it]

ratio tensor(0.9397, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 21: Loss = 0.0745, Actor Loss = -0.0656, Critic Loss = 0.2879, Entropy = 0.3909, Returns = 0.9282, Value = 0.8682, mask_loss = 0.8477std_loss = 0.0795:   0%|          | 21/5475 [03:43<17:17:06, 11.41s/it]

ratio tensor(26.9097, device='cuda:0', grad_fn=<MeanBackward0>)



Epoch 1, Step 22: Loss = 0.1032, Actor Loss = 0.0029, Critic Loss = 0.2070, Entropy = 0.3207, Returns = 0.5794, Value = 0.6541, mask_loss = 0.8869std_loss = 0.0809:   0%|          | 22/5475 [03:53<16:43:45, 11.04s/it] 

ratio tensor(4.3027, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 23: Loss = 0.4312, Actor Loss = 0.0036, Critic Loss = 0.8577, Entropy = 0.1192, Returns = 0.8736, Value = 0.8771, mask_loss = 0.9715std_loss = 0.0227:   0%|          | 23/5475 [04:02<16:06:41, 10.64s/it]

ratio tensor(0.5395, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 24: Loss = 0.1356, Actor Loss = 0.0090, Critic Loss = 0.2557, Entropy = 0.1192, Returns = 0.8790, Value = 0.8974, mask_loss = 0.9719std_loss = 0.0229:   0%|          | 24/5475 [04:14<16:28:48, 10.88s/it]

ratio tensor(1.1990, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 25: Loss = 0.0974, Actor Loss = 0.0139, Critic Loss = 0.1688, Entropy = 0.0881, Returns = 0.8435, Value = 0.8649, mask_loss = 0.9807std_loss = 0.0162:   0%|          | 25/5475 [04:25<16:23:00, 10.82s/it]

ratio tensor(1.2367, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 26: Loss = 0.0399, Actor Loss = -0.0284, Critic Loss = 0.1386, Entropy = 0.0907, Returns = 0.9687, Value = 0.9538, mask_loss = 0.9792std_loss = 0.0202:   0%|          | 26/5475 [04:38<17:30:45, 11.57s/it]

ratio tensor(1.1614, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 27: Loss = 0.1607, Actor Loss = -0.0013, Critic Loss = 0.3260, Entropy = 0.1013, Returns = 0.9627, Value = 0.9352, mask_loss = 0.9746std_loss = 0.0291:   0%|          | 27/5475 [04:47<16:27:44, 10.88s/it]

ratio tensor(0.9989, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 28: Loss = 0.0929, Actor Loss = -0.0578, Critic Loss = 0.3029, Entropy = 0.0669, Returns = 1.0148, Value = 0.9685, mask_loss = 0.9867std_loss = 0.0098:   1%|          | 28/5475 [04:56<15:23:22, 10.17s/it]

ratio tensor(1.4196, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 29: Loss = 0.1363, Actor Loss = 0.0040, Critic Loss = 0.2658, Entropy = 0.0628, Returns = 1.0669, Value = 1.1076, mask_loss = 0.9871std_loss = 0.0139:   1%|          | 29/5475 [05:05<15:05:35,  9.98s/it] 

ratio tensor(1.0658, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 30: Loss = 0.1557, Actor Loss = 0.0012, Critic Loss = 0.3101, Entropy = 0.0591, Returns = 0.9105, Value = 0.9310, mask_loss = 0.9882std_loss = 0.0115:   1%|          | 30/5475 [05:15<14:59:27,  9.91s/it]

ratio tensor(1.0431, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 31: Loss = 0.0861, Actor Loss = 0.0144, Critic Loss = 0.1443, Entropy = 0.0456, Returns = 0.9374, Value = 0.9452, mask_loss = 0.9912std_loss = 0.0102:   1%|          | 31/5475 [05:27<15:51:17, 10.48s/it]

ratio tensor(1.0020, device='cuda:0', grad_fn=<MeanBackward0>)



Epoch 1, Step 32: Loss = 0.1243, Actor Loss = -0.0072, Critic Loss = 0.2642, Entropy = 0.0583, Returns = 0.8546, Value = 0.8156, mask_loss = 0.9857std_loss = 0.0181:   1%|          | 32/5475 [05:39<16:48:45, 11.12s/it]

ratio tensor(7.8846, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 33: Loss = 0.1347, Actor Loss = 0.0318, Critic Loss = 0.2070, Entropy = 0.0598, Returns = 0.7860, Value = 0.8259, mask_loss = 0.9865std_loss = 0.0204:   1%|          | 33/5475 [05:49<16:10:25, 10.70s/it] 

ratio tensor(0.9987, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 34: Loss = 0.0849, Actor Loss = 0.0047, Critic Loss = 0.1615, Entropy = 0.0549, Returns = 0.7958, Value = 0.7911, mask_loss = 0.9878std_loss = 0.0220:   1%|          | 34/5475 [05:59<15:57:10, 10.56s/it]

ratio tensor(1.1567, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 35: Loss = 0.0949, Actor Loss = -0.0091, Critic Loss = 0.2087, Entropy = 0.0404, Returns = 1.0094, Value = 0.9799, mask_loss = 0.9913std_loss = 0.0146:   1%|          | 35/5475 [06:10<15:48:03, 10.46s/it]

ratio tensor(1.2218, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 36: Loss = 0.1089, Actor Loss = 0.0014, Critic Loss = 0.2161, Entropy = 0.0524, Returns = 0.7472, Value = 0.7020, mask_loss = 0.9881std_loss = 0.0168:   1%|          | 36/5475 [06:21<16:02:00, 10.61s/it] 

ratio tensor(1.0313, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 37: Loss = 0.0387, Actor Loss = -0.0023, Critic Loss = 0.0834, Entropy = 0.0637, Returns = 0.7902, Value = 0.7385, mask_loss = 0.9865std_loss = 0.0167:   1%|          | 37/5475 [06:30<15:20:38, 10.16s/it]

ratio tensor(0.9263, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 38: Loss = 0.0604, Actor Loss = 0.0110, Critic Loss = 0.1004, Entropy = 0.0780, Returns = 0.7389, Value = 0.7788, mask_loss = 0.9826std_loss = 0.0201:   1%|          | 38/5475 [06:39<15:02:20,  9.96s/it] 

ratio tensor(0.9047, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 39: Loss = 0.2345, Actor Loss = 0.0022, Critic Loss = 0.4659, Entropy = 0.0606, Returns = 0.7589, Value = 0.8150, mask_loss = 0.9876std_loss = 0.0127:   1%|          | 39/5475 [06:50<15:27:34, 10.24s/it]

ratio tensor(0.9983, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 40: Loss = 0.0524, Actor Loss = -0.0096, Critic Loss = 0.1252, Entropy = 0.0525, Returns = 0.8665, Value = 0.8549, mask_loss = 0.9894std_loss = 0.0117:   1%|          | 40/5475 [06:58<14:27:43,  9.58s/it]

ratio tensor(0.9806, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 41: Loss = 0.0982, Actor Loss = 0.0111, Critic Loss = 0.1752, Entropy = 0.0493, Returns = 0.8727, Value = 0.9318, mask_loss = 0.9903std_loss = 0.0115:   1%|          | 41/5475 [07:08<14:23:38,  9.54s/it] 

ratio tensor(1.0296, device='cuda:0', grad_fn=<MeanBackward0>)



Epoch 1, Step 42: Loss = 0.1392, Actor Loss = 0.0117, Critic Loss = 0.2558, Entropy = 0.0409, Returns = 1.0525, Value = 1.0531, mask_loss = 0.9922std_loss = 0.0095:   1%|          | 42/5475 [07:19<15:23:30, 10.20s/it]

ratio tensor(0.9633, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 43: Loss = 0.1276, Actor Loss = 0.0021, Critic Loss = 0.2522, Entropy = 0.0571, Returns = 0.8575, Value = 0.8396, mask_loss = 0.9881std_loss = 0.0142:   1%|          | 43/5475 [07:28<14:37:34,  9.69s/it]

ratio tensor(0.9671, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 44: Loss = 0.0952, Actor Loss = 0.0077, Critic Loss = 0.1761, Entropy = 0.0571, Returns = 0.8752, Value = 0.8700, mask_loss = 0.9876std_loss = 0.0160:   1%|          | 44/5475 [07:40<15:56:57, 10.57s/it]

ratio tensor(0.9762, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 45: Loss = 0.0347, Actor Loss = -0.0074, Critic Loss = 0.0856, Entropy = 0.0770, Returns = 0.8371, Value = 0.8841, mask_loss = 0.9822std_loss = 0.0243:   1%|          | 45/5475 [07:51<15:53:52, 10.54s/it]

ratio tensor(1.0575, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 46: Loss = 0.0840, Actor Loss = 0.0057, Critic Loss = 0.1576, Entropy = 0.0592, Returns = 1.1158, Value = 1.0713, mask_loss = 0.9873std_loss = 0.0171:   1%|          | 46/5475 [08:02<15:57:22, 10.58s/it] 

ratio tensor(1.0321, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 47: Loss = 0.0501, Actor Loss = -0.0128, Critic Loss = 0.1270, Entropy = 0.0596, Returns = 0.9296, Value = 0.9181, mask_loss = 0.9866std_loss = 0.0178:   1%|          | 47/5475 [08:14<16:48:00, 11.14s/it]

ratio tensor(1.0178, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 48: Loss = 0.0836, Actor Loss = -0.0248, Critic Loss = 0.2178, Entropy = 0.0547, Returns = 1.0336, Value = 1.0272, mask_loss = 0.9888std_loss = 0.0130:   1%|          | 48/5475 [08:26<17:24:36, 11.55s/it]

ratio tensor(1.0181, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 49: Loss = 0.0322, Actor Loss = -0.0029, Critic Loss = 0.0712, Entropy = 0.0454, Returns = 0.9718, Value = 0.9800, mask_loss = 0.9908std_loss = 0.0112:   1%|          | 49/5475 [08:40<18:06:58, 12.02s/it]

ratio tensor(1.0346, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 50: Loss = 0.0651, Actor Loss = -0.0151, Critic Loss = 0.1617, Entropy = 0.0640, Returns = 1.1587, Value = 1.1384, mask_loss = 0.9859std_loss = 0.0167:   1%|          | 50/5475 [08:49<17:06:19, 11.35s/it]

ratio tensor(0.9340, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 51: Loss = -0.0080, Actor Loss = -0.0375, Critic Loss = 0.0601, Entropy = 0.0605, Returns = 1.0017, Value = 1.0111, mask_loss = 0.9865std_loss = 0.0176:   1%|          | 51/5475 [08:59<16:17:49, 10.82s/it]

ratio tensor(1.1357, device='cuda:0', grad_fn=<MeanBackward0>)



Epoch 1, Step 52: Loss = 0.0894, Actor Loss = -0.0048, Critic Loss = 0.1898, Entropy = 0.0707, Returns = 0.9521, Value = 1.0256, mask_loss = 0.9847std_loss = 0.0173:   1%|          | 52/5475 [09:12<17:15:50, 11.46s/it] 

ratio tensor(1.0189, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 53: Loss = 0.0718, Actor Loss = 0.0114, Critic Loss = 0.1223, Entropy = 0.0784, Returns = 1.0964, Value = 1.1285, mask_loss = 0.9814std_loss = 0.0258:   1%|          | 53/5475 [09:22<16:31:25, 10.97s/it] 

ratio tensor(1.2778, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 54: Loss = 0.0491, Actor Loss = -0.0053, Critic Loss = 0.1098, Entropy = 0.0546, Returns = 1.2333, Value = 1.2989, mask_loss = 0.9882std_loss = 0.0161:   1%|          | 54/5475 [09:30<15:15:43, 10.14s/it]

ratio tensor(0.9917, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 55: Loss = 0.0992, Actor Loss = 0.0052, Critic Loss = 0.1889, Entropy = 0.0489, Returns = 1.1148, Value = 1.1190, mask_loss = 0.9896std_loss = 0.0157:   1%|          | 55/5475 [09:40<15:25:44, 10.25s/it] 

ratio tensor(0.9564, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 56: Loss = 0.1779, Actor Loss = 0.0083, Critic Loss = 0.3400, Entropy = 0.0437, Returns = 1.1554, Value = 1.0882, mask_loss = 0.9907std_loss = 0.0144:   1%|          | 56/5475 [09:54<16:46:57, 11.15s/it]

ratio tensor(0.8777, device='cuda:0', grad_fn=<MeanBackward0>)


Epoch 1, Step 56: Loss = 0.1779, Actor Loss = 0.0083, Critic Loss = 0.3400, Entropy = 0.0437, Returns = 1.1554, Value = 1.0882, mask_loss = 0.9907std_loss = 0.0144:   1%|          | 56/5475 [09:55<16:00:24, 10.63s/it]


KeyboardInterrupt: 

In [8]:
import numpy as np
import torch.nn.functional as F
from datasets import Dataset

mask_gen_model.load_state_dict(torch.load('saved_model/imdb_mask_gen_model_0_100.pth',map_location=device))

mask_gen_model.eval()

test_dataloader = DataLoader(train_ds, batch_size=batch_size, collate_fn=collate_fn, shuffle=False)

test_inputs = next(iter(test_dataloader)).to(device)
# test_inputs = next(iter(train_dataloader)).to(device)

# tokens = tokenizer.convert_ids_to_tokens(test_inputs['input_ids'][idx])



# data_dict = {
#     'sentence': ["I absolutely love this product! It exceeded all my expectations.", 
#              "The movie was fantastic, and the acting was top-notch.",
#              "This restaurant offers great service and delicious food. Highly recommend!",
#              "The product works as advertised, nothing more, nothing less.",
#              "The event was well-organized, but it didn’t leave a lasting impression.",
#              "t’s an average phone, nothing special but it does the job.",
#              "I’m really disappointed with this purchase. It broke within a week.",
#              "The movie was too long and boring, I wouldn’t recommend it.",
#              "Terrible customer service, I won’t be coming back to this place."],
#     'label': [1, 1, 1, 0, 0, 0, -1, -1, -1]
# }
# manual_test_data = Dataset.from_dict(data_dict)

# manual_test_dataloader = DataLoader(manual_test_data, batch_size=9, collate_fn=collate_fn, shuffle=False)
# #
# test_inputs = next(iter(manual_test_dataloader)).to(device)


# generate the answer for the test inputs
gen_outputs = model.generate(
            input_ids=test_inputs['input_ids'],
            attention_mask=test_inputs['attention_mask'],
            max_new_tokens=128,
            eos_token_id=terminators,
            pad_token_id=tokenizer.pad_token_id,
            do_sample=True,
            temperature=0.6,
            top_p=0.9,
            return_dict_in_generate=True,
            output_scores=True,
        )
input_ids = test_inputs['input_ids']
attention_mask = test_inputs['attention_mask']
gen_tokens = gen_outputs.sequences
pad_length = gen_tokens.size(1) - input_ids.size(1)
# get the attention mask for the generated tokens, and also mask the padding tokens
gen_attention_mask = F.pad(attention_mask, (0, pad_length), mode='constant', value=1)
context_mask = F.pad(test_inputs['context_mask'], (0, pad_length), mode='constant', value=0)
# (gen_tokens != pad_token_id).long() is the tokens mask, 1 for real tokens and 0 for padding tokens
unpaded_token_mask = (gen_tokens != tokenizer.pad_token_id).long()
unpaded_token_mask[:, :-pad_length] = 1
gen_attention_mask = gen_attention_mask * unpaded_token_mask

response_mask = gen_attention_mask.clone()
response_mask[:, :-pad_length] = 0 # TODO: 有问题. 有问题吗？

# context_mask = F.pad(context_mask, (0, pad_length), mode='constant', value=0)

# with torch.no_grad():
#     # prompt_outputs = model(input_ids=test_inputs['input_ids'], attention_mask=test_inputs['attention_mask'], output_hidden_states=True, return_dict=True)
#     prompt_outputs = model(input_ids=gen_tokens, attention_mask=gen_attention_mask, output_hidden_states=True, return_dict=True)

#     last_hidden_state = prompt_outputs.hidden_states[-1].float()
#     mask_logits = mask_gen_model(last_hidden_state)


with torch.no_grad():
    state = gen_tokens, gen_attention_mask, context_mask, response_mask
    dist, value = mask_gen_model.get_dist_critic(model, state)

mask_logits = dist.logits

  mask_gen_model.load_state_dict(torch.load('saved_model/imdb_mask_gen_model_0_100.pth',map_location=device))


In [9]:
import random
idx = random.randint(0, 8)
# idx = 0
test_ids = gen_tokens[idx]
test_mask = gen_attention_mask[idx]
test_mask_prob = torch.sigmoid(mask_logits[idx])
# inverse TODO
# test_mask_prob = 1 - test_mask_prob
test_context_mask = context_mask[idx]

test_tokens = tokenizer.convert_ids_to_tokens(test_ids)
scores = test_mask_prob * test_context_mask

def normalize_except_zeros(array):
    # Create a mask to identify non-zero elements
    mask = (array > 0)
    
    # Extract non-zero elements
    non_zero_elements = array[mask]
    
    # Normalize non-zero elements
    min_val = np.min(non_zero_elements)
    max_val = np.max(non_zero_elements)

    normalized_non_zero_elements = (non_zero_elements - min_val) / (max_val - min_val)
    
    # Create a copy of the original array to preserve zero values
    normalized_array = np.copy(array)
    
    # Assign normalized values back to the corresponding positions
    normalized_array[mask] = normalized_non_zero_elements
    
    return normalized_array
scores = normalize_except_zeros(scores.detach().cpu().numpy())

# # remove special tokens
# filtered_token_scores = [(token, score) for token, score in zip(test_tokens, scores) if token not in tokenizer.all_special_tokens]
filtered_token_scores = [(token, score) for token, score in zip(test_tokens, scores) ]

In [10]:
for token, score in filtered_token_scores:
    print(f"Token: {token}, Score: {score}")

Token: <|eot_id|>, Score: 0.0
Token: <|eot_id|>, Score: 0.0
Token: <|eot_id|>, Score: 0.0
Token: <|eot_id|>, Score: 0.0
Token: <|eot_id|>, Score: 0.0
Token: <|eot_id|>, Score: 0.0
Token: <|eot_id|>, Score: 0.0
Token: <|eot_id|>, Score: 0.0
Token: <|eot_id|>, Score: 0.0
Token: <|eot_id|>, Score: 0.0
Token: <|eot_id|>, Score: 0.0
Token: <|eot_id|>, Score: 0.0
Token: <|eot_id|>, Score: 0.0
Token: <|eot_id|>, Score: 0.0
Token: <|eot_id|>, Score: 0.0
Token: <|eot_id|>, Score: 0.0
Token: <|eot_id|>, Score: 0.0
Token: <|eot_id|>, Score: 0.0
Token: <|eot_id|>, Score: 0.0
Token: <|eot_id|>, Score: 0.0
Token: <|eot_id|>, Score: 0.0
Token: <|eot_id|>, Score: 0.0
Token: <|eot_id|>, Score: 0.0
Token: <|eot_id|>, Score: 0.0
Token: <|eot_id|>, Score: 0.0
Token: <|eot_id|>, Score: 0.0
Token: <|eot_id|>, Score: 0.0
Token: <|eot_id|>, Score: 0.0
Token: <|eot_id|>, Score: 0.0
Token: <|eot_id|>, Score: 0.0
Token: <|eot_id|>, Score: 0.0
Token: <|eot_id|>, Score: 0.0
Token: <|eot_id|>, Score: 0.0
Token: <|e

In [11]:
import re

def clean_token(token):
    # 移除所有普通的特殊字符，比如 'Ġ' 和 'Ċ'
    return token.replace("Ġ", "").replace("Ċ", "")

merged_tokens_scores = []
current_token = ""
current_score = 0
count = 0

def is_special_token(token):
    # 判断是否是特殊的独立 token，例如 '<|start_header_id|>' 这样的 token
    return token.startswith("<|") and token.endswith("|>")

# 用于合并 token 和 score，取平均值
for token, score in filtered_token_scores:
    # 检查是否是特殊 token
    if is_special_token(token):
        # 如果当前有累积的 token，先把它们加入结果
        if current_token:
            # 确保分数归一化在 [0, 1] 之间
            average_score = min(current_score / count, 1.0)
            merged_tokens_scores.append((current_token, average_score))
            current_token = ""
            current_score = 0
            count = 0

        # 特殊 token 直接加入，不合并
        merged_tokens_scores.append((token, score))
        continue

    # 清理 token 中的特殊字符
    cleaned_token = clean_token(token)

    # 忽略清理后的空 token
    if not cleaned_token:
        continue

    # 判断是否是新单词的开始（以 'Ġ' 或 'Ċ' 开头的通常是新词）
    if token.startswith("Ġ") or token.startswith("Ċ"):
        if current_token:
            # 确保分数归一化在 [0, 1] 之间
            average_score = min(current_score / count, 1.0)
            merged_tokens_scores.append((current_token, average_score))
        
        # 初始化新的 token 和 score
        current_token = cleaned_token
        current_score = score
        count = 1
    else:
        # 如果是子词，则继续合并
        current_token += cleaned_token
        current_score += score
        count += 1

# 处理最后一个 token
if current_token:
    # 确保分数归一化在 [0, 1] 之间
    average_score = min(current_score / count, 1.0)
    merged_tokens_scores.append((current_token, average_score))

# # 输出结果
# for token, score in merged_tokens_scores:
#     print(f"Token: {token}, Score: {score}")





# 根据分数高亮文本（示例中使用HTML标签）
highlighted_text = ""
for token, score in merged_tokens_scores:
    # 动态设置背景颜色：score为0时为白色，score为1时为绿色
    red = int((1 - score) * 255)
    green = 255
    blue = int((1 - score) * 255)
    color = f'rgb({red}, {green}, {blue})'
    highlighted_text += f'<span style="background-color: {color}; color: black;">{token}</span> '

# 打印高亮后的文本
from IPython.display import display, HTML
display(HTML(highlighted_text.strip()))

In [None]:
(test_mask_prob * test_context_mask).max()

tensor(0.8213, device='cuda:0')

In [None]:
filtered_token_scores

In [None]:
test_inputs.keys()

collate_fn()

tokenizer("this is a sentence")

In [22]:
new_ds = ds['train'].select(range(3))

In [26]:
from datasets import Dataset

data_dict = {
    'text': ["This is the first example.", "This is the second example."],
    'label': [0, 1]
}
dataset = Dataset.from_dict(data_dict)

In [None]:
new_ds[0]