In [1]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, get_linear_schedule_with_warmup, set_seed
from datasets import load_dataset, load_metric
import torch
import numpy as np


In [4]:
import wandb

In [5]:
!wandb login


[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit: 
Aborted!


In [9]:
import wandb
import os
os.environ['WANDB_DIR'] = os.getcwd() + '/wandb/'
os.environ['WANDB_CACHE_DIR'] = os.getcwd() + '/wandb/.cache/'
os.environ['WANDB_CONFIG_DIR'] = os.getcwd() + '/wandb/.config/'

import torch
import torch.nn as nn
import torch.optim as optim

# 初始化wandb
wandb.init(
    entity='kaifan-li',
    project="my_project"
)

# 构建模型
class SimpleModel(nn.Module):
    def __init__(self):
        super(SimpleModel, self).__init__()
        self.fc = nn.Linear(10, 1)
    
    def forward(self, x):
        return self.fc(x)

model = SimpleModel()

# 定义损失函数和优化器
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

# 训练循环
for epoch in range(10):
    running_loss = 0.0
    for i in range(100):
        inputs = torch.randn(32, 10)  # 随机生成输入数据
        labels = torch.randn(32, 1)   # 随机生成标签
        
        # 正向传播
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        # 反向传播和优化
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    # 记录训练过程和指标
    avg_loss = running_loss / 100
    wandb.log({"epoch": epoch, "loss": avg_loss})




VBox(children=(Label(value='0.003 MB of 0.013 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.227164…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▄▅▁▂▅▄▂▂▃

0,1
epoch,9.0
loss,0.99713


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016668189813693366, max=1.0…

In [24]:
import torch
import torch.nn as nn
from transformers import BartModel, BartConfig, BartForConditionalGeneration
from model.prefix_encoder import PrefixEncoder
class BartPrefixForConditionalGeneration(BartForConditionalGeneration):

    def __init__(self, config):
        super().__init__(config)
        # self.model = BartModel(config)
        # self.register_buffer("final_logits_bias", torch.zeros((1, self.model.shared.num_embeddings)))
        # self.lm_head = nn.Linear(config.d_model, self.model.shared.num_embeddings, bias=False)
        
        # MODIFIED
        # Start
        self.config = config
        # self.dropout = nn.Dropout(config.hidden_dropout_prob)
        # End
        
        # https://github.com/huggingface/transformers/issues/4701
        # if we use BartPrefixForConditionalGeneration.from_pretrained() to load the model, 
        # it will not overwrite the pretrained weights of the model
        # Initialize weights and apply final processing
        # self.post_init()
        
        # MODIFIED
        # Start
        for param in self.model.parameters():
            param.requires_grad = False
            
        self.pre_seq_len = config.pre_seq_len
        self.n_layer = config.num_hidden_layers
        self.n_head = config.num_attention_heads
        self.n_embd = config.hidden_size // config.num_attention_heads
        
        self.prefix_tokens = torch.arange(self.pre_seq_len).long()
        self.prefix_encoder = PrefixEncoder(config)
        
        bart_param = 0
        all_param = 0
        
        for name, param in self.model.named_parameters():
            bart_param += param.numel() # numel() returns the total number of elements in the input tensor
        
        for name, param in self.named_parameters():
            all_param += param.numel()
            
        trainable_param = all_param - bart_param
        
        print("Total parameters: {:,}".format(all_param))
        print("Trainable parameters: {:,} {:,%}".format((trainable_param), trainable_param/all_param))
        # End

In [25]:
config = BartConfig.from_pretrained('facebook/bart-large-cnn')
config.pre_seq_len=20
config.prefix_projection=False
bart = BartPrefixForConditionalGeneration(config)
bart

Total parameters: 406,781,952
Trainable parameters: 491,520 0.120831%


BartPrefixForConditionalGeneration(
  (model): BartModel(
    (shared): Embedding(50264, 1024, padding_idx=1)
    (encoder): BartEncoder(
      (embed_tokens): Embedding(50264, 1024, padding_idx=1)
      (embed_positions): BartLearnedPositionalEmbedding(1026, 1024)
      (layers): ModuleList(
        (0-11): 12 x BartEncoderLayer(
          (self_attn): BartAttention(
            (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
          )
          (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
          (activation_fn): GELUActivation()
          (fc1): Linear(in_features=1024, out_features=4096, bias=True)
          (fc2): Linear(in_features=4096, out_features=1024, bias=True)
          (final_layer_norm): 

In [28]:
# 检查模型的参数是否正确加载
state_dict = bart.state_dict()
if any(key.startswith('model.') for key in state_dict.keys()):
    print("BartPrefixForConditionalGeneration 模型成功加载了预训练的权重！")
else:
    print("BartPrefixForConditionalGeneration 模型没有成功加载预训练的权重，请检查模型定义。")

BartPrefixForConditionalGeneration 模型成功加载了预训练的权重！


In [13]:
config = BartConfig.from_pretrained('facebook/bart-large-cnn')
config.pre_seq_len=20

In [1]:
test = [1,2,3,4,5]
test = test[-1:]
test

[5]

In [2]:
import torch
import torch.nn as nn

class Test(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(10, 10)
        
    def forward(self, input):
        print('flag')
        for i in range(10):
            x = self.linear(input)
            print("this is {} epoch".format(i))
            print("x is {}".format(x))
            print('\n')

tensor = torch.randn(20, 10)
test = Test()
test(tensor)

flag
this is 0 epoch
x is tensor([[-0.2290,  1.2786,  0.3341,  0.0985, -0.0992, -0.7334, -0.8649, -0.4964,
         -0.4566,  1.5048],
        [-0.3796, -0.4937, -0.8009, -0.2741,  0.1658, -0.0121,  0.4416, -0.2015,
         -0.0962,  0.0998],
        [ 0.3215, -0.4579, -1.1388, -0.0204,  0.2511, -0.0035,  0.6961,  0.9450,
          0.1879, -0.3639],
        [ 0.0036, -0.5386, -1.3375, -0.5841,  0.0916, -1.3121, -0.6515, -0.2979,
          1.0804, -0.0919],
        [-0.2241,  0.4265,  0.0054, -0.0826, -0.5758, -0.6925,  0.0994, -0.6229,
         -0.8576,  0.3012],
        [ 0.2235,  1.4409,  0.8928,  0.7223, -0.6116, -1.6856, -0.0596, -0.7095,
         -1.2028,  0.3562],
        [-0.0972,  0.3060,  0.6573,  0.3085, -0.3260, -0.3235,  0.5225, -1.1482,
         -1.2006,  0.5086],
        [-0.6056, -1.3465, -1.2345, -0.8795,  0.2474,  0.7569, -0.2256, -0.3199,
          1.2152,  0.7675],
        [-0.3077, -0.2454, -0.5742,  0.2105, -0.5111,  0.5800, -0.4270, -0.8334,
          0.1129,  1.

In [2]:
from dataclasses import dataclass
from transformers import BartConfig, T5Config

class PromptBartConfig(BartConfig):
    def __init__(self, config):
        super().__init__()
        self.pre_seq_len = config.pre_seq_len

In [17]:
special_tokens = ['[CLS]', '[SEP]']
print(special_tokens[0])
seq = ['[CLS]', 'Hello', '[MASK]', 'world', '[SEP]']
for s in seq:
    print(s)
    drop_mask = sum([seq == t for t in special_tokens])
    print(drop_mask)

[CLS]
[CLS]
0
Hello
0
[MASK]
0
world
0
[SEP]
0


In [37]:
special_tokens = ['[CLS]', '[SEP]']
seq = [['[CLS]', 'Hello', '[MASK]', 'world', '[SEP]'],]

for s in seq:
    print(s)
    drop_mask = sum([s == t for t in special_tokens])
    print(drop_mask)
    drop_mask = [bool(1 - drop_mask)]
    print(drop_mask)


['[CLS]', 'Hello', '[MASK]', 'world', '[SEP]']
0
[True]


In [31]:
import math
special_token_ids = []
config = {
    "max_n_segments": 4,
}

def pad_and_segment(input_ids, config):
    """
    segment input_ids into segments
    be careful that all the segments are treated as one input sequence 
    and dealed with incurrence 
    """
    segmented_batch = []
    # input_ids: [batch_size, seq_len]
    for seq in input_ids:
        print('this is the origin seq: {}'.format(seq))
        drop_mask = sum([seq == t for t in special_token_ids])
        seq = seq[(1 - drop_mask)]
        seq = seq[:self.config.segment_size * config.max_n_segments]
        print('this is the seq after drop_mask: {}'.format(seq))
        
        align = self.config.segment_alignment
        if align in {'right', None}:
            split_inds = (list(range(len(seq), 0, -config.segment_size)) + [0])[::-1]
        elif align == 'left':
            split_inds = list(range(0, len(seq), config.segment_size)) + [len(seq)]
        elif align == 'center':
            n_seg = math.ceil(len(seq) / config.segment_size)
            split_inds = list(range(0, len(seq), math.ceil(len(seq) / n_seg))) + [len(seq)]
        else:
            raise NotImplementedError

        input_segments = [seq[start:end] for (start, end) in zip(split_inds, split_inds[1:])]
        # TODO: do the implementation
        # input_segments = [self.pad_add_special_tokens(t, self.config.input_size) for t in input_segments]
        print('this is the input_segments: {}'.format(input_segments))
        
        # add empty segment markers if needed
        n_empty_segments = config.max_n_segments - len(input_segments)
        
        # input_segments:
        input_segments = [None] * n_empty_segments + input_segments
        print('this is the input_segments after adding empty segment markers: {}'.format(input_segments))
        
        # segmented_batch: 
        segmented_batch.append(input_segments)
        print('this is the segmented_batch: {}'.format(segmented_batch))
        print('first sample in segmented_batch: {}'.format(segmented_batch[0]))
        
    segmented_batch = [[sample[seg_num] for sample in segmented_batch] \
                        for seg_num in range(config.max_n_segments)]
    print('this is the segmented_batch after re-arrange: {}'.format(segmented_batch))
    return segmented_batch

In [27]:
test = "LONDON, England (Reuters) -- Harry Potter star Daniel Radcliffe gains access to a reported £20 million ($41.1 million) fortune as he turns 18 on Monday, but he insists the money won't cast a spell on him. Daniel Radcliffe as Harry Potter in \"Harry Potter and the Order of the Phoenix\" To the disappointment of gossip columnists around the world, the young actor says he has no plans to fritter his cash away on fast cars, drink and celebrity parties. \"I don't plan to be one of those people who, as soon as they turn 18, suddenly buy themselves a massive sports car collection or something similar,\" he told an Australian interviewer earlier this month. \"I don't think I'll be particularly extravagant. \"The things I like buying are things that cost about 10 pounds -- books and CDs and DVDs.\" At 18, Radcliffe will be able to gamble in a casino, buy a drink in a pub or see the horror film \"Hostel: Part II,\" currently six places below his number one movie on the UK box office chart. Details of how he'll mark his landmark birthday are under wraps. His agent and publicist had no comment on his plans. \"I\'ll definitely have some sort of party,\" he said in an interview. \"Hopefully none of you will be reading about it.\" Radcliffe\'s earnings from the first five Potter films have been held in a trust fund which he has not been able to touch. Despite his growing fame and riches, the actor says he is keeping his feet firmly on the ground. \"People are always looking to say 'kid star goes off the rails,'\" he told reporters last month. \"But I try very hard not to go that way because it would be too easy for them.\" His latest outing as the boy wizard in \"Harry Potter and the Order of the Phoenix\" is breaking records on both sides of the Atlantic and he will reprise the role in the last two films. Watch I-Reporter give her review of Potter's latest  . There is life beyond Potter, however. The Londoner has filmed a TV movie called \"My Boy Jack,\" about author Rudyard Kipling and his son, due for release later this year. He will also appear in \"December Boys,\" an Australian film about four boys who escape an orphanage. Earlier this year, he made his stage debut playing a tortured teenager in Peter Shaffer's \"Equus.\" Meanwhile, he is braced for even closer media scrutiny now that he's legally an adult: \"I just think I'm going to be more sort of fair game,\" he told Reuters. E-mail to a friend . Copyright 2007 Reuters. All rights reserved.This material may not be published, broadcast, rewritten, or redistributed."
test

'LONDON, England (Reuters) -- Harry Potter star Daniel Radcliffe gains access to a reported £20 million ($41.1 million) fortune as he turns 18 on Monday, but he insists the money won\'t cast a spell on him. Daniel Radcliffe as Harry Potter in "Harry Potter and the Order of the Phoenix" To the disappointment of gossip columnists around the world, the young actor says he has no plans to fritter his cash away on fast cars, drink and celebrity parties. "I don\'t plan to be one of those people who, as soon as they turn 18, suddenly buy themselves a massive sports car collection or something similar," he told an Australian interviewer earlier this month. "I don\'t think I\'ll be particularly extravagant. "The things I like buying are things that cost about 10 pounds -- books and CDs and DVDs." At 18, Radcliffe will be able to gamble in a casino, buy a drink in a pub or see the horror film "Hostel: Part II," currently six places below his number one movie on the UK box office chart. Details o

In [28]:
batch = [
    ["LONDON, England (Reuters) -- Harry Potter star Daniel Radcliffe gains access to a reported £20 million ($41.1 million) fortune as he turns 18 on Monday, but he insists the money won't cast a spell on him. Daniel Radcliffe as Harry Potter in \"Harry Potter and the Order of the Phoenix\" To the disappointment of gossip columnists around the world, the young actor says he has no plans to fritter his cash away on fast cars, drink and celebrity parties. \"I don't plan to be one of those people who, as soon as they turn 18, suddenly buy themselves a massive sports car collection or something similar,\" he told an Australian interviewer earlier this month. \"I don't think I'll be particularly extravagant. \"The things I like buying are things that cost about 10 pounds -- books and CDs and DVDs.\" At 18, Radcliffe will be able to gamble in a casino, buy a drink in a pub or see the horror film \"Hostel: Part II,\" currently six places below his number one movie on the UK box office chart. Details of how he'll mark his landmark birthday are under wraps. His agent and publicist had no comment on his plans. \"I\'ll definitely have some sort of party,\" he said in an interview. \"Hopefully none of you will be reading about it.\" Radcliffe\'s earnings from the first five Potter films have been held in a trust fund which he has not been able to touch. Despite his growing fame and riches, the actor says he is keeping his feet firmly on the ground. \"People are always looking to say 'kid star goes off the rails,'\" he told reporters last month. \"But I try very hard not to go that way because it would be too easy for them.\" His latest outing as the boy wizard in \"Harry Potter and the Order of the Phoenix\" is breaking records on both sides of the Atlantic and he will reprise the role in the last two films. Watch I-Reporter give her review of Potter's latest  . There is life beyond Potter, however. The Londoner has filmed a TV movie called \"My Boy Jack,\" about author Rudyard Kipling and his son, due for release later this year. He will also appear in \"December Boys,\" an Australian film about four boys who escape an orphanage. Earlier this year, he made his stage debut playing a tortured teenager in Peter Shaffer's \"Equus.\" Meanwhile, he is braced for even closer media scrutiny now that he's legally an adult: \"I just think I'm going to be more sort of fair game,\" he told Reuters. E-mail to a friend . Copyright 2007 Reuters. All rights reserved.This material may not be published, broadcast, rewritten, or redistributed."]
]

pad_and_segment(batch, config)

this is the origin seq: ['LONDON, England (Reuters) -- Harry Potter star Daniel Radcliffe gains access to a reported £20 million ($41.1 million) fortune as he turns 18 on Monday, but he insists the money won\'t cast a spell on him. Daniel Radcliffe as Harry Potter in "Harry Potter and the Order of the Phoenix" To the disappointment of gossip columnists around the world, the young actor says he has no plans to fritter his cash away on fast cars, drink and celebrity parties. "I don\'t plan to be one of those people who, as soon as they turn 18, suddenly buy themselves a massive sports car collection or something similar," he told an Australian interviewer earlier this month. "I don\'t think I\'ll be particularly extravagant. "The things I like buying are things that cost about 10 pounds -- books and CDs and DVDs." At 18, Radcliffe will be able to gamble in a casino, buy a drink in a pub or see the horror film "Hostel: Part II," currently six places below his number one movie on the UK bo

AttributeError: 'int' object has no attribute 'bool'

In [44]:
import torch

input_ids = torch.tensor([
    [101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 102, 111, 112, 113, 114, 115, 102, 116, 117, 102],
    [101, 102, 103, 104, 105, 106, 102, 107, 108, 109, 110, 111, 102, 112, 113, 114, 115, 102, 116, 117]
])

special_token_ids = [101, 102]



class Processor:
    def __init__(self, segment_size, rmt_config, special_token_ids):
        self.segment_size = segment_size
        self.rmt_config = rmt_config
        self.special_token_ids = special_token_ids

    def pad_and_segment(self, input_ids):
        segmented_batch = []
        for seq in input_ids:
            print('seq:', seq)
            drop_mask = sum([seq == t for t in self.special_token_ids])
            print('drop_mask:', drop_mask)
            seq = seq[(1 - drop_mask).bool()]
            print('seq:', seq)
            seq = seq[:self.segment_size * self.rmt_config['max_n_segments']]
            segmented_batch.append(seq.tolist())
        return torch.tensor(segmented_batch)

# 假设我们有一个名为 rmt_config 的配置字典和 segment_size 变量
rmt_config = {'max_n_segments': 3}
segment_size = 5

processor = Processor(segment_size, rmt_config, special_token_ids)
output = processor.pad_and_segment(input_ids)

print("Original input_ids:")
print(input_ids)
print("Processed input_ids:")
print(output)


seq: tensor([101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 102, 111, 112, 113,
        114, 115, 102, 116, 117, 102])
drop_mask: tensor([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1])
seq: tensor([103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
        117])
seq: tensor([101, 102, 103, 104, 105, 106, 102, 107, 108, 109, 110, 111, 102, 112,
        113, 114, 115, 102, 116, 117])
drop_mask: tensor([1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0])
seq: tensor([103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
        117])
Original input_ids:
tensor([[101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 102, 111, 112, 113,
         114, 115, 102, 116, 117, 102],
        [101, 102, 103, 104, 105, 106, 102, 107, 108, 109, 110, 111, 102, 112,
         113, 114, 115, 102, 116, 117]])
Processed input_ids:
tensor([[103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
         117],
        [103, 104, 105, 106,