In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, TextGenerationPipeline, GPT2Model

tokenizer = AutoTokenizer.from_pretrained("uer/gpt2-chinese-cluecorpussmall")
model = AutoModelForCausalLM.from_pretrained("uer/gpt2-chinese-cluecorpussmall")
print(model)

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(21128, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2SdpaAttention(
          (c_attn): Conv1D(nf=2304, nx=768)
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=3072, nx=768)
          (c_proj): Conv1D(nf=768, nx=3072)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=21128, bias=False)
)


In [2]:
model = GPT2Model.from_pretrained("uer/gpt2-chinese-cluecorpussmall")
print(model)

GPT2Model(
  (wte): Embedding(21128, 768)
  (wpe): Embedding(1024, 768)
  (drop): Dropout(p=0.1, inplace=False)
  (h): ModuleList(
    (0-11): 12 x GPT2Block(
      (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (attn): GPT2SdpaAttention(
        (c_attn): Conv1D(nf=2304, nx=768)
        (c_proj): Conv1D(nf=768, nx=768)
        (attn_dropout): Dropout(p=0.1, inplace=False)
        (resid_dropout): Dropout(p=0.1, inplace=False)
      )
      (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (mlp): GPT2MLP(
        (c_fc): Conv1D(nf=3072, nx=768)
        (c_proj): Conv1D(nf=768, nx=3072)
        (act): NewGELUActivation()
        (dropout): Dropout(p=0.1, inplace=False)
      )
    )
  )
  (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
)


In [33]:
lm_weight = (model.lm_head.state_dict()["weight"])
torch.save(lm_weight,"./dataset/lm_weight.pth")

AttributeError: 'GPT2Model' object has no attribute 'lm_head'

In [3]:
import torch.nn as nn
class GPT2(nn.Module):
    def __init__(self):
        super().__init__()

        
        self.model =  GPT2Model.from_pretrained("uer/gpt2-chinese-cluecorpussmall")

        self.lm_head = torch.nn.Linear(768,21128,bias=False)
        weight = torch.load("./dataset/lm_weight.pth")
        self.lm_head.weight = nn.Parameter(weight)

        self.value_layer = torch.nn.Sequential(torch.nn.Linear(768,1),torch.nn.Tanh(),torch.nn.Dropout(0.1))

    def forward(self,token_inputs):

        embedding = self.model(token_inputs)
        embedding = embedding["last_hidden_state"]

        embedding = torch.nn.Dropout(0.1)(embedding)
        logits = self.lm_head(embedding)

        return logits

In [4]:
token_list = []
with open("./dataset/ChnSentiCorp.txt", "r", encoding="UTF-8") as emotion_file:
    for line in emotion_file.readlines():
        line = line.strip().split(",")
        text = "".join(line[1:])
        inputs = tokenizer(text, return_tensors="pt")
        token = input_ids = inputs["input_ids"]
        attention_mask = inputs["attention_mask"]
        for id in token[0]:
            token_list.append(id.item())
token_list = torch.tensor(token_list * 5)

class TextSamplerDataset(torch.utils.data.Dataset):
    def __init__(self, data, seq_len):
        super().__init__()
        self.data = data
        self.seq_len = seq_len

    def __getitem__(self, index):
	    #下面的写法是为了遵守GPT2数据输入输出格式而特定的写法
        rand_start = torch.randint(0, self.data.size(0) - self.seq_len, (1,))
        full_seq = self.data[rand_start : rand_start + self.seq_len + 1].long()
        return full_seq[:-1],full_seq[1:]


    def __len__(self):
        return self.data.size(0) // self.seq_len

Token indices sequence length is longer than the specified maximum sequence length for this model (1064 > 1024). Running this sequence through the model will result in indexing errors


In [5]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import torch
from tqdm import tqdm
from torch.utils.data import DataLoader
max_length = 128 + 1
batch_size = 2

device = "cuda"


save_path = "./train_model_emo.pth"
glm_model = GPT2()
glm_model.to(device)
#glm_model.load_state_dict(torch.load(save_path),strict=False)
optimizer = torch.optim.AdamW(glm_model.parameters(), lr=2e-4)
lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,T_max = 1200,eta_min=2e-6,last_epoch=-1)
criterion = torch.nn.CrossEntropyLoss()


train_dataset = TextSamplerDataset(token_list,max_length)
loader = DataLoader(train_dataset,batch_size=batch_size,shuffle=True,num_workers=0,pin_memory=True)

for epoch in range(30):
    pbar = tqdm(loader, total=len(loader))
    for token_inp,token_tgt in pbar:
        token_inp = token_inp.to(device)
        token_tgt = token_tgt.to(device)

        logits = glm_model(token_inp)
        loss = criterion(logits.view(-1,logits.size(-1)),token_tgt.view(-1))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        lr_scheduler.step()  # 执行优化器
        pbar.set_description(f"epoch:{epoch +1}, train_loss:{loss.item():.5f}, lr:{lr_scheduler.get_last_lr()[0]*100:.5f}")
    if (epoch + 1) % 2 == 0:
        torch.save(glm_model.state_dict(),save_path)


  weight = torch.load("./dataset/lm_weight.pth")
epoch:1, train_loss:1.58591, lr:0.01990: 100%|██████████| 19145/19145 [42:45<00:00,  7.46it/s]  
epoch:2, train_loss:1.27436, lr:0.01959: 100%|██████████| 19145/19145 [54:56<00:00,  5.81it/s] 
epoch:3, train_loss:0.65167, lr:0.01909: 100%|██████████| 19145/19145 [9:57:17<00:00,  1.87s/it]       
epoch:4, train_loss:0.72051, lr:0.01840: 100%|██████████| 19145/19145 [49:11<00:00,  6.49it/s] 
epoch:5, train_loss:1.55126, lr:0.01754: 100%|██████████| 19145/19145 [48:59<00:00,  6.51it/s]
epoch:6, train_loss:0.41094, lr:0.01653: 100%|██████████| 19145/19145 [54:13<00:00,  5.88it/s]
epoch:7, train_loss:0.53372, lr:0.01538: 100%|██████████| 19145/19145 [57:32<00:00,  5.54it/s] 
epoch:8, train_loss:0.21933, lr:0.01413: 100%|██████████| 19145/19145 [58:10<00:00,  5.48it/s]
epoch:9, train_loss:0.24982, lr:0.01279: 100%|██████████| 19145/19145 [56:49<00:00,  5.61it/s] 
epoch:10, train_loss:0.20603, lr:0.01139: 100%|██████████| 19145/19145 [59:03<00: