In [1]:
%run common.ipynb

tokenizer.decode(tokenizer.get_data(third_number=True))

  from .autonotebook import tqdm as notebook_tqdm


'S-92.12=-24.36/91.91+-91.85E'

In [2]:
def get_batch_data():

    def pad(data, split, lens):
        #做个白板
        input_ids = torch.full((len(data), lens),
                               tokenizer.encoder['P'],
                               device=device)

        #往白板里黏贴数据
        for i, d in enumerate(data):
            input_ids[i, :len(d)] = torch.LongTensor(d)

        attention_mask = (input_ids != tokenizer.encoder['P']).long()

        #计算label
        label = input_ids.clone()
        for l, s in zip(label, split):
            #问题和pad的位置是-100
            l[:s] = -100
            l[l == tokenizer.encoder['P']] = -100

        return {
            'input_ids': input_ids,
            'attention_mask': attention_mask,
            'label': label
        }

    #正确的问答
    choice = [tokenizer.get_data(third_number=True) for i in range(64)]

    #错误的回答简单地定义为空回答就可以了
    split = [i.index(tokenizer.encoder['=']) + 1 for i in choice]
    reject = [d[:s] for d, s in zip(choice, split)]
    reject = [i + [tokenizer.encoder['E']] for i in reject]

    #求最大长度
    lens = max([len(i) for i in choice])

    return pad(choice, split, lens), pad(reject, split, lens)


get_batch_data()

({'input_ids': tensor([[ 1,  7, 11,  ...,  0,  0,  0],
          [ 1, 15, 11,  ...,  2,  0,  0],
          [ 1, 15, 11,  ...,  2,  0,  0],
          ...,
          [ 1, 15,  5,  ..., 12,  2,  0],
          [ 1, 15, 12,  ..., 11,  2,  0],
          [ 1, 15, 12,  ...,  0,  0,  0]], device='cuda:0'),
  'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],
          [1, 1, 1,  ..., 1, 0, 0],
          [1, 1, 1,  ..., 1, 0, 0],
          ...,
          [1, 1, 1,  ..., 1, 1, 0],
          [1, 1, 1,  ..., 1, 1, 0],
          [1, 1, 1,  ..., 0, 0, 0]], device='cuda:0'),
  'label': tensor([[-100, -100, -100,  ..., -100, -100, -100],
          [-100, -100, -100,  ...,    2, -100, -100],
          [-100, -100, -100,  ...,    2, -100, -100],
          ...,
          [-100, -100, -100,  ...,   12,    2, -100],
          [-100, -100, -100,  ...,   11,    2, -100],
          [-100, -100, -100,  ..., -100, -100, -100]], device='cuda:0')},
 {'input_ids': tensor([[ 1,  7, 11,  ...,  0,  0,  0],
          

In [3]:
model_gen = torch.load('gen.model')
model_gen.to(device)
model_gen.train()

model_gen_ref = torch.load('gen.model')
model_gen_ref.to(device)
model_gen_ref.train()

ModelGEN(
  (feature): LlamaModel(
    (embed_tokens): Embedding(22, 64, padding_idx=0)
    (layers): ModuleList(
      (0-3): 4 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear(in_features=64, out_features=64, bias=False)
          (k_proj): Linear(in_features=64, out_features=64, bias=False)
          (v_proj): Linear(in_features=64, out_features=64, bias=False)
          (o_proj): Linear(in_features=64, out_features=64, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=64, out_features=64, bias=False)
          (up_proj): Linear(in_features=64, out_features=64, bias=False)
          (down_proj): Linear(in_features=64, out_features=64, bias=False)
          (act_fn): SiLUActivation()
        )
        (input_layernorm): LlamaRMSNorm()
        (post_attention_layernorm): LlamaRMSNorm()
      )
    )
    (norm): LlamaRMSNorm()
  )
  (fc_out): Linear(in_features=64, 

In [4]:
def get_loss(model_gen, choice, reject):
    b = choice['input_ids'].shape[0]

    #合并两部分输入,同时计算以提高效率
    #[8, 21]
    input_ids = torch.cat([choice['input_ids'], reject['input_ids']], dim=0)
    attention_mask = torch.cat(
        [choice['attention_mask'], reject['attention_mask']], dim=0)
    label = torch.cat([choice['label'], reject['label']], dim=0)

    #[8, 21, 28]
    out = model_gen(input_ids=input_ids, attention_mask=attention_mask)

    #偏移以对齐
    #[8, 20]
    label = label[:, 1:]
    #[8, 20, 28]
    out = out[:, :-1]

    #取所有字的预测概率,因为要求联合概率,所以取对数
    out = (out.softmax(2) + 1e-8).log()

    #索引不能是负数,所以这里把负数置0
    #[8, 20, 1]
    index = label.clone().unsqueeze(2)
    index[index == -100] = 0

    #取预测到label的概率
    #[8, 20]
    prob = torch.gather(out, dim=2, index=index).squeeze(2)

    #只取答案部分的loss,筛选后,所有答案的概率对数求和
    prob = (prob * (label != -100)).sum(1)

    #choice和reject的预测概率求差作为loss
    return prob[:b] - prob[b:]


get_loss(model_gen, *get_batch_data())

tensor([-81.4473, -54.0714, -70.3097, -55.9786, -66.7143, -74.3108, -52.3986,
        -70.3757, -77.1062, -42.0191, -50.4737, -53.8479, -44.9187, -42.3186,
        -72.4670, -48.3093, -86.0694, -76.4324, -55.6429, -59.1275, -78.3820,
        -50.7187, -69.7550, -95.5008, -77.0167, -50.7966, -47.0271, -54.3163,
        -81.2113, -65.7755, -55.3272, -56.4524, -64.8713, -79.5861, -56.2123,
        -78.0945, -73.5479, -39.5135, -41.1529, -38.9173, -74.6226, -67.9325,
        -72.8875, -36.5037, -51.1867, -68.7703, -57.0807, -58.2965, -69.4282,
        -60.8845, -53.3348, -52.7771, -65.3301, -52.3360, -76.4893, -69.4408,
        -46.7414, -87.0744, -62.7352, -55.8983, -74.1987, -43.9923, -38.6290,
        -67.7652], device='cuda:0', grad_fn=<SubBackward0>)

In [None]:
optimizer = torch.optim.Adam(model_gen.parameters(),
                             lr=1e-4,
                             betas=(0.9, 0.999),
                             eps=1e-8)

for i in range(10_0000):
    choice, reject = get_batch_data()
    loss = get_loss(model_gen, choice, reject)
    with torch.no_grad():
        loss_ref = get_loss(model_gen_ref, choice, reject)

    #logsigmoid正数归零的激活函数,有一定的平滑
    loss = -torch.nn.functional.logsigmoid(0.1 * (loss - loss_ref)).mean()
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()

    if i % 2000 == 0:
        question = tokenizer.get_data(third_number=True)
        question = question[:question.index(tokenizer.encoder['=']) + 1]
        question = torch.LongTensor(question).unsqueeze(0).to(device)

        gen = Generater(model_gen).generate(question, max_length=35)[0]
        print(i, tokenizer.decode(gen.tolist()))

model_gen.to('cpu')
torch.save(model_gen, 'dpo.model')

0 S90.06=9.62--7.69E
2000 S-91.98=-2.44*14.44+-94.44E
4000 S-81.36=-2.22/-22.22+-9.22E
6000 S-61.47=-1.11*15.45+-3.84E
8000 S-67.18=-1.06*24.45+-3.05E
10000 S-25.05=-9.16/-1.19+-1.19E
12000 S-89.50=-1.22*19.49+-39.00E
14000 S-4419.54=-52.85*82.55+-77.05E
16000 S5128.30=-62.64*-74.66+-6.66E
18000 S15.07=-72.98/-2.29+1.89E
20000 S5.12=-1.21*-1.21+-1.22E
22000 S33.73=-1.41*-1.44+39.33E
24000 S-138.93=-92.22-94.23+59.23E
26000 S53.01=-1.21*-22.44+2.19E
28000 S-86.73=-4.31*1.34+-97.00E
30000 S69.94=-1.11*-19.84+29.10E
32000 S88.63=-1.44*-3.44+70.14E
34000 S48.71=-1.47*-1.47+67.47E
36000 S-30.06=-1.23*-1.87+-37.02E
38000 S-2084.97=-32.66*66.66+-67.17E
40000 S65.84=-1.41/-41.44+66.02E
42000 S71.95=-1.11*-36.16+-1.12E
44000 S1.46=-1.10*-1.10+-1.10E
46000 S298.70=12.92*22.54+-47.17E
48000 S36.17=-1.41*-1.44+39.03E
50000 S3181.44=-42.99*-76.99+-5.99E
52000 S119.07=92.15--39.44+-4.00E
54000 S87.30=-1.44*-31.44+19.48E
56000 S-45.69=-1.21*12.49+-24.22E
58000 S-136.53=-9.11*12.17+-20.07E
60000 S-86.