Skip to content

Commit

Permalink
update code
Browse files Browse the repository at this point in the history
  • Loading branch information
liucongg committed Jun 13, 2023
1 parent e39cfef commit 710e6e6
Show file tree
Hide file tree
Showing 3 changed files with 424 additions and 2 deletions.
20 changes: 18 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

由于官方模型和代码一直再更新,请使用项目中的代码,对应版本模型见[百度网盘](https://pan.baidu.com/s/1-UrZWnqw6Ciyo5K2NLraDg),提取码:jh0l

- update-2023.06.12 [**增加流水线并行训练方法**](https://zhuanlan.zhihu.com/p/636488690)
- update-2023.04.18 **增加文本生成任务评测**
- update-2023.04.05 **增加信息抽取任务评测**

Expand Down Expand Up @@ -171,7 +172,22 @@ CUDA_VISIBLE_DEVICES=0 nohup deepspeed --master_port 5555 finetuning_freeze.py -
| 分数 | 51.75 | 73.75 | 87.75 | 79.25 | 86.75 |




### 文本分类
待补充

## 流水线并行训练
代码说明见:[大模型流水线并行(Pipeline)实战](https://zhuanlan.zhihu.com/p/636488690)

模型训练详细代码见Github中train_pipeline.py文件。 训练脚本:
```
CUDA_VISIBLE_DEVICES=0,1,2,3 deepspeed --master_port 5524 train_pipeline.py --train_path data/spo_0.json --model_name_or_path ./ChatGLM-6B/ --per_device_train_batch_size 14 --max_len 1024 --max_src_len 512 --num_train_epochs 5 --gradient_accumulation_steps 1 --seed 1234 --show_loss_step 20 --num_stages 4 --save_model_step 100 --output_dir ./output-glm-pp
```
模型转换详细代码见Github中convert_model_to_hf.py文件。模型转换脚本:
```
python3 convert_model_to_hf.py --ori_model_dir ./ChatGLM-6B/ --pipeline_model_dir output-glm-pp/global_step300/ --save_model_dir output-glm-pp/gs300/
```
| 步数 | 100 | 200 | 300 | 400 | 500 |
| ------- | ------ | ------ | ------ | ------ | ------ |
| F1值 | 0.4931 | 0.5132 | 0.5882 | 0.5793 | 0.5874 |

相比于之前其他微调方法(PT、Freeze、Lora等)来说,全量参数微调效果并不是最好,可能由于数据量不足导致。
55 changes: 55 additions & 0 deletions convert_model_to_hf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# -*- coding:utf-8 -*-
# @project: ChatGLM-Finetuning
# @filename: convert_to_hf
# @author: 刘聪NLP
# @zhihu: https://www.zhihu.com/people/LiuCongNLP
# @contact: logcongcong@gmail.com
# @time: 2023/6/5 11:06
"""
文件说明:
"""
import torch
from pathlib import Path
import os
from os.path import join
from shutil import copy
import argparse


def convert_model_to_hf(ori_model_dir, pipeline_model_dir, save_model_dir):
model_static_dict = {}
for path in Path(pipeline_model_dir).iterdir():
print("已经处理文件:{}".format(path))
if not path.name.startswith('layer'):
continue
small_static_dict = torch.load(path, map_location="cpu")
layer_i = int(path.name.split('-')[0].replace('layer_', ''))
if layer_i == 0:
model_static_dict["transformer.word_embeddings.weight"] = small_static_dict["word_embeddings.weight"]
elif layer_i == 30:
model_static_dict["lm_head.weight"] = small_static_dict["word_embeddings.weight"]
elif layer_i == 29:
for k, v in small_static_dict.items():
model_static_dict["transformer." + k] = v
else:
for k, v in small_static_dict.items():
model_static_dict["transformer." + k.replace("layer.", "layers.{}.".format(layer_i - 1))] = v

torch.save(model_static_dict, join(save_model_dir, "pytorch_model.bin"))
copy(join(ori_model_dir, "config.json"), join(save_model_dir, "config.json"))
copy(join(ori_model_dir, "tokenizer_config.json"), join(save_model_dir, "tokenizer_config.json"))
copy(join(ori_model_dir, "ice_text.model"), os.path.join(save_model_dir, "ice_text.model"))


def set_args():
parser = argparse.ArgumentParser()
parser.add_argument('--ori_model_dir', default='ChatGLM-6B/', type=str, help='')
parser.add_argument('--pipeline_model_dir', default='output-glm-pp/global_step300/', type=str, help='')
parser.add_argument('--save_model_dir', default='output-glm-pp/gs300/', type=str, help='')
return parser.parse_args()


if __name__ == '__main__':
ages = set_args()
convert_model_to_hf(ages.ori_model_dir, ages.pipeline_model_dir, ages.save_model_dir)

0 comments on commit 710e6e6

Please sign in to comment.