-
Notifications
You must be signed in to change notification settings - Fork 177
/
sft_config_ptv2.py
85 lines (64 loc) · 3.09 KB
/
sft_config_ptv2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# -*- coding: utf-8 -*-
# @Time : 2023/5/16 10:11
import json
from config.constant_map import train_info_models
# 可切换量化模型 ptv2 训练
train_model_config = train_info_models['chatglm']
# train_model_config = train_info_models['chatglm-6b-int4']
# train_model_config = train_info_models['chatglm-6b-int8']
# 全局变量
global_args = {
"load_in_8bit": False, # qlora int8
"load_in_4bit": False, # qlora int4
# load_in_4bit 量化配置
"quantization_config": None,
"num_layers_freeze": -1, # 非lora,非p-tuning 模式 , <= config.json num_layers
"pre_seq_len": 32, #p-tuning-v2 参数 , None 禁用p-tuning-v2
"prefix_projection": False, #p-tuning-v2 参数
"num_layers": -1, # 是否使用骨干网络的全部层数 最大1-28, -1 表示全层, 否则只用只用N层
}
train_info_args = {
'devices': 1,
'data_backend': 'record', #one of record lmdb, 超大数据集可以使用 lmdb , 注 lmdb 存储空间比record大
# 预训练模型路径 , 从0训练,则置空
**train_model_config,
'convert_onnx': False, # 转换onnx模型
'do_train': True,
'train_file': [ './data/finetune_train_examples.json'],
'max_epochs': 20,
'max_steps': -1,
'optimizer': 'lion', # one of [lamb,adma,adamw_hf,adamw,adamw_torch,adamw_torch_fused,adamw_torch_xla,adamw_apex_fused,adafactor,adamw_anyprecision,sgd,adagrad,adamw_bnb_8bit,adamw_8bit,lion_8bit,lion_32bit,paged_adamw_32bit,paged_adamw_8bit,paged_lion_32bit,paged_lion_8bit]
'scheduler_type': 'CAWR', #one of [linear,WarmupCosine,CAWR,CAL,Step,ReduceLROnPlateau, cosine,cosine_with_restarts,polynomial,constant,constant_with_warmup,inverse_sqrt,reduce_lr_on_plateau]
'scheduler':{'T_mult': 1,
'rewarm_epoch_num': 0.5, # 如果 max_epochs is not None !
# 'T_0': 50000, # 如果 max_epochs is None , 设定步数
'verbose': False},
# 'scheduler_type': 'linear',# one of [linear,WarmupCosine,CAWR,CAL,Step,ReduceLROnPlateau
# 'scheduler': None,
# 切换scheduler类型
# 'scheduler_type': 'WarmupCosine',
# 'scheduler': None,
# 'scheduler_type': 'ReduceLROnPlateau',
# 'scheduler': None,
# 'scheduler_type': 'Step',
# 'scheduler':{ 'decay_rate': 0.999,'decay_steps': 100,'verbose': True},
# 'scheduler_type': 'CAWR',
# 'scheduler':{'T_mult': 1, 'rewarm_epoch_num': 2, 'verbose': True},
# 'scheduler_type': 'CAL',
# 'scheduler': {'rewarm_epoch_num': 2,'verbose': True},
'optimizer_betas': (0.9, 0.999),
'train_batch_size': 4,
'eval_batch_size': 2,
'test_batch_size': 2,
'learning_rate': 1e-3, #
'adam_epsilon': 1e-8,
'gradient_accumulation_steps': 1,
'max_grad_norm': 1.0,
'weight_decay': 0,
'warmup_steps': 0,
'output_dir': './output',
'max_seq_length': 1024, # 如果资源充足,推荐长度2048 与官方保持一致
'max_target_length': 100, # 预测最大长度, 保留字段
'use_fast_tokenizer': False,
'do_lower_case': False,
}