forked from mindspore-lab/mindformers
-
Notifications
You must be signed in to change notification settings - Fork 0
/
gpt2_52b.yaml
37 lines (36 loc) · 837 Bytes
/
gpt2_52b.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
model:
model_config:
type: GPT2Config
seq_length: 1024
vocab_size: 50257
embedding_size: 8192
num_layers: 64
num_heads: 32
expand_ratio: 4
hidden_act: "fast_gelu"
dropout_prob: 0.0
hidden_dropout_prob: 0.1
attention_probs_dropout_prob: 0.1
initializer_range: 0.02
param_init_type: "float16"
layernorm_dtype: "float32"
softmax_dtype: "float16"
compute_dtype: "float16"
checkpoint_name_or_path: ""
eos_token: 50256
repetition_penalty: 1
max_decode_length: 1024
top_k: 5
top_p: 1
do_sample: True
arch:
type: GPT2LMHeadModel
processor:
return_tensors: ms
tokenizer:
unk_token: '<|endoftext|>'
bos_token: '<|endoftext|>'
eos_token: '<|endoftext|>'
pad_token: '<|endoftext|>'
type: GPT2Tokenizer
type: GPT2Processor