forked from mindspore-lab/mindformers
-
Notifications
You must be signed in to change notification settings - Fork 0
/
vit_base_p16.yaml
32 lines (29 loc) · 1.16 KB
/
vit_base_p16.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
model:
arch:
type: ViTForImageClassification
model_config:
type: ViTConfig
image_size: 224 # input image size
patch_size: 16 # patch size
num_channels: 3 # channels of input images
initializer_range: 0.02 # initial std of cls_tokens, pos_embed and linear weight
hidden_size: 768 # embedding dimension
num_hidden_layers: 12 # number of transformer blocks
num_attention_heads: 12 # number of attention heads
intermediate_size: 3072 # ffn_hidden_size of encoder
qkv_bias: True # if has bias in qkv dense
hidden_act: gelu # activation of MLP
post_layernorm_residual: False # if using post layernorm residual
layer_norm_eps: 0.000001 # eps of layer_norm
attention_probs_dropout_prob: 0. # drop rate of Attention
hidden_dropout_prob: 0. # drop rate of MLP
drop_path_rate: 0.1 # drop path rate of Block
use_mean_pooling: True # if using global mean pooling
num_labels: 1000 # num classes
loss_type: SoftTargetCrossEntropy # loss type
checkpoint_name_or_path: 'vit_base_p16'
processor:
type: ViTProcessor
image_processor:
type: ViTImageProcessor
size: 224 # input image size