forked from mindspore-lab/mindformers
-
Notifications
You must be signed in to change notification settings - Fork 0
/
mae_vit_base_p16.yaml
27 lines (26 loc) · 1.19 KB
/
mae_vit_base_p16.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
model:
arch:
type: ViTMAEForPreTraining
model_config:
type: ViTMAEConfig
mask_ratio: 0.75 # mask ratio
image_size: 224 # input image size
patch_size: 16 # patch size
num_channels: 3 # channels of input images
initializer_range: 0.02 # initial std of cls_tokens and mask_tokens
hidden_size: 768 # embedding dimension
num_hidden_layers: 12 # number of transformer blocks
num_attention_heads: 12 # number of attention heads
intermediate_size: 3072 # ffn_hidden_size of encoder
qkv_bias: True # if has bias in qkv dense
hidden_act: gelu # activation of MLP
post_layernorm_residual: False # if using post layernorm residual
layer_norm_eps: 0.000001 # eps of layer_norm
attention_probs_dropout_prob: 0. # drop rate of Attention
hidden_dropout_prob: 0. # drop rate of MLP
decoder_hidden_size: 512 # decoder embedding dim
decoder_num_hidden_layers: 8 # number of decoder transformer blocks
decoder_num_attention_heads: 16 # number of decoder attention heads
decoder_intermediate_size: 2048 # ffn_hidden_size of decoder
norm_pix_loss: True # if using norm pixel loss
checkpoint_name_or_path: 'mae_vit_base_p16'