Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions examples/bert/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ python3 examples/bert/bert_preprocess.py \
--output_file $OUTPUT_DIR/pretraining-data/pretraining.tfrecord
# Run pretraining for 100 train steps only.
python3 examples/bert/bert_train.py \
--input_files $OUTPUT_DIR/pretraining-data/ \
--input_directory $OUTPUT_DIR/pretraining-data/ \
--vocab_file $OUTPUT_DIR/bert_vocab_uncased.txt \
--saved_model_output $OUTPUT_DIR/model/ \
--num_train_steps 100
Expand Down Expand Up @@ -197,12 +197,14 @@ python3 -c "from examples.utils.data_utils import preview_tfrecord; preview_tfre

After preprocessing, we can run pretraining with the `bert_train.py`
script. This will train a model and save it to the `--saved_model_output`
directory.
directory. If you are willing to train from data stored on google cloud storage bucket (GCS), you can do it by setting the file path to
the URL of GCS bucket. For example, `--input_directory=gs://your-bucket-name/you-data-path`. You can also save models directly to GCS by the same approach.

```shell
python3 examples/bert/bert_train.py \
--input_files path/to/data/ \
--input_directory path/to/data/ \
--vocab_file path/to/bert_vocab_uncased.txt \
--model_size tiny \
--saved_model_output path/to/model/
```

Expand All @@ -219,7 +221,8 @@ training for a few epochs to finetune the model.
```shell
python3 examples/bert/bert_finetune_glue.py \
--saved_model_input path/to/model/ \
--vocab_file path/to/bert_vocab_uncased.txt
--vocab_file path/to/bert_vocab_uncased.txt \
--task_name mrpc
```

The script could be easily adapted to any other text classification finetuning
Expand Down
18 changes: 6 additions & 12 deletions examples/bert/bert_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,59 +16,53 @@
"tiny": {
"num_layers": 2,
"hidden_size": 128,
"hidden_dropout": 0.1,
"dropout": 0.1,
"num_attention_heads": 2,
"attention_dropout": 0.1,
"inner_size": 512,
"inner_activation": "gelu",
"initializer_range": 0.02,
},
"mini": {
"num_layers": 4,
"hidden_size": 256,
"hidden_dropout": 0.1,
"dropout": 0.1,
"num_attention_heads": 4,
"attention_dropout": 0.1,
"inner_size": 1024,
"inner_activation": "gelu",
"initializer_range": 0.02,
},
"small": {
"num_layers": 4,
"hidden_size": 512,
"hidden_dropout": 0.1,
"dropout": 0.1,
"num_attention_heads": 8,
"attention_dropout": 0.1,
"inner_size": 2048,
"inner_activation": "gelu",
"initializer_range": 0.02,
},
"medium": {
"num_layers": 8,
"hidden_size": 512,
"hidden_dropout": 0.1,
"dropout": 0.1,
"num_attention_heads": 8,
"attention_dropout": 0.1,
"inner_size": 2048,
"inner_activation": "gelu",
"initializer_range": 0.02,
},
"base": {
"num_layers": 12,
"hidden_size": 768,
"hidden_dropout": 0.1,
"dropout": 0.1,
"num_attention_heads": 12,
"attention_dropout": 0.1,
"inner_size": 3072,
"inner_activation": "gelu",
"initializer_range": 0.02,
},
"large": {
"num_layers": 24,
"hidden_size": 1024,
"hidden_dropout": 0.1,
"dropout": 0.1,
"num_attention_heads": 16,
"attention_dropout": 0.1,
"inner_size": 4096,
"inner_activation": "gelu",
"initializer_range": 0.02,
Expand Down
Loading