From 595496b94695909928a9e90811e5826ffe935e7b Mon Sep 17 00:00:00 2001 From: rraminen Date: Wed, 21 Apr 2021 17:38:19 -0400 Subject: [PATCH 1/2] pipecleaned bert_large_lamb.json --- bing_bert/bert_large_lamb_pipeclean.json | 59 ++++++++++++++++++++++++ bing_bert/bing_bert_dataset_provider.py | 7 +-- 2 files changed, 63 insertions(+), 3 deletions(-) create mode 100644 bing_bert/bert_large_lamb_pipeclean.json diff --git a/bing_bert/bert_large_lamb_pipeclean.json b/bing_bert/bert_large_lamb_pipeclean.json new file mode 100644 index 000000000000..e3e3d85590a6 --- /dev/null +++ b/bing_bert/bert_large_lamb_pipeclean.json @@ -0,0 +1,59 @@ +{ + "name": "bing_bert_large_lamb_seq", + "bert_token_file": "bert-large-cased", + "bert_model_file": "bert-large-cased", + "bert_model_config": { + "vocab_size_or_config_json_file": 119547, + "hidden_size": 1024, + "num_hidden_layers": 24, + "num_attention_heads": 16, + "intermediate_size": 4096, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "attention_probs_dropout_prob": 0.1, + "max_position_embeddings": 512, + "type_vocab_size": 2, + "initializer_range": 0.02 + }, + "data": { + "flags": { + "pretrain_dataset": true, + "pretrain_type": "wiki_bc" + }, + "mixed_seq_datasets": { + "128": { + "wiki_pretrain_dataset": "/data/DeepSpeed_data/wikipedia/seq128/text_npy/", + "bc_pretrain_dataset": "" + }, + "512": { + "wiki_pretrain_dataset": "/data/DeepSpeed_data/wikipedia/seq512/text_npy/", + "bc_pretrain_dataset": "" + } + } + }, + "mixed_seq_training": { + "128": { + "num_epochs": 150, + "warmup_proportion": 0.06, + "learning_rate": 11e-3, + "num_workers": 0, + "async_worker": true, + "decay_rate": 0.90, + "decay_step": 250, + "total_training_steps": 7500 + }, + "512": { + "num_epochs": 160, + "warmup_proportion": 0.02, + "learning_rate": 2e-3, + "num_workers": 0, + "async_worker": true, + "decay_rate": 0.90, + "decay_step": 150, + "total_training_steps": 7500 + } + }, + "validation": { + "path": "validation_set/" + } +} diff --git a/bing_bert/bing_bert_dataset_provider.py b/bing_bert/bing_bert_dataset_provider.py index 97c96962b37e..e9c48a9f8952 100755 --- a/bing_bert/bing_bert_dataset_provider.py +++ b/bing_bert/bing_bert_dataset_provider.py @@ -21,9 +21,10 @@ def __init__(self, args): # Initialize dataset paths self.dataset_paths = [] for dataset in ['wiki_pretrain_dataset', 'bc_pretrain_dataset']: - self.dataset_paths.append( - os.path.join(args.data_path_prefix, - args.config["data"]["datasets"][dataset])) + if (args.config["data"]["datasets"][dataset]): + self.dataset_paths.append( + os.path.join(args.data_path_prefix, + args.config["data"]["datasets"][dataset])) self.max_seq_length = args.max_seq_length self.max_predictions_per_seq = args.max_predictions_per_seq From f81f1330f187525b7f1e6abe865c71fd23279212 Mon Sep 17 00:00:00 2001 From: rraminen Date: Wed, 21 Apr 2021 17:40:58 -0400 Subject: [PATCH 2/2] Pipecleaned deepspeed_bsz32k_lamb_config_seq512.json to run on 8 GPUs --- ...d_bsz32k_lamb_config_seq512_pipeclean.json | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 bing_bert/deepspeed_bsz32k_lamb_config_seq512_pipeclean.json diff --git a/bing_bert/deepspeed_bsz32k_lamb_config_seq512_pipeclean.json b/bing_bert/deepspeed_bsz32k_lamb_config_seq512_pipeclean.json new file mode 100644 index 000000000000..c0e21d88b0dd --- /dev/null +++ b/bing_bert/deepspeed_bsz32k_lamb_config_seq512_pipeclean.json @@ -0,0 +1,24 @@ +{ + "train_batch_size": 128, + "train_micro_batch_size_per_gpu": 16, + "steps_per_print": 1000, + "prescale_gradients": false, + "optimizer": { + "type": "Lamb", + "params": { + "lr": 2e-3, + "weight_decay": 0.01, + "bias_correction": false, + "max_coeff": 0.3, + "min_coeff": 0.01 + } + }, + "gradient_clipping": 1.0, + + "wall_clock_breakdown": false, + + "fp16": { + "enabled": true, + "loss_scale": 0 + } +}