Skip to content

Commit

Permalink
Merge pull request microsoft#2 from rraminen/pipeclean_seq512_json_an…
Browse files Browse the repository at this point in the history
…d_dataset

Pipe cleaned files to use Wikipedia dataset and to run on 8 devices
  • Loading branch information
jithunnair-amd committed Apr 21, 2021
2 parents ea3bdc2 + f81f133 commit fb62e6e
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 3 deletions.
59 changes: 59 additions & 0 deletions bing_bert/bert_large_lamb_pipeclean.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
{
"name": "bing_bert_large_lamb_seq",
"bert_token_file": "bert-large-cased",
"bert_model_file": "bert-large-cased",
"bert_model_config": {
"vocab_size_or_config_json_file": 119547,
"hidden_size": 1024,
"num_hidden_layers": 24,
"num_attention_heads": 16,
"intermediate_size": 4096,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.1,
"attention_probs_dropout_prob": 0.1,
"max_position_embeddings": 512,
"type_vocab_size": 2,
"initializer_range": 0.02
},
"data": {
"flags": {
"pretrain_dataset": true,
"pretrain_type": "wiki_bc"
},
"mixed_seq_datasets": {
"128": {
"wiki_pretrain_dataset": "/data/DeepSpeed_data/wikipedia/seq128/text_npy/",
"bc_pretrain_dataset": ""
},
"512": {
"wiki_pretrain_dataset": "/data/DeepSpeed_data/wikipedia/seq512/text_npy/",
"bc_pretrain_dataset": ""
}
}
},
"mixed_seq_training": {
"128": {
"num_epochs": 150,
"warmup_proportion": 0.06,
"learning_rate": 11e-3,
"num_workers": 0,
"async_worker": true,
"decay_rate": 0.90,
"decay_step": 250,
"total_training_steps": 7500
},
"512": {
"num_epochs": 160,
"warmup_proportion": 0.02,
"learning_rate": 2e-3,
"num_workers": 0,
"async_worker": true,
"decay_rate": 0.90,
"decay_step": 150,
"total_training_steps": 7500
}
},
"validation": {
"path": "validation_set/"
}
}
7 changes: 4 additions & 3 deletions bing_bert/bing_bert_dataset_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,10 @@ def __init__(self, args):
# Initialize dataset paths
self.dataset_paths = []
for dataset in ['wiki_pretrain_dataset', 'bc_pretrain_dataset']:
self.dataset_paths.append(
os.path.join(args.data_path_prefix,
args.config["data"]["datasets"][dataset]))
if (args.config["data"]["datasets"][dataset]):
self.dataset_paths.append(
os.path.join(args.data_path_prefix,
args.config["data"]["datasets"][dataset]))

self.max_seq_length = args.max_seq_length
self.max_predictions_per_seq = args.max_predictions_per_seq
Expand Down
24 changes: 24 additions & 0 deletions bing_bert/deepspeed_bsz32k_lamb_config_seq512_pipeclean.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"train_batch_size": 128,
"train_micro_batch_size_per_gpu": 16,
"steps_per_print": 1000,
"prescale_gradients": false,
"optimizer": {
"type": "Lamb",
"params": {
"lr": 2e-3,
"weight_decay": 0.01,
"bias_correction": false,
"max_coeff": 0.3,
"min_coeff": 0.01
}
},
"gradient_clipping": 1.0,

"wall_clock_breakdown": false,

"fp16": {
"enabled": true,
"loss_scale": 0
}
}

0 comments on commit fb62e6e

Please sign in to comment.