remove distributed requirement from model building (microsoft#31)

* remove distributed requirement from model building, this is needed for mpi/aml support for this model * Update modelingpreln.py Removed the print. It would print tons of times after removing the rank check otherwise Co-authored-by: Samyam Rajbhandari <samyamr@microsoft.com>
rraminen · Aug 13, 2020 · 71ab29d · 71ab29d
1 parent 9e2c34e
commit 71ab29d
Show file tree

Hide file tree

Showing 2 changed files with 3 additions and 7 deletions.
diff --git a/bing_bert/deepspeed_train.py b/bing_bert/deepspeed_train.py
@@ -368,9 +368,6 @@ def prepare_optimizer_parameters(args, model):
 
 
 def prepare_model_optimizer(args):
-    # Initialize torch distributed
-    torch.distributed.init_process_group(backend="nccl")
-
     # Loading Model
     model = BertMultiTask(args)
 

diff --git a/bing_bert/nvidia/modelingpreln.py b/bing_bert/nvidia/modelingpreln.py
@@ -739,10 +739,9 @@ def init_bert_weights(self, module):
             num_layers = self.config.num_hidden_layers
             std = self.config.initializer_range
             if hasattr(module, 'bert_output_layer'):
-                if torch.distributed.get_rank() == 0:
-                    print("Accounting for accumulation on the residual path")
-                    std = self.config.initializer_range / math.sqrt(
-                        2.0 * num_layers)
+                #print("Accounting for accumulation on the residual path")
+                std = self.config.initializer_range / math.sqrt(
+                    2.0 * num_layers)
             module.weight.data.normal_(mean=0.0, std=std)
         elif isinstance(module, BertLayerNorm):
             module.bias.data.zero_()