tensorflow · dustinvtran · Nov 28, 2018 · Nov 27, 2018
diff --git a/mesh_tensorflow/beam_search.py b/mesh_tensorflow/beam_search.py
@@ -125,7 +125,7 @@ def beam_search(logits_fn,
 
   Args:
     logits_fn: Interface to the model, to provide logits.
-        Shoud take:
+        Should take:
           step_num - mtf Scalar
           ids - mtf Tensor with shape [batch, beam, length]
         Should return:

diff --git a/mesh_tensorflow/ops.py b/mesh_tensorflow/ops.py
@@ -3157,7 +3157,7 @@ def einsum(xs, output_shape=None, reduced_dims=None, name=None):
   shape is set to the contain all dimensions that appear exactly once in the
   inputs, in order of appearance.
 
-  If output_shape is not specifed, then the output shape is set to the contain
+  If output_shape is not specified, then the output shape is set to the contain
   all dimensions that appear in xs but not in reduced_dims, in the order
   that they appear in xs.  If reduced_dims is also not specified, then
   reduced_dims is set to the set of all dimensions that appear at least twice in
@@ -3488,7 +3488,7 @@ def sub(x1, x2, output_shape=None, name=None):
 
 
 def multiply(x1, x2, output_shape=None, name=None):
-  """Binary multiplication with broadcsting.
+  """Binary multiplication with broadcasting.
 
   Args:
     x1: a Tensor
@@ -3509,7 +3509,7 @@ def multiply(x1, x2, output_shape=None, name=None):
 
 
 def divide(x1, x2, output_shape=None, name=None):
-  """Binary division with broadcsting.
+  """Binary division with broadcasting.
 
   Args:
     x1: a Tensor

diff --git a/mesh_tensorflow/optimize.py b/mesh_tensorflow/optimize.py
@@ -34,14 +34,14 @@ def make_optimizer(hparams, lr):
 
 
 class Optimizer(object):
-  """Base optmizer class."""
+  """Base optimizer class."""
 
   def apply_grad(self, grad, var):
     raise ValueError("Apply_Grad not implemented %s %s" % (grad, var))
 
 
 class SgdOptimizer(Optimizer):
-  """oOptimizer implementing SGD."""
+  """Optimizer implementing SGD."""
 
   def __init__(self, lr):
     self._lr = lr

diff --git a/mesh_tensorflow/placement_mesh_impl.py b/mesh_tensorflow/placement_mesh_impl.py
@@ -78,7 +78,7 @@ def __init__(self, variable, mesh_impl):
       base_name = variable.name
       if self.slice_is_master:
         tf.logging.info(
-            "Single slice is indentical to master - avoid creating extra vars.")
+            "Single slice is identical to master - avoid creating extra vars.")
         slices = [variable.get_master()]
         self._laid_out_tensor = mesh_impl.LaidOutTensor(slices)
         self._copy_slices_to_master = tf.group([])

diff --git a/mesh_tensorflow/simd_mesh_impl.py b/mesh_tensorflow/simd_mesh_impl.py
@@ -154,7 +154,7 @@ def _generate_copy_master_to_slices_op(self, master_variable, master_shape,
       Args:
         master_variable: The master variable.
         master_shape: The shape of master variable.
-        slices: The list of sliced varialbes.
+        slices: The list of sliced variables.
         slice_shape: The shape of the slice variable.
       Returns:
         A grouped tf.assign ops.
@@ -365,7 +365,7 @@ def receive(self, x, mesh_axis, source_pcoord):
     return tpu_ops.collective_permute(t, source_target_pairs)
 
   def slice(self, tf_tensor, tensor_shape):
-    """"Slice out the correspoding part of tensor given the pnum variable."""
+    """"Slice out the corresponding part of tensor given the pnum variable."""
     tensor_layout = self.tensor_layout(tensor_shape)
 
     if tensor_layout.is_fully_replicated:

diff --git a/mesh_tensorflow/utils.py b/mesh_tensorflow/utils.py
@@ -33,7 +33,7 @@ def outside_all_rewrites():
 
 
 class BalancedVariablePlacer(object):
-  """Place the variable on different device and blance the memory usage."""
+  """Place the variable on different device and balance the memory usage."""
 
   def __init__(self, devices, init_usage=None):
     init_usage = init_usage if init_usage else [0] * len(devices)