tensorflow · tensorflow-copybara · Aug 14, 2018 · Apr 29, 2018
diff --git a/tensorflow/contrib/constrained_optimization/python/candidates.py b/tensorflow/contrib/constrained_optimization/python/candidates.py
@@ -204,7 +204,7 @@ def find_best_candidate_distribution(objective_vector,
   assert best_pp is not None
 
   # Throughout this loop, a maximum_violation of "lower" is not achievable,
-  # but a maximum_violation of "upper" is achiveable.
+  # but a maximum_violation of "upper" is achievable.
   while True:
     middle = 0.5 * (lower + upper)
     if (middle - lower <= epsilon) or (upper - middle <= epsilon):

diff --git a/tensorflow/contrib/kfac/examples/convnet.py b/tensorflow/contrib/kfac/examples/convnet.py
@@ -202,7 +202,7 @@ def minimize_loss_single_machine(loss,
     accuracy: 0-D Tensor. Accuracy of classifier on current minibatch.
     layer_collection: LayerCollection instance describing model architecture.
       Used by K-FAC to construct preconditioner.
-    device: string, Either '/cpu:0' or '/gpu:0'. The covaraince and invserse
+    device: string, Either '/cpu:0' or '/gpu:0'. The covariance and inverse
       update ops are run on this device.
     session_config: None or tf.ConfigProto. Configuration for tf.Session().
 
@@ -470,7 +470,7 @@ def train_mnist_single_machine(data_dir,
     data_dir: string. Directory to read MNIST examples from.
     num_epochs: int. Number of passes to make over the training set.
     use_fake_data: bool. If True, generate a synthetic dataset.
-    device: string, Either '/cpu:0' or '/gpu:0'. The covaraince and inverse
+    device: string, Either '/cpu:0' or '/gpu:0'. The covariance and inverse
       update ops are run on this device.
 
   Returns:
@@ -509,7 +509,7 @@ def train_mnist_multitower(data_dir, num_epochs, num_towers,
     num_epochs: int. Number of passes to make over the training set.
     num_towers: int. Number of CPUs to split inference across.
     use_fake_data: bool. If True, generate a synthetic dataset.
-    devices: string, Either list of CPU or GPU. The covaraince and inverse
+    devices: string, Either list of CPU or GPU. The covariance and inverse
       update ops are run on this device.
 
   Returns:
@@ -621,7 +621,7 @@ def train_mnist_distributed_sync_replicas(task_id,
     data_dir: string. Directory to read MNIST examples from.
     num_epochs: int. Number of passes to make over the training set.
     op_strategy: `string`, Strategy to run the covariance and inverse
-      ops. If op_strategy == `chief_worker` then covaraiance and inverse
+      ops. If op_strategy == `chief_worker` then covariance and inverse
       update ops are run on chief worker otherwise they are run on dedicated
       workers.
 

diff --git a/tensorflow/contrib/kfac/python/ops/estimator.py b/tensorflow/contrib/kfac/python/ops/estimator.py
@@ -97,8 +97,8 @@ def __init__(self,
           and to regularize the update direction by making it closer to the
           gradient. (Higher damping means the update looks more like a standard
           gradient update - see Tikhonov regularization.)
-      layer_collection: The layer collection object, which holds the fisher
-          blocks, kronecker factors, and losses associated with the
+      layer_collection: The layer collection object, which holds the Fisher
+          blocks, Kronecker factors, and losses associated with the
           graph.
       exps: List of floats or ints. These represent the different matrix
           powers of the approximate Fisher that the FisherEstimator will be able
@@ -464,7 +464,7 @@ def _get_grads_lists_gradients(self, tensors):
 
   def _get_grads_lists_empirical(self, tensors):
     # Passing in a list of loss values is better than passing in the sum as
-    # the latter creates unnessesary ops on the default device
+    # the latter creates unnecessary ops on the default device
     grads_flat = gradients_impl.gradients(
         self._layers.eval_losses(),
         nest.flatten(tensors),

diff --git a/tensorflow/contrib/kfac/python/ops/fisher_blocks.py b/tensorflow/contrib/kfac/python/ops/fisher_blocks.py
@@ -870,7 +870,7 @@ class ConvKFCBasicFB(InputOutputMultiTower, KroneckerProductFB):
   Estimates the Fisher Information matrix's blog for a convolutional
   layer.
 
-  Consider a convoluational layer in this model with (unshared) filter matrix
+  Consider a convolutional layer in this model with (unshared) filter matrix
   'w'. For a minibatch that produces inputs 'a' and output preactivations 's',
   this FisherBlock estimates,
 

diff --git a/tensorflow/contrib/kfac/python/ops/fisher_factors.py b/tensorflow/contrib/kfac/python/ops/fisher_factors.py
@@ -71,15 +71,15 @@
 # factor. This parameter is used only if `_SUB_SAMPLE_INPUTS` is True.
 _INPUTS_TO_EXTRACT_PATCHES_FACTOR = 0.5
 
-# If True, then subsamples the tensor passed to compute the covaraince matrix.
+# If True, then subsamples the tensor passed to compute the covariance matrix.
 _SUB_SAMPLE_OUTER_PRODUCTS = False
 
-# If True, then subsamples the tensor passed to compute the covaraince matrix.
+# If True, then subsamples the tensor passed to compute the covariance matrix.
 _SUB_SAMPLE_INPUTS = False
 
 # TOWER_STRATEGY can be one of "concat" or "separate".  If "concat", the data
 # passed to the factors from the blocks will be concatenated across towers
-# (lazilly via PartitionedTensor objects).  Otherwise a tuple of tensors over
+# (lazily via PartitionedTensor objects).  Otherwise a tuple of tensors over
 # towers will be passed in, and the factors will iterate over this and do the
 # cov computations separately for each one, averaging the results together.
 TOWER_STRATEGY = "concat"
@@ -309,7 +309,7 @@ def _subsample_for_cov_computation(array, name=None):
 
 
 def _random_tensor_gather(array, max_size):
-  """Generates a random set of indices and gathers the value at the indcices.
+  """Generates a random set of indices and gathers the value at the indices.
 
   Args:
     array: Tensor, of shape `[batch_size, dim_2]`.
@@ -1762,8 +1762,8 @@ def make_inverse_update_ops(self):
         # Might need to enforce symmetry lost due to numerical issues.
         invsqrtC0 = (invsqrtC0 + array_ops.transpose(invsqrtC0)) / 2.0
 
-        # The following line imposses the symmetry assumed by "Option 1" on C1.
-        # Stangely the code can work okay with this line commented out,
+        # The following line imposes the symmetry assumed by "Option 1" on C1.
+        # Strangely the code can work okay with this line commented out,
         # depending on how psd_eig is defined.  I'm not sure why.
         C1 = (C1 + array_ops.transpose(C1)) / 2.0
 

diff --git a/tensorflow/contrib/kfac/python/ops/layer_collection.py b/tensorflow/contrib/kfac/python/ops/layer_collection.py
@@ -609,7 +609,7 @@ def register_fully_connected(self,
                                outputs,
                                approx=None,
                                reuse=VARIABLE_SCOPE):
-    """Registers a fully connnected layer.
+    """Registers a fully connected layer.
 
     Args:
       params: Tensor or 2-tuple of Tensors corresponding to weight and bias of
@@ -975,7 +975,7 @@ def register_fully_connected_multi(self, params, inputs, outputs,
         block for this layer (which must have already been registered). If
         "VARIABLE_SCOPE", use tf.get_variable_scope().reuse.  (Note that the
         word `use` here has a completely different meaning to "use in the graph"
-        as it perturns to the `inputs`, `outputs`, and `num_uses` arguments.)
+        as it pertains to the `inputs`, `outputs`, and `num_uses` arguments.)
         (Default: "VARIABLE_SCOPE")
 
     Raises:
@@ -1045,7 +1045,7 @@ def register_conv2d_multi(self,
         block for this layer (which must have already been registered). If
         "VARIABLE_SCOPE", use tf.get_variable_scope().reuse.  (Note that the
         word `use` here has a completely different meaning to "use in the graph"
-        as it perturns to the `inputs`, `outputs`, and `num_uses` arguments.)
+        as it pertains to the `inputs`, `outputs`, and `num_uses` arguments.)
         (Default: "VARIABLE_SCOPE")
 
     Raises:
@@ -1116,7 +1116,7 @@ def register_embedding_multi(self,
         block for this layer (which must have already been registered). If
         "VARIABLE_SCOPE", use tf.get_variable_scope().reuse.  (Note that the
         word `use` here has a completely different meaning to "use in the graph"
-        as it perturns to the `inputs`, `outputs`, and `num_uses` arguments.)
+        as it pertains to the `inputs`, `outputs`, and `num_uses` arguments.)
         (Default: "VARIABLE_SCOPE")
 
     Raises:

diff --git a/tensorflow/contrib/kfac/python/ops/loss_functions.py b/tensorflow/contrib/kfac/python/ops/loss_functions.py
@@ -214,7 +214,7 @@ def multiply_fisher_factor(self, vector):
 
     Here the 'Fisher' is the Fisher information matrix (i.e. expected outer-
     product of gradients) with respect to the parameters of the underlying
-    probability distribtion (whose log-prob defines the loss). Typically this
+    probability distribution (whose log-prob defines the loss). Typically this
     will be block-diagonal across different cases in the batch, since the
     distribution is usually (but not always) conditionally iid across different
     cases.
@@ -238,7 +238,7 @@ def multiply_fisher_factor_transpose(self, vector):
 
     Here the 'Fisher' is the Fisher information matrix (i.e. expected outer-
     product of gradients) with respect to the parameters of the underlying
-    probability distribtion (whose log-prob defines the loss). Typically this
+    probability distribution (whose log-prob defines the loss). Typically this
     will be block-diagonal across different cases in the batch, since the
     distribution is usually (but not always) conditionally iid across different
     cases.
@@ -262,7 +262,7 @@ def multiply_fisher_factor_replicated_one_hot(self, index):
 
     Here the 'Fisher' is the Fisher information matrix (i.e. expected outer-
     product of gradients) with respect to the parameters of the underlying
-    probability distribtion (whose log-prob defines the loss). Typically this
+    probability distribution (whose log-prob defines the loss). Typically this
     will be block-diagonal across different cases in the batch, since the
     distribution is usually (but not always) conditionally iid across different
     cases.

diff --git a/tensorflow/contrib/kfac/python/ops/optimizer.py b/tensorflow/contrib/kfac/python/ops/optimizer.py
@@ -72,7 +72,7 @@ def __init__(self,
           (Higher damping means the update looks more like a standard gradient
           update - see Tikhonov regularization.)
       layer_collection: The layer collection object, which holds the fisher
-          blocks, kronecker factors, and losses associated with the
+          blocks, Kronecker factors, and losses associated with the
           graph.  The layer_collection cannot be modified after KfacOptimizer's
           initialization.
       var_list: Optional list or tuple of variables to train. Defaults to the
@@ -99,7 +99,7 @@ def __init__(self,
       placement_strategy: string, Device placement strategy used when creating
         covariance variables, covariance ops, and inverse ops.
         (Default: `None`)
-      **kwargs: Arguments to be passesd to specific placement
+      **kwargs: Arguments to be passed to specific placement
         strategy mixin. Check `placement.RoundRobinPlacementMixin` for example.
 
     Raises:
@@ -120,7 +120,7 @@ def __init__(self,
     self._estimation_mode = estimation_mode
     self._colocate_gradients_with_ops = colocate_gradients_with_ops
 
-    # The below parameters are required only if damping needs to be adapated.
+    # The below parameters are required only if damping needs to be adapted.
     # These parameters can be set by calling
     # set_damping_adaptation_params() explicitly.
     self._damping_adaptation_decay = 0.95
@@ -574,7 +574,7 @@ def _compute_qmodel_hyperparams_wrapper(self, grads_and_vars,
     """Wrapper function for `self._compute_qmodel_hyperparams`.
 
     Constructs a list of preconditioned gradients and variables. Also creates a
-    op to asssign the computed q model change to `self._q_model_change`.
+    op to assign the computed q model change to `self._q_model_change`.
 
     Args:
       grads_and_vars: List of (gradient, variable) pairs.

diff --git a/tensorflow/contrib/layers/python/layers/initializers.py b/tensorflow/contrib/layers/python/layers/initializers.py
@@ -111,7 +111,7 @@ def variance_scaling_initializer(factor=2.0, mode='FAN_IN', uniform=False,
   if not dtype.is_floating:
     raise TypeError('Cannot create initializer for non-floating point type.')
   if mode not in ['FAN_IN', 'FAN_OUT', 'FAN_AVG']:
-    raise TypeError('Unknow mode %s [FAN_IN, FAN_OUT, FAN_AVG]', mode)
+    raise TypeError('Unknown mode %s [FAN_IN, FAN_OUT, FAN_AVG]', mode)
 
   # pylint: disable=unused-argument
   def _initializer(shape, dtype=dtype, partition_info=None):

diff --git a/tensorflow/contrib/linear_optimizer/python/sdca_optimizer.py b/tensorflow/contrib/linear_optimizer/python/sdca_optimizer.py
@@ -158,7 +158,7 @@ def _training_examples_and_variables():
           # exactly 2 (i.e., its shape should be [batch_size, column.dim]).
           check_rank_op = control_flow_ops.Assert(
               math_ops.less_equal(array_ops.rank(transformed_tensor), 2),
-              ['transformed_tensor shouls have rank at most 2.'])
+              ['transformed_tensor should have rank at most 2.'])
           # Reshape to [batch_size, dense_column_dimension].
           with ops.control_dependencies([check_rank_op]):
             transformed_tensor = array_ops.reshape(transformed_tensor, [
@@ -172,7 +172,7 @@ def _training_examples_and_variables():
         elif isinstance(column, layers.feature_column._BucketizedColumn):  # pylint: disable=protected-access
           # A bucketized column corresponds to a sparse feature in SDCA. The
           # bucketized feature is "sparsified" for SDCA by converting it to a
-          # SparseFeatureColumn respresenting the one-hot encoding of the
+          # SparseFeatureColumn representing the one-hot encoding of the
           # bucketized feature.
           #
           # TODO(sibyl-vie3Poto): Explore whether it is more efficient to translate a
@@ -220,7 +220,7 @@ def _training_examples_and_variables():
           # occur multiple times for a single example.
           projected_ids = projection_length * example_ids + flat_ids
 
-          # Remove any redudant ids.
+          # Remove any redundant ids.
           ids, idx = array_ops.unique(projected_ids)
           # Keep only one example id per duplicated ids.
           example_ids_filtered = math_ops.unsorted_segment_min(