tensorflow · martinwicke · Jul 15, 2016 · Jul 13, 2016 · Jul 14, 2016
diff --git a/tensorflow/contrib/factorization/python/ops/kmeans.py b/tensorflow/contrib/factorization/python/ops/kmeans.py
@@ -54,12 +54,8 @@ def __init__(self,
                distance_metric=clustering_ops.SQUARED_EUCLIDEAN_DISTANCE,
                random_seed=0,
                use_mini_batch=True,
-               batch_size=128,
-               steps=10,
                kmeans_plus_plus_num_retries=2,
-               continue_training=False,
-               config=None,
-               verbose=1):
+               config=None):
     """Creates a model for running KMeans training and inference.
 
     Args:
@@ -72,25 +68,17 @@ def __init__(self,
       random_seed: Python integer. Seed for PRNG used to initialize centers.
       use_mini_batch: If true, use the mini-batch k-means algorithm. Else assume
         full batch.
-      batch_size: See TensorFlowEstimator
-      steps: See TensorFlowEstimator
       kmeans_plus_plus_num_retries: For each point that is sampled during
         kmeans++ initialization, this parameter specifies the number of
         additional points to draw from the current distribution before selecting
         the best. If a negative value is specified, a heuristic is used to
         sample O(log(num_to_sample)) additional points.
-      continue_training: See TensorFlowEstimator
-      config: See TensorFlowEstimator
-      verbose: See TensorFlowEstimator
+      config: See Estimator
     """
     super(KMeansClustering, self).__init__(
         model_dir=model_dir,
         config=config)
-    self.batch_size = batch_size
-    self.steps = steps
     self.kmeans_plus_plus_num_retries = kmeans_plus_plus_num_retries
-    self.continue_training = continue_training
-    self.verbose = verbose
     self._num_clusters = num_clusters
     self._training_initial_clusters = initial_clusters
     self._training_graph = None
@@ -99,10 +87,10 @@ def __init__(self,
     self._random_seed = random_seed
     self._initialized = False
 
-  def fit(self, x, y=None, monitors=None, logdir=None, steps=None):
+  def fit(self, x, y=None, monitors=None, logdir=None, steps=None, batch_size=128):
     """Trains a k-means clustering on x.
 
-    Note: See TensorFlowEstimator for logic for continuous training and graph
+    Note: See Estimator for logic for continuous training and graph
       construction across multiple calls to fit.
 
     Args:
@@ -122,10 +110,10 @@ def fit(self, x, y=None, monitors=None, logdir=None, steps=None):
     if logdir is not None:
       self._model_dir = logdir
     self._data_feeder = data_feeder.setup_train_data_feeder(
-        x, None, self._num_clusters, self.batch_size)
+        x, None, self._num_clusters, batch_size)
     self._train_model(input_fn=self._data_feeder.input_builder,
                       feed_fn=self._data_feeder.get_feed_dict_fn(),
-                      steps=steps or self.steps,
+                      steps=steps,
                       monitors=monitors,
                       init_feed_fn=self._data_feeder.get_feed_dict_fn())
     return self

diff --git a/tensorflow/contrib/factorization/python/ops/kmeans_test.py b/tensorflow/contrib/factorization/python/ops/kmeans_test.py
@@ -53,13 +53,14 @@ def setUp(self):
 
     self.kmeans = KMeans(self.num_centers,
                          initial_clusters=kmeans_ops.RANDOM_INIT,
-                         batch_size=self.batch_size,
                          use_mini_batch=self.use_mini_batch,
-                         steps=30,
-                         continue_training=True,
-                         config=run_config.RunConfig(tf_random_seed=14),
+                         config=self.config(14),
                          random_seed=12)
 
+  @staticmethod
+  def config(tf_random_seed):
+    return run_config.RunConfig(tf_random_seed=tf_random_seed)
+
   @property
   def batch_size(self):
     return self.num_points
@@ -86,7 +87,7 @@ def make_random_points(centers, num_points, max_offset=20):
 
   def test_clusters(self):
     kmeans = self.kmeans
-    kmeans.fit(x=self.points, steps=0)
+    kmeans.fit(x=self.points, steps=1, batch_size=8)
     clusters = kmeans.clusters()
     self.assertAllEqual(list(clusters.shape),
                         [self.num_centers, self.num_dims])
@@ -97,32 +98,33 @@ def test_fit(self):
       return
     kmeans = self.kmeans
     kmeans.fit(x=self.points,
-               steps=1)
+               steps=1, batch_size=self.batch_size)
     score1 = kmeans.score(x=self.points)
     kmeans.fit(x=self.points,
-               steps=15 * self.num_points // self.batch_size)
+               steps=15 * self.num_points // self.batch_size,
+               batch_size=self.batch_size)
     score2 = kmeans.score(x=self.points)
     self.assertTrue(score1 > score2)
     self.assertNear(self.true_score, score2, self.true_score * 0.05)
 
   def test_infer(self):
     kmeans = self.kmeans
-    kmeans.fit(x=self.points)
+    kmeans.fit(x=self.points, steps=10, batch_size=128)
     clusters = kmeans.clusters()
 
     # Make a small test set
     points, true_assignments, true_offsets = self.make_random_points(clusters,
                                                                      10)
     # Test predict
-    assignments = kmeans.predict(points)
+    assignments = kmeans.predict(points, batch_size=self.batch_size)
     self.assertAllEqual(assignments, true_assignments)
 
     # Test score
-    score = kmeans.score(points)
+    score = kmeans.score(points, batch_size=128)
     self.assertNear(score, np.sum(true_offsets), 0.01 * score)
 
     # Test transform
-    transform = kmeans.transform(points)
+    transform = kmeans.transform(points, batch_size=128)
     true_transform = np.maximum(
         0,
         np.sum(np.square(points), axis=1, keepdims=True) -
@@ -140,12 +142,9 @@ def test_fit_with_cosine_distance(self):
                     initial_clusters=kmeans_ops.RANDOM_INIT,
                     distance_metric=kmeans_ops.COSINE_DISTANCE,
                     use_mini_batch=self.use_mini_batch,
-                    batch_size=4,
-                    steps=30,
-                    continue_training=True,
-                    config=run_config.RunConfig(tf_random_seed=2),
+                    config=self.config(2),
                     random_seed=12)
-    kmeans.fit(x=points)
+    kmeans.fit(x=points, steps=10, batch_size=4)
     centers = normalize(kmeans.clusters())
     self.assertAllClose(np.sort(centers, axis=0),
                         np.sort(true_centers, axis=0))
@@ -163,18 +162,16 @@ def test_transform_with_cosine_distance(self):
                     initial_clusters=kmeans_ops.RANDOM_INIT,
                     distance_metric=kmeans_ops.COSINE_DISTANCE,
                     use_mini_batch=self.use_mini_batch,
-                    batch_size=8,
-                    continue_training=True,
-                    config=run_config.RunConfig(tf_random_seed=3))
-    kmeans.fit(x=points, steps=30)
+                    config=self.config(3))
+    kmeans.fit(x=points, steps=30, batch_size=8)
 
     centers = normalize(kmeans.clusters())
     self.assertAllClose(np.sort(centers, axis=0),
                         np.sort(true_centers, axis=0),
                         atol=1e-2)
 
     true_transform = 1 - cosine_similarity(points, centers)
-    transform = kmeans.transform(points)
+    transform = kmeans.transform(points, batch_size=8)
     self.assertAllClose(transform, true_transform, atol=1e-3)
 
   def test_predict_with_cosine_distance(self):
@@ -196,20 +193,18 @@ def test_predict_with_cosine_distance(self):
                     initial_clusters=kmeans_ops.RANDOM_INIT,
                     distance_metric=kmeans_ops.COSINE_DISTANCE,
                     use_mini_batch=self.use_mini_batch,
-                    batch_size=8,
-                    continue_training=True,
-                    config=run_config.RunConfig(tf_random_seed=3))
-    kmeans.fit(x=points, steps=30)
+                    config=self.config(3))
+    kmeans.fit(x=points, steps=30, batch_size=8)
 
     centers = normalize(kmeans.clusters())
     self.assertAllClose(np.sort(centers, axis=0),
                         np.sort(true_centers, axis=0), atol=1e-2)
 
-    assignments = kmeans.predict(points)
+    assignments = kmeans.predict(points, batch_size=8)
     self.assertAllClose(centers[assignments],
                         true_centers[true_assignments], atol=1e-2)
 
-    score = kmeans.score(points)
+    score = kmeans.score(points, batch_size=8)
     self.assertAllClose(score, true_score, atol=1e-2)
 
   def test_predict_with_cosine_distance_and_kmeans_plus_plus(self):
@@ -233,29 +228,27 @@ def test_predict_with_cosine_distance_and_kmeans_plus_plus(self):
                     initial_clusters=kmeans_ops.KMEANS_PLUS_PLUS_INIT,
                     distance_metric=kmeans_ops.COSINE_DISTANCE,
                     use_mini_batch=self.use_mini_batch,
-                    batch_size=12,
-                    continue_training=True,
-                    config=run_config.RunConfig(tf_random_seed=3))
-    kmeans.fit(x=points, steps=30)
+                    config=self.config(3))
+    kmeans.fit(x=points, steps=30, batch_size=12)
 
     centers = normalize(kmeans.clusters())
     self.assertAllClose(sorted(centers.tolist()),
                         sorted(true_centers.tolist()),
                         atol=1e-2)
 
-    assignments = kmeans.predict(points)
+    assignments = kmeans.predict(points, batch_size=12)
     self.assertAllClose(centers[assignments],
                         true_centers[true_assignments], atol=1e-2)
 
-    score = kmeans.score(points)
+    score = kmeans.score(points, batch_size=12)
     self.assertAllClose(score, true_score, atol=1e-2)
 
   def test_fit_raise_if_num_clusters_larger_than_num_points_random_init(self):
     points = np.array([[2.0, 3.0], [1.6, 8.2]])
 
     with self.assertRaisesOpError('less'):
       kmeans = KMeans(num_clusters=3, initial_clusters=kmeans_ops.RANDOM_INIT)
-      kmeans.fit(x=points)
+      kmeans.fit(x=points, steps=10, batch_size=8)
 
   def test_fit_raise_if_num_clusters_larger_than_num_points_kmeans_plus_plus(
       self):
@@ -264,7 +257,7 @@ def test_fit_raise_if_num_clusters_larger_than_num_points_kmeans_plus_plus(
     with self.assertRaisesOpError(AssertionError):
       kmeans = KMeans(num_clusters=3,
                       initial_clusters=kmeans_ops.KMEANS_PLUS_PLUS_INIT)
-      kmeans.fit(x=points)
+      kmeans.fit(x=points, steps=10, batch_size=8)
 
 
 class MiniBatchKMeansTest(KMeansTest):