allow switching between tiled and non-tiled projection

kreativai · Jan 16, 2020 · 2036fb8 · 2036fb8
1 parent f1bd21b
commit 2036fb8
Show file tree

Hide file tree

Showing 2 changed files with 18 additions and 5 deletions.
diff --git a/project_images.py b/project_images.py
@@ -87,6 +87,10 @@ def main():
     parser.add_argument('--initial-learning-rate', type=float, default=0.1, help='Initial learning rate')
     parser.add_argument('--initial-noise-factor', type=float, default=0.05, help='Initial noise factor')
     parser.add_argument('--verbose', type=bool, default=False, help='Verbose output')
+    tiled_parser = parser.add_mutually_exclusive_group(required=False)
+    tiled_parser.add_argument('--tiled', dest='tiled', action='store_true', help='Tiled dlatents (default)')
+    tiled_parser.add_argument('--no-tiled', dest='tiled', action='store_false', help='Non-tiled dlatents')
+    parser.set_defaults(tiled=True)
     parser.add_argument('--video', type=bool, default=False, help='Render video of the optimization process')
     parser.add_argument('--video-mode', type=int, default=1, help='Video mode: 1 for optimization only, 2 for source + optimization')
     parser.add_argument('--video-size', type=int, default=1024, help='Video size (height in px)')
@@ -102,7 +106,8 @@ def main():
         num_steps             = args.num_steps,
         initial_learning_rate = args.initial_learning_rate,
         initial_noise_factor  = args.initial_noise_factor,
-        verbose               = args.verbose
+        verbose               = args.verbose,
+        tiled                 = args.tiled
     )
     proj.set_network(Gs)
 

diff --git a/projector.py b/projector.py
@@ -19,7 +19,8 @@ def __init__(self,
         num_steps                       = 1000,
         initial_learning_rate           = 0.1,
         initial_noise_factor            = 0.05,
-        verbose                         = False
+        verbose                         = False,
+        tiled                           = True
     ):
 
         self.vgg16_pkl                  = vgg16_pkl
@@ -32,6 +33,7 @@ def __init__(self,
         self.noise_ramp_length          = 0.75
         self.regularize_noise_weight    = 1e5
         self.verbose                    = verbose
+        self.tiled                      = tiled
         self.clone_net                  = True
 
         self._Gs                    = None
@@ -71,8 +73,11 @@ def set_network(self, Gs, minibatch_size=1):
         # Find dlatent stats.
         self._info('Finding W midpoint and stddev using %d samples...' % self.dlatent_avg_samples)
         latent_samples = np.random.RandomState(123).randn(self.dlatent_avg_samples, *self._Gs.input_shapes[0][1:])
-        dlatent_samples = self._Gs.components.mapping.run(latent_samples, None) # [N, 18, 512]
-        self._dlatent_avg = np.mean(dlatent_samples, axis=0, keepdims=True) # [1, 18, 512]
+        if self.tiled:
+            dlatent_samples = self._Gs.components.mapping.run(latent_samples, None)[:, :1, :] # [N, 1, 512]
+        else:
+            dlatent_samples = self._Gs.components.mapping.run(latent_samples, None) # [N, 18, 512]
+        self._dlatent_avg = np.mean(dlatent_samples, axis=0, keepdims=True) # [1, 1 or 18, 512]
         self._dlatent_std = (np.sum((dlatent_samples - self._dlatent_avg) ** 2) / self.dlatent_avg_samples) ** 0.5
         self._info('std = %g' % self._dlatent_std)
 
@@ -100,7 +105,10 @@ def set_network(self, Gs, minibatch_size=1):
         self._dlatents_var = tf.Variable(tf.zeros([self._minibatch_size] + list(self._dlatent_avg.shape[1:])), name='dlatents_var')
         self._noise_in = tf.placeholder(tf.float32, [], name='noise_in')
         dlatents_noise = tf.random.normal(shape=self._dlatents_var.shape) * self._noise_in
-        self._dlatents_expr = self._dlatents_var + dlatents_noise
+        if self.tiled:
+            self._dlatents_expr = tf.tile(self._dlatents_var + dlatents_noise, [1, self._Gs.components.synthesis.input_shape[1], 1])
+        else:
+            self._dlatents_expr = self._dlatents_var + dlatents_noise
         self._images_expr = self._Gs.components.synthesis.get_output_for(self._dlatents_expr, randomize_noise=False)
 
         # Downsample image to 256x256 if it's larger than that. VGG was built for 224x224 images.