diff --git a/project_images.py b/project_images.py index 50a2ff2d..2c68af48 100644 --- a/project_images.py +++ b/project_images.py @@ -87,6 +87,10 @@ def main(): parser.add_argument('--initial-learning-rate', type=float, default=0.1, help='Initial learning rate') parser.add_argument('--initial-noise-factor', type=float, default=0.05, help='Initial noise factor') parser.add_argument('--verbose', type=bool, default=False, help='Verbose output') + tiled_parser = parser.add_mutually_exclusive_group(required=False) + tiled_parser.add_argument('--tiled', dest='tiled', action='store_true', help='Tiled dlatents (default)') + tiled_parser.add_argument('--no-tiled', dest='tiled', action='store_false', help='Non-tiled dlatents') + parser.set_defaults(tiled=True) parser.add_argument('--video', type=bool, default=False, help='Render video of the optimization process') parser.add_argument('--video-mode', type=int, default=1, help='Video mode: 1 for optimization only, 2 for source + optimization') parser.add_argument('--video-size', type=int, default=1024, help='Video size (height in px)') @@ -102,7 +106,8 @@ def main(): num_steps = args.num_steps, initial_learning_rate = args.initial_learning_rate, initial_noise_factor = args.initial_noise_factor, - verbose = args.verbose + verbose = args.verbose, + tiled = args.tiled ) proj.set_network(Gs) diff --git a/projector.py b/projector.py index edb39c20..44758da4 100644 --- a/projector.py +++ b/projector.py @@ -19,7 +19,8 @@ def __init__(self, num_steps = 1000, initial_learning_rate = 0.1, initial_noise_factor = 0.05, - verbose = False + verbose = False, + tiled = True ): self.vgg16_pkl = vgg16_pkl @@ -32,6 +33,7 @@ def __init__(self, self.noise_ramp_length = 0.75 self.regularize_noise_weight = 1e5 self.verbose = verbose + self.tiled = tiled self.clone_net = True self._Gs = None @@ -71,8 +73,11 @@ def set_network(self, Gs, minibatch_size=1): # Find dlatent stats. self._info('Finding W midpoint and stddev using %d samples...' % self.dlatent_avg_samples) latent_samples = np.random.RandomState(123).randn(self.dlatent_avg_samples, *self._Gs.input_shapes[0][1:]) - dlatent_samples = self._Gs.components.mapping.run(latent_samples, None) # [N, 18, 512] - self._dlatent_avg = np.mean(dlatent_samples, axis=0, keepdims=True) # [1, 18, 512] + if self.tiled: + dlatent_samples = self._Gs.components.mapping.run(latent_samples, None)[:, :1, :] # [N, 1, 512] + else: + dlatent_samples = self._Gs.components.mapping.run(latent_samples, None) # [N, 18, 512] + self._dlatent_avg = np.mean(dlatent_samples, axis=0, keepdims=True) # [1, 1 or 18, 512] self._dlatent_std = (np.sum((dlatent_samples - self._dlatent_avg) ** 2) / self.dlatent_avg_samples) ** 0.5 self._info('std = %g' % self._dlatent_std) @@ -100,7 +105,10 @@ def set_network(self, Gs, minibatch_size=1): self._dlatents_var = tf.Variable(tf.zeros([self._minibatch_size] + list(self._dlatent_avg.shape[1:])), name='dlatents_var') self._noise_in = tf.placeholder(tf.float32, [], name='noise_in') dlatents_noise = tf.random.normal(shape=self._dlatents_var.shape) * self._noise_in - self._dlatents_expr = self._dlatents_var + dlatents_noise + if self.tiled: + self._dlatents_expr = tf.tile(self._dlatents_var + dlatents_noise, [1, self._Gs.components.synthesis.input_shape[1], 1]) + else: + self._dlatents_expr = self._dlatents_var + dlatents_noise self._images_expr = self._Gs.components.synthesis.get_output_for(self._dlatents_expr, randomize_noise=False) # Downsample image to 256x256 if it's larger than that. VGG was built for 224x224 images.