-
Notifications
You must be signed in to change notification settings - Fork 15
/
generate_diffusion.py
472 lines (425 loc) · 18.3 KB
/
generate_diffusion.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
#
# Copyright (C) 2021 NVIDIA Corporation. All rights reserved.
# Licensed under the NVIDIA Source Code License.
# See LICENSE at https://github.com/nv-tlabs/ATISS.
# Authors: Despoina Paschalidou, Amlan Kar, Maria Shugrina, Karsten Kreis,
# Andreas Geiger, Sanja Fidler
#
"""Script used for generating scenes using a previously trained model."""
import argparse
import logging
import os
import sys
import numpy as np
import torch
from training_utils import load_config
from utils import floor_plan_from_scene, export_scene, get_textured_objects_in_scene
from scene_diffusion.datasets import filter_function, get_dataset_raw_and_encoded
from scene_diffusion.datasets.threed_front import ThreedFront
from scene_diffusion.datasets.threed_future_dataset import ThreedFutureDataset
from scene_diffusion.networks import build_network
from scene_diffusion.utils import get_textured_objects, get_textured_objects_based_on_objfeats
from scene_diffusion.stats_logger import AverageAggregator
from simple_3dviz import Scene
#from simple_3dviz.window import show
from simple_3dviz.behaviours.keyboard import SnapshotOnKey, SortTriangles
from simple_3dviz.behaviours.misc import LightToCamera
from simple_3dviz.behaviours.movements import CameraTrajectory
from simple_3dviz.behaviours.trajectory import Circle
from simple_3dviz.behaviours.io import SaveFrames, SaveGif
from simple_3dviz.utils import render
import matplotlib.pyplot as plt
from pyrr import Matrix44
from utils import render as render_top2down
from utils import merge_meshes
import trimesh
import open3d as o3d
from utils import merge_meshes, computer_intersection, computer_symmetry
def categorical_kl(p, q):
return (p * (np.log(p + 1e-6) - np.log(q + 1e-6))).sum()
def main(argv):
parser = argparse.ArgumentParser(
description="Generate scenes using a previously trained model"
)
parser.add_argument(
"config_file",
help="Path to the file that contains the experiment configuration"
)
parser.add_argument(
"output_directory",
default="/tmp/",
help="Path to the output directory"
)
parser.add_argument(
"path_to_pickled_3d_futute_models",
help="Path to the 3D-FUTURE model meshes"
)
parser.add_argument(
"--path_to_floor_plan_textures",
default="../demo/floor_plan_texture_images",
help="Path to floor texture images"
)
parser.add_argument(
"--weight_file",
default=None,
help="Path to a pretrained model"
)
parser.add_argument(
"--n_sequences",
default=10,
type=int,
help="The number of sequences to be generated"
)
parser.add_argument(
"--background",
type=lambda x: list(map(float, x.split(","))),
default="1,1,1,1",
help="Set the background of the scene"
)
parser.add_argument(
"--up_vector",
type=lambda x: tuple(map(float, x.split(","))),
default="0,1,0",
help="Up vector of the scene"
)
parser.add_argument(
"--camera_position",
type=lambda x: tuple(map(float, x.split(","))),
default="-0.10923499,1.9325259,-7.19009",
help="Camer position in the scene"
)
parser.add_argument(
"--camera_target",
type=lambda x: tuple(map(float, x.split(","))),
default="0,0,0",
help="Set the target for the camera"
)
parser.add_argument(
"--window_size",
type=lambda x: tuple(map(int, x.split(","))),
default="512,512",
help="Define the size of the scene and the window"
)
parser.add_argument(
"--with_rotating_camera",
action="store_true",
help="Use a camera rotating around the object"
)
parser.add_argument(
"--save_frames",
help="Path to save the visualization frames to"
)
parser.add_argument(
"--n_frames",
type=int,
default=360,
help="Number of frames to be rendered"
)
parser.add_argument(
"--without_screen",
action="store_true",
help="Perform no screen rendering"
)
parser.add_argument(
"--scene_id",
default=None,
help="The scene id to be used for conditioning"
)
parser.add_argument(
"--render_top2down",
action="store_true",
help="Perform top2down orthographic rendering"
)
parser.add_argument(
"--without_floor",
action="store_true",
help="if remove the floor plane"
)
parser.add_argument(
"--no_texture",
action="store_true",
help="if remove the texture"
)
parser.add_argument(
"--save_mesh",
action="store_true",
help="if save mesh"
)
parser.add_argument(
"--mesh_format",
type=str,
default=".ply",
help="mesh format "
)
parser.add_argument(
"--clip_denoised",
action="store_true",
help="if clip_denoised"
)
#
parser.add_argument(
"--retrive_objfeats",
action="store_true",
help="if retrive most similar objectfeats"
)
parser.add_argument(
"--fix_order",
action="store_true",
help="if use fix order"
)
parser.add_argument(
"--compute_intersec",
action="store_true",
help="if remove the texture"
)
args = parser.parse_args(argv)
# Disable trimesh's logger
logging.getLogger("trimesh").setLevel(logging.ERROR)
if torch.cuda.is_available():
device = torch.device("cuda:0")
else:
device = torch.device("cpu")
print("Running code on", device)
# Check if output directory exists and if it doesn't create it
if not os.path.exists(args.output_directory):
os.makedirs(args.output_directory)
config = load_config(args.config_file)
########## make it for evaluation
if 'text' in config["data"]["encoding_type"]:
if 'textfix' not in config["data"]["encoding_type"]:
config["data"]["encoding_type"] = config["data"]["encoding_type"].replace('text', 'textfix')
if "no_prm" not in config["data"]["encoding_type"]:
print('NO PERM AUG in test')
config["data"]["encoding_type"] = config["data"]["encoding_type"] + "_no_prm"
print('encoding type :', config["data"]["encoding_type"])
#######
raw_dataset, train_dataset = get_dataset_raw_and_encoded(
config["data"],
filter_fn=filter_function(
config["data"],
split=config["training"].get("splits", ["train", "val"])
),
split=config["training"].get("splits", ["train", "val"])
)
# Build the dataset of 3D models
objects_dataset = ThreedFutureDataset.from_pickled_dataset(
args.path_to_pickled_3d_futute_models
)
print("Loaded {} 3D-FUTURE models".format(len(objects_dataset)))
raw_dataset, dataset = get_dataset_raw_and_encoded(
config["data"],
filter_fn=filter_function(
config["data"],
split=config["validation"].get("splits", ["test"])
),
split=config["validation"].get("splits", ["test"])
)
print("Loaded {} scenes with {} object types:".format(
len(dataset), dataset.n_object_types)
)
network, _, _ = build_network(
dataset.feature_size, dataset.n_classes,
config, args.weight_file, device=device
)
network.eval()
# Create the scene and the behaviour list for simple-3dviz
# scene = Scene(size=args.window_size)
# scene.up_vector = args.up_vector
# scene.camera_target = args.camera_target
# scene.camera_position = args.camera_position
# scene.light = args.camera_position
# Create the scene and the behaviour list for simple-3dviz top-down orthographic rendering, the arguments are same as preprocess_data.py
if args.render_top2down:
if args.without_floor:
scene_top2down = Scene(size=(256, 256), background=[1,1,1,1])
else:
scene_top2down = Scene(size=(256, 256), background=[0,0,0,1])
scene_top2down.up_vector = (0,0,-1)
scene_top2down.camera_target = (0, 0, 0)
scene_top2down.camera_position = (0,4,0)
scene_top2down.light = (0,4,0)
scene_top2down.camera_matrix = Matrix44.orthogonal_projection(
left=-3.1, right=3.1,
bottom=3.1, top=-3.1,
near=0.1, far=6
)
print('init scene top2donw')
given_scene_id = None
if args.scene_id:
for i, di in enumerate(raw_dataset):
if str(di.scene_id) == args.scene_id:
given_scene_id = i
if args.compute_intersec:
num_objects_counter = []
total_num_symmetry, total_num_pairs = 0, 0
NUM_OBJ = AverageAggregator()
NUM_PAIRS = AverageAggregator()
BOX_IOU = AverageAggregator()
BOX_INSEC = AverageAggregator()
OVERLAP_RATIO = AverageAggregator()
num_objects_counter_onlysize = []
total_num_symmetry_onlysize, total_num_pairs_onlysize = 0, 0
NUM_OBJ_ONLYSIZE = AverageAggregator()
NUM_PAIRS_ONLYSIZE = AverageAggregator()
BOX_IOU_ONLYSIZE = AverageAggregator()
BOX_INSEC_ONLYSIZE= AverageAggregator()
OVERLAP_RATIO_ONLYSIZE = AverageAggregator()
classes = np.array(dataset.class_labels)
print('class labels:', classes, len(classes))
for i in range(args.n_sequences):
if args.fix_order:
if i < len(dataset):
scene_idx = given_scene_id or i
else:
scene_idx = given_scene_id or (i % len(dataset))
else:
scene_idx = given_scene_id or np.random.choice(len(dataset))
current_scene = raw_dataset[scene_idx]
samples = dataset[scene_idx]
print("{} / {}: Using the {} floor plan of scene {}".format(
i, args.n_sequences, scene_idx, current_scene.scene_id)
)
# Get a floor plan
floor_plan, tr_floor, room_mask = floor_plan_from_scene(
current_scene, args.path_to_floor_plan_textures, no_texture=args.no_texture
)
bbox_params = network.generate_layout(
room_mask=room_mask.to(device),
num_points=config["network"]["sample_num_points"],
point_dim=config["network"]["point_dim"],
#text=torch.from_numpy(samples['desc_emb'])[None, :].to(device) if 'desc_emb' in samples.keys() else None, # glove embedding
text=samples['description'] if 'description' in samples.keys() else None, # bert
device=device,
clip_denoised=args.clip_denoised,
batch_seeds=torch.arange(i, i+1),
)
boxes = dataset.post_process(bbox_params)
bbox_params_t = torch.cat([
boxes["class_labels"],
boxes["translations"],
boxes["sizes"],
boxes["angles"]
], dim=-1).cpu().numpy()
print('Generated bbox:', bbox_params_t.shape)
if args.retrive_objfeats:
objfeats = boxes["objfeats"].cpu().numpy()
print('shape retrieval based on obj latent feats')
renderables, trimesh_meshes, model_jids = get_textured_objects_based_on_objfeats(
bbox_params_t, objects_dataset, classes, diffusion=True, no_texture=args.no_texture, query_objfeats=objfeats,
)
renderables_onlysize, trimesh_meshes_onlysize, model_jids_onlysize = get_textured_objects(
bbox_params_t, objects_dataset, classes, diffusion=True, no_texture=args.no_texture
)
else:
renderables, trimesh_meshes, model_jids = get_textured_objects(
bbox_params_t, objects_dataset, classes, diffusion=True, no_texture=args.no_texture
)
if not args.without_floor:
renderables += floor_plan
trimesh_meshes += tr_floor
if args.render_top2down:
path_to_image = "{}/{}_{}_{:03d}.png".format(
args.output_directory,
current_scene.scene_id,
scene_idx,
i
)
render_top2down(
scene_top2down,
renderables,
color=None,
mode="shading",
frame_path=path_to_image,
)
if args.retrive_objfeats:
# save results of only retrieving sizes
path_to_image_onlysize = "{}/{}".format(
args.output_directory,
"retrive_only_size",
)
if not os.path.exists(path_to_image_onlysize):
os.mkdir(path_to_image_onlysize)
path_to_image_onlysize = "{}/{}/{}_{}_{:03d}.png".format(
args.output_directory,
"retrive_only_size",
current_scene.scene_id,
scene_idx,
i
)
render_top2down(
scene_top2down,
renderables_onlysize,
color=None,
mode="shading",
frame_path=path_to_image_onlysize,
)
if args.compute_intersec:
num_objects, num_pairs, avg_iou, avg_insec, overlap_ratio = computer_intersection(trimesh_meshes)
num_objects_counter.append(num_objects)
NUM_OBJ.value = num_objects
NUM_PAIRS.value = num_pairs
BOX_IOU.value = avg_iou
BOX_INSEC.value = avg_insec
OVERLAP_RATIO.value = overlap_ratio
num_symmetries = computer_symmetry(trimesh_meshes, boxes["class_labels"][0, :, :].cpu().numpy(), model_jids)
total_num_symmetry += num_symmetries
total_num_pairs += num_pairs
string = "num scenes: {:d} - num objects avg: {:f} - std: {:f} - num pairs: {:f} - box iou: {:f} - box intersec: {:f} - overlap ratio: {:f} - total num symmetries: {:d} - total num pairs: {:d} retrive objfeats "\
.format(i+1, NUM_OBJ.value, np.array(num_objects_counter).std(), NUM_PAIRS.value, BOX_IOU.value, BOX_INSEC.value, OVERLAP_RATIO.value, total_num_symmetry, total_num_pairs)
print( string )
with open( os.path.join(args.output_directory, "iou_states.txt"), 'a') as f:
f.write(string+"\n")
f.close()
if args.retrive_objfeats:
num_objects, num_pairs, avg_iou, avg_insec, overlap_ratio = computer_intersection(trimesh_meshes_onlysize)
num_objects_counter_onlysize.append(num_objects)
NUM_OBJ_ONLYSIZE.value = num_objects
NUM_PAIRS_ONLYSIZE.value = num_pairs
BOX_IOU_ONLYSIZE.value = avg_iou
BOX_INSEC_ONLYSIZE.value = avg_insec
OVERLAP_RATIO_ONLYSIZE.value = overlap_ratio
num_symmetries = computer_symmetry(trimesh_meshes_onlysize, boxes["class_labels"][0, :, :].cpu().numpy(), model_jids_onlysize)
total_num_symmetry_onlysize += num_symmetries
total_num_pairs_onlysize += num_pairs
string = "num scenes: {:d} - num objects avg: {:f} - std: {:f} - num pairs: {:f} - box iou: {:f} - box intersec: {:f} - overlap ratio: {:f} - total num symmetries: {:d} - total num pairs: {:d} retrive only size"\
.format(i+1, NUM_OBJ_ONLYSIZE.value, np.array(num_objects_counter_onlysize).std(), NUM_PAIRS_ONLYSIZE.value, BOX_IOU_ONLYSIZE.value, BOX_INSEC_ONLYSIZE.value, OVERLAP_RATIO_ONLYSIZE.value, total_num_symmetry_onlysize, total_num_pairs_onlysize)
print( string )
with open( os.path.join(args.output_directory, "iou_states.txt"), 'a') as f:
f.write(string+"\n")
f.close()
if args.save_mesh:
if trimesh_meshes is not None:
# Create a trimesh scene and export it
path_to_objs = os.path.join(
args.output_directory,
"scene_mesh",
)
if not os.path.exists(path_to_objs):
os.mkdir(path_to_objs)
filename = "{}_{}_{:03d}".format(current_scene.scene_id, scene_idx, i)
path_to_scene = os.path.join(path_to_objs, filename+args.mesh_format)
whole_scene_mesh = merge_meshes( trimesh_meshes )
o3d.io.write_triangle_mesh(path_to_scene, whole_scene_mesh)
if args.retrive_objfeats:
if trimesh_meshes_onlysize is not None:
# Create a trimesh scene and export it
path_to_objs_retrive_onlysize = os.path.join(
args.output_directory,
"scene_mesh_retrive_onlysize",
)
if not os.path.exists(path_to_objs_retrive_onlysize):
os.mkdir(path_to_objs_retrive_onlysize)
filename = "{}_{}_{:03d}".format(current_scene.scene_id, scene_idx, i)
path_to_scene_onlysize = os.path.join(path_to_objs_retrive_onlysize, filename+args.mesh_format)
whole_scene_mesh_onlysize = merge_meshes( trimesh_meshes_onlysize )
o3d.io.write_triangle_mesh(path_to_scene_onlysize, whole_scene_mesh_onlysize)
if "description" in samples.keys():
path_to_texts = os.path.join(
args.output_directory,
"{}_{}_{:03d}_text.txt".format(current_scene.scene_id, scene_idx, i)
)
print('the length of samples[description]: {:d}'.format( len(samples['description']) ) )
print('text description {}'.format( samples['description']) )
open(path_to_texts, 'w').write( ''.join(samples['description']) )
if __name__ == "__main__":
main(sys.argv[1:])