In [1]:
from general_tools.notebook.gpu_utils import setup_one_gpu
GPU = 0
setup_one_gpu(GPU)

Picking GPU 0


In [3]:
import sys
import time
import numpy as np
import os.path as osp
import tensorflow as tf
import matplotlib.pyplot as plt

from general_tools.notebook.tf import reset_tf_graph
from general_tools.in_out import create_dir
from general_tools.in_out.basics import create_dir, delete_files_in_directory, files_in_subdirs

from geo_tool import Point_Cloud

from tf_lab.in_out.basics import Data_Splitter
from tf_lab.point_clouds.ae_templates import conv_architecture_ala_nips_17, default_train_params_ala_nips_17
from tf_lab.point_clouds.encoders_decoders import encoder_with_convs_and_symmetry, decoder_with_fc_only
from tf_lab.point_clouds.point_net_ae import PointNetAutoEncoder
from tf_lab.point_clouds.autoencoder import Configuration as Conf
from tf_lab.point_clouds.in_out import load_point_clouds_from_filenames, PointCloudDataSet
from tf_lab.data_sets.shape_net import pc_loader as snc_loader
from tf_lab.data_sets.shape_net import snc_category_to_synth_id

from pc_completions.evaluation import basic_comletion_measures

In [4]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [5]:
n_pc_points = 4096
top_data_dir = '/orions4-zfs/projects/optas/DATA/'
top_pclouds_path = osp.join(top_data_dir, 'Point_Clouds/Shape_Net/Core/from_manifold_meshes/centered/', str(n_pc_points))

In [6]:
# train_split = osp.join(top_data_dir, 'Point_Clouds/Shape_Net/Splits/wu_small_size_splits/train.txt')
# val_split = osp.join(top_data_dir, 'Point_Clouds/Shape_Net/Splits/wu_small_size_splits/val.txt')
train_split = osp.join(top_data_dir, 'Point_Clouds/Shape_Net/Splits/single_class_splits/03001627/85_5_10/train.txt')
val_split = osp.join(top_data_dir, 'Point_Clouds/Shape_Net/Splits/single_class_splits/03001627/85_5_10/val.txt')

In [7]:
do_training = True
load_pretrained_model = False
load_epoch = None
random_seed = 42
splitter = Data_Splitter(top_pclouds_path, data_file_ending='.ply', random_seed=42)

In [8]:
bneck_size = 1024
loss = 'chamfer'
experiment_id = '1'

In [9]:
experiment_name = '_'.join(['wu_', 'exp', experiment_id, str(n_pc_points),
                            'pts_bneck', str(bneck_size), loss
                           ])

train_dir = osp.join(top_data_dir, 'OUT/iclr/nn_models/testing_ae_settings', experiment_name)
create_dir(train_dir)

'/orions4-zfs/projects/optas/DATA/OUT/iclr/nn_models/testing_ae_settings/wu__exp_1_4096_pts_bneck_1024_chamfer'

In [10]:
tr_files = splitter.load_splits(train_split)
pclouds, model_ids, syn_ids = load_point_clouds_from_filenames(tr_files, n_threads=20, loader=snc_loader, verbose=True)
train_data = PointCloudDataSet(pclouds, labels=syn_ids + '_' + model_ids)

val_files = splitter.load_splits(val_split)
pclouds, model_ids, syn_ids = load_point_clouds_from_filenames(val_files, n_threads=20, loader=snc_loader, verbose=True)
val_data = PointCloudDataSet(pclouds, labels=syn_ids + '_' + model_ids)

5761 pclouds were loaded. They belong in 1 shape-classes.
338 pclouds were loaded. They belong in 1 shape-classes.


In [12]:
def architectures_to_test(bneck, bnorm, neighborhood, n_pc_points):
    res = dict()

    res['encoder_args'] = {'filter_sizes': [neighborhood, min(neighborhood, 20), 10, 5],
                           'n_filters': [64, 128, 256, bneck],
                           'strides': [1, 1, 2, 2],
                           'b_norm': bnorm
                          }
    
    res['decoder_args'] = {'layer_sizes': [1024, 2048, n_pc_points*3],
                           'b_norm': bnorm
                          }
    return res

In [13]:
b_norm = False
neighb_size = 20
arc_param = architectures_to_test(bneck_size, b_norm, neighb_size, n_pc_points)

In [14]:
if load_pretrained_model:
    conf = Conf.load(osp.join(train_dir, 'configuration'))
    print conf
    if conf.train_dir != train_dir:
        conf.train_dir = train_dir
    conf.save(osp.join(train_dir, 'configuration'))
else:
    conf = Conf(
                n_input = [n_pc_points, 3],
                loss = loss,
                training_epochs = 100,
                batch_size = 50,
                denoising = False,
                learning_rate = 0.0005,
                train_dir = train_dir,             
                loss_display_step = 1,
                saver_step = 10,
                z_rotate = False,
                encoder = encoder_with_convs_and_symmetry,
                decoder = decoder_with_fc_only,
                encoder_args = arc_param['encoder_args'],
                decoder_args = arc_param['decoder_args'],
               )
    
    conf.experiment_name = experiment_name
    conf.held_out_step = 5
    conf.save(osp.join(conf.train_dir, 'configuration'))

In [15]:
reset_tf_graph()
ae = PointNetAutoEncoder(conf.experiment_name, conf)

In [16]:
if load_pretrained_model:
    if load_epoch is None: # load last saved.
        saved_epochs = read_saved_epochs(conf.train_dir)
        load_epoch = saved_epochs[-1]        
    ae.restore_model(conf.train_dir, load_epoch, verbose=True)

In [17]:
if do_training:
    buf_size = 1 # flush each line
    fout = open(osp.join(conf.train_dir, 'train_stats.txt'), 'a', buf_size)    
    train_stats = ae.train(train_data, conf, log_file=fout, held_out_data=val_data)
    fout.close()

('Epoch:', '0001', 'training time (minutes)=', '0.3366', 'loss=', '0.003380882')
INFO:tensorflow:/orions4-zfs/projects/optas/DATA/OUT/iclr/nn_models/testing_ae_settings/wu__exp_1_4096_pts_bneck_1024_chamfer/models.ckpt-1 is not in all_model_checkpoint_paths. Manually adding it.


INFO:tensorflow:/orions4-zfs/projects/optas/DATA/OUT/iclr/nn_models/testing_ae_settings/wu__exp_1_4096_pts_bneck_1024_chamfer/models.ckpt-1 is not in all_model_checkpoint_paths. Manually adding it.


('Epoch:', '0002', 'training time (minutes)=', '0.3148', 'loss=', '0.001846131')
('Epoch:', '0003', 'training time (minutes)=', '0.3214', 'loss=', '0.001615333')
('Epoch:', '0004', 'training time (minutes)=', '0.3356', 'loss=', '0.001381883')
('Epoch:', '0005', 'training time (minutes)=', '0.3279', 'loss=', '0.001241733')
('Held Out Data :', 'forward time (minutes)=', '0.0050', 'loss=', '0.001258408')
('Epoch:', '0006', 'training time (minutes)=', '0.3266', 'loss=', '0.001109748')
('Epoch:', '0007', 'training time (minutes)=', '0.3307', 'loss=', '0.001035305')
('Epoch:', '0008', 'training time (minutes)=', '0.3255', 'loss=', '0.000952491')
('Epoch:', '0009', 'training time (minutes)=', '0.3275', 'loss=', '0.000880249')
('Epoch:', '0010', 'training time (minutes)=', '0.3345', 'loss=', '0.000826131')
INFO:tensorflow:/orions4-zfs/projects/optas/DATA/OUT/iclr/nn_models/testing_ae_settings/wu__exp_1_4096_pts_bneck_1024_chamfer/models.ckpt-10 is not in all_model_checkpoint_paths. Manually ad

INFO:tensorflow:/orions4-zfs/projects/optas/DATA/OUT/iclr/nn_models/testing_ae_settings/wu__exp_1_4096_pts_bneck_1024_chamfer/models.ckpt-10 is not in all_model_checkpoint_paths. Manually adding it.


('Held Out Data :', 'forward time (minutes)=', '0.0053', 'loss=', '0.000946144')
('Epoch:', '0011', 'training time (minutes)=', '0.3248', 'loss=', '0.000789703')
('Epoch:', '0012', 'training time (minutes)=', '0.3206', 'loss=', '0.000753506')
('Epoch:', '0013', 'training time (minutes)=', '0.3424', 'loss=', '0.000729481')
('Epoch:', '0014', 'training time (minutes)=', '0.3281', 'loss=', '0.000694345')
('Epoch:', '0015', 'training time (minutes)=', '0.3342', 'loss=', '0.000720426')
('Held Out Data :', 'forward time (minutes)=', '0.0049', 'loss=', '0.000925197')
('Epoch:', '0016', 'training time (minutes)=', '0.3300', 'loss=', '0.000682521')
('Epoch:', '0017', 'training time (minutes)=', '0.3284', 'loss=', '0.000646123')
('Epoch:', '0018', 'training time (minutes)=', '0.3278', 'loss=', '0.000613741')
('Epoch:', '0019', 'training time (minutes)=', '0.3329', 'loss=', '0.000597239')
('Epoch:', '0020', 'training time (minutes)=', '0.3248', 'loss=', '0.000580694')
INFO:tensorflow:/orions4-zfs

INFO:tensorflow:/orions4-zfs/projects/optas/DATA/OUT/iclr/nn_models/testing_ae_settings/wu__exp_1_4096_pts_bneck_1024_chamfer/models.ckpt-20 is not in all_model_checkpoint_paths. Manually adding it.


('Held Out Data :', 'forward time (minutes)=', '0.0055', 'loss=', '0.000807113')
('Epoch:', '0021', 'training time (minutes)=', '0.3269', 'loss=', '0.000570991')
('Epoch:', '0022', 'training time (minutes)=', '0.3283', 'loss=', '0.000568392')
('Epoch:', '0023', 'training time (minutes)=', '0.3306', 'loss=', '0.000556921')
('Epoch:', '0024', 'training time (minutes)=', '0.3238', 'loss=', '0.000532802')
('Epoch:', '0025', 'training time (minutes)=', '0.3284', 'loss=', '0.000525494')
('Held Out Data :', 'forward time (minutes)=', '0.0050', 'loss=', '0.000794625')
('Epoch:', '0026', 'training time (minutes)=', '0.3289', 'loss=', '0.000526770')
('Epoch:', '0027', 'training time (minutes)=', '0.3286', 'loss=', '0.000512511')
('Epoch:', '0028', 'training time (minutes)=', '0.3249', 'loss=', '0.000508717')
('Epoch:', '0029', 'training time (minutes)=', '0.3232', 'loss=', '0.000500228')
('Epoch:', '0030', 'training time (minutes)=', '0.3235', 'loss=', '0.000496937')
INFO:tensorflow:/orions4-zfs

INFO:tensorflow:/orions4-zfs/projects/optas/DATA/OUT/iclr/nn_models/testing_ae_settings/wu__exp_1_4096_pts_bneck_1024_chamfer/models.ckpt-30 is not in all_model_checkpoint_paths. Manually adding it.


('Held Out Data :', 'forward time (minutes)=', '0.0055', 'loss=', '0.000775254')
('Epoch:', '0031', 'training time (minutes)=', '0.3225', 'loss=', '0.000495152')
('Epoch:', '0032', 'training time (minutes)=', '0.3236', 'loss=', '0.000487385')
('Epoch:', '0033', 'training time (minutes)=', '0.3225', 'loss=', '0.000472021')
('Epoch:', '0034', 'training time (minutes)=', '0.3284', 'loss=', '0.000466775')
('Epoch:', '0035', 'training time (minutes)=', '0.3232', 'loss=', '0.000455131')
('Held Out Data :', 'forward time (minutes)=', '0.0049', 'loss=', '0.000780727')
('Epoch:', '0036', 'training time (minutes)=', '0.3266', 'loss=', '0.000445644')
('Epoch:', '0037', 'training time (minutes)=', '0.3222', 'loss=', '0.000449642')
('Epoch:', '0038', 'training time (minutes)=', '0.3361', 'loss=', '0.000456020')
('Epoch:', '0039', 'training time (minutes)=', '0.3229', 'loss=', '0.000432731')
('Epoch:', '0040', 'training time (minutes)=', '0.3312', 'loss=', '0.000421089')
INFO:tensorflow:/orions4-zfs

INFO:tensorflow:/orions4-zfs/projects/optas/DATA/OUT/iclr/nn_models/testing_ae_settings/wu__exp_1_4096_pts_bneck_1024_chamfer/models.ckpt-40 is not in all_model_checkpoint_paths. Manually adding it.


('Held Out Data :', 'forward time (minutes)=', '0.0051', 'loss=', '0.000748909')
('Epoch:', '0041', 'training time (minutes)=', '0.3251', 'loss=', '0.000414664')
('Epoch:', '0042', 'training time (minutes)=', '0.3286', 'loss=', '0.000435642')
('Epoch:', '0043', 'training time (minutes)=', '0.3255', 'loss=', '0.000412814')
('Epoch:', '0044', 'training time (minutes)=', '0.3272', 'loss=', '0.000413225')
('Epoch:', '0045', 'training time (minutes)=', '0.3285', 'loss=', '0.000418999')
('Held Out Data :', 'forward time (minutes)=', '0.0052', 'loss=', '0.000743545')
('Epoch:', '0046', 'training time (minutes)=', '0.3231', 'loss=', '0.000409780')
('Epoch:', '0047', 'training time (minutes)=', '0.3234', 'loss=', '0.000413140')
('Epoch:', '0048', 'training time (minutes)=', '0.3225', 'loss=', '0.000401614')
('Epoch:', '0049', 'training time (minutes)=', '0.3224', 'loss=', '0.000390364')
('Epoch:', '0050', 'training time (minutes)=', '0.3263', 'loss=', '0.000384325')
INFO:tensorflow:/orions4-zfs

INFO:tensorflow:/orions4-zfs/projects/optas/DATA/OUT/iclr/nn_models/testing_ae_settings/wu__exp_1_4096_pts_bneck_1024_chamfer/models.ckpt-50 is not in all_model_checkpoint_paths. Manually adding it.


('Held Out Data :', 'forward time (minutes)=', '0.0052', 'loss=', '0.000730725')
('Epoch:', '0051', 'training time (minutes)=', '0.3252', 'loss=', '0.000384923')
('Epoch:', '0052', 'training time (minutes)=', '0.3229', 'loss=', '0.000381082')
('Epoch:', '0053', 'training time (minutes)=', '0.3282', 'loss=', '0.000374425')
('Epoch:', '0054', 'training time (minutes)=', '0.3238', 'loss=', '0.000373968')
('Epoch:', '0055', 'training time (minutes)=', '0.3256', 'loss=', '0.000376388')
('Held Out Data :', 'forward time (minutes)=', '0.0050', 'loss=', '0.000767868')
('Epoch:', '0056', 'training time (minutes)=', '0.3281', 'loss=', '0.000382435')
('Epoch:', '0057', 'training time (minutes)=', '0.3237', 'loss=', '0.000372227')
('Epoch:', '0058', 'training time (minutes)=', '0.3230', 'loss=', '0.000367514')
('Epoch:', '0059', 'training time (minutes)=', '0.3255', 'loss=', '0.000358917')
('Epoch:', '0060', 'training time (minutes)=', '0.3232', 'loss=', '0.000354713')
INFO:tensorflow:/orions4-zfs

INFO:tensorflow:/orions4-zfs/projects/optas/DATA/OUT/iclr/nn_models/testing_ae_settings/wu__exp_1_4096_pts_bneck_1024_chamfer/models.ckpt-60 is not in all_model_checkpoint_paths. Manually adding it.


('Held Out Data :', 'forward time (minutes)=', '0.0053', 'loss=', '0.000700989')
('Epoch:', '0061', 'training time (minutes)=', '0.3266', 'loss=', '0.000364730')
('Epoch:', '0062', 'training time (minutes)=', '0.3277', 'loss=', '0.000358370')
('Epoch:', '0063', 'training time (minutes)=', '0.3232', 'loss=', '0.000348133')
('Epoch:', '0064', 'training time (minutes)=', '0.3288', 'loss=', '0.000342780')
('Epoch:', '0065', 'training time (minutes)=', '0.3315', 'loss=', '0.000351249')
('Held Out Data :', 'forward time (minutes)=', '0.0049', 'loss=', '0.000734599')
('Epoch:', '0066', 'training time (minutes)=', '0.3243', 'loss=', '0.000351536')
('Epoch:', '0067', 'training time (minutes)=', '0.3307', 'loss=', '0.000344179')
('Epoch:', '0068', 'training time (minutes)=', '0.3238', 'loss=', '0.000336846')
('Epoch:', '0069', 'training time (minutes)=', '0.3234', 'loss=', '0.000335677')
('Epoch:', '0070', 'training time (minutes)=', '0.3258', 'loss=', '0.000332953')
INFO:tensorflow:/orions4-zfs

INFO:tensorflow:/orions4-zfs/projects/optas/DATA/OUT/iclr/nn_models/testing_ae_settings/wu__exp_1_4096_pts_bneck_1024_chamfer/models.ckpt-70 is not in all_model_checkpoint_paths. Manually adding it.


('Held Out Data :', 'forward time (minutes)=', '0.0055', 'loss=', '0.000751548')
('Epoch:', '0071', 'training time (minutes)=', '0.3214', 'loss=', '0.000328591')
('Epoch:', '0072', 'training time (minutes)=', '0.3255', 'loss=', '0.000326604')
('Epoch:', '0073', 'training time (minutes)=', '0.3227', 'loss=', '0.000323873')
('Epoch:', '0074', 'training time (minutes)=', '0.3282', 'loss=', '0.000325194')
('Epoch:', '0075', 'training time (minutes)=', '0.3244', 'loss=', '0.000322821')
('Held Out Data :', 'forward time (minutes)=', '0.0047', 'loss=', '0.000757342')
('Epoch:', '0076', 'training time (minutes)=', '0.3320', 'loss=', '0.000322788')
('Epoch:', '0077', 'training time (minutes)=', '0.3258', 'loss=', '0.000317479')
('Epoch:', '0078', 'training time (minutes)=', '0.3323', 'loss=', '0.000310879')
('Epoch:', '0079', 'training time (minutes)=', '0.3283', 'loss=', '0.000307530')
('Epoch:', '0080', 'training time (minutes)=', '0.3225', 'loss=', '0.000309738')
INFO:tensorflow:/orions4-zfs

INFO:tensorflow:/orions4-zfs/projects/optas/DATA/OUT/iclr/nn_models/testing_ae_settings/wu__exp_1_4096_pts_bneck_1024_chamfer/models.ckpt-80 is not in all_model_checkpoint_paths. Manually adding it.


('Held Out Data :', 'forward time (minutes)=', '0.0053', 'loss=', '0.000748870')
('Epoch:', '0081', 'training time (minutes)=', '0.3249', 'loss=', '0.000309352')
('Epoch:', '0082', 'training time (minutes)=', '0.3226', 'loss=', '0.000303970')
('Epoch:', '0083', 'training time (minutes)=', '0.3281', 'loss=', '0.000298700')
('Epoch:', '0084', 'training time (minutes)=', '0.3260', 'loss=', '0.000301767')
('Epoch:', '0085', 'training time (minutes)=', '0.3291', 'loss=', '0.000298950')
('Held Out Data :', 'forward time (minutes)=', '0.0053', 'loss=', '0.000731552')
('Epoch:', '0086', 'training time (minutes)=', '0.3268', 'loss=', '0.000298772')
('Epoch:', '0087', 'training time (minutes)=', '0.3302', 'loss=', '0.000302319')
('Epoch:', '0088', 'training time (minutes)=', '0.3282', 'loss=', '0.000296602')
('Epoch:', '0089', 'training time (minutes)=', '0.3246', 'loss=', '0.000294277')
('Epoch:', '0090', 'training time (minutes)=', '0.3276', 'loss=', '0.000292981')
INFO:tensorflow:/orions4-zfs

INFO:tensorflow:/orions4-zfs/projects/optas/DATA/OUT/iclr/nn_models/testing_ae_settings/wu__exp_1_4096_pts_bneck_1024_chamfer/models.ckpt-90 is not in all_model_checkpoint_paths. Manually adding it.


('Held Out Data :', 'forward time (minutes)=', '0.0053', 'loss=', '0.000727372')
('Epoch:', '0091', 'training time (minutes)=', '0.3269', 'loss=', '0.000288320')
('Epoch:', '0092', 'training time (minutes)=', '0.3278', 'loss=', '0.000287931')
('Epoch:', '0093', 'training time (minutes)=', '0.3244', 'loss=', '0.000284935')
('Epoch:', '0094', 'training time (minutes)=', '0.3359', 'loss=', '0.000290867')
('Epoch:', '0095', 'training time (minutes)=', '0.3226', 'loss=', '0.000286699')
('Held Out Data :', 'forward time (minutes)=', '0.0053', 'loss=', '0.000723344')
('Epoch:', '0096', 'training time (minutes)=', '0.3284', 'loss=', '0.000284524')
('Epoch:', '0097', 'training time (minutes)=', '0.3284', 'loss=', '0.000284057')
('Epoch:', '0098', 'training time (minutes)=', '0.3389', 'loss=', '0.000280941')
('Epoch:', '0099', 'training time (minutes)=', '0.3270', 'loss=', '0.000274134')
('Epoch:', '0100', 'training time (minutes)=', '0.3285', 'loss=', '0.000270106')
INFO:tensorflow:/orions4-zfs

INFO:tensorflow:/orions4-zfs/projects/optas/DATA/OUT/iclr/nn_models/testing_ae_settings/wu__exp_1_4096_pts_bneck_1024_chamfer/models.ckpt-100 is not in all_model_checkpoint_paths. Manually adding it.


('Held Out Data :', 'forward time (minutes)=', '0.0053', 'loss=', '0.000708920')


In [19]:
some_pcs = train_data.next_batch(100)[0]
recon_pcs = ae.reconstruct(some_pcs)[0]
a, b, _ = basic_comletion_measures(some_pcs, recon_pcs)

Medians of Accuracy=0.935, Coverage=0.969, H-mean=0.951.
