In [1]:
from general_tools.notebook.gpu_utils import setup_one_gpu
GPU = 3
setup_one_gpu(GPU)

Picking GPU 3


In [2]:
import sys
import time
import numpy as np
import os.path as osp
import tensorflow as tf
import matplotlib.pyplot as plt

from general_tools.notebook.tf import reset_tf_graph
from general_tools.in_out import create_dir
from general_tools.in_out.basics import create_dir, delete_files_in_directory, files_in_subdirs

from geo_tool import Point_Cloud

from tf_lab.in_out.basics import Data_Splitter, read_saved_epochs

from tf_lab.point_clouds.encoders_decoders import encoder_with_convs_and_symmetry_new, decoder_with_fc_only
from tf_lab.point_clouds.point_net_ae import PointNetAutoEncoder
from tf_lab.point_clouds.autoencoder import Configuration as Conf
from tf_lab.point_clouds.in_out import load_point_clouds_from_filenames, PointCloudDataSet
from tf_lab.data_sets.shape_net import pc_loader as snc_loader
from tf_lab.data_sets.shape_net import snc_category_to_synth_id

In [3]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [4]:
n_pc_samples = 2048
do_training = True
first_time_running = True
load_model = True
seed = 42
max_training_epochs = 2000
loss = 'chamfer'
z_rotate = True
experiment_name = 'all_snc_rotated_mlp_arch_with_bnorm_on_encoder_3_pc' + str(n_pc_samples) +  'pts_' + loss

In [6]:
top_data_dir = '/orions4-zfs/projects/optas/DATA/'
train_dir = osp.join(top_data_dir, 'OUT/iclr/nn_models/all_snc', experiment_name)
create_dir(train_dir)

'/orions4-zfs/projects/optas/DATA/OUT/iclr/nn_models/all_snc/all_snc_rotated_mlp_arch_with_bnorm_on_encoder_3_pc2048pts_chamfer'

In [7]:
n_threads = 25
verbose = True
pc_versions = ['centered', 'centered_3rd_version', 'centered_2nd_version']
pclouds = []
for version in pc_versions:
    full_pclouds_path = osp.join(top_data_dir, 'Point_Clouds/Shape_Net/Core/from_manifold_meshes/', version, str(n_pc_samples))
    full_file_names = [f for f in files_in_subdirs(full_pclouds_path, '.ply')]
    pclouds_v, _, _ = load_point_clouds_from_filenames(full_file_names, n_threads, snc_loader, verbose)
    pclouds.append(pclouds_v)



57449 pclouds were loaded. They belong in 57 shape-classes.
57449 pclouds were loaded. They belong in 57 shape-classes.
57449 pclouds were loaded. They belong in 57 shape-classes.


In [8]:
train_data = PointCloudDataSet(np.vstack(pclouds))
print train_data.num_examples

172347


In [9]:
n_input = [n_pc_samples, 3]

encoder_args = {'n_filters': [128, 128, 256, 512],
                'filter_sizes': [1, 1, 1, 1],
                'strides': [1, 1, 1, 1],
                'b_norm': True
               }

decoder_args = {'layer_sizes': [1024, 2048, np.prod(n_input)],
                'b_norm': False
               }

In [14]:
if load_model:
    conf = Conf.load(osp.join(train_dir, 'configuration'))
    print conf
    
else:    
    conf = Conf(
                n_input = n_input,
                training_epochs = max_training_epochs,
                batch_size = 50,
                loss = loss,
                denoising = False,
                train_dir = train_dir,
                loss_display_step = 1,
                saver_step = 10,
                learning_rate = 0.0005,
                z_rotate = z_rotate,
                encoder = encoder_with_convs_and_symmetry_new,
                encoder_args = encoder_args,
                decoder = decoder_with_fc_only,
                decoder_args = decoder_args,            
               )
    conf.allow_gpu_growth = False
    conf.experiment_name = experiment_name
    conf.save(osp.join(conf.train_dir, 'configuration'))

reset_tf_graph()
ae = PointNetAutoEncoder(experiment_name, conf)

              allow_gpu_growth: False
                    batch_size: 50
                 consistent_io: None
                         debug: False
                       decoder: decoder_with_fc_only
                  decoder_args: {'b_norm': False, 'layer_sizes': [1024, 2048, 6144]}
                       encoder: encoder_with_convs_and_symmetry_new
                  encoder_args: {'filter_sizes': [1, 1, 1, 1], 'n_filters': [128, 128, 256, 512], 'b_norm': True, 'strides': [1, 1, 1, 1]}
               experiment_name: all_snc_rotated_mlp_arch_with_bnorm_on_encoder_3_pc2048pts_chamfer
                 gauss_augment: None
                  is_denoising: False
               latent_vs_recon: 1.0
                 learning_rate: 0.0005
                          loss: chamfer
             loss_display_step: 1
                       n_input: [2048, 3]
                      n_output: [2048, 3]
                           n_z: None
             saver_max_to_keep: None
                    saver_

In [15]:
if load_model:
    saved_epochs = read_saved_epochs(conf.train_dir)
    last_epoch = saved_epochs[-1]
    ae.restore_model(conf.train_dir, last_epoch, verbose=True)
        
if load_model and do_training:
    conf.training_epochs -= last_epoch
    print conf.training_epochs

Model restored in epoch 840.
1160


In [16]:
if do_training:
    buf_size = 1 # flush each line
    fout = open(osp.join(conf.train_dir, 'train_stats.txt'), 'a', buf_size)    
    train_stats = ae.train(train_data, conf, log_file=fout)
    fout.close()

('Epoch:', '0841', 'training time (minutes)=', '5.6655', 'loss=', '0.000457045')
('Epoch:', '0842', 'training time (minutes)=', '5.8733', 'loss=', '0.000454852')
('Epoch:', '0843', 'training time (minutes)=', '5.8284', 'loss=', '0.000451665')
('Epoch:', '0844', 'training time (minutes)=', '5.9518', 'loss=', '0.000449829')
('Epoch:', '0845', 'training time (minutes)=', '5.8962', 'loss=', '0.000461034')
('Epoch:', '0846', 'training time (minutes)=', '6.1026', 'loss=', '0.000457664')
('Epoch:', '0847', 'training time (minutes)=', '6.0204', 'loss=', '0.000460795')
('Epoch:', '0848', 'training time (minutes)=', '6.0758', 'loss=', '0.000456962')
('Epoch:', '0849', 'training time (minutes)=', '6.0364', 'loss=', '0.000459388')
('Epoch:', '0850', 'training time (minutes)=', '5.8582', 'loss=', '0.000458844')
INFO:tensorflow:/orions4-zfs/projects/optas/DATA/OUT/iclr/nn_models/all_snc/all_snc_rotated_mlp_arch_with_bnorm_on_encoder_3_pc2048pts_chamfer/models.ckpt-850 is not in all_model_checkpoint_

KeyboardInterrupt: 