In [1]:
import sys
import time
import numpy as np
import os.path as osp
import tensorflow as tf
import matplotlib.pyplot as plt

from tf_lab.fundamentals.utils import set_visible_GPUs, reset_tf_graph

import tf_lab.point_clouds.in_out as pio
from tf_lab.point_clouds.in_out import PointCloudDataSet, write_model_ids_of_datasets
from tf_lab.point_clouds.point_net_ae import PointNetAutoEncoder
from tf_lab.point_clouds.autoencoder import Configuration as Conf
import tf_lab.point_clouds.encoders_decoders as enc_dec

from tf_lab.autopredictors.scripts.helper import shape_net_category_to_synth_id, match_incomplete_to_complete_data

from tf_lab.autopredictors.plotting import plot_original_pclouds_vs_reconstructed, \
                                           plot_train_val_test_curves, plot_reconstructions_at_epoch, \
                                           plot_interpolations

from tf_lab.autopredictors.evaluate import eval_model, read_saved_epochs
                                                  

from general_tools.in_out.basics import create_dir, delete_files_in_directory, files_in_subdirs
from general_tools.simpletons import select_first_last_and_k
from geo_tool import Point_Cloud

from tf_lab.nips.shape_net import pc_loader as sn_pc_loader



In [2]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [8]:
n_pc_samples = 2048
do_training = False
do_plotting = True
first_time_running = False
load_model = True
seed = 42
GPU = 0
max_training_epochs = 1000
loss = 'chamfer'
experiment_name = '1_three_fc_dec_with_spn_' + str(n_pc_samples) +  'pts_' + loss

In [4]:
top_data_dir = '/orions4-zfs/projects/lins2/Panos_Space/DATA/'
full_pclouds_path = osp.join(top_data_dir, 'Point_Clouds/Shape_Net/Core/from_manifold_meshes/centered/', str(n_pc_samples))
train_dir = osp.join(top_data_dir, 'OUT/models/nips/vanilla_ae/')
train_dir = osp.join(train_dir, experiment_name)
create_dir(train_dir)

'/orions4-zfs/projects/lins2/Panos_Space/DATA/OUT/models/nips/vanilla_ae/1_three_fc_dec_with_spn_2048pts_chamfer'

In [5]:
full_file_names = pio.load_filenames_of_input_data(full_pclouds_path, '.ply', verbose=True)

57449 files were found.


In [6]:
full_pclouds, full_model_names, full_syn_ids = pio.load_crude_point_clouds(file_names=full_file_names,\
                                                                           n_threads=40, loader=sn_pc_loader)
print '%d files containing complete point clouds were found.' % (len(full_pclouds), )

57449 files containing complete point clouds were found.




In [36]:
# np.random.seed(seed)
# tf.set_random_seed(seed)

# train_data_, val_data_, test_data_, r_indices = pio.train_validate_test_split(
#                                                             [full_pclouds, full_model_names],
#                                                             train_perc=0.85,
#                                                             validate_perc=0.05,
#                                                             test_perc=0.1,
#                                                             seed=seed)

# model_ids = np.vstack((full_model_names, full_syn_ids)).T

# if first_time_running:
#     write_model_ids_of_datasets(train_dir, model_ids, r_indices)

# train_data = PointCloudDataSet(train_data_[0], labels=train_data_[1])
# val_data = PointCloudDataSet(val_data_[0], labels=val_data_[1])
# test_data = PointCloudDataSet(test_data_[0], labels=test_data_[1])

model_unames = full_model_names + '.' + full_syn_ids
# train_data = PointCloudDataSet(full_pclouds, labels=model_unames)

In [9]:
if load_model:
    conf = Conf.load(osp.join(train_dir, 'configuration'))
    print conf
else:
    n_input = [n_pc_samples, 3]

    encoder_args = {'spn': True}
    
    decoder_args = {'layer_sizes': [512, 1024, np.prod(n_input)], 
                    'non_linearity': tf.nn.relu
                   }

    conf = Conf(
                n_input = n_input,
                training_epochs = max_training_epochs,
                batch_size = 50,
                loss = loss,
                denoising = False,
                train_dir = train_dir,
                loss_display_step = 1,
                saver_step = 10,
                learning_rate = 0.0005,
                z_rotate = True,
                encoder = enc_dec.encoder_with_convs_and_symmetry,
                encoder_args = encoder_args,
                decoder = enc_dec.decoder_with_fc_only,        
                decoder_args = decoder_args,        
               )

    conf.experiment_name = experiment_name
    conf.save(osp.join(conf.train_dir, 'configuration'))
    
reset_tf_graph()
set_visible_GPUs([GPU])
ae = PointNetAutoEncoder(experiment_name, conf)

if load_model:
    saved_epochs = read_saved_epochs(conf.train_dir)
    last_epoch = saved_epochs[-1]
    ae.restore_model(conf.train_dir, last_epoch, verbose=True)

                    batch_size: 50
                 consistent_io: None
                         debug: False
                       decoder: decoder_with_fc_only
                  decoder_args: {'non_linearity': <function relu at 0x7f8bee5fbe60>, 'layer_sizes': [512, 1024, 6144]}
                       encoder: encoder_with_convs_and_symmetry
                  encoder_args: {}
               experiment_name: 1_three_fc_dec_with_spn_2048pts_chamfer
                 gauss_augment: None
                  is_denoising: False
               latent_vs_recon: 1.0
                 learning_rate: 0.0005
                          loss: chamfer
             loss_display_step: 1
                       n_input: [2048, 3]
                      n_output: [2048, 3]
                           n_z: None
             saver_max_to_keep: None
                    saver_step: 10
                     train_dir: /orions4-zfs/projects/lins2/Panos_Space/DATA/OUT/models/nips/vanilla_ae/1_three_fc_dec_with_spn_20

In [9]:
summary_writer = tf.summary.FileWriter('/orions4-zfs/projects/lins2/Panos_Space/test', graph=ae.sess.graph)

In [10]:
if do_training:
    training_stats = []
    training_stats.append(ae.train(train_data, conf))    
    with open(osp.join(conf.train_dir, 'train_stats.txt'), 'a') as fout:
        np.savetxt(fout, np.array(training_stats)[0])

In [39]:
#TSNE PLOTTING
from tf_lab.autopredictors.scripts.virt_scan_data import all_classes as vscan_categories
from tf_lab.nips.shape_net import shape_net_core_synth_id_to_category as syn_to_cat
from tf_lab.autopredictors.scripts.virt_scan_data import plotting_default_params
from general_tools.clustering.plt import plot_2d_embedding_in_grid_greedy_way, plot_2d_embedding_in_grid_forceful


all_categories = [syn_to_cat[s] for s in full_syn_ids]
mask_on_vcats = [c in vscan_categories for c in all_categories]
mask_on_vcats = np.array(mask_on_vcats, dtype=np.bool)
in_vcat_index = np.arange(len(mask_on_vcats))[mask_on_vcats]

full_pclouds_tmp = full_pclouds[in_vcat_index]
syn_ids_tmp = full_syn_ids[in_vcat_index]
model_unames_tmp = model_unames[in_vcat_index] 


def random_samples_from_each_class(syn_ids, samples_per_class=100):
    selected = []
    for syn_id in np.unique(syn_ids):
        in_class = np.where(syn_ids == syn_id)[0]
        effective_samples = min(len(in_class), samples_per_class)
        selected += np.ndarray.tolist(np.random.choice(in_class, effective_samples, replace=False))
    return selected

def save_images_of_pointclouds(pclouds, model_unames, syn_ids, save_image_dir):
    plt.ioff()   
    for i, pc in enumerate(pclouds):
        category_i = shape_net_core_synth_id_to_category[syn_ids[i]]
        params = plotting_default_params(category_i)        
        save_name = model_unames[i] + '.png'
        fig = Point_Cloud(points=pc).plot(show=False, show_axis=False, **params);
        fig.savefig(osp.join(save_image_dir, save_name))
        plt.close()



top_image_dir = osp.join(full_pclouds_path, 'images')
create_dir(top_image_dir)
samples_per_class = 200
rand_ind = random_samples_from_each_class(syn_ids_tmp, samples_per_class)
save_images_of_pointclouds(full_pclouds_tmp[rand_ind], model_unames_tmp[rand_ind],
                           syn_ids_tmp[rand_ind], top_image_dir)

In [41]:
from tf_lab.autopredictors.exploration import latent_embedding_of_entire_dataset
from sklearn.manifold import TSNE
tsne_dataset = PointCloudDataSet(full_pclouds_tmp[rand_ind], labels=model_unames_tmp[rand_ind], init_shuffle=False)
feed, latent_codes, ids = latent_embedding_of_entire_dataset(tsne_dataset, ae, conf)
model = TSNE(n_components=2, random_state=seed, init='pca', verbose=True)
tsne_small = model.fit_transform(latent_codes)

[t-SNE] Computing pairwise distances...
[t-SNE] Computing 91 nearest neighbors...
[t-SNE] Computed conditional probabilities for sample 1000 / 1600
[t-SNE] Computed conditional probabilities for sample 1600 / 1600
[t-SNE] Mean sigma: 1.179427
[t-SNE] KL divergence after 100 iterations with early exaggeration: 0.753273
[t-SNE] Error after 125 iterations: 0.753273


In [42]:
# Make a list with the file-names of all to be used images.
image_files = []
for s_id in model_unames_tmp[rand_ind]:
    save_name = s_id + '.png'
    image_files.append(osp.join(top_image_dir, save_name))

In [43]:
plot_2d_embedding_in_grid_forceful(tsne_small, image_files, big_dim=8000, small_dim=200, save_file='tsne_complete_ae.png');