In [1]:
import sys
import time
import numpy as np
import os.path as osp
import tensorflow as tf
import matplotlib.pyplot as plt
from scipy.stats import hmean
from sklearn.manifold import TSNE

from tf_lab.fundamentals.utils import set_visible_GPUs, reset_tf_graph

import tf_lab.point_clouds.in_out as pio
from tf_lab.point_clouds.in_out import PointCloudDataSet, write_model_ids_of_datasets
from tf_lab.point_clouds.point_net_ae import PointNetAutoEncoder
from tf_lab.point_clouds.autoencoder import Configuration as Conf
import tf_lab.point_clouds.encoders_decoders as enc_dec


import tf_lab.autopredictors.scripts.virt_scan_data as vscan

from tf_lab.autopredictors.scripts.helper import shape_net_category_to_synth_id, points_extension, \
                                                 shape_net_core_synth_id_to_category


from tf_lab.autopredictors.plotting import plot_original_pclouds_vs_reconstructed, \
                                           plot_train_val_test_curves, plot_reconstructions_at_epoch
        
from tf_lab.autopredictors.evaluate import eval_model, read_saved_epochs, accuracy_of_completion, \
                                           coverage_of_completion, save_reconstructions, \
                                           save_pc_prediction_stats, save_stats_of_multi_class_experiments, \
                                           paper_pc_completion_experiment_id_best_epoch
                                                  
from tf_lab.autopredictors.exploration import latent_embedding_of_entire_dataset

from general_tools.in_out.basics import create_dir, delete_files_in_directory, files_in_subdirs
from general_tools.simpletons import select_first_last_and_k
from geo_tool import Point_Cloud



In [2]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [3]:
GPU = 0
loss = 'chamfer'

In [4]:
if loss == 'emd':
    exp_counter = '9'
else:
    exp_counter = '10'

load_model_conf = True
do_training = False
do_evaluation = False

incomplete_n_samples = 2048
complete_n_samples = 4096

val_percent = .10
dropout_keep_prob = .8
seed = 42

experiment_name = exp_counter + '_all_classes_' + str(incomplete_n_samples) + '_' \
                  + str(complete_n_samples) + 'pts_' + loss

top_data_dir = '/orions4-zfs/projects/lins2/Panos_Space/DATA/'

n_input = [incomplete_n_samples, 3]
n_output = [complete_n_samples, 3] 

train_dir = osp.join(top_data_dir, 'OUT/models/incomplete_pclouds/paper_vanilla_vscan')
train_dir = osp.join(train_dir, experiment_name)
create_dir(train_dir)

max_training_epochs = 100    
max_evaluation_epochs = max_training_epochs

In [5]:
class_to_syn_id = shape_net_category_to_synth_id()
all_classes = vscan.all_classes
n_threads = 50

first = class_to_syn_id[all_classes[0]]
train_data, val_data, test_data = vscan.load_train_val_test_vscan_paper(first, n_threads,\
                                                                        complete_n_samples=complete_n_samples,\
                                                                        incomplete_n_samples=incomplete_n_samples,
                                                                        val_percent=val_percent)


4045 files containing complete point clouds were found.
19800 incomplete point clouds were loaded.
4470 incomplete point clouds were loaded.


In [5]:
# Load Data of All Classes (Takes Time.)
class_to_syn_id = shape_net_category_to_synth_id()
all_classes = vscan.all_classes
n_threads = 50

first = class_to_syn_id[all_classes[0]]
train_data, val_data, test_data = vscan.load_train_val_test_vscan_paper(first, n_threads,\
                                                                        complete_n_samples=complete_n_samples,\
                                                                        incomplete_n_samples=incomplete_n_samples,
                                                                        val_percent=val_percent)
for model_class in vscan.all_classes[1:]:
    class_syn_id = class_to_syn_id[model_class]
    curr_train, curr_val, curr_test = vscan.load_train_val_test_vscan_paper(class_syn_id, n_threads,\
                                                              complete_n_samples=complete_n_samples,\
                                                              incomplete_n_samples=incomplete_n_samples,\
                                                              val_percent=val_percent)
    train_data.merge(curr_train)
    test_data.merge(curr_test)
    val_data.merge(curr_val)

train_data.shuffle_data();

4045 files containing complete point clouds were found.
19800 incomplete point clouds were loaded.
4470 incomplete point clouds were loaded.
1572 files containing complete point clouds were found.
7800 incomplete point clouds were loaded.
1632 incomplete point clouds were loaded.
7497 files containing complete point clouds were found.
29640 incomplete point clouds were loaded.
5922 incomplete point clouds were loaded.
6778 files containing complete point clouds were found.
30000 incomplete point clouds were loaded.
6000 incomplete point clouds were loaded.
2318 files containing complete point clouds were found.
11100 incomplete point clouds were loaded.
2808 incomplete point clouds were loaded.
3173 files containing complete point clouds were found.
15600 incomplete point clouds were loaded.
3438 incomplete point clouds were loaded.
8509 files containing complete point clouds were found.
30000 incomplete point clouds were loaded.
6000 incomplete point clouds were loaded.
1939 files con

In [33]:
# # Verification we didn't mix train-test-val data.

# print train_data.num_examples + val_data.num_examples
# print test_data.num_examples

# tr = train_data.full_epoch_data(shuffle=False)
# va = val_data.full_epoch_data(shuffle=False)
# te = test_data.full_epoch_data(shuffle=False)

# train_set = set([i[:-6] for i in tr[1]])
# val_set = set([i[:-6] for i in va[1]])
# test_set = set([i[:-6] for i in te[1]])

# c1 = len(test_set.intersection(train_set)) == 0
# c2 = len(test_set.intersection(val_set)) == 0
# c3 = len(train_set.intersection(val_set)) == 0

# assert(c1 and c2 and c3)

# pp = test_data.next_batch(1)
# pinc = pp[2].reshape(incomplete_n_samples, 3)
# pcom = pp[0].reshape(complete_n_samples, 3)
        
# score1 = accuracy_of_completion(pinc, pcom, 0.02, ret_dists=False)
# print score1
# score2, c2 = coverage_of_completion(pcom, pinc, 0.02, ret_dists=True)

# Point_Cloud(points=pinc).plot();
# Point_Cloud(points=pcom).plot(c=c2);
# print pp[1]

In [38]:
tf.set_random_seed(seed)

if load_model_conf:
    conf = Conf.load(osp.join(train_dir, 'configuration'))
    print conf
else:
    decoder_args = {'layer_sizes': [1024, np.prod(n_output)],
                    'non_linearity': tf.nn.relu
                   }

    encoder_args = {'dropout_prob': dropout_keep_prob}
    
    conf = Conf(
                n_input = n_input,
                n_output = n_output,
                denoising = True,
                training_epochs = max_training_epochs,
                batch_size = 50,
                loss = loss,
                train_dir = train_dir,
                loss_display_step = 1,
                saver_step = 1,
                learning_rate = 0.0005,
                encoder = enc_dec.encoder_with_convs_and_symmetry,
                encoder_args = encoder_args,
                decoder = enc_dec.decoder_with_fc_only,
                decoder_args = decoder_args
               )
    
    conf.allow_gpu_growth = False
    conf.experiment_name = experiment_name
    conf.save(osp.join(conf.train_dir, 'configuration'))

conf.consistent_io = None    
reset_tf_graph()
set_visible_GPUs([GPU])
ae = PointNetAutoEncoder(experiment_name, conf)

              allow_gpu_growth: False
                    batch_size: 50
                 consistent_io: False
                         debug: False
                       decoder: decoder_with_fc_only
                  decoder_args: {'non_linearity': <function relu at 0x7fe3c2ad9e60>, 'layer_sizes': [1024, 12288]}
                       encoder: encoder_with_convs_and_symmetry
                  encoder_args: {'dropout_prob': 0.8}
               experiment_name: 10_all_classes_2048_4096pts_chamfer
                 gauss_augment: None
                  is_denoising: True
               latent_vs_recon: 1.0
                 learning_rate: 0.0005
                          loss: chamfer
             loss_display_step: 1
                       n_input: [2048, 3]
                      n_output: [4096, 3]
                           n_z: None
             saver_max_to_keep: None
                    saver_step: 1
                     train_dir: /orions4-zfs/projects/lins2/Panos_Space/DATA/OUT/m

In [39]:
if do_training:
    training_stats = []
    training_stats.append(ae.train(train_data, conf))    
    with open(osp.join(conf.train_dir, 'train_stats.txt'), 'a') as fout:
        np.savetxt(fout, np.array(training_stats)[0])

In [9]:
if do_evaluation:    
    # Pick the epoch that minimizes the loss on the validation dataset.
    saved_epochs = np.array(read_saved_epochs(conf.train_dir))
    allowable_epochs = saved_epochs[saved_epochs <= max_evaluation_epochs]
    val_stats = eval_model(ae, conf, val_data, epochs=allowable_epochs, verbose=True)
    val_loss = np.min(val_stats[:,1])
    best_epoch = int(val_stats[np.argmin(val_stats[:,1]), 0])
    print 'Best epoch = %d.' % (best_epoch,) 
        
    ae.restore_model(conf.train_dir, best_epoch)
    top_save_dir = osp.join(conf.train_dir, 'output', 'epoch_' + str(best_epoch))
    save_dir = osp.join(top_save_dir, 'test_predictions')
    test_recon, test_loss, test_feed, test_ids, test_gt = ae.evaluate(test_data, conf)
    save_reconstructions(save_dir, test_recon, test_gt, test_feed, test_ids) # save ply files of test data.    
    train_loss = ae.evaluate(train_data, conf)[1]
    
    # Report Accuracy and Coverage of test data.
    n_examples = len(test_recon)
    pred_scores = np.zeros((n_examples, 2))
    for i in xrange(n_examples):
        gt = test_gt[i]
        pred = test_recon[i] 
        pred_scores[i, 0] = accuracy_of_completion(pred, gt, thres=0.02, ret_dists=False)
        pred_scores[i, 1] = coverage_of_completion(gt, pred, thres=0.02, ret_dists=False)
    
    print 'Test Median-Accuracy-Coverage:', np.median(pred_scores[:, 0]), np.median(pred_scores[:, 1])
    
    save_pc_prediction_stats(osp.join(top_save_dir, 'detailed_stats.txt'), test_ids, pred_scores)
    save_stats_of_multi_class_experiments(osp.join(top_save_dir, 'class_stats.txt'), test_ids, pred_scores)
    
    with open(osp.join(top_save_dir, 'stats.txt'), 'w') as fout:
        fout.write('Best Validation Epoch = %d\n' % (best_epoch))
        fout.write('Validation loss = %f\n' % (val_loss))
        fout.write('Train loss = %f\n' % (train_loss))
        fout.write('Test loss = %f\n' % (test_loss))
        fout.write('Gen. Error (abs, per) = %f %f\n' % (abs(test_loss-train_loss),  abs(test_loss-train_loss) / train_loss ))
        fout.write('Test Median-Accuracy-Coverage = %f %f\n' % (np.median(pred_scores[:, 0]), np.median(pred_scores[:, 1])))
        fout.write('Test Median Harmonic Mean = %f' % (np.median(hmean(pred_scores, 1))))

# Latent codes:

In [40]:
class_type = 'all_classes'
exp_counter, best_epoch = paper_pc_completion_experiment_id_best_epoch(class_type, loss)
ae.restore_model(conf.train_dir, best_epoch)

In [41]:
feed, latent_emb, ids = latent_embedding_of_entire_dataset(test_data, ae, conf, feed_original=False, apply_augmentation=False)

tokens = [i.split('.') for i in ids]
test_syn_ids = np.array([token[0] for token in tokens], dtype=object)
test_model_names = np.array([token[1] for token in tokens], dtype=object)
test_scan_ids = np.array([token[2] for token in tokens], dtype=object)

# model = TSNE(n_components=2, random_state=seed, init='pca', verbose=True);
# tsne_emb = model.fit_transform(latent_emb)

In [42]:
from general_tools.in_out.basics import pickle_data, unpickle_data
pickle_data('%s-tsne-data.pkl' % (loss,), ids, latent_emb)

In [24]:
from tf_lab.autopredictors.scripts.helper import syn_id_to_class_id_dict
from general_tools.arrays.transform import make_contiguous

syn_id_to_cat = shape_net_core_synth_id_to_category
syn_id_to_int = syn_id_to_class_id_dict()
c_int = [syn_id_to_int[i] for i in test_syn_ids]

uvalues = np.unique(c_int)
inv_map = {v: k for k, v in syn_id_to_int.iteritems()}
color_bar_labels = [syn_id_to_cat[inv_map[u]] for u in uvalues]

In [155]:
samples_per_class = 100
selected = []
for class_id in np.unique(test_syn_ids):
    in_class = np.where(test_syn_ids == class_id)[0]
    selected += np.ndarray.tolist(np.random.choice(in_class, samples_per_class, replace=False))


exemplars = np.array(selected)
latent_emb_small = latent_emb[exemplars]
model = TSNE(n_components=2, random_state=seed, init='pca', verbose=True);
tsne_emb_small = model.fit_transform(latent_emb_small)
c = np.array(c_int)[exemplars]
c = make_contiguous(c)
plt.scatter(tsne_emb_small[:,0], tsne_emb_small[:,1], c=c)
cbar = plt.colorbar(ticks=range(0,9))
cbar.set_ticklabels(color_bar_labels)
plt.grid()
plt.suptitle('TSNE of Test Data - %s Loss.' % (loss, ))

[t-SNE] Computing pairwise distances...
[t-SNE] Computing 91 nearest neighbors...
[t-SNE] Computed conditional probabilities for sample 800 / 800
[t-SNE] Mean sigma: 0.526402
[t-SNE] KL divergence after 100 iterations with early exaggeration: 0.653676
[t-SNE] Error after 175 iterations: 0.653676
