In [1]:
from general_tools.notebook.gpu_utils import setup_one_gpu
GPU = 2
setup_one_gpu(GPU)

Picking GPU 2


In [4]:
import sys
import numpy as np
import os.path as osp
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn import mixture

from geo_tool import Point_Cloud

from general_tools.notebook.tf import reset_tf_graph
from general_tools.in_out.basics import create_dir

from tf_lab.in_out.basics import Data_Splitter, read_saved_epochs
from tf_lab.point_clouds.ae_templates import mlp_architecture_ala_iclr_18, default_train_params
from tf_lab.point_clouds.autoencoder import Configuration as Conf
from tf_lab.point_clouds.point_net_ae import PointNetAutoEncoder
from tf_lab.point_clouds.in_out import load_point_clouds_from_filenames, PointCloudDataSet
from tf_lab.point_clouds.convenience import reconstruct_pclouds, get_latent_codes

from tf_lab.data_sets.shape_net import pc_loader as snc_loader
from tf_lab.data_sets.shape_net import snc_category_to_synth_id
from tf_lab.nips.helper import pclouds_centered_and_half_sphere
from tf_lab.iclr.helper import load_multiple_version_of_pcs, find_best_validation_epoch_from_train_stats

from tf_lab.evaluate.generative_pc_nets import entropy_of_occupancy_grid, jensen_shannon_divergence

In [5]:
%load_ext autoreload
%matplotlib inline
%autoreload 2

In [8]:
def identity(x):
    return x

pc_normalizer = pclouds_centered_and_half_sphere
pc_normalizer = identity

In [9]:
top_data_dir = '/orions4-zfs/projects/optas/DATA'
class_name = 'chair'
syn_id = snc_category_to_synth_id()[class_name]
n_pc_points = 2048
n_pc_versions = 1 # load ae trained with that many versions of PCs per mesh.
voxel_resolution = 28
cmp_in_sphere = True

In [10]:
in_data = load_multiple_version_of_pcs('uniform_one', syn_id, n_classes=1)
train_data = in_data['train'].point_clouds
test_data = in_data['test'].point_clouds
val_data = in_data['val'].point_clouds

Loading test data.
/orions4-zfs/projects/optas/DATA/Point_Clouds/Shape_Net/Splits/single_class_splits/03001627/85_5_10/test.txt
679 pclouds were loaded. They belong in 1 shape-classes.
Loading train data.
/orions4-zfs/projects/optas/DATA/Point_Clouds/Shape_Net/Splits/single_class_splits/03001627/85_5_10/train.txt
5761 pclouds were loaded. They belong in 1 shape-classes.
Loading val data.
/orions4-zfs/projects/optas/DATA/Point_Clouds/Shape_Net/Splits/single_class_splits/03001627/85_5_10/val.txt
338 pclouds were loaded. They belong in 1 shape-classes.


In [11]:
# Prepare GT for JSD comparisons
train_data = pc_normalizer(train_data)
test_data = pc_normalizer(test_data) 
val_data = pc_normalizer(val_data)

_, train_grid_var = entropy_of_occupancy_grid(train_data, voxel_resolution, in_sphere=cmp_in_sphere)
_, test_grid_var = entropy_of_occupancy_grid(test_data, voxel_resolution, in_sphere=cmp_in_sphere)
_, val_grid_var = entropy_of_occupancy_grid(val_data, voxel_resolution, in_sphere=cmp_in_sphere)

In [7]:
ae_loss = 'emd'
b_necks = [128]

# cov_types = ['full', 'diag']
# diag_n_clusters = [5, 10, 20, 30, 128]
# full_n_clusters = range(2, 31, 2)
# top_out_dir = osp.join('/orions4-zfs/projects/optas/DATA/OUT/iclr/synthetic_samples/gmm', ae_loss)
# create_dir(top_out_dir)

'/orions4-zfs/projects/optas/DATA/OUT/iclr/synthetic_samples/gmm/emd'

In [8]:
def load_an_auto_encoder(b_neck, ae_loss, n_pc_versions, n_pc_points=2048):
    # Load Auto-Encoder
    
    ae_experiment_tag = 'mlp_with_split_' + str(n_pc_versions) + 'pc_usampled_bnorm_on_encoder_only'
    ae_id = '_'.join(['ae', class_name, ae_experiment_tag, str(n_pc_points), 'pts', str(b_neck), 'bneck', ae_loss])    
    ae_train_dir = osp.join(top_data_dir, 'OUT/iclr/nn_models/', ae_id)
    ae_conf = Conf.load(osp.join(ae_train_dir, 'configuration'))
    
    val_error, best_epoch = find_best_validation_epoch_from_train_stats(osp.join(ae_train_dir, 'train_stats.txt'))
        
    if best_epoch % ae_conf.saver_step != 0:
        best_epoch += best_epoch % ae_conf.saver_step

    ae_conf.encoder_args['verbose'] = False
    ae_conf.decoder_args['verbose'] = False

    reset_tf_graph()
    ae = PointNetAutoEncoder(ae_conf.experiment_name, ae_conf)
    ae.restore_model(ae_conf.train_dir, best_epoch, verbose=True)
    return ae

def jsd_on_reconstructed_data(ae_model, pclouds, cmp_grid_var, voxel_resolution, cmp_in_sphere):
    recon, _ = reconstruct_pclouds(ae_model, pclouds, batch_size=100)
    recon = pclouds_centered_and_half_sphere(recon)
    _, recon_grid_var = entropy_of_occupancy_grid(recon, voxel_resolution, in_sphere=cmp_in_sphere)
    return jensen_shannon_divergence(recon_grid_var, cmp_grid_var)

In [9]:
for b_neck in b_necks:
    
    ae_model = load_an_auto_encoder(b_neck, ae_loss, n_pc_versions)
    latent_codes = get_latent_codes(ae_model, train_data)
    val_latent_codes = get_latent_codes(ae_model, val_data)
    
    print 'bneck size:', b_neck
    j1 = jsd_on_reconstructed_data(ae_model, train_data, train_grid_var, voxel_resolution, cmp_in_sphere)
    j2 = jsd_on_reconstructed_data(ae_model, test_data, test_grid_var, voxel_resolution, cmp_in_sphere)
    j3 = jsd_on_reconstructed_data(ae_model, val_data, val_grid_var, voxel_resolution, cmp_in_sphere)
    
    print 'Train-Test JSD of the AE-decoded data: (Train-Test-Val)'
    print j1, j2, j3
    
    for cov_t in cov_types:
        if cov_t == 'diag':
            choose_from = diag_n_clusters
        else: 
            choose_from = full_n_clusters
        
        for n_cluster in choose_from:            
            gmm = mixture.GaussianMixture(n_cluster, cov_t)
            gmm.fit(latent_codes)
            sample_codes = gmm.sample(len(latent_codes))[0]
            gmm_pcs = ae_model.decode(sample_codes)            
            tag = '_'.join(['bneck', str(b_neck), cov_t, str(n_cluster), 'gaussians'])
            np.savez(osp.join(top_out_dir, tag), gmm_pcs)
                        
            gmm_pcs = pclouds_centered_and_half_sphere(gmm_pcs)            
            _, gmm_grid_var = entropy_of_occupancy_grid(gmm_pcs, voxel_resolution, in_sphere=cmp_in_sphere)
            
            tr_jsd = jensen_shannon_divergence(gmm_grid_var, train_grid_var)
            te_jsd = jensen_shannon_divergence(gmm_grid_var, test_grid_var)
            val_jsd = jensen_shannon_divergence(gmm_grid_var, val_grid_var)
            
            print 'cov-type n_clusters -train_jsd -test_jsd, -val_jsd'
            print cov_t, n_cluster, tr_jsd, te_jsd, val_jsd
            print gmm.bic(latent_codes), gmm.bic(val_latent_codes), gmm.aic(latent_codes), gmm.aic(val_latent_codes)

Model restored in epoch 430.
bneck size: 128
Train-Test JSD of the AE-decoded data: (Train-Test-Val)
0.0027059559316 0.00701035696729 0.0108586368304
cov-type n_clusters -train_jsd -test_jsd, -val_jsd
full 2 0.0117282525845 0.015247552279 0.0195558837596
-3629612.51416 -119813.926707 -3741275.04398 -183922.583329
cov-type n_clusters -train_jsd -test_jsd, -val_jsd
full 4 0.00821402134792 0.0115882067153 0.0162379955711
-3673056.27915 -31211.3533302 -3896387.99765 -159432.489619
cov-type n_clusters -train_jsd -test_jsd, -val_jsd
full 6 0.00732867060932 0.0111244447567 0.0154588609194
-3597034.96303 64118.4995923 -3932035.87022 -128215.116364
cov-type n_clusters -train_jsd -test_jsd, -val_jsd
full 8 0.00664067836693 0.01027715697 0.0150655312804
-3517182.34679 162055.537506 -3963852.44266 -94390.5581167
cov-type n_clusters -train_jsd -test_jsd, -val_jsd
full 10 0.00602765581759 0.010000038167 0.0138865495735
-3428084.97657 266285.41688 -3986424.26113 -54273.1584102
cov-type n_clusters -tr