In [1]:
from general_tools.notebook.gpu_utils import setup_one_gpu
GPU = 2
setup_one_gpu(GPU)

Picking GPU 2


In [16]:
import numpy as np
import os.path as osp
import matplotlib.pylab as plt
from sklearn.manifold import TSNE

from general_tools.in_out.basics import unpickle_data
from general_tools.in_out import create_dir
from general_tools.notebook.tf import reset_tf_graph

from general_tools.clustering.plt import plot_2d_embedding_in_grid_greedy_way, \
                                         plot_2d_embedding_in_grid_forceful

from general_tools.plotting import stack_images_horizontally, stack_images_in_square_grid

from geo_tool import Point_Cloud

import tf_lab.point_clouds.in_out as pio

from tf_lab.data_sets.shape_net import  pc_loader as sn_pc_loader

from tf_lab.point_clouds.point_net_ae import PointNetAutoEncoder
from tf_lab.point_clouds.autoencoder import Configuration as Conf
from tf_lab.point_clouds.in_out import PointCloudDataSet

from tf_lab.in_out.basics import read_saved_epochs
from tf_lab.data_sets.shape_net import snc_category_to_synth_id

from sklearn.ensemble import IsolationForest
from sklearn.metrics.pairwise import pairwise_distances
import random

from general_tools.clustering.dist import incremental_farthest_sampling, evaluate_solution
from general_tools.in_out.basics import files_in_subdirs

In [7]:
def detect_outliers(features, outlier_fraction, random_seed):
    '''
    '''
    clf = IsolationForest(max_samples=100, random_state=random_seed, contamination=outlier_fraction)
    clf.fit(features)
    y_pred = clf.predict(features)
    outliers = np.where(y_pred == -1)[0]
    print 'Outliers detected = ', len(outliers)
    return outliers

In [8]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [9]:
random_seed = 42
outlier_fraction = 0.3 # Percent of data instances that will be automatically disreguarded.
n_exemplars = 140      # Source instances that will be used to create contrasting triplets.
top_data_dir = top_data_dir = '/orions4-zfs/projects/optas/DATA/'

In [13]:
# load AE model
class_name = 'chair'
syn_id = snc_category_to_synth_id()[class_name]
ae_loss = 'emd'
ae_id = '12' # 128 bottleneck

ae_net_name = 'ae_farm_' + class_name + '_conv_arch_' + ae_id + '_2048pts_' + ae_loss
ae_net_dir = osp.join(top_data_dir, 'OUT/icml/nn_models/ae_farming', ae_net_name)
ae_conf = Conf.load(osp.join(ae_net_dir, 'configuration'))

if ae_conf.train_dir != ae_net_dir:
    ae_conf.train_dir = ae_net_dir
    ae_conf.save(osp.join(ae_conf.train_dir, 'configuration'))
    
reset_tf_graph()
ae = PointNetAutoEncoder(ae_net_name, ae_conf)

saved_epochs = read_saved_epochs(ae_conf.train_dir)
load_epoch = saved_epochs[-1]
ae.restore_model(ae_conf.train_dir, load_epoch, verbose=True)

Model restored in epoch 1000.


In [17]:
# Load Raw Point-Clouds of class
n_pc_samples = 2048
pclouds_path = osp.join(top_data_dir, 'Point_Clouds/Shape_Net/Core/from_manifold_meshes/centered/', str(n_pc_samples))
pclouds_path = osp.join(pclouds_path, syn_id)
file_names = [f for f in files_in_subdirs(pclouds_path, '.ply')]
pclouds, model_ids, syn_ids = pio.load_crude_point_clouds(file_names=file_names, n_threads=50, loader=sn_pc_loader)
print '%d files containing complete point clouds were found.' % (len(pclouds), )

6778 files containing complete point clouds were found.


In [18]:
pcloud_data = PointCloudDataSet(pclouds, labels=model_ids, init_shuffle=False)
_, latent_codes, temp_names = ae.embedding_at_tensor(pcloud_data, ae_conf, tensor_name='bottleneck')
assert(np.alltrue(temp_names==model_ids))

In [24]:
from pandas import DataFrame
df = DataFrame(data=latent_codes, index=model_ids)
df.to_pickle('pilot_chair_embeddings.pkl')

In [9]:
top_image_dir = '/orions4-zfs/projects/lins2/Panos_Space/DATA/Meshes/Shape_Net_Core/2015_Summer_OUT/Images/'
top_image_dir = osp.join(top_image_dir, syn_id)
image_view_tag = 'image_p020_t337_r005.png'
top_out_dir = '/orions4-zfs/projects/lins2/Panos_Space/DATA/OUT/3d_neighbs_as_context/pilot_triplets_2/'
top_out_dir = osp.join(top_out_dir, 'outlier_frac_' + str(int(outlier_fraction * 100)))
create_dir(top_out_dir)

'/orions4-zfs/projects/lins2/Panos_Space/DATA/OUT/3d_neighbs_as_context/pilot_triplets_2/outlier_frac_30'

In [22]:
all_pdists = pairwise_distances(latent_codes)
outliers = detect_outliers(latent_codes, outlier_fraction, random_seed)
fps_sol = incremental_farthest_search(all_pdists, n_exemplars, outliers, random_seed)
print 'Average Min Distance of FPS exemplars:', evaluate_solution(latent_codes[fps_sol])

Outliers detected =  2034
Average Min Distance of FPS exemplars: 317.139815331


In [23]:
# Plot TSNE of examplars.
model = TSNE(n_components=2, random_state=random_seed, init='pca')
tsne_coords = model.fit_transform(latent_codes[fps_sol])

# Make a list with the file-names of all to be used images.
image_files = []
for i in fps_sol:
    image_files.append(osp.join(top_image_dir, model_ids[i], image_view_tag))

save_file = osp.join(top_out_dir, 'tsne_fps_%d_examplars.png' % (n_exemplars, ))

small_dim = 400
big_dim = int(np.ceil(np.sqrt(n_exemplars))) * small_dim
plot_2d_embedding_in_grid_forceful(tsne_coords, image_files, big_dim, small_dim, save_file=save_file);

In [24]:
from shutil import copyfile

def find_first_distant_enough(dists, dist_thres=0.01):
    for i, d in enumerate(dists):
        if d > dist_thres:
            return i
    return -1

def sorted_indices_given_exclusions(source, all_pdists, excluded):
    s_dists = all_pdists[source, :]
    s_dists[source] = np.inf
    s_dists[excluded] = np.inf
    return np.argsort(s_dists)

def far_from_two_observations(all_dists, obs_1, obs_2, excluded):
    n = all_pdists.shape[0]
    candidates = np.arange(n)
    candidates = np.setdiff1d(candidates, excluded)
    candidates = np.setdiff1d(candidates, [obs_1, obs_2])        
    sum_dist = all_pdists[obs_1, candidates] + all_pdists[obs_2, candidates]
    aso = np.argsort(sum_dist)
    aso = candidates[aso]
    return aso
    
def make_triplets(sources, all_pdists, excluded, rule, far_threshold=0.9):
    n = all_pdists.shape[0]
    triplets = []
    for i, s in enumerate(sources):
        candidates = np.arange(n)
        candidates = np.setdiff1d(candidates, excluded)
        candidates = np.setdiff1d(candidates, s)        
        aso = np.argsort(all_pdists[s, candidates])                        
        aso = candidates[aso]
       
        if rule == 'closest_nn':                             
            triplets.append([s, aso[0], aso[1]])
        
        elif rule == 'one_far':
            far_p = int(np.round(len(aso) * far_threshold))
            triplets.append([s, aso[0], aso[far_p]])
        
        elif rule == 'both_far':
            far_p = int(np.round(len(aso) * far_threshold))            
            aso_2 = far_from_two_observations(all_pdists, s, far_p, excluded)
            far_p2 = int(np.round(len(aso_2) * far_threshold))            
            triplets.append([s, aso_2[far_p2], aso[far_p]])            
        else:
            assert(False)                
    return triplets


def save_triplets(triplets, top_image_dir, top_out_dir, model_ids):
    image_view_tag = 'image_p020_t337_r005.png'
    logger = open(osp.join(top_out_dir, 'model_names_of_triplets.txt'), 'w')
    for i, t in enumerate(triplets):
        logger.write(str(i) + '\t')
        for k in range(3):
            save_dir = create_dir(osp.join(top_out_dir, str(i)))
            source = osp.join(top_image_dir, model_ids[t[k]], image_view_tag)
            dest = osp.join(save_dir, str(k) + '_' + model_ids[t[k]] + '.png')            
            copyfile(source, dest)
            if k < 2:
                logger.write(model_ids[t[k]] + '\t')
            else:
                logger.write(model_ids[t[k]] + '\n')
    
    logger.close()
    
def plot_triplets(triplets, top_image_dir, top_out_dir, model_ids):
    image_view_tag = 'image_p020_t337_r005.png'
    for i, t in enumerate(triplets):
        image_files = []
        image_files.append(osp.join(top_image_dir, model_ids[t[0]], image_view_tag))
        image_files.append(osp.join(top_image_dir, model_ids[t[1]], image_view_tag))
        image_files.append(osp.join(top_image_dir, model_ids[t[2]], image_view_tag))
        save_file = osp.join(top_out_dir, str(i) + '.png')
        stack_images_horizontally(image_files, save_file=save_file)
        
def plot_triplets_in_multiple_contexts(triplets, top_image_dir, top_out_dir, model_ids):
    image_view_tag = 'image_p020_t337_r005.png'
    n_contexts = len(triplets)
    n_examples = len(triplets[0])
    for i in range(n_examples):        
        image_files = []
        for context in range(n_contexts):                    
            t = triplets[context][i]
            image_files.append(osp.join(top_image_dir, model_ids[t[0]], image_view_tag))
            image_files.append(osp.join(top_image_dir, model_ids[t[1]], image_view_tag))
            image_files.append(osp.join(top_image_dir, model_ids[t[2]], image_view_tag))
        save_file = osp.join(top_out_dir, str(i) + '.png')
        stack_images_in_square_grid(image_files, save_file=save_file)

In [27]:
rule = 'both_far'
triplets = make_triplets(fps_sol, all_pdists, outliers, rule)
triplet_out_dir = create_dir(osp.join(top_out_dir, rule, 'data'))
save_triplets(triplets, top_image_dir, triplet_out_dir, model_ids)
triplet_out_dir = create_dir(osp.join(top_out_dir, rule, 'images'))
plot_triplets(triplets, top_image_dir, triplet_out_dir, model_ids)

In [28]:
triplet_out_dir = create_dir(osp.join(top_out_dir, rule))
triplets = []
for rule in ['closest_nn', 'one_far', 'both_far']:
    triplets.append(make_triplets(fps_sol, all_pdists, outliers, rule))

triplet_out_dir = create_dir(osp.join(top_out_dir, 'all_rules', 'images'))
plot_triplets_in_multiple_contexts(triplets, top_image_dir, triplet_out_dir, model_ids)