In [181]:
import numpy as np
import os.path as osp
from collections import defaultdict
from sklearn.neighbors import NearestNeighbors  # Students: you can use this implementation to find the 
                                                # Nearest-Neigbors
from cs233_gtda_hw4.in_out.plotting import plot_3d_point_cloud

# Imports

In [182]:
import torch
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [183]:
torch.cuda.is_available()
torch.cuda.device_count()
torch.cuda.get_device_name(0)

'NVIDIA Tesla K80'

In [184]:
import torch
import numpy as np
import os
import os.path as osp
import tqdm
import matplotlib.pylab as plt
from torch import nn
from torch import optim
from collections import defaultdict

## Imports based on our ready-to-use code (after you pip-install the cs233_gtda_hw4 package)
from cs233_gtda_hw4.in_out.utils import make_data_loaders
from cs233_gtda_hw4.in_out.utils import save_state_dicts, load_state_dicts
from cs233_gtda_hw4.in_out import pointcloud_dataset
from cs233_gtda_hw4.in_out.plotting import plot_3d_point_cloud


## Imports you might use if you follow are scaffold code (it is OK to use your own stucture of the models)
from cs233_gtda_hw4.models import PointcloudAutoencoder
from cs233_gtda_hw4.models import PartAwarePointcloudAutoencoder
from cs233_gtda_hw4.models.point_net import PointNet
from cs233_gtda_hw4.models.mlp import MLP
from cs233_gtda_hw4.models.part_classifier import part_classifier

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [185]:
##
## Fixed Settings (we do not expect you to change these)
## 

n_points = 1024  # number of points of each point-cloud
n_parts = 4      # max number of parts of each shape
n_train_epochs = 400

# Students: feel free to change below -ONLY- for the bonus Question:
# I.e., use THESE hyper-parameters when you train for the non-bonus questions.

part_lambda = 0.005  # for the part-aware AE you will be using (summing) two losses:
                     # chamfer + cross-entropy
                     # do it like this: chamfer + (part_lambda * cross-entropy), 
                     # i.e. we are scaling down the cross-entropy term
init_lr = 0.009  # initial learning-rate, tested by us with ADAM optimizer (see below)

In [186]:
## Students: feel free to change below:

# batch-size of data loaders
batch_size = 128 # if you can keep this too as is keep it, 
                 # but if it is too big for your GPU, feel free to change it.

# which device to use: cpu or cuda?
#device = 'cpu'     # Note: only the "alternative" (slower) chamfer_loss in losses/nn_distance can run in cpu.
device = 'cuda'

top_in_dir = '../data/'
top_out_dir = '../data/out/'
if not osp.exists(top_out_dir):
    os.makedirs(top_out_dir)

In [187]:
# PREPARE DATA:

loaders = make_data_loaders(top_in_dir, batch_size)

for split, loader in loaders.items():
    print('N-examples', split, len(loader.dataset))
    
# BUILD MODELS:
### TODO: Student on your own:
NUM_POINTS = 1024
NUM_CHANNELS = 3
LATENT_DIM = 128
# batch_size, num_channels, num_points

encoder = PointNet(NUM_CHANNELS)
decoder = MLP(LATENT_DIM, NUM_POINTS)
part_classifier = part_classifier(LATENT_DIM+NUM_CHANNELS, n_parts, 0.005)

N-examples train 750
N-examples test 150
N-examples val 50


In [188]:
# Students: Default location of saved latent codes per last cell of main.ipynb, change appropriately if
# you saved them in another way.
vanilla_ae_emb_file = '../data/out/pc_ae_latent_codes.npz'
part_ae_emb_file = '../data/out/part_pc_ae_latent_codes.npz'

In [189]:
# Load golden distances (pairwise matrix, or corresponding model/part names in golden_names)
golden_part_dist_file = '../data/golden_dists.npz'
golden_data = np.load(golden_part_dist_file, allow_pickle=True)
golden_part_dist = golden_data['golden_part_dist']
golden_names = golden_data['golden_names']
print(len(golden_names))  # models-name/part combinations
print(golden_names[0])

484
c59cdd1537bd75ddd0818327fc390a5__2__


# 1. Compare distances with vanilla model

In [190]:
# To load vanilla-AE-embeddings (if False will open those of the 2-branch AE).
vanilla = True # or False

In [191]:
# Load/organize golden part-aware distances.
sn_id_to_parts = defaultdict(list)
id_to_part_loc = dict()

for i, name in enumerate(golden_names):
    # Extract shape-net model ids of golden, map them to their parts.
    sn_id, _, part_id, _, _ = name.split('_')
    sn_id_to_parts[sn_id].append(part_id)
    
    # Map shape-net model id and part_id to location in distance matrix, (the order is the same).
    id_to_part_loc[(sn_id, part_id)] = i

In [192]:
if vanilla:
    in_d = np.load(vanilla_ae_emb_file)    # Students: assuming you used the numpy.savez
else:
    in_d = np.load(part_ae_emb_file)
        
latent_codes = in_d['latent_codes']
test_names = in_d['test_names']

Using the provided part-distance function dP, compare the cumulative distances of the encoding space learned by the vanilla AE of (d) vs. the part-aware AE
of (f). Compute the cumulative distance of an encoding space by accumulating the part
distances of the parts of every chair in the test split, to those of its nearest neighbor (NN)
in the encoding. Use the Euclidean distance between the latent vectors to compute the
neighborhoods. Let M(A) denote all parts of chair A and M˜(A, k) its k-th part. Define the
one-way (part-based) distance of chair A from B as

In [194]:
neigh = NearestNeighbors(n_neighbors=2)
neigh.fit(latent_codes)

NearestNeighbors(n_neighbors=2)

In [196]:
# TODO: Use golden distances and matchings to solve question (g)

distances = []
num_parts_shared_all = []
latent_distances = []
for i, sn_name in enumerate(test_names):
    parts_of_model = set(sn_id_to_parts[sn_name])
    
    # Find nearest neighbor to model sn_name
    nn_i = neigh.kneighbors(latent_codes[i,].reshape(1, -1), return_distance=True)[1][:,1].item()
    nn_dist = neigh.kneighbors(latent_codes[i,].reshape(1, -1), return_distance=True)[0][:,1].item()
    latent_distances.append(nn_dist)
    matched_neighbor = test_names[nn_i]
    parts_of_neighbor = set(sn_id_to_parts[matched_neighbor])
    
    # Compute the requested distances.
    # Use id_to_part_loc for each model/part combination
    
    distance = 0 # initiate per model distance
    num_parts_shared = 0
    
    for k in parts_of_model:
        if k in parts_of_neighbor:
            num_parts_shared += 1
            a_id = id_to_part_loc[(sn_name, k)]
            b_id = id_to_part_loc[(matched_neighbor, k)]
            distance += golden_part_dist[a_id, b_id]
        else:
            temp_dists = []
            for u in parts_of_neighbor:
                a_id = id_to_part_loc[(sn_name, k)]
                b_id = id_to_part_loc[(matched_neighbor, u)]
                temp_dists.append(golden_part_dist[a_id, b_id])
            distance += max(temp_dists)
    distances.append(distance)
    num_parts_shared_all.append(num_parts_shared)

In [197]:
print("cumulative distance: ",sum(distances))
print("average number of shared parts: ", sum(num_parts_shared_all) / len(num_parts_shared_all))
print("average euclidean distance between nearest neighbors: ", sum(latent_distances) / len(latent_distances))

cumulative distance:  401.75394278764725
average number of shared parts:  3.1533333333333333
average euclidean distance between nearest neighbors:  0.21923460632562639


# 2. Compare distances with part-aware model

In [198]:
# To load vanilla-AE-embeddings (if False will open those of the 2-branch AE).
vanilla = False # or False

In [199]:
# Load/organize golden part-aware distances.
sn_id_to_parts = defaultdict(list)
id_to_part_loc = dict()

for i, name in enumerate(golden_names):
    # Extract shape-net model ids of golden, map them to their parts.
    sn_id, _, part_id, _, _ = name.split('_')
    sn_id_to_parts[sn_id].append(part_id)
    
    # Map shape-net model id and part_id to location in distance matrix, (the order is the same).
    id_to_part_loc[(sn_id, part_id)] = i

In [200]:
if vanilla:
    in_d = np.load(vanilla_ae_emb_file)    # Students: assuming you used the numpy.savez
else:
    in_d = np.load(part_ae_emb_file)
        
latent_codes = in_d['latent_codes']
test_names = in_d['test_names']

Using the provided part-distance function dP, compare the cumulative distances of the encoding space learned by the vanilla AE of (d) vs. the part-aware AE
of (f). Compute the cumulative distance of an encoding space by accumulating the part
distances of the parts of every chair in the test split, to those of its nearest neighbor (NN)
in the encoding. Use the Euclidean distance between the latent vectors to compute the
neighborhoods. Let M(A) denote all parts of chair A and M˜(A, k) its k-th part. Define the
one-way (part-based) distance of chair A from B as

In [202]:
neigh = NearestNeighbors(n_neighbors=2)
neigh.fit(latent_codes)

NearestNeighbors(n_neighbors=2)

In [203]:
neigh.kneighbors(latent_codes[100,].reshape(1, -1), return_distance=True)

(array([[3.7252903e-09, 1.6370015e-01]], dtype=float32), array([[100, 120]]))

In [206]:
# TODO: Use golden distances and matchings to solve question (g)

distances = []
num_parts_shared_all = []
latent_distances = []
for i, sn_name in enumerate(test_names):
    parts_of_model = set(sn_id_to_parts[sn_name])
    #print(i, ",", sn_name)
    #print(parts_of_model)
    #plot_3d_point_cloud(loaders['test'].dataset.pointclouds[i], title = 'original',c = loaders['test'].dataset.part_masks[i])
    
    # Find nearest neighbor to model sn_name
    nn_i = neigh.kneighbors(latent_codes[i,].reshape(1, -1), return_distance=True)[1][:,1].item()
    nn_dist = neigh.kneighbors(latent_codes[i,].reshape(1, -1), return_distance=True)[0][:,1].item()
    latent_distances.append(nn_dist)
    matched_neighbor = test_names[nn_i]
    parts_of_neighbor = set(sn_id_to_parts[matched_neighbor])
    
    #print(nn_i, ",", matched_neighbor, "distance: ", nn_dist)
    #print(parts_of_neighbor)
    #plot_3d_point_cloud(loaders['test'].dataset.pointclouds[nn_i], title = 'original',c = loaders['test'].dataset.part_masks[nn_i])
    
    # Compute the requested distances.
    # Use id_to_part_loc for each model/part combination
    distance = 0 # initiate per model distance
    num_parts_shared = 0
    
    for k in parts_of_model:
        if k in parts_of_neighbor:
            num_parts_shared += 1
            a_id = id_to_part_loc[(sn_name, k)]
            b_id = id_to_part_loc[(matched_neighbor, k)]
            distance += golden_part_dist[a_id, b_id]
        else:
            #print(k)
            temp_dists = []
            for u in parts_of_neighbor:
                a_id = id_to_part_loc[(sn_name, k)]
                b_id = id_to_part_loc[(matched_neighbor, u)]
                #print(golden_part_dist[a_id, b_id])
                temp_dists.append(golden_part_dist[a_id, b_id])
            distance += max(temp_dists)
    distances.append(distance)
    num_parts_shared_all.append(num_parts_shared)

In [207]:
print("cumulative distance: ",sum(distances))
print("average number of shared parts: ", sum(num_parts_shared_all) / len(num_parts_shared_all))
print("average euclidean distance between nearest neighbors: ", sum(latent_distances) / len(latent_distances))

cumulative distance:  396.53958693146706
average number of shared parts:  3.18
average euclidean distance between nearest neighbors:  0.26441301822662355
