In [8]:
import os
import glob
from tqdm import tqdm
import open3d as o3d
from src.data import tk_data
import numpy as np
import MinkowskiEngine as ME
import torch

from tk_umos_publication_evaluation_script import load_data

### Information about paths, args, and which model to visualize.

In [38]:
data_root_path = "/workspace/approaches/WACV_UMOS/data" ## TO_MODIFY: Modify this to data root path
model_root_path = "/workspace/approaches/WACV_UMOS/models"  ## TO_MODIFY: Modify this to your model root path

scenes = {"Campus1" : "22",
          "City1" :  "23",
           "City2" : "24"}

eval_scene = "City1"
eval_scene = scenes[eval_scene]
scene_path = os.path.join(data_root_path, eval_scene)
window_size = 20

# prediction
model = "1683644585490"
n_clusters = 10
preds = sorted(glob.glob("models/%s/predictions/%s/k%s-0/raw_*.npy" % (model, eval_scene, n_clusters)))
mapped_preds = sorted(glob.glob("models/%s/predictions/%s/k%s-0/mapped_*.npy" % (model, eval_scene, n_clusters)))
predictions = [np.load(p) for p in preds] 
mapped_predictions = [np.load(p) for p in mapped_preds] 

In [16]:
scene_path

'/workspace/approaches/WACV_UMOS/data/23'

### Read the data and the ground truth labels

In [17]:
_, pcs, labels = load_data(scene_path, window_size=window_size, voxel_size=0.1)

22it [00:00, 214.81it/s]

1. Read scans


114it [00:00, 209.67it/s]
46it [00:00, 458.46it/s]

2. Ego-motion compensation


114it [00:00, 352.61it/s]
  9%|▉         | 9/95 [00:00<00:00, 88.65it/s]

Normalizing frames to positive voxel coordinates


100%|██████████| 95/95 [00:12<00:00,  7.90it/s]


Sorting voxels and labels to allign with MOTS predictions
Create sliding windows...


### Visualize RAW Clustering

An open3d window will open on your screen.

Navigate forward or backward using left or right arrow, close using ESC

In [45]:
import distinctipy
# get colors for the number of clusters
colors = distinctipy.get_colors(n_clusters)

In [38]:
# assign colors to each voxel in the pointcloud
seq = []
for pc, lbl in tqdm(zip(pcs, predictions)):  
    pcd = o3d.geometry.PointCloud(o3d.utility.Vector3dVector(pc))
    pcd.colors = o3d.utility.Vector3dVector([colors[cluster] for cluster in lbl])
    seq.append(pcd)

95it [00:04, 19.13it/s]


In [None]:
tk_data.vis_sequence(seq, name="visualization/demo", capture=False)

### Visualize Ground Truth or Mapped Predictions

An open3d window will open on your screen.

Navigate forward or backward using left or right arrow, close using ESC

In [19]:
# fg and bg color
bg_color = (0.8, 0.8, 0.8)
fg_color = (0.2, 0.2, 0.7)
seq = []

#for pc, lbl in tqdm(zip(pcs, labels)):  
for pc, lbl in tqdm(zip(pcs, mapped_predictions)):  
    pcd = o3d.geometry.PointCloud(o3d.utility.Vector3dVector(pc))
    pcd.colors = o3d.utility.Vector3dVector([fg_color if l==1 else bg_color for l in lbl])
    seq.append(pcd)

0it [00:00, ?it/s]


In [14]:
# Navigate forward or backward using left or right arrow, close using ESC
tk_data.vis_sequence(seq, name="visualization/demo", capture=False)

n_frames=167
end reached, restarting
end reached, restarting
end reached, restarting
end reached, restarting


### Save as GIF

set capture=True and your respective output path. It will save one image for each frame. 

-> visualization/demo_1.png, ... , visualization/demo_N.png

Afterward, use the script "make_vid.sh", which will take the path to the files as an input, and will generate a gif in the same path.

./make_vid.sh visualization/demo

creates visualization/demo.gif

In [19]:
### Save frames to make GIF -> capture=True 
# -> is laggy, but just step through the sequence once, it will save one figure for each frame
tk_data.vis_sequence(seq, name="visualization/demo", capture=True)

n_frames=167


### Get predictions from data, model and clustering.

This section serves as an example on how to obtain predictions. We will load the gmm and model that achieved the best results in our publication in this example. The code is almost identical to the evaluation script.

Using this example, you should be able to adapt it and obtain predictions for any LiDAR dataset of your choice.

If you do not have any labels, adapt the "load_data" part accordingly to not load any labels (dataloader has an option with_labels=False) and just obtain the voxelized sliding windows. Just study the "load_data" method a bit and you just understand easily. :)


In [6]:
import joblib

scene_path = "data/rawkitti/22/"
window_size = 20
start_frame = 0
end_frame = -1
radius = 2
embedding_dim = 32
n_clusters = 20

model_id = "1661045943760"
#gmm = joblib.load("models/%s/predictions/22/k20-0/gmm.joblib" % model_id) #-> how to load from predictions folder.
gmm = joblib.load("models/%s/gmms/gmm_22.joblib"%model_id) # load publication gmm for scene 22
checkpoint_path = "models/%s/checkpoint_1.pth.tar" % model_id


In [10]:
from src.models.umos_ae import UMOSAE
from src.data.MOTS_Datasets import MOTS_Dataset

print("Reading and preparing data....")
# reads sliding window frames and voxelizes them
frames, vs, labels = load_data(scene_path, window_size=window_size, start_frame=start_frame, end_frame=end_frame)
dataset = MOTS_Datasets.MOTS_Dataset(frames, r=radius, window_size=window_size)

print("Loading model...")
model = UMOSAE(input_channels=dataset.mots_utils.d.shape[0], window_size=window_size, embedding_dim=embedding_dim).cuda()
checkpoint = torch.load(checkpoint_path)
model.load_state_dict(checkpoint["state_dict"])


18it [00:00, 178.13it/s]

Reading and preparing data....
1. Read scans


186it [00:00, 392.97it/s]
0it [00:00, ?it/s]

2. Ego-motion compensation


186it [00:01, 117.06it/s]
  3%|▎         | 5/167 [00:00<00:03, 48.12it/s]

Normalizing frames to positive voxel coordinates


100%|██████████| 167/167 [00:03<00:00, 48.38it/s]


Sorting voxels and labels to allign with MOTS predictions
Create sliding windows...
Loading model...


<All keys matched successfully>

In [12]:
# Now that we have loaded the model and data, we make our predictions.
# First encode all mots
embs = []
voxels_frame = []
for i in tqdm(range(len(frames))):
    with torch.no_grad():
        features, vxls = dataset[i] # one batch == all MOTS of a single frame -> shape (n_voxels, n_channels, window_size)
        # voxels in here correspond to the voxels in vs, they are not ordered the same though.
        embs.append(model.projection_head(torch.flatten(model.encoder(features.cuda()), start_dim=1)).cpu().detach().numpy())
        voxels_frame.append(vxls)

# -> Train clustering model here. In this example, we used a pre-trained one.
# clustering = ClusterModel(...).fit(np.vstack(embs))

100%|██████████| 167/167 [04:48<00:00,  1.73s/it]


In [14]:
import distinctipy
n_clusters = 20
colors = distinctipy.get_colors(n_clusters)

In [16]:
# Afterward, prediction + visualization.
seq = []
for emb, pc in tqdm(zip(embs, voxels_frame)):  # -> window size not alligned
    pcd = o3d.geometry.PointCloud(o3d.utility.Vector3dVector(pc))
    preds = gmm.predict(emb)
    pcd.colors = o3d.utility.Vector3dVector([colors[cluster] for cluster in preds])
    seq.append(pcd)

# The clustering model partitions our data into clusters, which reflect individual structures identified within the data. 
# -> Since the approach is not supervised, the the approach does not know what these structures are, it just finds them.
# To give an interpretation to the clusters, you can
# a) use labels
# b) do it manually, i.e., identify yourself which clusters belong to moving by watching a video of the raw clustering predictions.
# c) Implement an automated method that includes some domain knowledge about the structure you expect in a cluster.
#        -> e.g., instance segmentation+tracking => traces, shape consistency, etc.


167it [04:30,  1.62s/it]


In [17]:
tk_data.vis_sequence(seq)

n_frames=167
end reached, restarting
