In [1]:
import os
import random
import numpy as np
import logging
import argparse
import urllib
from glob import glob
from os.path import join, exists

import torch
import torch.backends.cudnn as cudnn
import torch.nn.parallel
import torch.optim
import torch.utils.data
import torch.multiprocessing as mp
import torch.distributed as dist
from util import metric
from torch.utils import model_zoo

from MinkowskiEngine import SparseTensor
from util import config
from util.util import export_pointcloud, get_palette, \
    convert_labels_with_palette, visualize_labels, extract_clip_feature
from tqdm import tqdm
from run.distill import get_model

from dataset.label_constants import *
from dataset.feature_loader import FusedFeatureLoader, collation_fn_eval_all, collation_fn
from dataset.point_loader import Point3DLoader, collation_fn_eval_all

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [2]:
def get_logger():
    '''Define logger.'''

    logger_name = "main-logger"
    logger_in = logging.getLogger(logger_name)
    logger_in.setLevel(logging.DEBUG)
    handler = logging.StreamHandler()
    fmt = "[%(asctime)s %(filename)s line %(lineno)d] %(message)s"
    handler.setFormatter(logging.Formatter(fmt))
    logger_in.addHandler(handler)
    return logger_in

In [3]:
def extract_text_feature(labelset):
    '''extract CLIP text features.'''

    # a bit of prompt engineering
    print('Use prompt engineering: a XX in a scene')
    labelset = [ "a " + label + " in a scene" for label in labelset]
    labelset[-1] = 'other'
    text_features = extract_clip_feature(labelset, model_name="ViT-L/14@336px")
    
    return text_features

def precompute_text_related_properties(labelset_name):
    '''pre-compute text features, labelset, palette, and mapper.'''

    if 'scannet' in labelset_name:
        labelset = list(SCANNET_LABELS_20)
        labelset[-1] = 'other' # change 'other furniture' to 'other'
        palette = get_palette(colormap='scannet')
    mapper = None

    text_features = extract_text_feature(labelset)
    # labelset.append('unknown')
    labelset.append('unlabeled')
    return text_features, labelset, mapper, palette

In [4]:
from models.mink_unet import mink_unet as model3D

def constructor3d(**kwargs):
    model = model3D(**kwargs)
    return model


class DisNet(torch.nn.Module):
    '''3D Sparse UNet for Distillation.'''
    def __init__(self):
        super(DisNet, self).__init__()
        last_dim = 768

        # MinkowskiNet for 3D point clouds
        net3d = constructor3d(in_channels=3, out_channels=last_dim, D=3, arch="MinkUNet18A")
        self.net3d = net3d

    def forward(self, sparse_3d):
        '''Forward method.'''
        return self.net3d(sparse_3d)


In [5]:
manual_seed = 1453
random.seed(manual_seed)
np.random.seed(manual_seed)
torch.manual_seed(manual_seed)
torch.cuda.manual_seed(manual_seed)
torch.cuda.manual_seed_all(manual_seed)

In [6]:
model_path = 'https://cvg-data.inf.ethz.ch/openscene/models/scannet_openseg.pth.tar'
model = DisNet()
model = model.cuda()
checkpoint = model_zoo.load_url(model_path, progress=True)
model.load_state_dict(checkpoint['state_dict'], strict=True)

Downloading: "https://cvg-data.inf.ethz.ch/openscene/models/scannet_openseg.pth.tar" to /root/.cache/torch/hub/checkpoints/scannet_openseg.pth.tar


  0%|          | 0.00/178M [00:00<?, ?B/s]

<All keys matched successfully>

In [7]:
#DATA_ROOT = "/mnt/project/AT3DCV_Data/Preprocessed_OpenScene/data/scannet_3d"
#DATA_ROOT_FUSED = "/mnt/project/AT3DCV_Data/Preprocessed_OpenScene/data/scannet_example_fused"
#SAVE_FOLDER = "/mnt/project/AT3DCV_Data/3D_features"

#for augmented
DATA_ROOT = "/mnt/project/AT3DCV_Data/Preprocessed_OpenScene/data/augmented/birds_new/scannet_3d"
DATA_ROOT_FUSED = "/mnt/project/AT3DCV_Data/Preprocessed_OpenScene/data/augmented/birds_new/fused"
SAVE_FOLDER = "/mnt/project/AT3DCV_Data/Preprocessed_OpenScene/data/augmented/birds_new/features_3D"


INPUT_COLOR = False
BATCH_SIZE = 1
VOXEL_SIZE = 0.02
FEATURE_TYPE = "distill"
SAVE_FEATURES_AS_NUMPY = True
TEST_REPEATS = 5

In [8]:
global logger
logger = get_logger()

In [9]:
data_paths = sorted(glob(join(DATA_ROOT, "example", '*.pth')))

In [10]:
data_paths

['/mnt/project/AT3DCV_Data/Preprocessed_OpenScene/data/augmented/birds_new/scannet_3d/example/scene0024_00_vh_clean_2.pth']

In [11]:
DATA_ROOT.split('/')[-1]

'scannet_3d'

In [12]:
from dataset.feature_loader import FusedFeatureLoader, collation_fn_eval_all

val_data = FusedFeatureLoader(datapath_prefix = DATA_ROOT,
                            datapath_prefix_feat = DATA_ROOT_FUSED,
                            voxel_size = VOXEL_SIZE, 
                            split = "example", aug = False,
                            memcache_init = False, eval_all = True, identifier=6797,
                            input_color = INPUT_COLOR)
val_sampler = None
val_loader = torch.utils.data.DataLoader(val_data, batch_size = BATCH_SIZE,
                                            shuffle = False, num_workers = 4, pin_memory = True,
                                            drop_last = False, collate_fn = collation_fn_eval_all,
                                            sampler = val_sampler)


In [13]:
labelset_name = DATA_ROOT.split('/')[-1]

In [14]:
labelset_name

'scannet_3d'

In [15]:
# not necessary, if you already have a folder ready on the SAVE_FOLDER path
"""
torch.backends.cudnn.enabled = False

if not os.path.exists(SAVE_FOLDER):
    os.makedirs(SAVE_FOLDER, exist_ok=True)


if SAVE_FEATURES_AS_NUMPY: # save point features to folder
    out_root = os.path.commonprefix([SAVE_FOLDER, "model"])
    saved_feature_folder = os.path.join(out_root, 'saved_feature')
    os.makedirs(saved_feature_folder, exist_ok=True)
"""



'\ntorch.backends.cudnn.enabled = False\n\nif not os.path.exists(SAVE_FOLDER):\n    os.makedirs(SAVE_FOLDER, exist_ok=True)\n\n\nif SAVE_FEATURES_AS_NUMPY: # save point features to folder\n    out_root = os.path.commonprefix([SAVE_FOLDER, "model"])\n    saved_feature_folder = os.path.join(out_root, \'saved_feature\')\n    os.makedirs(saved_feature_folder, exist_ok=True)\n'

In [16]:
text_features, labelset, mapper, palette = precompute_text_related_properties(labelset_name)

Use prompt engineering: a XX in a scene
Loading CLIP ViT-L/14@336px model...
Finish loading


In [17]:
with torch.no_grad():
    model.eval()
    store = 0.0
    for rep_i in range(TEST_REPEATS):
        preds, gts = [], []
        val_loader.dataset.offset = rep_i
        logger.info("\nEvaluation {} out of {} runs...\n".format(rep_i+1, TEST_REPEATS))
        if rep_i>0:
            seed = np.random.randint(10000)
            random.seed(seed)
            np.random.seed(seed)
            torch.manual_seed(seed)
            torch.cuda.manual_seed(seed)
            torch.cuda.manual_seed_all(seed)
            
        for i, (coords, feat, label, feat_3d, mask, inds_reverse) in enumerate(tqdm(val_loader)):
            sinput = SparseTensor(feat.cuda(non_blocking=True), coords.cuda(non_blocking=True))
            coords = coords[inds_reverse, :]
            pcl = coords[:, 1:].cpu().numpy()
            
            predictions = model(sinput)
            predictions = predictions[inds_reverse, :]
            pred = predictions.half() @ text_features.t()
            logits_pred = torch.max(pred, 1)[1].cpu()
            
            scene_name = val_loader.dataset.data_paths[i].split('/')[-1].split('.pth')[0]
            print(scene_name)
            np.save(os.path.join(SAVE_FOLDER, '{}_openscene_feat_{}.npy'.format(scene_name,"distill")), predictions.cpu().numpy())
                

[2023-07-19 15:31:54,652 2931514810.py line 7] 
Evaluation 1 out of 5 runs...

  0%|                                                                                             | 0/1 [00:00<?, ?it/s]

scene0024_00_vh_clean_2


100%|█████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.17s/it]
[2023-07-19 15:31:57,822 2931514810.py line 7] 
Evaluation 2 out of 5 runs...

  0%|                                                                                             | 0/1 [00:00<?, ?it/s]

scene0024_00_vh_clean_2


100%|█████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.68s/it]
[2023-07-19 15:32:01,507 2931514810.py line 7] 
Evaluation 3 out of 5 runs...

  0%|                                                                                             | 0/1 [00:00<?, ?it/s]

scene0024_00_vh_clean_2


100%|█████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.79s/it]
[2023-07-19 15:32:04,299 2931514810.py line 7] 
Evaluation 4 out of 5 runs...

  0%|                                                                                             | 0/1 [00:00<?, ?it/s]

scene0024_00_vh_clean_2


100%|█████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.88s/it]
[2023-07-19 15:32:07,183 2931514810.py line 7] 
Evaluation 5 out of 5 runs...

  0%|                                                                                             | 0/1 [00:00<?, ?it/s]

scene0024_00_vh_clean_2


100%|█████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.79s/it]
