In [1]:
import argparse
import os
import sys
import pickle

import numpy as np
import torch
from torch.multiprocessing import set_start_method
from torch.utils.data import DataLoader, DistributedSampler

# 3DETR codebase specific imports
from datasets import build_dataset_SDCoT
from engine import evaluate, train_one_epoch, evaluate_incremental
from models import build_model
from optimizer import build_optimizer
from criterion import build_criterion
from utils.dist import init_distributed, is_distributed, is_primary, get_rank, barrier
from utils.misc import my_worker_init_fn
from utils.io import save_checkpoint, resume_if_possible, resume_if_possible_SDCoT
from utils.logger import Logger
from utils.ap_calculator import APCalculator, get_ap_config_dict, parse_predictions_SDCoT, get_ap_config_dict

In [2]:
class TempArgs:
    def __init__(self) -> None:
        self.dataset_name = 'scannet'
        self.num_base_class = 17
        self.num_novel_class = 1
        self.dataset_root_dir = None
        self.meta_data_dir = None
        self.use_color = False
        self.seed = 42
        self.checkpoint_dir = 'ckpts_scannet/scannet_SDCoT_17'
        self.checkpoint_name = 'checkpoint_best_6270.pth'
        self.enc_dim = 256
        self.dec_dim = 512
        self.nqueries = 256
        self.mlp_dropout = 0.3
        self.model_name = '3detr'
        self.preenc_npoints = 2048
        self.enc_type = 'vanilla'
        self.enc_nhead = 4
        self.enc_ffn_dim = 128
        self.enc_dropout = 0.1
        self.enc_activation = 'relu'
        self.enc_nlayers = 3

        # define for the decoder
        self.dec_nhead = 4
        self.dec_ffn_dim = 256
        self.dec_dropout = 0.1
        self.dec_nlayers = 8
        self.dec_dim = 256


        
_args = TempArgs()

In [3]:
        # init_distributed(
        #     local_rank,
        #     global_rank=local_rank,
        #     world_size=args.ngpus,
        #     dist_url=args.dist_url,
        #     dist_backend="nccl",
        # )

torch.cuda.set_device(0)
np.random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(42)

# For incremental learning, the train and test dataset are different,
# The train dataset only contains NOVEL classes.
# The test dataset contains both base and novel classes.
datasets, dataset_config_train, dataset_config_val, dataset_config_base = build_dataset_SDCoT(_args)

ap_calculator = APCalculator(dataset_config=dataset_config_val,
        ap_iou_thresh=[0.25, 0.5],
        class2type_map=dataset_config_val.class2type,
        exact_eval=True)

# define the base detection model and load weights
base_detection_model, _ = build_model(_args, dataset_config_base)
base_detection_model = base_detection_model.cuda(0) # TODO add ddp
resume_if_possible(
    checkpoint_dir=_args.checkpoint_dir, model_no_ddp=base_detection_model, optimizer=None, checkpoint_name=_args.checkpoint_name
)

# For the train set, set the base detector
datasets['train'].set_base_detector(base_detection_model)
ap_config_dict = get_ap_config_dict(
    dataset_config=dataset_config_train, remove_empty_box=True
)
# set set_ap_config_dict
datasets['train'].set_ap_config_dict(ap_config_dict)
# pseudo_labels, instance_bboxes = datasets['train'][0]
# ret_ = datasets['train'][0]
# temp_input = datasets['train'][0]

# temp_input['point_clouds'] = torch.tensor(temp_input['point_clouds']).cuda(0)
# parsed_prediction = datasets['train'].generate_pseudo_labels(temp_input['point_clouds'], temp_input['point_cloud_dims_min'], temp_input['point_cloud_dims_max'])

kept 1199 scans out of 1201
kept 312 scans out of 312
last_checkpoint ckpts_scannet/scannet_SDCoT_17/checkpoint_best_6270.pth
resuming ............
Found checkpoint at 700. Resuming.
optimizer is None
Loaded model and optimizer state at 700. Loaded best val metrics so far.


In [5]:
datasets['test'][0]

AttributeError: 'ScannetDatasetConfig_base' object has no attribute 'nyu40ids_novel'

In [7]:
ret_['gt_box_corners'].shape

(64, 8, 3)

In [None]:
[converted_instance_bbox[-1] for converted_instance_bbox in converted_instance_bboxes]

In [None]:
target_bboxes_semcls[0: len(converted_instance_bboxes)
                        ] = [converted_instance_bbox[-1] for converted_instance_bbox in converted_instance_bboxes]

In [None]:
target_bboxes_semcls

In [None]:
# rtarget_bboxes_mask.shape # (64,)
box_centers_normalized.shape # (67, 3)

In [None]:
# instance_bboxes[0] is an np array of shape (7, ).
# The first 6 elements are the box coordinates, the last element is the class label.
# The box coordinates mean (x, y, z, dx, dy, dz), where (x, y, z) is the center of the box, and (dx, dy, dz) is the size of the box.
# pseudo_labels[0] is a list of length 3. The first element is the class index.
# The second element is an array of (8, 3), which is the x,y,z coordinates of 8 corners of the bounding box.
# The third element is the probality of the class.
# Convert the pseudo labels to the format of instance_bboxes
def pseudo_label_to_instance_bbox(pseudo_label):
    # pseudo_label is a list of length 3. The first element is the class index.
    # The second element is an array of (8, 3), which is the x,y,z coordinates of 8 corners of the bounding box.
    # The third element is the probality of the class.
    instance_bbox = np.zeros((7, ))
    instance_bbox[0:3] = pseudo_label[1].mean(axis=0)
    instance_bbox[3:6] = pseudo_label[1].max(axis=0) - pseudo_label[1].min(axis=0)
    instance_bbox[6] = pseudo_label[0]
    return instance_bbox

# Convert the pseudo labels to the format of instance_bboxes
converted_instance_bboxes = []
for pseudo_label in pseudo_labels[0]:
    converted_instance_bboxes.append(pseudo_label_to_instance_bbox(pseudo_label))

print('converted_instance_bboxes', converted_instance_bboxes)
# pseudo_labels[0][0][1].shape

In [None]:
converted_instance_bboxes_no_cls = [converted_instance_bbox[0:6] for converted_instance_bbox in converted_instance_bboxes]
converted_instance_bboxes_no_cls

In [None]:
# instance_bboxes.shape
# concat converted_instance_bboxes and instance_bboxes
instance_bboxes = np.concatenate((converted_instance_bboxes, instance_bboxes), axis=0)
print('instance_bboxes', instance_bboxes)

In [None]:
instance_bboxes.shape

In [None]:
pseudo_labels[0][0]

In [None]:
pseudo_labels[0][0][1].mean(axis=0)

In [None]:
# get an ap array of size (3,), where each value is the difference between max and min of pseudo_labels[0][0][1] in the first dimension
pseudo_labels[0][0][1].max(axis=0) - pseudo_labels[0][0][1].min(axis=0)

In [None]:
instance_bboxes

# an instance box is an array like array([1.98960567, 0.49231637, 0.89994586, 0.31689119, 0.87098539, 1.00589061, 9.        ])
# the first 6 elements are the box coordinates, the last element is the class label.
# the box coordinates mean (x, y, z, dx, dy, dz), where (x, y, z) is the center of the box, and (dx, dy, dz) is the size of the box.


In [None]:
temp

In [None]:
# temp_input_unsqueezed is a detached copy of temp_input['point_clouds'] with an extra dimension
# temp_input_unsqueezed = temp_input['point_clouds'].clone().detach().unsqueeze_(0)
# temp_input_unsqueezed = temp_input['point_clouds'].unsqueeze_(0)

In [None]:
instance_bboxes

In [None]:
# len(temp_input)
pseudo_labels

In [None]:
for i in temp_input[0]:
    print(i[1].shape)

In [None]:
datasets['train']

In [None]:
temp_input

In [None]:
temp['outputs'].keys() 
# dict_keys(['sem_cls_logits', 'center_normalized', 'center_unnormalized', 'size_normalized', 'size_unnormalized', 'angle_logits', 'angle_residual', 'angle_residual_normalized', 'angle_continuous', 'objectness_prob', 'sem_cls_prob', 'box_corners'])

# aux_outputs
# temp['aux_outputs'][2]

In [None]:
temp['outputs']

In [None]:
# ap_calculator.step_SDCoT(
#     predicted_box_corners=temp['outputs']['box_corners'],
#     sem_cls_probs = temp['outputs']['sem_cls_prob'],
#     objectness_probs = temp['outputs']['objectness_prob'],
#     point_cloud = temp_input['point_clouds'],
# )




In [None]:
len(parsed_predictions[0][0]) # 1x85

In [None]:
for i in range (len(parsed_predictions[0])):
    prob = parsed_predictions[0][i][-1]
    print(parsed_predictions[0][i][0], parsed_predictions[0][i][-1])

# In SDCoT, the thresholds are set to 0.95 (objectness score) and 0.9 (classification prob)
# 4 0.90740836
# 9 0.9623784
# 15 0.9973775

In [None]:
temp['outputs']['objectness_prob'].shape

In [None]:
x = datasets['train'][0]

In [None]:
x["gt_box_corners"].shape # prepend and remove last few

In [None]:
parsed_predictions[0][0][1].shape