In [1]:
import argparse
import os
import sys
import pickle

import numpy as np
import torch
from torch.multiprocessing import set_start_method
from torch.utils.data import DataLoader, DistributedSampler

# 3DETR codebase specific imports
from datasets import build_dataset_SDCoT
from engine import evaluate, train_one_epoch, evaluate_incremental
from models import build_model
from optimizer import build_optimizer
from criterion import build_criterion
from utils.dist import init_distributed, is_distributed, is_primary, get_rank, barrier
from utils.misc import my_worker_init_fn
from utils.io import save_checkpoint, resume_if_possible, resume_if_possible_SDCoT
from utils.logger import Logger
from utils.ap_calculator import APCalculator, get_ap_config_dict, parse_predictions_SDCoT, get_ap_config_dict

In [2]:
class TempArgs:
    def __init__(self) -> None:
        self.dataset_name = 'scannet'
        self.num_base_class = 17
        self.num_novel_class = 1
        self.dataset_root_dir = None
        self.meta_data_dir = None
        self.use_color = False
        self.seed = 42
        self.checkpoint_dir = 'ckpts_scannet/scannet_SDCoT_17'
        self.checkpoint_name = 'checkpoint_best_6270.pth'
        self.enc_dim = 256
        self.dec_dim = 512
        self.nqueries = 256
        self.mlp_dropout = 0.3
        self.model_name = '3detr'
        self.preenc_npoints = 2048
        self.enc_type = 'vanilla'
        self.enc_nhead = 4
        self.enc_ffn_dim = 128
        self.enc_dropout = 0.1
        self.enc_activation = 'relu'
        self.enc_nlayers = 3

        # define for the decoder
        self.dec_nhead = 4
        self.dec_ffn_dim = 256
        self.dec_dropout = 0.1
        self.dec_nlayers = 8
        self.dec_dim = 256


        
_args = TempArgs()

In [3]:
        # init_distributed(
        #     local_rank,
        #     global_rank=local_rank,
        #     world_size=args.ngpus,
        #     dist_url=args.dist_url,
        #     dist_backend="nccl",
        # )

torch.cuda.set_device(0)
np.random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(42)

# For incremental learning, the train and test dataset are different,
# The train dataset only contains NOVEL classes.
# The test dataset contains both base and novel classes.
datasets, dataset_config_train, dataset_config_val, dataset_config_base = build_dataset_SDCoT(_args)

ap_calculator = APCalculator(dataset_config=dataset_config_val,
        ap_iou_thresh=[0.25, 0.5],
        class2type_map=dataset_config_val.class2type,
        exact_eval=True)

# define the base detection model and load weights
base_detection_model, _ = build_model(_args, dataset_config_base)
base_detection_model = base_detection_model.cuda(0) # TODO add ddp
resume_if_possible(
    checkpoint_dir=_args.checkpoint_dir, model_no_ddp=base_detection_model, optimizer=None, checkpoint_name=_args.checkpoint_name
)

# For the train set, set the base detector
datasets['train'].set_base_detector(base_detection_model)
ap_config_dict = get_ap_config_dict(
    dataset_config=dataset_config_train, remove_empty_box=True
)
# set set_ap_config_dict
datasets['train'].set_ap_config_dict(ap_config_dict)
temp_input = datasets['train'][0]

temp_input['point_clouds'] = torch.tensor(temp_input['point_clouds']).cuda(0)
parsed_prediction = datasets['train'].generate_pseudo_labels(temp_input['point_clouds'], temp_input['point_cloud_dims_min'], temp_input['point_cloud_dims_max'])

kept 1199 scans out of 1201
kept 312 scans out of 312
last_checkpoint ckpts_scannet/scannet_SDCoT_17/checkpoint_best_6270.pth
resuming ............
Found checkpoint at 700. Resuming.
optimizer is None
Loaded model and optimizer state at 700. Loaded best val metrics so far.


  batch_data_label["point_clouds"] = torch.tensor(


In [7]:
temp

[[(4,
   array([[-0.93295187, -0.01727048,  0.7384062 ],
          [-0.93295187, -0.01727048,  0.24609195],
          [-1.6295767 , -0.01727048,  0.24609195],
          [-1.6295767 , -0.01727048,  0.7384062 ],
          [-0.93295187, -0.5254606 ,  0.7384062 ],
          [-0.93295187, -0.5254606 ,  0.24609195],
          [-1.6295767 , -0.5254606 ,  0.24609195],
          [-1.6295767 , -0.5254606 ,  0.7384062 ]], dtype=float32),
   0.90740836),
  (9,
   array([[-0.11904451,  0.01944979,  0.5837554 ],
          [-0.11904451,  0.01944979,  0.25368482],
          [-0.40251604,  0.01944979,  0.25368482],
          [-0.40251604,  0.01944979,  0.5837554 ],
          [-0.11904451, -0.39607286,  0.5837554 ],
          [-0.11904451, -0.39607286,  0.25368482],
          [-0.40251604, -0.39607286,  0.25368482],
          [-0.40251604, -0.39607286,  0.5837554 ]], dtype=float32),
   0.9623784),
  (15,
   array([[-0.2253964 ,  0.015304  ,  0.7392752 ],
          [-0.2253964 ,  0.015304  , -0.17880258]

In [5]:
# temp_input_unsqueezed is a detached copy of temp_input['point_clouds'] with an extra dimension
# temp_input_unsqueezed = temp_input['point_clouds'].clone().detach().unsqueeze_(0)
# temp_input_unsqueezed = temp_input['point_clouds'].unsqueeze_(0)

TypeError: list indices must be integers or slices, not str

In [5]:
instance_bboxes

array([[ 1.98960567,  0.49231637,  0.89994586,  0.31689119,  0.87098539,
         1.00589061,  9.        ],
       [ 1.97557068, -1.19884264,  0.8656804 ,  0.26435578,  0.67966241,
         1.12072575,  9.        ],
       [ 2.03211308, -2.61495972,  0.35489422,  0.22199845,  0.49363828,
         0.15295288,  9.        ]])

In [7]:
# len(temp_input)
pseudo_labels

[[(4,
   array([[ 1.5885948 , -0.04509287, -0.30919978],
          [ 1.5885948 , -0.04509287, -0.73816586],
          [ 0.9208852 , -0.04509287, -0.73816586],
          [ 0.9208852 , -0.04509287, -0.30919978],
          [ 1.5885948 , -0.49846047, -0.30919978],
          [ 1.5885948 , -0.49846047, -0.73816586],
          [ 0.9208852 , -0.49846047, -0.73816586],
          [ 0.9208852 , -0.49846047, -0.30919978]], dtype=float32),
   0.9625753),
  (8,
   array([[ 2.0904934 , -0.32871246,  3.2535353 ],
          [ 2.0904934 , -0.32871246,  2.0823412 ],
          [ 1.8958637 , -0.32871246,  2.0823412 ],
          [ 1.8958637 , -0.32871246,  3.2535353 ],
          [ 2.0904934 , -1.7971337 ,  3.2535353 ],
          [ 2.0904934 , -1.7971337 ,  2.0823412 ],
          [ 1.8958637 , -1.7971337 ,  2.0823412 ],
          [ 1.8958637 , -1.7971337 ,  3.2535353 ]], dtype=float32),
   0.93744236),
  (9,
   array([[ 0.36006266,  0.01849274, -0.23532286],
          [ 0.36006266,  0.01849274, -0.54561067],

In [11]:
for i in temp_input[0]:
    print(i[1].shape)

(8, 3)
(8, 3)
(8, 3)
(8, 3)


In [None]:
datasets['train']

<datasets.scannet_SDCoT.ScannetDetectionDataset_SDCoT at 0x7f28b076dd90>

In [None]:
temp_input

{'outputs': {'sem_cls_logits': tensor([[[-10.1757,  -7.6815,  -7.8094,  ...,  -7.8561, -10.9767,   4.9599],
           [-11.6661,  -8.9641,  -9.2035,  ...,  -9.2340, -11.4091,   5.0689],
           [-13.5923, -10.4847,  -7.0154,  ...,  -7.9290, -11.9607,   4.1387],
           ...,
           [-13.8350,  -9.4825,  -9.3383,  ...,  -4.7228, -15.0598,   3.6305],
           [-11.8198,  -9.4931,  -9.0573,  ...,  -8.6720, -12.3282,   5.1943],
           [-14.8208, -11.1866,  -7.8178,  ...,  -9.7698, -14.6232,   4.2465]]],
         device='cuda:0'),
  'center_normalized': tensor([[[ 0.3169,  0.6594,  0.2300],
           [ 0.2840,  0.0158, -0.0635],
           [ 0.9216,  0.3351,  0.6078],
           [ 0.9478,  0.9884,  0.8340],
           [ 0.0445,  0.3316,  0.6770],
           [ 0.0929,  0.9903,  0.9629],
           [ 0.9383,  0.6460,  0.5755],
           [ 0.5184,  0.3171,  0.2891],
           [ 0.5305,  0.9379,  0.3095],
           [ 0.0158,  0.6096,  0.8556],
           [ 0.9243,  0.2026,  

In [5]:
temp['outputs'].keys() 
# dict_keys(['sem_cls_logits', 'center_normalized', 'center_unnormalized', 'size_normalized', 'size_unnormalized', 'angle_logits', 'angle_residual', 'angle_residual_normalized', 'angle_continuous', 'objectness_prob', 'sem_cls_prob', 'box_corners'])

# aux_outputs
# temp['aux_outputs'][2]

NameError: name 'temp' is not defined

In [None]:
temp['outputs']

{'sem_cls_logits': tensor([[[ -9.8974,  -7.3756,  -8.4300,  ...,  -9.1686, -12.6970,   4.9194],
          [-11.3731,  -9.3842,  -8.2803,  ..., -10.8258, -10.3145,   5.1706],
          [-13.8235, -11.1196,  -6.1872,  ...,  -8.0389, -11.4558,   4.4387],
          ...,
          [-13.0471, -10.1218,  -9.6415,  ...,  -5.7342, -14.9513,   3.8883],
          [-11.1809,  -9.9146,  -9.5117,  ...,  -9.3230, -13.3664,   5.1888],
          [-13.3934, -11.3304,  -7.3510,  ..., -10.4634, -12.6790,   4.3721]]],
        device='cuda:0'),
 'center_normalized': tensor([[[ 7.5555e-01,  3.2098e-01,  2.0089e-01],
          [ 6.4203e-01,  9.8475e-01, -8.8191e-02],
          [ 7.7279e-02,  6.4264e-01,  6.1996e-01],
          [ 1.4358e-01,  4.0981e-02,  8.0997e-01],
          [ 9.0456e-01,  6.6881e-01,  6.9110e-01],
          [ 9.2768e-01,  4.1624e-02,  9.4109e-01],
          [ 9.2935e-02,  3.3163e-01,  5.3604e-01],
          [ 4.3970e-01,  6.8482e-01,  2.9256e-01],
          [ 5.3175e-01,  6.6805e-02,  2.80

In [None]:
# ap_calculator.step_SDCoT(
#     predicted_box_corners=temp['outputs']['box_corners'],
#     sem_cls_probs = temp['outputs']['sem_cls_prob'],
#     objectness_probs = temp['outputs']['objectness_prob'],
#     point_cloud = temp_input['point_clouds'],
# )




In [None]:
len(parsed_predictions[0][0]) # 1x85

3

In [None]:
for i in range (len(parsed_predictions[0])):
    prob = parsed_predictions[0][i][-1]
    print(parsed_predictions[0][i][0], parsed_predictions[0][i][-1])

# In SDCoT, the thresholds are set to 0.95 (objectness score) and 0.9 (classification prob)
# 4 0.90740836
# 9 0.9623784
# 15 0.9973775

4 0.90740836
9 0.9623784
15 0.9973775


In [None]:
temp['outputs']['objectness_prob'].shape

torch.Size([1, 256])

In [None]:
x = datasets['train'][0]

In [None]:
x["gt_box_corners"].shape # prepend and remove last few

(64, 8, 3)

In [None]:
parsed_predictions[0][0][1].shape

(8, 3)