In [2]:
from pathlib import Path
import sys

THIS_DIR = Path.cwd().resolve()
PROJECT_ROOT = THIS_DIR.parents[1]  # -> ptta/
print(PROJECT_ROOT)

sys.path.insert(0, str(PROJECT_ROOT))

/workspace/ptta


In [3]:
import sys
from pathlib import Path

# 현재 폴더: ptta/other_method/DUA/
# ptta 바로 위의 디렉토리를 sys.path에 추가
PROJECT_PARENT = Path.cwd().parents[1]  # -> ptta/ 의 부모 디렉토리
sys.path.insert(0, str(PROJECT_PARENT))

from os import path

import torch
from torch import nn, optim
from torch.utils.data import DataLoader

from ttadapters.datasets import BaseDataset, DatasetHolder, DataLoaderHolder
from ttadapters.datasets import SHIFTClearDatasetForObjectDetection, SHIFTCorruptedDatasetForObjectDetection, SHIFTDataSubsetForObjectDetection
from transformers import Trainer, TrainingArguments, EarlyStoppingCallback
from accelerate import Accelerator, notebook_launcher

from supervision.metrics.mean_average_precision import MeanAveragePrecision
from supervision.detection.core import Detections

# import wandb
from tqdm.auto import tqdm
import matplotlib.pyplot as plt

## Check GPU Availability

In [4]:
!nvidia-smi

Sat Aug 23 11:14:38 2025       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.183.01             Driver Version: 535.183.01   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla P100-PCIE-16GB           Off | 00000000:04:00.0 Off |                    0 |
| N/A   46C    P0              41W / 250W |   6382MiB / 16384MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
|   1  Tesla P100-PCIE-16GB           Off | 00000000:06:00.0 Off |  

In [5]:
# Set CUDA Device Number 0~7
DEVICE_NUM = 1
ADDITIONAL_GPU = 0
DATA_TYPE = torch.bfloat16

if torch.cuda.is_available():
    if ADDITIONAL_GPU:
        torch.cuda.set_device(DEVICE_NUM)
        device = torch.device("cuda")
    else:
        device = torch.device(f"cuda:{DEVICE_NUM}")
else:
    device = torch.device("cpu")
    DEVICE_NUM = -1

print(f"INFO: Using device - {device}" + (f":{DEVICE_NUM}" if ADDITIONAL_GPU else ""))

INFO: Using device - cuda:1


In [6]:
PROJECT_NAME = "DUA test"
RUN_NAME = "RT-DETR_R50_DUA"

## Dataset

In [7]:
DATA_ROOT = path.normpath(path.join(Path.cwd(), "..", "..", "data"))
print(DATA_ROOT)
dataset = DatasetHolder(
    train=SHIFTClearDatasetForObjectDetection(root=DATA_ROOT, train=True),
    valid=SHIFTClearDatasetForObjectDetection(root=DATA_ROOT, valid=True),
    test=SHIFTCorruptedDatasetForObjectDetection(root=DATA_ROOT, valid=True)
)
DATA_ROOT

[08/23/2025 11:14:38] SHIFT DevKit - INFO - Base: /workspace/ptta/data/SHIFT/discrete/images/train. Backend: <shift_dev.utils.backend.ZipBackend object at 0x7f89491c6b10>
[08/23/2025 11:14:38] SHIFT DevKit - INFO - Loading annotation from '/workspace/ptta/data/SHIFT_SUBSET/normal/discrete/images/train/front/det_2d.json' ...


/workspace/ptta/data
INFO: Downloading 'SHIFT_SUBSET' from file server to /workspace/ptta/data/SHIFT/discrete...
INFO: Dataset archive found in the root directory. Skipping download.
INFO: Subset split for 'SHIFT_SUBSET' dataset is already done. Skipping...
INFO: Downloading 'SHIFT_SUBSET' from file server to /workspace/ptta/data/SHIFT/discrete...
INFO: Dataset archive found in the root directory. Skipping download.


[08/23/2025 11:14:41] SHIFT DevKit - INFO - Loading annotation from '/workspace/ptta/data/SHIFT_SUBSET/normal/discrete/images/train/front/det_2d.json' Done.
[08/23/2025 11:14:51] SHIFT DevKit - INFO - Loading annotation takes 12.44 seconds.


Batch 0:

Item                 Shape                               Min        Max       
--------------------------------------------------------------------------------
original_hw          [tensor([800]), tensor([1280])]
input_hw             [tensor([800]), tensor([1280])]
frame_ids            torch.Size([1])                           0.00       0.00
name                 ['00000000_img_front.jpg']
videoName            ['0016-1b62']
intrinsics           torch.Size([1, 3, 3])                     0.00     640.00
extrinsics           torch.Size([1, 4, 4])                    -7.53     219.91
boxes2d              torch.Size([1, 26, 4])                    5.00     974.00
boxes2d_classes      torch.Size([1, 26])                       0.00       3.00
boxes2d_track_ids    torch.Size([1, 26])                       0.00      25.00
images               torch.Size([1, 1, 3, 800, 1280])          0.00     255.00



[08/23/2025 11:14:56] SHIFT DevKit - INFO - Base: /workspace/ptta/data/SHIFT/discrete/images/val. Backend: <shift_dev.utils.backend.ZipBackend object at 0x7f89491c6b10>
[08/23/2025 11:14:56] SHIFT DevKit - INFO - Loading annotation from '/workspace/ptta/data/SHIFT_SUBSET/normal/discrete/images/val/front/det_2d.json' ...
[08/23/2025 11:14:56] SHIFT DevKit - INFO - Loading annotation from '/workspace/ptta/data/SHIFT_SUBSET/normal/discrete/images/val/front/det_2d.json' Done.


Video name: 0016-1b62
Sample indices within a video: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
INFO: Downloading 'SHIFT_SUBSET' from file server to /workspace/ptta/data/SHIFT/discrete...
INFO: Dataset archive found in the root directory. Skipping download.
INFO: Subset split for 'SHIFT_SUBSET' dataset is already done. Skipping...
INFO: Downloading 'SHIFT_SUBSET' from file server to /workspace/ptta/data/SHIFT/discrete...
INFO: Dataset archive found in the root directory. Skipping download.


[08/23/2025 11:14:58] SHIFT DevKit - INFO - Loading annotation takes 1.52 seconds.


Batch 0:

Item                 Shape                               Min        Max       
--------------------------------------------------------------------------------
original_hw          [tensor([800]), tensor([1280])]
input_hw             [tensor([800]), tensor([1280])]
frame_ids            torch.Size([1])                           0.00       0.00
name                 ['00000000_img_front.jpg']
videoName            ['0116-4859']
intrinsics           torch.Size([1, 3, 3])                     0.00     640.00
extrinsics           torch.Size([1, 4, 4])                    -0.90     138.34
boxes2d              torch.Size([1, 6, 4])                   246.00     859.00
boxes2d_classes      torch.Size([1, 6])                        1.00       5.00
boxes2d_track_ids    torch.Size([1, 6])                        0.00       5.00
images               torch.Size([1, 1, 3, 800, 1280])          0.00     255.00



[08/23/2025 11:14:58] SHIFT DevKit - INFO - Base: /workspace/ptta/data/SHIFT/discrete/images/val. Backend: <shift_dev.utils.backend.ZipBackend object at 0x7f89491c6b10>
[08/23/2025 11:14:58] SHIFT DevKit - INFO - Loading annotation from '/workspace/ptta/data/SHIFT_SUBSET/corrupted/discrete/images/val/front/det_2d.json' ...


Video name: 0116-4859
Sample indices within a video: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
INFO: Downloading 'SHIFT_SUBSET' from file server to /workspace/ptta/data/SHIFT/discrete...
INFO: Dataset archive found in the root directory. Skipping download.
INFO: Subset split for 'SHIFT_SUBSET' dataset is already done. Skipping...
INFO: Downloading 'SHIFT_SUBSET' from file server to /workspace/ptta/data/SHIFT/discrete...
INFO: Dataset archive found in the root directory. Skipping download.


[08/23/2025 11:15:00] SHIFT DevKit - INFO - Loading annotation from '/workspace/ptta/data/SHIFT_SUBSET/corrupted/discrete/images/val/front/det_2d.json' Done.
[08/23/2025 11:15:10] SHIFT DevKit - INFO - Loading annotation takes 12.27 seconds.


Batch 0:

Item                 Shape                               Min        Max       
--------------------------------------------------------------------------------
original_hw          [tensor([800]), tensor([1280])]
input_hw             [tensor([800]), tensor([1280])]
frame_ids            torch.Size([1])                           0.00       0.00
name                 ['00000000_img_front.jpg']
videoName            ['007b-4e72']
intrinsics           torch.Size([1, 3, 3])                     0.00     640.00
extrinsics           torch.Size([1, 4, 4])                  -311.22     226.46
boxes2d              torch.Size([1, 3, 4])                   233.00     802.00
boxes2d_classes      torch.Size([1, 3])                        0.00       1.00
boxes2d_track_ids    torch.Size([1, 3])                        0.00       2.00
images               torch.Size([1, 1, 3, 800, 1280])          0.00     255.00

Video name: 007b-4e72
Sample indices within a video: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,

'/workspace/ptta/data'

In [8]:
from typing import Iterable, List

def task_to_subset_types(task: str):
    T = SHIFTDataSubsetForObjectDetection.SubsetType

    # weather
    if task == "cloudy":
        return T.CLOUDY_DAYTIME
    if task == "overcast":
        return T.OVERCAST_DAYTIME
    if task == "rainy":
        return T.RAINY_DAYTIME
    if task == "foggy":
        return T.FOGGY_DAYTIME

    # time
    if task == "night":
        return T.CLEAR_NIGHT
    if task in {"dawn", "dawn/dusk"}:
        return T.CLEAR_DAWN
    if task == "clear":
        return T.CLEAR_DAYTIME
    
    # simple
    if task == "normal":
        return T.NORMAL
    if task == "corrupted":
        return T.CORRUPTED

    raise ValueError(f"Unknown task: {task}")

In [9]:
from typing import Optional, Callable

class SHIFTCorruptedTaskDatasetForObjectDetection(SHIFTDataSubsetForObjectDetection):
    def __init__(
            self, root: str, force_download: bool = False,
            train: bool = True, valid: bool = False,
            transform: Optional[Callable] = None, task: str = "clear", target_transform: Optional[Callable] = None
    ):
        super().__init__(
            root=root, force_download=force_download,
            train=train, valid=valid, subset_type=task_to_subset_types(task),
            transform=transform, tar_transform=target_transform
        )

In [10]:
# Set Batch Size
BATCH_SIZE = 2, 8, 8, 8  # 4070 Ti
BATCH_SIZE = 32, 64, 64, 32  # A6000

# Dataset Configs
CLASSES = dataset.train.classes
NUM_CLASSES = len(CLASSES)

print(f"INFO: Set batch size - Train: {BATCH_SIZE[0]}, Valid: {BATCH_SIZE[1]}, Test: {BATCH_SIZE[2]}")
print(f"INFO: Number of classes - {NUM_CLASSES} {CLASSES}")

INFO: Set batch size - Train: 32, Valid: 64, Test: 64
INFO: Number of classes - 6 ['pedestrian', 'car', 'truck', 'bus', 'motorcycle', 'bicycle']


In [11]:
class DatasetAdapterForTransformers(BaseDataset):
    def __init__(self, original_dataset, camera='front'):
        self.dataset = original_dataset
        self.camera = camera

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        item = self.dataset[idx][self.camera]
        image = item['images'].squeeze(0)

        # Convert to COCO_Detection Format
        annotations = []
        target = dict(image_id=idx, annotations=annotations)
        for box, cls in zip(item['boxes2d'], item['boxes2d_classes']):
            x1, y1, x2, y2 = box.tolist()  # from Pascal VOC format (x1, y1, x2, y2)
            width, height = x2 - x1, y2 - y1
            annotations.append(dict(
                bbox=[x1, y1, width, height],  # to COCO format: [x, y, width, height]
                category_id=cls.item(),
                area=width * height,
                iscrowd=0
            ))

        # Following prepare_coco_detection_annotation's expected format
        # RT-DETR ImageProcessor converts the COCO bbox to center format (cx, cy, w, h) during preprocessing
        # But, eventually re-converts the bbox to Pascal VOC (x1, y1, x2, y2) format after post-processing
        return dict(image=image, target=target)

In [12]:
def collate_fn(batch, preprocessor=None):
    images = [item['image'] for item in batch]
    if preprocessor is not None:
        target = [item['target'] for item in batch]
        return preprocessor(images=images, annotations=target, return_tensors="pt")
    else:
        # If no preprocessor is provided, just assume images are already in tensor format
        return dict(
            pixel_values=dict(pixel_values=torch.stack(images)),
            labels=[dict(
                class_labels=item['boxes2d_classes'].long(),
                boxes=item["boxes2d"].float()
            ) for item in batch]
        )

## Define Model

In [13]:
from transformers import RTDetrForObjectDetection, RTDetrImageProcessorFast, RTDetrConfig
from transformers.image_utils import AnnotationFormat
from safetensors.torch import load_file

In [14]:
IMG_SIZE = 800

In [15]:
reference_model_id = "PekingU/rtdetr_r50vd"

# Load the reference model configuration
reference_config = RTDetrConfig.from_pretrained(reference_model_id, torch_dtype=torch.float32, return_dict=True)
reference_config.num_labels = 6

# Set the image size and preprocessor size
reference_config.image_size = 800

# Load the reference model image processor
reference_preprocessor = RTDetrImageProcessorFast.from_pretrained(reference_model_id)
reference_preprocessor.format = AnnotationFormat.COCO_DETECTION  # COCO Format / Detection BBOX Format
reference_preprocessor.size = {"height": IMG_SIZE, "width": IMG_SIZE}
reference_preprocessor.do_resize = False

In [16]:
model_pretrained = RTDetrForObjectDetection(config=reference_config)
model_states = load_file("/workspace/ptta/RT-DETR_R50vd_SHIFT_CLEAR_42.42.safetensors", device="cpu")
model_pretrained.load_state_dict(model_states, strict=False)

for param in model_pretrained.parameters():
    param.requires_grad = False  # Freeze

# Initialize Model
model_pretrained.to(device)

RTDetrForObjectDetection(
  (model): RTDetrModel(
    (backbone): RTDetrConvEncoder(
      (model): RTDetrResNetBackbone(
        (embedder): RTDetrResNetEmbeddings(
          (embedder): Sequential(
            (0): RTDetrResNetConvLayer(
              (convolution): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
              (normalization): RTDetrFrozenBatchNorm2d()
              (activation): ReLU()
            )
            (1): RTDetrResNetConvLayer(
              (convolution): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
              (normalization): RTDetrFrozenBatchNorm2d()
              (activation): ReLU()
            )
            (2): RTDetrResNetConvLayer(
              (convolution): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
              (normalization): RTDetrFrozenBatchNorm2d()
              (activation): ReLU()
            )
          )
          (pooler): MaxPool2d(

## NORM

In [17]:
from functools import partial
dataloader_discrete = DataLoader(DatasetAdapterForTransformers(dataset.test), batch_size=4, collate_fn=partial(collate_fn, preprocessor=reference_preprocessor))

In [18]:
class LabelDataset(BaseDataset):
    def __init__(self, original_dataset, camera='front'):
        self.dataset = original_dataset
        self.camera = camera

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        item = self.dataset[idx][self.camera]
        return item['boxes2d'], item['boxes2d_classes']

In [19]:
def naive_collate_fn(batch):
    return batch

In [20]:
from functools import partial

def test(model, task, batch_size):
    targets = []
    predictions = []
    dataset = SHIFTCorruptedTaskDatasetForObjectDetection(root=DATA_ROOT, valid=True, task=task)
    
    raw_data = DataLoader(LabelDataset(dataset), batch_size=batch_size, collate_fn=naive_collate_fn)
    dataloader_discrete = DataLoader(DatasetAdapterForTransformers(dataset), batch_size=batch_size, collate_fn=partial(collate_fn, preprocessor=reference_preprocessor))
    for idx, lables, inputs in zip(tqdm(range(len(raw_data))), raw_data, dataloader_discrete):
        sizes = [label['orig_size'].cpu().tolist() for label in inputs['labels']]

        with torch.no_grad():
            outputs = model(pixel_values=inputs['pixel_values'].to(device))

        results = reference_preprocessor.post_process_object_detection(
            outputs, target_sizes=sizes, threshold=0.0
        )

        detections = [Detections.from_transformers(results[i]) for i in range(batch_size)]
        annotations = [Detections(
            xyxy=lables[i][0].cpu().numpy(),
            class_id=lables[i][1].cpu().numpy(),
        ) for i in range(batch_size)]

        targets.extend(annotations)
        predictions.extend(detections)
    
    mean_average_precision = MeanAveragePrecision().update(
    predictions=predictions,
    targets=targets,
    ).compute()
    per_class_map = {
        f"{CLASSES[idx]}_mAP@0.95": mean_average_precision.ap_per_class[idx].mean()
        for idx in mean_average_precision.matched_classes
    }

    print(f"mAP@0.95_{task}: {mean_average_precision.map50_95:.2f}")
    print(f"mAP50_{task}: {mean_average_precision.map50:.2f}")
    print(f"mAP75_{task}: {mean_average_precision.map75:.2f}")
    for key, value in per_class_map.items():
        print(f"{key}_{task}: {value:.2f}")
    
    return {"mAP@0.95" : mean_average_precision.map50_95,
            "mAP50" : mean_average_precision.map50,
            "mAP75" : mean_average_precision.map75,
            "per_class_mAP@0.95" : per_class_map
            }

In [21]:
from collections import defaultdict

def agg_per_class(dicts):
    """dicts: per_class_map(dict)의 리스트. 예: [{"car_mAP@0.95":0.41, ...}, {...}]"""
    sums = defaultdict(float)
    counts = defaultdict(int)
    for d in dicts:
        for cls, val in d.items():
            sums[cls]  += float(val)
            counts[cls] += 1
    means = {cls: (sums[cls] / counts[cls]) for cls in sums}
    return means


def aggregate_runs(results_list):
    overall_sum = {"mAP@0.95": 0.0, "mAP50": 0.0, "mAP75": 0.0}
    n = len(results_list)

    per_class_maps = []

    for r in results_list:
        overall_sum["mAP@0.95"] += float(r["mAP@0.95"])
        overall_sum["mAP50"]    += float(r["mAP50"])

        overall_sum["mAP75"] += float(r["mAP75"])

        class_mAP = r["per_class_mAP@0.95"]
        per_class_means = agg_per_class(class_mAP)

    overall_mean = {k: (overall_sum[k] / n if n > 0 else 0.0) for k in overall_sum}

    return {
        "overall_sum": overall_sum,            # {"mAP@0.95": ..., "mAP50": ..., "map75": ...}
        "overall_mean": overall_mean,          # 위의 평균          # {"car_mAP@0.95": 합, ...}
        "per_class_mean@0.95": per_class_means,        # {"car_mAP@0.95": 평균, ...}
    }

def print_results(result):
    print(f"mAP@0.95: {float(result['mAP@0.95']):.2f}")
    print(f"mAP50: {float(result['mAP50']):.2f}")
    print(f"mAP75: {float(result['mAP75']):.2f}")

    for k, v in result["per_class_mean@0.95"].item():
        print(f"{k}: {v:.2f}")

In [22]:
class EMABatchNorm(nn.Module):
    @staticmethod
    def reset_stats(module):
        module.reset_running_stats()
        module.momentum = None
        return module

    @staticmethod
    def find_bns(parent):
        replace_mods = []
        if parent is None:
            return []
        for name, child in parent.named_children():
            child.requires_grad_(False)
            if isinstance(child, nn.BatchNorm2d):
                module = EMABatchNorm.reset_stats(child)
                module = EMABatchNorm(module)
                replace_mods.append((parent, name, module))
            else:
                replace_mods.extend(EMABatchNorm.find_bns(child))

        return replace_mods

    @staticmethod
    def adapt_model(model):
        replace_mods = EMABatchNorm.find_bns(model)
        print(f"| Found {len(replace_mods)} modules to be replaced.")
        for parent, name, child in replace_mods:
            setattr(parent, name, child)
        return model

    def __init__(self, layer):
        super().__init__()
        self.layer = layer

    def forward(self, x):
        # store statistics, but discard result
        self.layer.train()
        self.layer(x)
        # store statistics, use the stored stats
        self.layer.eval()
        return self.layer(x)

In [None]:
def set_bn_momentum(model, momentum: float | None):
    # momentum=None 이면 CMA(누적 평균), 수치(0<α≤1)이면 EMA
    for m in model.modules():
        if isinstance(m, nn.BatchNorm2d):
            m.momentum = momentum

In [None]:
from transformers.models.rt_detr.modeling_rt_detr import RTDetrFrozenBatchNorm2d
from torchvision import transforms

def NORM(model, MOMENTOM):
    """
    model is a pre-trained model.
    """
    
    all_results = []
    
    device = next(model.parameters()).device

    for task in ["cloudy", "overcast", "foggy", "rain", "dawn", "night", "clear"]:
        results = []

        dataloader_discrete = DataLoader(DatasetAdapterForTransformers(SHIFTCorruptedTaskDatasetForObjectDetection(root=DATA_ROOT, train=True, valid=False, task=task)), batch_size=1, collate_fn=partial(collate_fn, preprocessor=reference_preprocessor))

        for batch_i, input in enumerate(tqdm(dataloader_discrete)):
            model.eval()
            for module in model.modules():
                if isinstance(module, torch.nn.BatchNorm2d):
                    module.momentum = MOMENTOM
                    module.train()
            img = input['pixel_values'].to(device, non_blocking=True)

            _ = model(img)
            model.eval()

        current_result = test(model, task, batch_size=8)
        all_results.append(current_result)
    
    each_task_mAP_list = aggregate_runs(all_results)

    print_results(each_task_mAP_list)    

In [None]:
# 
NORM(model_pretrained, MOMENTOM=0.2)

[08/23/2025 06:47:00] SHIFT DevKit - INFO - Base: /workspace/ptta/data/SHIFT/discrete/images/train. Backend: <shift_dev.utils.backend.ZipBackend object at 0x7f6a701706d0>
[08/23/2025 06:47:00] SHIFT DevKit - INFO - Loading annotation from '/workspace/ptta/data/SHIFT_SUBSET/cloudy_daytime/discrete/images/train/front/det_2d.json' ...


INFO: Downloading 'SHIFT_SUBSET' from file server to /workspace/ptta/data/SHIFT/discrete...
INFO: Dataset archive found in the root directory. Skipping download.
INFO: Subset split for 'SHIFT_SUBSET' dataset is already done. Skipping...
INFO: Downloading 'SHIFT_SUBSET' from file server to /workspace/ptta/data/SHIFT/discrete...
INFO: Dataset archive found in the root directory. Skipping download.


[08/23/2025 06:47:02] SHIFT DevKit - INFO - Loading annotation from '/workspace/ptta/data/SHIFT_SUBSET/cloudy_daytime/discrete/images/train/front/det_2d.json' Done.
[08/23/2025 06:47:07] SHIFT DevKit - INFO - Loading annotation takes 6.95 seconds.


Batch 0:

Item                 Shape                               Min        Max       
--------------------------------------------------------------------------------
original_hw          [tensor([800]), tensor([1280])]
input_hw             [tensor([800]), tensor([1280])]
frame_ids            torch.Size([1])                           0.00       0.00
name                 ['00000000_img_front.jpg']
videoName            ['01b2-4042']
intrinsics           torch.Size([1, 3, 3])                     0.00     640.00
extrinsics           torch.Size([1, 4, 4])                    -0.80      46.71
boxes2d              torch.Size([1, 2, 4])                     0.00     476.00
boxes2d_classes      torch.Size([1, 2])                        1.00       2.00
boxes2d_track_ids    torch.Size([1, 2])                        0.00       1.00
images               torch.Size([1, 1, 3, 800, 1280])          0.00     255.00

Video name: 01b2-4042
Sample indices within a video: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,

  0%|          | 0/13850 [00:00<?, ?it/s]

[08/23/2025 06:47:13] SHIFT DevKit - INFO - Base: /workspace/ptta/data/SHIFT/discrete/images/val. Backend: <shift_dev.utils.backend.ZipBackend object at 0x7f6a701706d0>
[08/23/2025 06:47:13] SHIFT DevKit - INFO - Loading annotation from '/workspace/ptta/data/SHIFT_SUBSET/cloudy_daytime/discrete/images/val/front/det_2d.json' ...


INFO: Downloading 'SHIFT_SUBSET' from file server to /workspace/ptta/data/SHIFT/discrete...
INFO: Dataset archive found in the root directory. Skipping download.
INFO: Subset split for 'SHIFT_SUBSET' dataset is already done. Skipping...
INFO: Downloading 'SHIFT_SUBSET' from file server to /workspace/ptta/data/SHIFT/discrete...
INFO: Dataset archive found in the root directory. Skipping download.


[08/23/2025 06:47:14] SHIFT DevKit - INFO - Loading annotation from '/workspace/ptta/data/SHIFT_SUBSET/cloudy_daytime/discrete/images/val/front/det_2d.json' Done.
[08/23/2025 06:47:20] SHIFT DevKit - INFO - Loading annotation takes 7.23 seconds.


ValueError: Value '01b2-4042/00000000_img_front.jpg' not found in /workspace/ptta/data/SHIFT/discrete/images/val/front/img.zip!

In [None]:
from zipfile import ZipFile
zip_path = "/workspace/ptta/data/SHIFT/discrete/images/train/front/img.zip"
target = "01b2-4042/00000000_img_front.jpg"

with ZipFile(zip_path) as z:
    print(any(info.filename == target for info in z.infolist()))

True
