# DINO: DETR with Improved DeNoising Anchor Boxes for End-to-End Object Detection

https://github.com/IDEA-Research/DINO

[Papers With Code Link](https://paperswithcode.com/paper/focal-modulation-networks)

In [1]:
!pip install --quiet pylance duckdb torch torchvision transforms

## Build and install [DINO]() Model

In [2]:
!git -C DINO pull || git clone https://github.com/IDEACVR/DINO
%cd DINO

!pip install --quiet -r requirements.txt \
  && cd models/dino/ops \
  && python setup.py -q build install

Already up to date.
/home/lei/work/lance/python/notebooks/DINO
zip_safe flag not set; analyzing archive contents...
__pycache__.MultiScaleDeformableAttention.cpython-310: module references __file__


In [3]:
# See https://github.com/IDEA-Research/DINO/blob/main/inference_and_visualization.ipynb
# for instruction to load model
from util.slconfig import SLConfig
from main import build_model_main
model_config_path = "config/DINO/DINO_4scale.py"

args = SLConfig.fromfile(model_config_path) 
args.device = 'cuda' 
model, criterion, postprocessors = build_model_main(args)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /home/lei/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth


  0%|          | 0.00/97.8M [00:00<?, ?B/s]

In [4]:
# Downloads weights

# Download DINO-4scale weights
! [[ -f /tmp/model.pt ]] || gsutil cp gs://eto-public/models/dino/checkpoint0033_4scale.pth /tmp/model.pt
import torch
model_checkpoint_path = "/tmp/model.pt"
checkpoint = torch.load(model_checkpoint_path)
model.load_state_dict(checkpoint['model'])
_ = model.cuda().eval()

zsh:1: command not found: gsutil


FileNotFoundError: [Errno 2] No such file or directory: '/tmp/model.pt'

## Prepare COCO validation dataset to [Lance](https://github.com/eto-ai/lance) format.

In [None]:
! [[ -f annotations/instances_val2017.json ]] || ( \
  wget -O /tmp/annotations.zip http://images.cocodataset.org/annotations/annotations_trainval2017.zip && \
  unzip -o -qq /tmp/annotations.zip && rm annotations.zip \
)
! [[ -d val2017/ ]] || ( \
  wget -O /tmp/val2017.zip http://images.cocodataset.org/zips/val2017.zip && \
  unzip -o -qq /tmp/val2017.zip && \
  rm val2017.zip )

import pandas as pd
import json
with open("annotations/instances_val2017.json") as fobj:
  data = json.load(fobj)

print(data.keys())
images_df = (
    pd
    .DataFrame(data=data["images"])
    .rename(columns={"id": "image_id"}) 
)

print(images_df)
annos_df = (pd.DataFrame(data=data["annotations"]))
annos_df

In [None]:
from lance.pytorch import Dataset
import torchvision.transforms as T
import pandas as pd

transform = T.Compose([
    T.Resize(400),
    T.ToTensor(),
    T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

threshold = 0.5

dataset = Dataset(
    "s3://eto-public/datasets/coco/coco.lance",
    columns=["image", "split", "image_id"],
    # mode="batch",
    batch_size=8)
results = []
with torch.no_grad():
  for batch in dataset:
    image_id = batch[2].cpu().item()
    imgs = [transform(batch[0]).cuda()]
    # print(batch, batch.shape)
    output = model(imgs)
    output = postprocessors['bbox'](output, torch.Tensor([[1.0, 1.0]]).cuda())[0]
    mask = output["scores"] > threshold
    pred = {
        "image_id": image_id,
        "boxes": output["boxes"][mask].cpu().numpy(),
        "labels": output["labels"][mask].cpu().numpy(),
        "scores": output["scores"][mask].cpu().numpy(),
    }
    del output
    results.append(pred)

df = pd.DataFrame(data=results)
df