<a href="https://colab.research.google.com/github/thanhdangg/Knowledge-Distillation-for-Object-Detection/blob/main/Distillation_model_yolo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets

# Check if GPU is available, and if not, use the CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [2]:
import torch.nn.functional as F
from torchvision.datasets import VOCDetection
from torchvision import transforms
import torch
from torch.utils.data import DataLoader

In [3]:
student_model = torch.hub.load("ultralytics/yolov5", "yolov5s")

Using cache found in /root/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2024-8-31 Python-3.10.12 torch-2.4.0+cu121 CUDA:0 (Tesla T4, 15102MiB)

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 


In [4]:
!pip install dill



In [5]:
teacher_model = torch.hub.load("ultralytics/yolov5", "yolov5x6")
teacher_model.eval()

Using cache found in /root/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2024-8-31 Python-3.10.12 torch-2.4.0+cu121 CUDA:0 (Tesla T4, 15102MiB)

Fusing layers... 
YOLOv5x6 summary: 574 layers, 140730220 parameters, 0 gradients, 209.6 GFLOPs
Adding AutoShape... 


AutoShape(
  (model): DetectMultiBackend(
    (model): DetectionModel(
      (model): Sequential(
        (0): Conv(
          (conv): Conv2d(3, 80, kernel_size=(6, 6), stride=(2, 2), padding=(2, 2))
          (act): SiLU(inplace=True)
        )
        (1): Conv(
          (conv): Conv2d(80, 160, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
          (act): SiLU(inplace=True)
        )
        (2): C3(
          (cv1): Conv(
            (conv): Conv2d(160, 80, kernel_size=(1, 1), stride=(1, 1))
            (act): SiLU(inplace=True)
          )
          (cv2): Conv(
            (conv): Conv2d(160, 80, kernel_size=(1, 1), stride=(1, 1))
            (act): SiLU(inplace=True)
          )
          (cv3): Conv(
            (conv): Conv2d(160, 160, kernel_size=(1, 1), stride=(1, 1))
            (act): SiLU(inplace=True)
          )
          (m): Sequential(
            (0): Bottleneck(
              (cv1): Conv(
                (conv): Conv2d(80, 80, kernel_size=(1, 1), stride=(1, 1

In [6]:
transform = transforms.Compose([
    transforms.Resize((640, 640)),
    transforms.ToTensor(),
])

In [7]:
# Load the VOC2012 dataset
train_dataset = VOCDetection(root='./data', year='2012', image_set='train', download=True, transform=transform)
val_dataset = VOCDetection(root='./data', year='2012', image_set='val', download=True, transform=transform)

Using downloaded and verified file: ./data/VOCtrainval_11-May-2012.tar
Extracting ./data/VOCtrainval_11-May-2012.tar to ./data
Using downloaded and verified file: ./data/VOCtrainval_11-May-2012.tar
Extracting ./data/VOCtrainval_11-May-2012.tar to ./data


In [8]:
def custom_collate_fn(batch):
    # Loại bỏ các phần tử None
    batch = [b for b in batch if b is not None]

    # Tạo một danh sách chứa các ảnh và nhãn riêng biệt
    images = torch.stack([item[0] for item in batch], dim=0)
    targets = [item[1] for item in batch]

    return images, targets


In [9]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2, collate_fn=custom_collate_fn)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=2, collate_fn=custom_collate_fn)


In [10]:
import numpy as np

In [11]:
import torch
import numpy as np
import torch.optim as optim
import torch.nn as nn
from PIL import UnidentifiedImageError  # Import the exception class


def train(model, train_loader, epochs, learning_rate, device):
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    model.train()

    for epoch in range(epochs):
        running_loss = 0.0

        for batch in train_loader:
          try:
            images, targets = batch
            images = images.to(device)

            if images.size(0) == 0:
                continue  # Skip empty batch

            # YOLOv5 returns a list of outputs
            outputs = model(images)[0]  # Select the first output

            # Debugging: Print type and content of targets
            print(f"Type of targets: {type(targets)}")
            print(f"Content of targets: {targets}")

            # Extract numeric data from targets
            if isinstance(targets, dict):
                # Assuming the dictionary contains tensors or lists of tensors
                target_values = []
                for key, value in targets.items():
                    if isinstance(value, (list, np.ndarray, torch.Tensor)):
                        # Convert to numpy array if it's a tensor
                        if isinstance(value, torch.Tensor):
                            value = value.numpy()
                        target_values.append(value)
                    else:
                        print(f"Unexpected type in targets dictionary for key {key}: {type(value)}")

                if len(target_values) == 0:
                    print("No valid numeric data found in targets.")
                    continue

                # Flatten the list of arrays/tensors if necessary
                target_values = np.concatenate([np.array(v) for v in target_values], axis=0)
                targets_tensor = torch.tensor(target_values, dtype=torch.float).to(device)
            else:
                # Handle if targets is not a dictionary
                if isinstance(targets, (list, np.ndarray, torch.Tensor)):
                    # Convert to tensor directly
                    if isinstance(targets, torch.Tensor):
                        targets_tensor = targets.to(device)
                    else:
                        targets_tensor = torch.tensor(targets, dtype=torch.float).to(device)
                elif isinstance(targets, dict):
                    print(f"Unexpected dictionary type for targets: {targets}")
                    # Handle the dictionary case, e.g., extract relevant values
                    # targets_tensor = handle_dict(targets)
                else:
                    print(f"Unexpected type for targets: {type(targets)}")
                    continue


            # Compute the loss
            loss_fn = nn.MSELoss()  # Example loss function
            loss = loss_fn(outputs, targets_tensor)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
          except UnidentifiedImageError as e:
            print(f"Warning: Failed to load image: {e}")
            continue
          except Exception as e:
            print(f"Error during training: {e}")
            continue

        print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss / len(train_loader)}")


In [12]:
epochs = 5
learning_rate = 0.001

In [13]:
train(teacher_model, train_loader, epochs, learning_rate, device)

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2010_005229.jpg', 'folder': 'VOC2012', 'object': [{'name': 'dog', 'bndbox': {'xmax': '282', 'xmin': '62', 'ymax': '400', 'ymin': '83'}, 'difficult': '0', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '1'}], 'segmented': '0', 'size': {'depth': '3', 'height': '400', 'width': '293'}, 'source': {'annotation': 'PASCAL VOC2010', 'database': 'The VOC2010 Database', 'image': 'flickr'}}}, {'annotation': {'filename': '2010_002734.jpg', 'folder': 'VOC2012', 'object': [{'name': 'horse', 'bndbox': {'xmax': '222', 'xmin': '129', 'ymax': '152', 'ymin': '66'}, 'difficult': '0', 'occluded': '0', 'pose': 'Right', 'truncated': '0'}, {'name': 'horse', 'bndbox': {'xmax': '500', 'xmin': '261', 'ymax': '333', 'ymin': '1'}, 'difficult': '0', 'occluded': '0', 'pose': 'Frontal', 'truncated': '1'}], 'segmented': '0', 'size': {'depth': '3', 'height': '333', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2010', 'database

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'folder': 'VOC2012', 'filename': '2008_001986.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '300', 'height': '500', 'depth': '3'}, 'segmented': '0', 'object': [{'name': 'bicycle', 'pose': 'Left', 'truncated': '0', 'occluded': '1', 'bndbox': {'xmin': '49', 'ymin': '241', 'xmax': '286', 'ymax': '379'}, 'difficult': '0'}, {'name': 'person', 'pose': 'Left', 'truncated': '0', 'occluded': '1', 'bndbox': {'xmin': '69', 'ymin': '190', 'xmax': '236', 'ymax': '354'}, 'difficult': '0'}, {'name': 'person', 'pose': 'Frontal', 'truncated': '1', 'occluded': '1', 'bndbox': {'xmin': '192', 'ymin': '1', 'xmax': '276', 'ymax': '151'}, 'difficult': '0'}, {'name': 'person', 'pose': 'Frontal', 'truncated': '1', 'occluded': '1', 'bndbox': {'xmin': '91', 'ymin': '1', 'xmax': '183', 'ymax': '151'}, 'difficult': '0'}, {'name': 'person', 'pose': 'Frontal', 'truncated':

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'folder': 'VOC2012', 'filename': '2008_002281.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '500', 'height': '375', 'depth': '3'}, 'segmented': '0', 'object': [{'name': 'boat', 'pose': 'Unspecified', 'truncated': '0', 'occluded': '0', 'bndbox': {'xmin': '409', 'ymin': '183', 'xmax': '456', 'ymax': '217'}, 'difficult': '0'}, {'name': 'boat', 'pose': 'Unspecified', 'truncated': '0', 'occluded': '0', 'bndbox': {'xmin': '417', 'ymin': '186', 'xmax': '425', 'ymax': '195'}, 'difficult': '1'}, {'name': 'boat', 'pose': 'Unspecified', 'truncated': '0', 'occluded': '0', 'bndbox': {'xmin': '425', 'ymin': '186', 'xmax': '431', 'ymax': '194'}, 'difficult': '1'}, {'name': 'boat', 'pose': 'Unspecified', 'truncated': '0', 'occluded': '0', 'bndbox': {'xmin': '429', 'ymin': '184', 'xmax': '434', 'ymax': '192'}, 'difficult': '1'}, {'name': 'boat', 'pose': 'Unsp

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2010_001143.jpg', 'folder': 'VOC2012', 'object': [{'name': 'person', 'bndbox': {'xmax': '359', 'xmin': '337', 'ymax': '186', 'ymin': '132'}, 'difficult': '0', 'occluded': '0', 'pose': 'Rear', 'truncated': '0'}, {'name': 'person', 'bndbox': {'xmax': '376', 'xmin': '356', 'ymax': '185', 'ymin': '132'}, 'difficult': '0', 'occluded': '1', 'pose': 'Rear', 'truncated': '0'}, {'name': 'person', 'bndbox': {'xmax': '293', 'xmin': '282', 'ymax': '186', 'ymin': '126'}, 'difficult': '1', 'occluded': '1', 'pose': 'Unspecified', 'truncated': '1'}, {'name': 'train', 'bndbox': {'xmax': '304', 'xmin': '94', 'ymax': '251', 'ymin': '85'}, 'difficult': '0', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '0'}], 'segmented': '0', 'size': {'depth': '3', 'height': '279', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2010', 'database': 'The VOC2010 Database', 'image': 'flickr'}}}, {'annotation': {'filename': '2011_00

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2009_004829.jpg', 'folder': 'VOC2012', 'object': [{'name': 'bus', 'bndbox': {'xmax': '392', 'xmin': '86', 'ymax': '341', 'ymin': '37'}, 'difficult': '0', 'occluded': '0', 'pose': 'Rear', 'truncated': '0'}], 'segmented': '1', 'size': {'depth': '3', 'height': '375', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2009', 'database': 'The VOC2009 Database', 'image': 'flickr'}}}, {'annotation': {'filename': '2011_002940.jpg', 'folder': 'VOC2012', 'object': [{'name': 'bottle', 'bndbox': {'xmax': '244', 'xmin': '216', 'ymax': '299', 'ymin': '208'}, 'difficult': '0', 'occluded': '1', 'pose': 'Unspecified', 'truncated': '0'}, {'name': 'bottle', 'bndbox': {'xmax': '260', 'xmin': '243', 'ymax': '232', 'ymin': '170'}, 'difficult': '0', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '0'}, {'name': 'diningtable', 'bndbox': {'xmax': '386', 'xmin': '107', 'ymax': '375', 'ymin': '198'}, 'difficult': '0', 'occlu

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'folder': 'VOC2012', 'filename': '2008_000512.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '418', 'height': '500', 'depth': '3'}, 'segmented': '0', 'object': [{'name': 'bird', 'pose': 'Left', 'truncated': '0', 'occluded': '0', 'bndbox': {'xmin': '53', 'ymin': '50', 'xmax': '352', 'ymax': '498'}, 'difficult': '0'}]}}, {'annotation': {'folder': 'VOC2012', 'filename': '2008_000729.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '320', 'height': '240', 'depth': '3'}, 'segmented': '0', 'object': [{'name': 'person', 'pose': 'Frontal', 'truncated': '0', 'occluded': '1', 'bndbox': {'xmin': '4', 'ymin': '4', 'xmax': '295', 'ymax': '200'}, 'difficult': '0'}, {'name': 'person', 'pose': 'Frontal', 'truncated': '1', 'occluded': '1', 'bndbox': {'xmin': '1', 'ymin': '1', 'xmax': '32

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2011_000882.jpg', 'folder': 'VOC2012', 'object': [{'name': 'aeroplane', 'bndbox': {'xmax': '334', 'xmin': '2', 'ymax': '500', 'ymin': '162'}, 'difficult': '0', 'occluded': '0', 'pose': 'Rear', 'truncated': '1'}, {'name': 'person', 'bndbox': {'xmax': '237', 'xmin': '136', 'ymax': '179', 'ymin': '80'}, 'difficult': '0', 'occluded': '1', 'pose': 'Rear', 'truncated': '1'}], 'segmented': '1', 'size': {'depth': '3', 'height': '500', 'width': '334'}, 'source': {'annotation': 'PASCAL VOC2011', 'database': 'The VOC2011 Database', 'image': 'flickr'}}}, {'annotation': {'filename': '2010_002955.jpg', 'folder': 'VOC2012', 'object': [{'name': 'cat', 'bndbox': {'xmax': '328', 'xmin': '133', 'ymax': '174', 'ymin': '7'}, 'difficult': '0', 'occluded': '1', 'pose': 'Unspecified', 'truncated': '0'}, {'name': 'bird', 'bndbox': {'xmax': '341', 'xmin': '244', 'ymax': '302', 'ymin': '176'}, 'difficult': '0', 'occluded': '0', 'po

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2010_001562.jpg', 'folder': 'VOC2012', 'object': [{'name': 'tvmonitor', 'bndbox': {'xmax': '500', 'xmin': '413', 'ymax': '252', 'ymin': '120'}, 'difficult': '0', 'occluded': '0', 'pose': 'Frontal', 'truncated': '1'}], 'segmented': '1', 'size': {'depth': '3', 'height': '334', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2010', 'database': 'The VOC2010 Database', 'image': 'flickr'}}}, {'annotation': {'folder': 'VOC2012', 'filename': '2008_008411.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '500', 'height': '375', 'depth': '3'}, 'segmented': '0', 'object': [{'name': 'aeroplane', 'pose': 'Frontal', 'truncated': '1', 'occluded': '0', 'bndbox': {'xmin': '58', 'ymin': '75', 'xmax': '500', 'ymax': '304'}, 'difficult': '0'}]}}, {'annotation': {'filename': '2010_003264.jpg', 'folder': 'VOC2012', 'object': [{'name': 'bicycle', 'bnd

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'folder': 'VOC2012', 'filename': '2008_006625.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '500', 'height': '375', 'depth': '3'}, 'segmented': '0', 'object': [{'name': 'diningtable', 'pose': 'Unspecified', 'truncated': '1', 'occluded': '1', 'bndbox': {'xmin': '1', 'ymin': '200', 'xmax': '436', 'ymax': '375'}, 'difficult': '1'}, {'name': 'person', 'pose': 'Unspecified', 'truncated': '1', 'occluded': '1', 'bndbox': {'xmin': '62', 'ymin': '1', 'xmax': '498', 'ymax': '311'}, 'difficult': '0'}, {'name': 'chair', 'pose': 'Frontal', 'truncated': '1', 'occluded': '1', 'bndbox': {'xmin': '29', 'ymin': '1', 'xmax': '126', 'ymax': '188'}, 'difficult': '1'}]}}, {'annotation': {'filename': '2009_002271.jpg', 'folder': 'VOC2012', 'object': [{'name': 'bicycle', 'bndbox': {'xmax': '300', 'xmin': '93', 'ymax': '310', 'ymin': '174'}, 'difficult': '0', 'occlud

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2011_002273.jpg', 'folder': 'VOC2012', 'object': [{'name': 'person', 'bndbox': {'xmax': '309', 'xmin': '68', 'ymax': '500', 'ymin': '98'}, 'difficult': '0', 'occluded': '1', 'pose': 'Unspecified', 'truncated': '1'}], 'segmented': '0', 'size': {'depth': '3', 'height': '500', 'width': '333'}, 'source': {'annotation': 'PASCAL VOC2011', 'database': 'The VOC2011 Database', 'image': 'flickr'}}}, {'annotation': {'folder': 'VOC2012', 'filename': '2008_001444.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '500', 'height': '375', 'depth': '3'}, 'segmented': '0', 'object': [{'name': 'bottle', 'pose': 'Unspecified', 'truncated': '0', 'occluded': '1', 'bndbox': {'xmin': '291', 'ymin': '198', 'xmax': '342', 'ymax': '369'}, 'difficult': '0'}, {'name': 'person', 'pose': 'Unspecified', 'truncated': '1', 'occluded': '1', 'bndbox': {'xmin': '9', 'ym

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2011_000359.jpg', 'folder': 'VOC2012', 'object': [{'name': 'aeroplane', 'bndbox': {'xmax': '284', 'xmin': '1', 'ymax': '355', 'ymin': '249'}, 'difficult': '0', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '1'}], 'segmented': '1', 'size': {'depth': '3', 'height': '375', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2011', 'database': 'The VOC2011 Database', 'image': 'flickr'}}}, {'annotation': {'filename': '2010_003677.jpg', 'folder': 'VOC2012', 'object': [{'name': 'chair', 'bndbox': {'xmax': '375', 'xmin': '1', 'ymax': '500', 'ymin': '1'}, 'difficult': '0', 'occluded': '1', 'pose': 'Frontal', 'truncated': '1'}, {'name': 'person', 'bndbox': {'xmax': '271', 'xmin': '38', 'ymax': '500', 'ymin': '94'}, 'difficult': '0', 'occluded': '0', 'pose': 'Frontal', 'truncated': '1'}], 'segmented': '0', 'size': {'depth': '3', 'height': '500', 'width': '375'}, 'source': {'annotation': 'PASCAL VOC2010', 'da

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'folder': 'VOC2012', 'filename': '2008_002482.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '500', 'height': '375', 'depth': '3'}, 'segmented': '0', 'object': [{'name': 'boat', 'pose': 'Rear', 'truncated': '0', 'occluded': '0', 'bndbox': {'xmin': '110', 'ymin': '263', 'xmax': '162', 'ymax': '298'}, 'difficult': '0'}, {'name': 'person', 'pose': 'Rear', 'truncated': '1', 'occluded': '0', 'bndbox': {'xmin': '109', 'ymin': '234', 'xmax': '143', 'ymax': '278'}, 'difficult': '0'}, {'name': 'person', 'pose': 'Rear', 'truncated': '1', 'occluded': '1', 'bndbox': {'xmin': '134', 'ymin': '240', 'xmax': '156', 'ymax': '276'}, 'difficult': '1'}]}}, {'annotation': {'folder': 'VOC2012', 'filename': '2008_001267.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '500', 'height': '375', '

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2009_003000.jpg', 'folder': 'VOC2012', 'object': [{'name': 'person', 'bndbox': {'xmax': '500', 'xmin': '340', 'ymax': '252', 'ymin': '104'}, 'difficult': '0', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '1'}, {'name': 'person', 'bndbox': {'xmax': '356', 'xmin': '271', 'ymax': '280', 'ymin': '116'}, 'difficult': '0', 'occluded': '1', 'pose': 'Unspecified', 'truncated': '1'}, {'name': 'person', 'bndbox': {'xmax': '284', 'xmin': '145', 'ymax': '277', 'ymin': '99'}, 'difficult': '0', 'occluded': '0', 'pose': 'Frontal', 'truncated': '0', 'part': [{'name': 'head', 'bndbox': {'xmin': '208', 'ymin': '97', 'xmax': '251', 'ymax': '151'}}, {'name': 'hand', 'bndbox': {'xmin': '205', 'ymin': '211', 'xmax': '228', 'ymax': '230'}}, {'name': 'hand', 'bndbox': {'xmin': '230', 'ymin': '208', 'xmax': '253', 'ymax': '229'}}, {'name': 'foot', 'bndbox': {'xmin': '238', 'ymin': '239', 'xmax': '282', 'ymax': '276'}}, {'

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2009_004681.jpg', 'folder': 'VOC2012', 'object': [{'name': 'bus', 'bndbox': {'xmax': '395', 'xmin': '96', 'ymax': '302', 'ymin': '54'}, 'difficult': '0', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '0'}, {'name': 'person', 'bndbox': {'xmax': '292', 'xmin': '264', 'ymax': '232', 'ymin': '190'}, 'difficult': '0', 'occluded': '1', 'pose': 'Frontal', 'truncated': '1'}, {'name': 'bus', 'bndbox': {'xmax': '96', 'xmin': '3', 'ymax': '248', 'ymin': '162'}, 'difficult': '0', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '1'}], 'segmented': '0', 'size': {'depth': '3', 'height': '324', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2009', 'database': 'The VOC2009 Database', 'image': 'flickr'}}}, {'annotation': {'filename': '2009_001828.jpg', 'folder': 'VOC2012', 'object': [{'name': 'dog', 'bndbox': {'xmax': '248', 'xmin': '146', 'ymax': '441', 'ymin': '172'}, 'difficult': '0', 'occluded': '0', 

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2010_003717.jpg', 'folder': 'VOC2012', 'object': [{'name': 'sofa', 'bndbox': {'xmax': '498', 'xmin': '34', 'ymax': '242', 'ymin': '1'}, 'difficult': '0', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '1'}, {'name': 'bottle', 'bndbox': {'xmax': '500', 'xmin': '432', 'ymax': '332', 'ymin': '305'}, 'difficult': '0', 'occluded': '1', 'pose': 'Unspecified', 'truncated': '0'}], 'segmented': '1', 'size': {'depth': '3', 'height': '375', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2010', 'database': 'The VOC2010 Database', 'image': 'flickr'}}}, {'annotation': {'filename': '2010_003773.jpg', 'folder': 'VOC2012', 'object': [{'name': 'cat', 'bndbox': {'xmax': '374', 'xmin': '1', 'ymax': '500', 'ymin': '1'}, 'difficult': '0', 'occluded': '0', 'pose': 'Frontal', 'truncated': '1'}], 'segmented': '0', 'size': {'depth': '3', 'height': '500', 'width': '375'}, 'source': {'annotation': 'PASCAL VOC2010', 'data

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'folder': 'VOC2012', 'filename': '2008_001052.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '500', 'height': '375', 'depth': '3'}, 'segmented': '0', 'object': [{'name': 'person', 'pose': 'Unspecified', 'truncated': '0', 'occluded': '1', 'bndbox': {'xmin': '196', 'ymin': '117', 'xmax': '347', 'ymax': '375'}, 'difficult': '0'}, {'name': 'person', 'pose': 'Unspecified', 'truncated': '1', 'occluded': '1', 'bndbox': {'xmin': '96', 'ymin': '41', 'xmax': '354', 'ymax': '373'}, 'difficult': '0'}]}}, {'annotation': {'folder': 'VOC2012', 'filename': '2008_007038.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '500', 'height': '375', 'depth': '3'}, 'segmented': '0', 'object': [{'name': 'person', 'pose': 'Right', 'truncated': '1', 'occluded': '0', 'bndbox': {'xmin': '108', 'ymin':

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2009_000409.jpg', 'folder': 'VOC2012', 'object': [{'name': 'horse', 'bndbox': {'xmax': '119', 'xmin': '40', 'ymax': '274', 'ymin': '209'}, 'difficult': '0', 'occluded': '0', 'pose': 'Right', 'truncated': '1'}, {'name': 'horse', 'bndbox': {'xmax': '243', 'xmin': '147', 'ymax': '263', 'ymin': '203'}, 'difficult': '0', 'occluded': '1', 'pose': 'Left', 'truncated': '0'}, {'name': 'horse', 'bndbox': {'xmax': '217', 'xmin': '121', 'ymax': '275', 'ymin': '201'}, 'difficult': '0', 'occluded': '0', 'pose': 'Right', 'truncated': '0'}, {'name': 'horse', 'bndbox': {'xmax': '294', 'xmin': '237', 'ymax': '307', 'ymin': '205'}, 'difficult': '0', 'occluded': '0', 'pose': 'Frontal', 'truncated': '0'}], 'segmented': '1', 'size': {'depth': '3', 'height': '375', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2009', 'database': 'The VOC2009 Database', 'image': 'flickr'}}}, {'annotation': {'folder': 'VOC2012', 'filename'

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2010_004025.jpg', 'folder': 'VOC2012', 'object': [{'name': 'sofa', 'bndbox': {'xmax': '469', 'xmin': '1', 'ymax': '361', 'ymin': '99'}, 'difficult': '0', 'occluded': '1', 'pose': 'Unspecified', 'truncated': '1'}, {'name': 'person', 'bndbox': {'xmax': '422', 'xmin': '85', 'ymax': '279', 'ymin': '150'}, 'difficult': '0', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '0'}], 'segmented': '1', 'size': {'depth': '3', 'height': '376', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2010', 'database': 'The VOC2010 Database', 'image': 'flickr'}}}, {'annotation': {'filename': '2011_001479.jpg', 'folder': 'VOC2012', 'object': [{'name': 'bus', 'bndbox': {'xmax': '497', 'xmin': '14', 'ymax': '363', 'ymin': '1'}, 'difficult': '0', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '0'}], 'segmented': '1', 'size': {'depth': '3', 'height': '375', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2011', '

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'folder': 'VOC2012', 'filename': '2008_001501.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '500', 'height': '366', 'depth': '3'}, 'segmented': '0', 'object': [{'name': 'person', 'pose': 'Frontal', 'truncated': '0', 'occluded': '0', 'bndbox': {'xmin': '20', 'ymin': '135', 'xmax': '83', 'ymax': '268'}, 'difficult': '0', 'part': [{'name': 'head', 'bndbox': {'xmin': '53', 'ymin': '137', 'xmax': '72', 'ymax': '157'}}, {'name': 'hand', 'bndbox': {'xmin': '17', 'ymin': '183', 'xmax': '28', 'ymax': '191'}}, {'name': 'hand', 'bndbox': {'xmin': '70', 'ymin': '188', 'xmax': '80', 'ymax': '197'}}, {'name': 'foot', 'bndbox': {'xmin': '64', 'ymin': '252', 'xmax': '73', 'ymax': '269'}}, {'name': 'foot', 'bndbox': {'xmin': '45', 'ymin': '255', 'xmax': '54', 'ymax': '269'}}]}, {'name': 'person', 'pose': 'Frontal', 'truncated': '0', 'occluded': '0', 'bndbox':

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'folder': 'VOC2012', 'filename': '2008_002248.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '500', 'height': '333', 'depth': '3'}, 'segmented': '1', 'object': [{'name': 'person', 'pose': 'Unspecified', 'truncated': '1', 'occluded': '0', 'bndbox': {'xmin': '291', 'ymin': '126', 'xmax': '500', 'ymax': '333'}, 'difficult': '0'}]}}, {'annotation': {'folder': 'VOC2012', 'filename': '2008_004284.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '500', 'height': '334', 'depth': '3'}, 'segmented': '0', 'object': [{'name': 'dog', 'pose': 'Left', 'truncated': '0', 'occluded': '1', 'bndbox': {'xmin': '116', 'ymin': '64', 'xmax': '343', 'ymax': '293'}, 'difficult': '0'}, {'name': 'cat', 'pose': 'Unspecified', 'truncated': '0', 'occluded': '1', 'bndbox': {'xmin': '314', 'ymin': '146'

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'folder': 'VOC2012', 'filename': '2008_005609.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '375', 'height': '500', 'depth': '3'}, 'segmented': '0', 'object': [{'name': 'tvmonitor', 'pose': 'Unspecified', 'truncated': '1', 'occluded': '0', 'bndbox': {'xmin': '1', 'ymin': '1', 'xmax': '375', 'ymax': '433'}, 'difficult': '0'}, {'name': 'person', 'pose': 'Frontal', 'truncated': '1', 'occluded': '1', 'bndbox': {'xmin': '29', 'ymin': '43', 'xmax': '214', 'ymax': '240'}, 'difficult': '0'}]}}, {'annotation': {'filename': '2009_001704.jpg', 'folder': 'VOC2012', 'object': [{'name': 'pottedplant', 'bndbox': {'xmax': '89', 'xmin': '13', 'ymax': '193', 'ymin': '51'}, 'difficult': '0', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '0'}, {'name': 'bicycle', 'bndbox': {'xmax': '500', 'xmin': '38', 'ymax': '367', 'ymin': '45'}, 'difficult': '0', 'occl

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2011_002724.jpg', 'folder': 'VOC2012', 'object': [{'name': 'cat', 'bndbox': {'xmax': '500', 'xmin': '2', 'ymax': '375', 'ymin': '1'}, 'difficult': '0', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '1'}], 'segmented': '0', 'size': {'depth': '3', 'height': '375', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2011', 'database': 'The VOC2011 Database', 'image': 'flickr'}}}, {'annotation': {'folder': 'VOC2012', 'filename': '2008_003665.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '500', 'height': '375', 'depth': '3'}, 'segmented': '1', 'object': [{'name': 'dog', 'pose': 'Unspecified', 'truncated': '0', 'occluded': '0', 'bndbox': {'xmin': '29', 'ymin': '58', 'xmax': '414', 'ymax': '375'}, 'difficult': '0'}, {'name': 'pottedplant', 'pose': 'Unspecified', 'truncated': '1', 'occluded': '0', 'bndbox': {'xmin': '70', 'ymin':

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2009_001163.jpg', 'folder': 'VOC2012', 'object': [{'name': 'cow', 'bndbox': {'xmax': '239', 'xmin': '70', 'ymax': '373', 'ymin': '90'}, 'difficult': '0', 'occluded': '0', 'pose': 'Frontal', 'truncated': '0'}, {'name': 'cow', 'bndbox': {'xmax': '421', 'xmin': '241', 'ymax': '373', 'ymin': '73'}, 'difficult': '0', 'occluded': '1', 'pose': 'Frontal', 'truncated': '0'}], 'segmented': '1', 'size': {'depth': '3', 'height': '373', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2009', 'database': 'The VOC2009 Database', 'image': 'flickr'}}}, {'annotation': {'filename': '2009_002289.jpg', 'folder': 'VOC2012', 'object': [{'name': 'person', 'bndbox': {'xmax': '212', 'xmin': '197', 'ymax': '299', 'ymin': '279'}, 'difficult': '0', 'occluded': '1', 'pose': 'Unspecified', 'truncated': '0'}, {'name': 'person', 'bndbox': {'xmax': '164', 'xmin': '137', 'ymax': '323', 'ymin': '292'}, 'difficult': '0', 'occluded': '0',

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'folder': 'VOC2012', 'filename': '2008_001413.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '500', 'height': '375', 'depth': '3'}, 'segmented': '1', 'object': [{'name': 'diningtable', 'pose': 'Unspecified', 'truncated': '0', 'occluded': '0', 'bndbox': {'xmin': '205', 'ymin': '146', 'xmax': '500', 'ymax': '375'}, 'difficult': '1'}, {'name': 'person', 'pose': 'Unspecified', 'truncated': '1', 'occluded': '0', 'bndbox': {'xmin': '65', 'ymin': '41', 'xmax': '334', 'ymax': '375'}, 'difficult': '0'}]}}, {'annotation': {'filename': '2010_005028.jpg', 'folder': 'VOC2012', 'object': [{'name': 'bird', 'bndbox': {'xmax': '268', 'xmin': '216', 'ymax': '183', 'ymin': '138'}, 'difficult': '0', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '0'}], 'segmented': '1', 'size': {'depth': '3', 'height': '385', 'width': '500'}, 'source': {'annotation': 'PASCA

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2010_001922.jpg', 'folder': 'VOC2012', 'object': [{'name': 'bicycle', 'bndbox': {'xmax': '412', 'xmin': '114', 'ymax': '289', 'ymin': '48'}, 'difficult': '0', 'occluded': '1', 'pose': 'Left', 'truncated': '0'}, {'name': 'bicycle', 'bndbox': {'xmax': '418', 'xmin': '180', 'ymax': '289', 'ymin': '62'}, 'difficult': '0', 'occluded': '1', 'pose': 'Right', 'truncated': '0'}], 'segmented': '1', 'size': {'depth': '3', 'height': '375', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2010', 'database': 'The VOC2010 Database', 'image': 'flickr'}}}, {'annotation': {'filename': '2009_004279.jpg', 'folder': 'VOC2012', 'object': [{'name': 'horse', 'bndbox': {'xmax': '224', 'xmin': '86', 'ymax': '256', 'ymin': '35'}, 'difficult': '0', 'occluded': '1', 'pose': 'Frontal', 'truncated': '1'}, {'name': 'person', 'bndbox': {'xmax': '322', 'xmin': '67', 'ymax': '334', 'ymin': '116'}, 'difficult': '0', 'occluded': '1', 'po

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2011_001498.jpg', 'folder': 'VOC2012', 'object': [{'name': 'motorbike', 'bndbox': {'xmax': '459', 'xmin': '22', 'ymax': '307', 'ymin': '28'}, 'difficult': '0', 'occluded': '0', 'pose': 'Right', 'truncated': '0'}], 'segmented': '0', 'size': {'depth': '3', 'height': '375', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2011', 'database': 'The VOC2011 Database', 'image': 'flickr'}}}, {'annotation': {'folder': 'VOC2012', 'filename': '2008_001112.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '375', 'height': '500', 'depth': '3'}, 'segmented': '1', 'object': [{'name': 'person', 'pose': 'Left', 'truncated': '1', 'occluded': '0', 'bndbox': {'xmin': '142', 'ymin': '164', 'xmax': '330', 'ymax': '500'}, 'difficult': '0'}, {'name': 'horse', 'pose': 'Right', 'truncated': '1', 'occluded': '1', 'bndbox': {'xmin': '1', 'ymin': '205', 'xmax

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2009_003539.jpg', 'folder': 'VOC2012', 'object': [{'name': 'motorbike', 'bndbox': {'xmax': '386', 'xmin': '112', 'ymax': '392', 'ymin': '48'}, 'difficult': '0', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '0'}], 'segmented': '1', 'size': {'depth': '3', 'height': '414', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2009', 'database': 'The VOC2009 Database', 'image': 'flickr'}}}, {'annotation': {'filename': '2010_001973.jpg', 'folder': 'VOC2012', 'object': [{'name': 'chair', 'bndbox': {'xmax': '326', 'xmin': '19', 'ymax': '491', 'ymin': '32'}, 'difficult': '0', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '0'}], 'segmented': '0', 'size': {'depth': '3', 'height': '500', 'width': '333'}, 'source': {'annotation': 'PASCAL VOC2010', 'database': 'The VOC2010 Database', 'image': 'flickr'}}}, {'annotation': {'filename': '2010_001401.jpg', 'folder': 'VOC2012', 'object': [{'name': 'cat', 'bndb

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2011_002821.jpg', 'folder': 'VOC2012', 'object': [{'name': 'boat', 'bndbox': {'xmax': '166', 'xmin': '28', 'ymax': '261', 'ymin': '142'}, 'difficult': '0', 'occluded': '1', 'pose': 'Unspecified', 'truncated': '1'}, {'name': 'train', 'bndbox': {'xmax': '448', 'xmin': '1', 'ymax': '336', 'ymin': '1'}, 'difficult': '0', 'occluded': '1', 'pose': 'Unspecified', 'truncated': '1'}, {'name': 'person', 'bndbox': {'xmax': '99', 'xmin': '23', 'ymax': '294', 'ymin': '134'}, 'difficult': '0', 'occluded': '0', 'pose': 'Right', 'truncated': '0'}], 'segmented': '0', 'size': {'depth': '3', 'height': '336', 'width': '448'}, 'source': {'annotation': 'PASCAL VOC2011', 'database': 'The VOC2011 Database', 'image': 'flickr'}}}, {'annotation': {'folder': 'VOC2012', 'filename': '2008_002665.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '500', 'height': '

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2009_002077.jpg', 'folder': 'VOC2012', 'object': [{'name': 'motorbike', 'bndbox': {'xmax': '483', 'xmin': '17', 'ymax': '375', 'ymin': '1'}, 'difficult': '0', 'occluded': '0', 'pose': 'Right', 'truncated': '1'}], 'segmented': '0', 'size': {'depth': '3', 'height': '375', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2009', 'database': 'The VOC2009 Database', 'image': 'flickr'}}}, {'annotation': {'folder': 'VOC2012', 'filename': '2008_003060.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '500', 'height': '375', 'depth': '3'}, 'segmented': '1', 'object': [{'name': 'bus', 'pose': 'Rear', 'truncated': '1', 'occluded': '0', 'bndbox': {'xmin': '1', 'ymin': '132', 'xmax': '31', 'ymax': '202'}, 'difficult': '0'}, {'name': 'bus', 'pose': 'Unspecified', 'truncated': '1', 'occluded': '1', 'bndbox': {'xmin': '31', 'ymin': '148', 'xmax':

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2009_002472.jpg', 'folder': 'VOC2012', 'object': [{'name': 'cow', 'bndbox': {'xmax': '368', 'xmin': '31', 'ymax': '333', 'ymin': '37'}, 'difficult': '0', 'occluded': '0', 'pose': 'Frontal', 'truncated': '1'}], 'segmented': '1', 'size': {'depth': '3', 'height': '339', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2009', 'database': 'The VOC2009 Database', 'image': 'flickr'}}}, {'annotation': {'filename': '2009_000209.jpg', 'folder': 'VOC2012', 'object': [{'name': 'chair', 'bndbox': {'xmax': '315', 'xmin': '115', 'ymax': '475', 'ymin': '209'}, 'difficult': '0', 'occluded': '0', 'pose': 'Left', 'truncated': '0'}], 'segmented': '0', 'size': {'depth': '3', 'height': '500', 'width': '375'}, 'source': {'annotation': 'PASCAL VOC2009', 'database': 'The VOC2009 Database', 'image': 'flickr'}}}, {'annotation': {'filename': '2010_004467.jpg', 'folder': 'VOC2012', 'object': [{'name': 'cat', 'bndbox': {'xmax': '3

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'folder': 'VOC2012', 'filename': '2008_008345.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '500', 'height': '223', 'depth': '3'}, 'segmented': '0', 'object': [{'name': 'aeroplane', 'pose': 'Right', 'truncated': '0', 'occluded': '0', 'bndbox': {'xmin': '73', 'ymin': '57', 'xmax': '415', 'ymax': '151'}, 'difficult': '0'}, {'name': 'aeroplane', 'pose': 'Left', 'truncated': '0', 'occluded': '0', 'bndbox': {'xmin': '290', 'ymin': '29', 'xmax': '320', 'ymax': '44'}, 'difficult': '0'}]}}, {'annotation': {'filename': '2011_001135.jpg', 'folder': 'VOC2012', 'object': [{'name': 'cat', 'bndbox': {'xmax': '271', 'xmin': '13', 'ymax': '426', 'ymin': '130'}, 'difficult': '0', 'occluded': '0', 'pose': 'Frontal', 'truncated': '0'}, {'name': 'chair', 'bndbox': {'xmax': '375', 'xmin': '138', 'ymax': '500', 'ymin': '410'}, 'difficult': '0', 'occluded': '1', 'p

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2009_002611.jpg', 'folder': 'VOC2012', 'object': [{'name': 'sofa', 'bndbox': {'xmax': '182', 'xmin': '1', 'ymax': '370', 'ymin': '152'}, 'difficult': '0', 'occluded': '1', 'pose': 'Unspecified', 'truncated': '0'}], 'segmented': '0', 'size': {'depth': '3', 'height': '375', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2009', 'database': 'The VOC2009 Database', 'image': 'flickr'}}}, {'annotation': {'folder': 'VOC2012', 'filename': '2008_008097.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '500', 'height': '333', 'depth': '3'}, 'segmented': '0', 'object': [{'name': 'car', 'pose': 'Right', 'truncated': '1', 'occluded': '0', 'bndbox': {'xmin': '424', 'ymin': '105', 'xmax': '500', 'ymax': '194'}, 'difficult': '0'}, {'name': 'bicycle', 'pose': 'Right', 'truncated': '0', 'occluded': '1', 'bndbox': {'xmin': '163', 'ymin': '180', 'x

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'folder': 'VOC2012', 'filename': '2008_007098.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '500', 'height': '333', 'depth': '3'}, 'segmented': '0', 'object': [{'name': 'bird', 'pose': 'Right', 'truncated': '0', 'occluded': '0', 'bndbox': {'xmin': '65', 'ymin': '137', 'xmax': '209', 'ymax': '212'}, 'difficult': '0'}]}}, {'annotation': {'folder': 'VOC2012', 'filename': '2008_007717.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '500', 'height': '333', 'depth': '3'}, 'segmented': '0', 'object': [{'name': 'chair', 'pose': 'Left', 'truncated': '1', 'occluded': '1', 'bndbox': {'xmin': '403', 'ymin': '204', 'xmax': '463', 'ymax': '333'}, 'difficult': '1'}, {'name': 'chair', 'pose': 'Right', 'truncated': '1', 'occluded': '1', 'bndbox': {'xmin': '37', 'ymin': '206', 'xmax': '

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'folder': 'VOC2012', 'filename': '2008_005810.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '500', 'height': '375', 'depth': '3'}, 'segmented': '0', 'object': [{'name': 'bird', 'pose': 'Rear', 'truncated': '0', 'occluded': '0', 'bndbox': {'xmin': '194', 'ymin': '107', 'xmax': '328', 'ymax': '272'}, 'difficult': '0'}]}}, {'annotation': {'filename': '2009_002229.jpg', 'folder': 'VOC2012', 'object': [{'name': 'cow', 'bndbox': {'xmax': '417', 'xmin': '129', 'ymax': '286', 'ymin': '1'}, 'difficult': '0', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '0'}], 'segmented': '1', 'size': {'depth': '3', 'height': '299', 'width': '448'}, 'source': {'annotation': 'PASCAL VOC2009', 'database': 'The VOC2009 Database', 'image': 'flickr'}}}, {'annotation': {'filename': '2009_002429.jpg', 'folder': 'VOC2012', 'object': [{'name': 'cat', 'bndbox': {'xmax':

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2010_005306.jpg', 'folder': 'VOC2012', 'object': [{'name': 'dog', 'bndbox': {'xmax': '347', 'xmin': '166', 'ymax': '222', 'ymin': '7'}, 'difficult': '0', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '0'}, {'name': 'person', 'bndbox': {'xmax': '99', 'xmin': '1', 'ymax': '341', 'ymin': '1'}, 'difficult': '1', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '1'}], 'segmented': '0', 'size': {'depth': '3', 'height': '375', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2010', 'database': 'The VOC2010 Database', 'image': 'flickr'}}}, {'annotation': {'filename': '2009_003416.jpg', 'folder': 'VOC2012', 'object': [{'name': 'motorbike', 'bndbox': {'xmax': '323', 'xmin': '89', 'ymax': '283', 'ymin': '124'}, 'difficult': '0', 'occluded': '1', 'pose': 'Left', 'truncated': '0'}, {'name': 'car', 'bndbox': {'xmax': '227', 'xmin': '1', 'ymax': '228', 'ymin': '114'}, 'difficult': '0', 'occluded': '1', 'p

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'folder': 'VOC2012', 'filename': '2008_006096.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '500', 'height': '333', 'depth': '3'}, 'segmented': '0', 'object': [{'name': 'horse', 'pose': 'Left', 'truncated': '0', 'occluded': '1', 'bndbox': {'xmin': '132', 'ymin': '76', 'xmax': '344', 'ymax': '238'}, 'difficult': '0'}, {'name': 'horse', 'pose': 'Left', 'truncated': '1', 'occluded': '1', 'bndbox': {'xmin': '65', 'ymin': '83', 'xmax': '184', 'ymax': '198'}, 'difficult': '0'}, {'name': 'person', 'pose': 'Left', 'truncated': '0', 'occluded': '0', 'bndbox': {'xmin': '213', 'ymin': '36', 'xmax': '257', 'ymax': '166'}, 'difficult': '0'}, {'name': 'person', 'pose': 'Left', 'truncated': '0', 'occluded': '1', 'bndbox': {'xmin': '134', 'ymin': '52', 'xmax': '170', 'ymax': '148'}, 'difficult': '0'}, {'name': 'person', 'pose': 'Left', 'truncated': '1', 'occ

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'folder': 'VOC2012', 'filename': '2008_006365.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '500', 'height': '330', 'depth': '3'}, 'segmented': '0', 'object': [{'name': 'car', 'pose': 'Right', 'truncated': '1', 'occluded': '1', 'bndbox': {'xmin': '1', 'ymin': '229', 'xmax': '38', 'ymax': '258'}, 'difficult': '0'}, {'name': 'car', 'pose': 'Right', 'truncated': '1', 'occluded': '1', 'bndbox': {'xmin': '23', 'ymin': '234', 'xmax': '78', 'ymax': '254'}, 'difficult': '0'}, {'name': 'car', 'pose': 'Right', 'truncated': '1', 'occluded': '1', 'bndbox': {'xmin': '66', 'ymin': '225', 'xmax': '102', 'ymax': '249'}, 'difficult': '1'}, {'name': 'train', 'pose': 'Right', 'truncated': '0', 'occluded': '0', 'bndbox': {'xmin': '89', 'ymin': '77', 'xmax': '436', 'ymax': '301'}, 'difficult': '0'}]}}, {'annotation': {'filename': '2010_000415.jpg', 'folder': 'VOC

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2009_003837.jpg', 'folder': 'VOC2012', 'object': [{'name': 'bird', 'bndbox': {'xmax': '402', 'xmin': '115', 'ymax': '220', 'ymin': '62'}, 'difficult': '0', 'occluded': '0', 'pose': 'Left', 'truncated': '0'}], 'segmented': '0', 'size': {'depth': '3', 'height': '333', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2009', 'database': 'The VOC2009 Database', 'image': 'flickr'}}}, {'annotation': {'folder': 'VOC2012', 'filename': '2008_006992.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '375', 'height': '500', 'depth': '3'}, 'segmented': '0', 'object': [{'name': 'bottle', 'pose': 'Unspecified', 'truncated': '0', 'occluded': '0', 'bndbox': {'xmin': '99', 'ymin': '379', 'xmax': '146', 'ymax': '410'}, 'difficult': '0'}, {'name': 'car', 'pose': 'Rear', 'truncated': '1', 'occluded': '0', 'bndbox': {'xmin': '315', 'ymin': '63', 'xmax'

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2010_000463.jpg', 'folder': 'VOC2012', 'object': [{'name': 'diningtable', 'bndbox': {'xmax': '370', 'xmin': '116', 'ymax': '375', 'ymin': '236'}, 'difficult': '0', 'occluded': '1', 'pose': 'Unspecified', 'truncated': '1'}, {'name': 'chair', 'bndbox': {'xmax': '241', 'xmin': '127', 'ymax': '375', 'ymin': '242'}, 'difficult': '0', 'occluded': '0', 'pose': 'Rear', 'truncated': '1'}, {'name': 'chair', 'bndbox': {'xmax': '326', 'xmin': '234', 'ymax': '375', 'ymin': '232'}, 'difficult': '0', 'occluded': '1', 'pose': 'Rear', 'truncated': '1'}, {'name': 'chair', 'bndbox': {'xmax': '401', 'xmin': '316', 'ymax': '365', 'ymin': '208'}, 'difficult': '0', 'occluded': '1', 'pose': 'Left', 'truncated': '1'}, {'name': 'chair', 'bndbox': {'xmax': '146', 'xmin': '112', 'ymax': '328', 'ymin': '196'}, 'difficult': '0', 'occluded': '1', 'pose': 'Frontal', 'truncated': '1'}, {'name': 'chair', 'bndbox': {'xmax': '216', 'xmin': 

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2009_004943.jpg', 'folder': 'VOC2012', 'object': [{'name': 'sheep', 'bndbox': {'xmax': '155', 'xmin': '38', 'ymax': '420', 'ymin': '341'}, 'difficult': '0', 'occluded': '0', 'pose': 'Right', 'truncated': '0'}, {'name': 'sheep', 'bndbox': {'xmax': '152', 'xmin': '97', 'ymax': '346', 'ymin': '313'}, 'difficult': '0', 'occluded': '0', 'pose': 'Right', 'truncated': '0'}, {'name': 'sheep', 'bndbox': {'xmax': '249', 'xmin': '201', 'ymax': '331', 'ymin': '299'}, 'difficult': '0', 'occluded': '0', 'pose': 'Right', 'truncated': '0'}, {'name': 'sheep', 'bndbox': {'xmax': '309', 'xmin': '267', 'ymax': '332', 'ymin': '292'}, 'difficult': '0', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '0'}, {'name': 'sheep', 'bndbox': {'xmax': '361', 'xmin': '308', 'ymax': '315', 'ymin': '282'}, 'difficult': '0', 'occluded': '0', 'pose': 'Right', 'truncated': '0'}, {'name': 'sheep', 'bndbox': {'xmax': '286', 'xmin': '248', 

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'folder': 'VOC2012', 'filename': '2008_008325.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '333', 'height': '500', 'depth': '3'}, 'segmented': '0', 'object': [{'name': 'bicycle', 'pose': 'Unspecified', 'truncated': '1', 'occluded': '1', 'bndbox': {'xmin': '1', 'ymin': '327', 'xmax': '333', 'ymax': '500'}, 'difficult': '0'}, {'name': 'bicycle', 'pose': 'Unspecified', 'truncated': '1', 'occluded': '1', 'bndbox': {'xmin': '266', 'ymin': '238', 'xmax': '333', 'ymax': '322'}, 'difficult': '0'}, {'name': 'person', 'pose': 'Unspecified', 'truncated': '1', 'occluded': '1', 'bndbox': {'xmin': '3', 'ymin': '125', 'xmax': '333', 'ymax': '500'}, 'difficult': '0'}, {'name': 'person', 'pose': 'Unspecified', 'truncated': '1', 'occluded': '0', 'bndbox': {'xmin': '266', 'ymin': '197', 'xmax': '333', 'ymax': '308'}, 'difficult': '0'}]}}, {'annotation': {'file

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2010_004657.jpg', 'folder': 'VOC2012', 'object': [{'name': 'bird', 'bndbox': {'xmax': '352', 'xmin': '200', 'ymax': '195', 'ymin': '101'}, 'difficult': '0', 'occluded': '0', 'pose': 'Right', 'truncated': '0'}], 'segmented': '0', 'size': {'depth': '3', 'height': '375', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2010', 'database': 'The VOC2010 Database', 'image': 'flickr'}}}, {'annotation': {'folder': 'VOC2012', 'filename': '2008_004217.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '500', 'height': '378', 'depth': '3'}, 'segmented': '0', 'object': [{'name': 'tvmonitor', 'pose': 'Frontal', 'truncated': '0', 'occluded': '1', 'bndbox': {'xmin': '41', 'ymin': '93', 'xmax': '227', 'ymax': '272'}, 'difficult': '0'}, {'name': 'person', 'pose': 'Left', 'truncated': '1', 'occluded': '0', 'bndbox': {'xmin': '170', 'ymin': '17', 'xm

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2010_001458.jpg', 'folder': 'VOC2012', 'object': [{'name': 'bus', 'bndbox': {'xmax': '453', 'xmin': '63', 'ymax': '318', 'ymin': '67'}, 'difficult': '0', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '0'}, {'name': 'bus', 'bndbox': {'xmax': '73', 'xmin': '1', 'ymax': '248', 'ymin': '162'}, 'difficult': '0', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '1'}, {'name': 'person', 'bndbox': {'xmax': '470', 'xmin': '455', 'ymax': '288', 'ymin': '223'}, 'difficult': '0', 'occluded': '0', 'pose': 'Left', 'truncated': '0'}, {'name': 'person', 'bndbox': {'xmax': '489', 'xmin': '469', 'ymax': '282', 'ymin': '231'}, 'difficult': '0', 'occluded': '0', 'pose': 'Rear', 'truncated': '0'}], 'segmented': '0', 'size': {'depth': '3', 'height': '333', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2010', 'database': 'The VOC2010 Database', 'image': 'flickr'}}}, {'annotation': {'filename': '2009_001285.jpg

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'folder': 'VOC2012', 'filename': '2008_004239.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '500', 'height': '347', 'depth': '3'}, 'segmented': '0', 'object': [{'name': 'aeroplane', 'pose': 'Left', 'truncated': '0', 'occluded': '0', 'bndbox': {'xmin': '146', 'ymin': '158', 'xmax': '431', 'ymax': '251'}, 'difficult': '0'}]}}, {'annotation': {'folder': 'VOC2012', 'filename': '2008_006564.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '500', 'height': '334', 'depth': '3'}, 'segmented': '0', 'object': [{'name': 'aeroplane', 'pose': 'Left', 'truncated': '0', 'occluded': '0', 'bndbox': {'xmin': '121', 'ymin': '166', 'xmax': '306', 'ymax': '212'}, 'difficult': '0'}, {'name': 'aeroplane', 'pose': 'Unspecified', 'truncated': '1', 'occluded': '0', 'bndbox': {'xmin': '1', 'ymin'

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2010_000746.jpg', 'folder': 'VOC2012', 'object': [{'name': 'dog', 'bndbox': {'xmax': '365', 'xmin': '14', 'ymax': '462', 'ymin': '47'}, 'difficult': '0', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '0'}], 'segmented': '1', 'size': {'depth': '3', 'height': '500', 'width': '375'}, 'source': {'annotation': 'PASCAL VOC2010', 'database': 'The VOC2010 Database', 'image': 'flickr'}}}, {'annotation': {'filename': '2011_001956.jpg', 'folder': 'VOC2012', 'object': [{'name': 'horse', 'bndbox': {'xmax': '287', 'xmin': '109', 'ymax': '449', 'ymin': '72'}, 'difficult': '0', 'occluded': '0', 'pose': 'Frontal', 'truncated': '0'}, {'name': 'person', 'bndbox': {'xmax': '275', 'xmin': '122', 'ymax': '302', 'ymin': '37'}, 'difficult': '0', 'occluded': '1', 'pose': 'Frontal', 'truncated': '0'}], 'segmented': '0', 'size': {'depth': '3', 'height': '500', 'width': '333'}, 'source': {'annotation': 'PASCAL VOC2011', 'data

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2010_003719.jpg', 'folder': 'VOC2012', 'object': [{'name': 'chair', 'bndbox': {'xmax': '500', 'xmin': '383', 'ymax': '368', 'ymin': '153'}, 'difficult': '0', 'occluded': '1', 'pose': 'Unspecified', 'truncated': '1'}, {'name': 'chair', 'bndbox': {'xmax': '140', 'xmin': '1', 'ymax': '373', 'ymin': '164'}, 'difficult': '0', 'occluded': '1', 'pose': 'Unspecified', 'truncated': '1'}, {'name': 'tvmonitor', 'bndbox': {'xmax': '360', 'xmin': '300', 'ymax': '108', 'ymin': '51'}, 'difficult': '0', 'occluded': '0', 'pose': 'Frontal', 'truncated': '0'}, {'name': 'tvmonitor', 'bndbox': {'xmax': '218', 'xmin': '147', 'ymax': '119', 'ymin': '41'}, 'difficult': '0', 'occluded': '1', 'pose': 'Frontal', 'truncated': '0'}, {'name': 'person', 'bndbox': {'xmax': '312', 'xmin': '150', 'ymax': '338', 'ymin': '14'}, 'difficult': '0', 'occluded': '1', 'pose': 'Rear', 'truncated': '0'}, {'name': 'person', 'bndbox': {'xmax': '162',

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2010_005725.jpg', 'folder': 'VOC2012', 'object': [{'name': 'bird', 'bndbox': {'xmax': '423', 'xmin': '168', 'ymax': '376', 'ymin': '100'}, 'difficult': '0', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '1'}], 'segmented': '1', 'size': {'depth': '3', 'height': '376', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2010', 'database': 'The VOC2010 Database', 'image': 'flickr'}}}, {'annotation': {'filename': '2009_001368.jpg', 'folder': 'VOC2012', 'object': [{'name': 'dog', 'bndbox': {'xmax': '381', 'xmin': '124', 'ymax': '333', 'ymin': '78'}, 'difficult': '0', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '1'}, {'name': 'dog', 'bndbox': {'xmax': '211', 'xmin': '21', 'ymax': '333', 'ymin': '81'}, 'difficult': '0', 'occluded': '1', 'pose': 'Unspecified', 'truncated': '1'}], 'segmented': '0', 'size': {'depth': '3', 'height': '333', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2009', 

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'folder': 'VOC2012', 'filename': '2008_001716.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '498', 'height': '288', 'depth': '3'}, 'segmented': '1', 'object': [{'name': 'car', 'pose': 'Unspecified', 'truncated': '0', 'occluded': '0', 'bndbox': {'xmin': '201', 'ymin': '128', 'xmax': '429', 'ymax': '272'}, 'difficult': '0'}]}}, {'annotation': {'folder': 'VOC2012', 'filename': '2008_005231.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '375', 'height': '500', 'depth': '3'}, 'segmented': '1', 'object': [{'name': 'tvmonitor', 'pose': 'Frontal', 'truncated': '0', 'occluded': '0', 'bndbox': {'xmin': '177', 'ymin': '71', 'xmax': '326', 'ymax': '181'}, 'difficult': '0'}, {'name': 'person', 'pose': 'Rear', 'truncated': '1', 'occluded': '0', 'bndbox': {'xmin': '7', 'ymin': '174'

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'folder': 'VOC2012', 'filename': '2008_008757.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '500', 'height': '375', 'depth': '3'}, 'segmented': '0', 'object': [{'name': 'train', 'pose': 'Unspecified', 'truncated': '1', 'occluded': '0', 'bndbox': {'xmin': '282', 'ymin': '1', 'xmax': '500', 'ymax': '375'}, 'difficult': '0'}, {'name': 'person', 'pose': 'Frontal', 'truncated': '0', 'occluded': '1', 'bndbox': {'xmin': '266', 'ymin': '106', 'xmax': '280', 'ymax': '148'}, 'difficult': '1'}, {'name': 'person', 'pose': 'Rear', 'truncated': '1', 'occluded': '1', 'bndbox': {'xmin': '232', 'ymin': '106', 'xmax': '241', 'ymax': '120'}, 'difficult': '1'}, {'name': 'person', 'pose': 'Frontal', 'truncated': '1', 'occluded': '1', 'bndbox': {'xmin': '212', 'ymin': '107', 'xmax': '226', 'ymax': '147'}, 'difficult': '1'}, {'name': 'person', 'pose': 'Right', 'tru

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'folder': 'VOC2012', 'filename': '2008_005794.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '500', 'height': '375', 'depth': '3'}, 'segmented': '0', 'object': [{'name': 'pottedplant', 'pose': 'Unspecified', 'truncated': '0', 'occluded': '0', 'bndbox': {'xmin': '406', 'ymin': '70', 'xmax': '497', 'ymax': '179'}, 'difficult': '0'}, {'name': 'person', 'pose': 'Left', 'truncated': '0', 'occluded': '0', 'bndbox': {'xmin': '308', 'ymin': '69', 'xmax': '358', 'ymax': '231'}, 'difficult': '0'}, {'name': 'person', 'pose': 'Right', 'truncated': '0', 'occluded': '0', 'bndbox': {'xmin': '179', 'ymin': '135', 'xmax': '237', 'ymax': '215'}, 'difficult': '0'}, {'name': 'person', 'pose': 'Right', 'truncated': '0', 'occluded': '0', 'bndbox': {'xmin': '137', 'ymin': '52', 'xmax': '178', 'ymax': '208'}, 'difficult': '0'}, {'name': 'person', 'pose': 'Frontal', '

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'folder': 'VOC2012', 'filename': '2008_007573.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '500', 'height': '375', 'depth': '3'}, 'segmented': '0', 'object': [{'name': 'car', 'pose': 'Rear', 'truncated': '1', 'occluded': '0', 'bndbox': {'xmin': '173', 'ymin': '1', 'xmax': '500', 'ymax': '375'}, 'difficult': '0'}]}}, {'annotation': {'folder': 'VOC2012', 'filename': '2008_000973.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '500', 'height': '367', 'depth': '3'}, 'segmented': '0', 'object': [{'name': 'dog', 'pose': 'Unspecified', 'truncated': '1', 'occluded': '0', 'bndbox': {'xmin': '141', 'ymin': '1', 'xmax': '500', 'ymax': '367'}, 'difficult': '0'}]}}, {'annotation': {'folder': 'VOC2012', 'filename': '2008_007746.jpg', 'source': {'database': 'The VOC2008 Database', '

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2011_000052.jpg', 'folder': 'VOC2012', 'object': [{'name': 'boat', 'bndbox': {'xmax': '480', 'xmin': '411', 'ymax': '249', 'ymin': '235'}, 'difficult': '0', 'occluded': '1', 'pose': 'Unspecified', 'truncated': '0'}, {'name': 'person', 'bndbox': {'xmax': '475', 'xmin': '459', 'ymax': '267', 'ymin': '236'}, 'difficult': '0', 'occluded': '0', 'pose': 'Frontal', 'truncated': '0'}], 'segmented': '0', 'size': {'depth': '3', 'height': '375', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2011', 'database': 'The VOC2011 Database', 'image': 'flickr'}}}, {'annotation': {'folder': 'VOC2012', 'filename': '2008_008281.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '500', 'height': '334', 'depth': '3'}, 'segmented': '0', 'object': [{'name': 'bus', 'pose': 'Frontal', 'truncated': '0', 'occluded': '0', 'bndbox': {'xmin': '111', 'ymin': '44'

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'folder': 'VOC2012', 'filename': '2008_005283.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '500', 'height': '375', 'depth': '3'}, 'segmented': '0', 'object': [{'name': 'car', 'pose': 'Rear', 'truncated': '0', 'occluded': '1', 'bndbox': {'xmin': '33', 'ymin': '45', 'xmax': '433', 'ymax': '302'}, 'difficult': '0'}]}}, {'annotation': {'folder': 'VOC2012', 'filename': '2008_007504.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '500', 'height': '357', 'depth': '3'}, 'segmented': '0', 'object': [{'name': 'aeroplane', 'pose': 'Left', 'truncated': '1', 'occluded': '0', 'bndbox': {'xmin': '15', 'ymin': '110', 'xmax': '500', 'ymax': '264'}, 'difficult': '0'}]}}, {'annotation': {'filename': '2010_001980.jpg', 'folder': 'VOC2012', 'object': [{'name': 'bottle', 'bndbox': {'xmax':

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2010_000490.jpg', 'folder': 'VOC2012', 'object': [{'name': 'person', 'bndbox': {'xmax': '210', 'xmin': '106', 'ymax': '327', 'ymin': '20'}, 'difficult': '0', 'occluded': '1', 'pose': 'Frontal', 'truncated': '0'}, {'name': 'person', 'bndbox': {'xmax': '296', 'xmin': '180', 'ymax': '190', 'ymin': '14'}, 'difficult': '0', 'occluded': '0', 'pose': 'Right', 'truncated': '1'}, {'name': 'motorbike', 'bndbox': {'xmax': '464', 'xmin': '47', 'ymax': '335', 'ymin': '82'}, 'difficult': '0', 'occluded': '0', 'pose': 'Right', 'truncated': '0'}], 'segmented': '0', 'size': {'depth': '3', 'height': '375', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2010', 'database': 'The VOC2010 Database', 'image': 'flickr'}}}, {'annotation': {'filename': '2010_005967.jpg', 'folder': 'VOC2012', 'object': [{'name': 'chair', 'bndbox': {'xmax': '444', 'xmin': '227', 'ymax': '375', 'ymin': '214'}, 'difficult': '0', 'occluded': '1', 

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'folder': 'VOC2012', 'filename': '2008_004112.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '500', 'height': '375', 'depth': '3'}, 'segmented': '1', 'object': [{'name': 'car', 'pose': 'Left', 'truncated': '0', 'occluded': '0', 'bndbox': {'xmin': '56', 'ymin': '103', 'xmax': '441', 'ymax': '268'}, 'difficult': '0'}]}}, {'annotation': {'filename': '2009_002674.jpg', 'folder': 'VOC2012', 'object': [{'name': 'aeroplane', 'bndbox': {'xmax': '493', 'xmin': '11', 'ymax': '231', 'ymin': '51'}, 'difficult': '0', 'occluded': '0', 'pose': 'Frontal', 'truncated': '0'}], 'segmented': '1', 'size': {'depth': '3', 'height': '333', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2009', 'database': 'The VOC2009 Database', 'image': 'flickr'}}}, {'annotation': {'filename': '2010_003804.jpg', 'folder': 'VOC2012', 'object': [{'name': 'sofa', 'bndbox': {'xmax'

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2009_001550.jpg', 'folder': 'VOC2012', 'object': [{'name': 'cat', 'bndbox': {'xmax': '485', 'xmin': '32', 'ymax': '374', 'ymin': '28'}, 'difficult': '0', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '0'}], 'segmented': '0', 'size': {'depth': '3', 'height': '375', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2009', 'database': 'The VOC2009 Database', 'image': 'flickr'}}}, {'annotation': {'filename': '2009_003054.jpg', 'folder': 'VOC2012', 'object': [{'name': 'train', 'bndbox': {'xmax': '500', 'xmin': '129', 'ymax': '233', 'ymin': '175'}, 'difficult': '0', 'occluded': '1', 'pose': 'Left', 'truncated': '1'}], 'segmented': '1', 'size': {'depth': '3', 'height': '375', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2009', 'database': 'The VOC2009 Database', 'image': 'flickr'}}}, {'annotation': {'filename': '2010_005506.jpg', 'folder': 'VOC2012', 'object': [{'name': 'train', 'bndbox': {'xma

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2009_004285.jpg', 'folder': 'VOC2012', 'object': [{'name': 'person', 'bndbox': {'xmax': '322', 'xmin': '1', 'ymax': '500', 'ymin': '39'}, 'difficult': '0', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '1'}], 'segmented': '0', 'size': {'depth': '3', 'height': '500', 'width': '333'}, 'source': {'annotation': 'PASCAL VOC2009', 'database': 'The VOC2009 Database', 'image': 'flickr'}}}, {'annotation': {'filename': '2011_001134.jpg', 'folder': 'VOC2012', 'object': [{'name': 'bird', 'bndbox': {'xmax': '460', 'xmin': '240', 'ymax': '419', 'ymin': '70'}, 'difficult': '0', 'occluded': '1', 'pose': 'Right', 'truncated': '0'}, {'name': 'bird', 'bndbox': {'xmax': '447', 'xmin': '336', 'ymax': '338', 'ymin': '208'}, 'difficult': '0', 'occluded': '1', 'pose': 'Unspecified', 'truncated': '0'}, {'name': 'bird', 'bndbox': {'xmax': '363', 'xmin': '175', 'ymax': '496', 'ymin': '141'}, 'difficult': '0', 'occluded': '0'

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2010_001279.jpg', 'folder': 'VOC2012', 'object': [{'name': 'dog', 'bndbox': {'xmax': '452', 'xmin': '280', 'ymax': '321', 'ymin': '179'}, 'difficult': '0', 'occluded': '0', 'pose': 'Left', 'truncated': '0'}, {'name': 'dog', 'bndbox': {'xmax': '243', 'xmin': '150', 'ymax': '325', 'ymin': '135'}, 'difficult': '0', 'occluded': '0', 'pose': 'Rear', 'truncated': '0'}, {'name': 'dog', 'bndbox': {'xmax': '309', 'xmin': '135', 'ymax': '289', 'ymin': '104'}, 'difficult': '0', 'occluded': '1', 'pose': 'Unspecified', 'truncated': '0'}], 'segmented': '1', 'size': {'depth': '3', 'height': '375', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2010', 'database': 'The VOC2010 Database', 'image': 'flickr'}}}, {'annotation': {'filename': '2010_005835.jpg', 'folder': 'VOC2012', 'object': [{'name': 'person', 'bndbox': {'xmax': '407', 'xmin': '179', 'ymax': '363', 'ymin': '102'}, 'difficult': '0', 'occluded': '0', 'pose

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2009_003975.jpg', 'folder': 'VOC2012', 'object': [{'name': 'cow', 'bndbox': {'xmax': '334', 'xmin': '266', 'ymax': '453', 'ymin': '211'}, 'difficult': '0', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '1'}, {'name': 'cow', 'bndbox': {'xmax': '264', 'xmin': '79', 'ymax': '476', 'ymin': '135'}, 'difficult': '0', 'occluded': '0', 'pose': 'Frontal', 'truncated': '0'}], 'segmented': '1', 'size': {'depth': '3', 'height': '500', 'width': '334'}, 'source': {'annotation': 'PASCAL VOC2009', 'database': 'The VOC2009 Database', 'image': 'flickr'}}}, {'annotation': {'filename': '2010_002702.jpg', 'folder': 'VOC2012', 'object': [{'name': 'person', 'bndbox': {'xmax': '253', 'xmin': '141', 'ymax': '374', 'ymin': '167'}, 'difficult': '0', 'occluded': '0', 'pose': 'Frontal', 'truncated': '1'}, {'name': 'boat', 'bndbox': {'xmax': '487', 'xmin': '77', 'ymax': '247', 'ymin': '73'}, 'difficult': '0', 'occluded': '1', '

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2009_000502.jpg', 'folder': 'VOC2012', 'object': [{'name': 'diningtable', 'bndbox': {'xmax': '500', 'xmin': '1', 'ymax': '375', 'ymin': '40'}, 'difficult': '0', 'occluded': '1', 'pose': 'Unspecified', 'truncated': '1'}, {'name': 'bottle', 'bndbox': {'xmax': '296', 'xmin': '239', 'ymax': '187', 'ymin': '4'}, 'difficult': '0', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '0'}], 'segmented': '0', 'size': {'depth': '3', 'height': '375', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2009', 'database': 'The VOC2009 Database', 'image': 'flickr'}}}, {'annotation': {'filename': '2011_002144.jpg', 'folder': 'VOC2012', 'object': [{'name': 'person', 'bndbox': {'xmax': '364', 'xmin': '157', 'ymax': '375', 'ymin': '70'}, 'difficult': '0', 'occluded': '0', 'pose': 'Frontal', 'truncated': '1'}], 'segmented': '0', 'size': {'depth': '3', 'height': '375', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2009_003555.jpg', 'folder': 'VOC2012', 'object': [{'name': 'bus', 'bndbox': {'xmax': '301', 'xmin': '82', 'ymax': '279', 'ymin': '38'}, 'difficult': '0', 'occluded': '0', 'pose': 'Frontal', 'truncated': '0'}], 'segmented': '1', 'size': {'depth': '3', 'height': '338', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2009', 'database': 'The VOC2009 Database', 'image': 'flickr'}}}, {'annotation': {'folder': 'VOC2012', 'filename': '2008_008517.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '500', 'height': '375', 'depth': '3'}, 'segmented': '0', 'object': [{'name': 'sofa', 'pose': 'Unspecified', 'truncated': '1', 'occluded': '1', 'bndbox': {'xmin': '8', 'ymin': '135', 'xmax': '500', 'ymax': '375'}, 'difficult': '0'}, {'name': 'pottedplant', 'pose': 'Unspecified', 'truncated': '1', 'occluded': '1', 'bndbox': {'xmin': '1', 'ymin': '

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2010_004592.jpg', 'folder': 'VOC2012', 'object': [{'name': 'chair', 'bndbox': {'xmax': '44', 'xmin': '1', 'ymax': '441', 'ymin': '337'}, 'difficult': '0', 'occluded': '0', 'pose': 'Frontal', 'truncated': '1'}, {'name': 'chair', 'bndbox': {'xmax': '359', 'xmin': '308', 'ymax': '374', 'ymin': '312'}, 'difficult': '0', 'occluded': '0', 'pose': 'Frontal', 'truncated': '1'}, {'name': 'chair', 'bndbox': {'xmax': '299', 'xmin': '244', 'ymax': '479', 'ymin': '348'}, 'difficult': '0', 'occluded': '1', 'pose': 'Frontal', 'truncated': '1'}, {'name': 'diningtable', 'bndbox': {'xmax': '375', 'xmin': '269', 'ymax': '500', 'ymin': '369'}, 'difficult': '0', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '1'}], 'segmented': '0', 'size': {'depth': '3', 'height': '500', 'width': '375'}, 'source': {'annotation': 'PASCAL VOC2010', 'database': 'The VOC2010 Database', 'image': 'flickr'}}}, {'annotation': {'filename': '201

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'folder': 'VOC2012', 'filename': '2008_008048.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '500', 'height': '375', 'depth': '3'}, 'segmented': '0', 'object': [{'name': 'aeroplane', 'pose': 'Unspecified', 'truncated': '0', 'occluded': '0', 'bndbox': {'xmin': '303', 'ymin': '82', 'xmax': '339', 'ymax': '119'}, 'difficult': '0'}, {'name': 'aeroplane', 'pose': 'Unspecified', 'truncated': '0', 'occluded': '0', 'bndbox': {'xmin': '259', 'ymin': '79', 'xmax': '297', 'ymax': '116'}, 'difficult': '0'}, {'name': 'aeroplane', 'pose': 'Unspecified', 'truncated': '0', 'occluded': '0', 'bndbox': {'xmin': '270', 'ymin': '115', 'xmax': '307', 'ymax': '144'}, 'difficult': '0'}, {'name': 'aeroplane', 'pose': 'Unspecified', 'truncated': '0', 'occluded': '1', 'bndbox': {'xmin': '226', 'ymin': '143', 'xmax': '259', 'ymax': '172'}, 'difficult': '0'}, {'name': 'ae

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2010_005253.jpg', 'folder': 'VOC2012', 'object': [{'name': 'dog', 'bndbox': {'xmax': '327', 'xmin': '134', 'ymax': '301', 'ymin': '189'}, 'difficult': '0', 'occluded': '0', 'pose': 'Left', 'truncated': '0'}], 'segmented': '0', 'size': {'depth': '3', 'height': '375', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2010', 'database': 'The VOC2010 Database', 'image': 'flickr'}}}, {'annotation': {'filename': '2010_001700.jpg', 'folder': 'VOC2012', 'object': [{'name': 'cat', 'bndbox': {'xmax': '312', 'xmin': '131', 'ymax': '500', 'ymin': '178'}, 'difficult': '0', 'occluded': '1', 'pose': 'Frontal', 'truncated': '0'}], 'segmented': '0', 'size': {'depth': '3', 'height': '500', 'width': '375'}, 'source': {'annotation': 'PASCAL VOC2010', 'database': 'The VOC2010 Database', 'image': 'flickr'}}}, {'annotation': {'filename': '2009_005308.jpg', 'folder': 'VOC2012', 'object': [{'name': 'bottle', 'bndbox': {'xmax':

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2010_000447.jpg', 'folder': 'VOC2012', 'object': [{'name': 'chair', 'bndbox': {'xmax': '188', 'xmin': '89', 'ymax': '179', 'ymin': '36'}, 'difficult': '0', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '0'}], 'segmented': '0', 'size': {'depth': '3', 'height': '347', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2010', 'database': 'The VOC2010 Database', 'image': 'flickr'}}}, {'annotation': {'filename': '2010_000137.jpg', 'folder': 'VOC2012', 'object': [{'name': 'bottle', 'bndbox': {'xmax': '228', 'xmin': '32', 'ymax': '500', 'ymin': '116'}, 'difficult': '0', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '1'}], 'segmented': '0', 'size': {'depth': '3', 'height': '500', 'width': '333'}, 'source': {'annotation': 'PASCAL VOC2010', 'database': 'The VOC2010 Database', 'image': 'flickr'}}}, {'annotation': {'filename': '2011_001030.jpg', 'folder': 'VOC2012', 'object': [{'name': 'bicycle', 'bnd

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2011_003065.jpg', 'folder': 'VOC2012', 'object': [{'name': 'bus', 'bndbox': {'xmax': '241', 'xmin': '207', 'ymax': '216', 'ymin': '188'}, 'difficult': '0', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '0'}, {'name': 'car', 'bndbox': {'xmax': '117', 'xmin': '88', 'ymax': '224', 'ymin': '206'}, 'difficult': '0', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '0'}, {'name': 'car', 'bndbox': {'xmax': '263', 'xmin': '249', 'ymax': '217', 'ymin': '204'}, 'difficult': '1', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '0'}, {'name': 'car', 'bndbox': {'xmax': '287', 'xmin': '279', 'ymax': '211', 'ymin': '196'}, 'difficult': '1', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '0'}, {'name': 'car', 'bndbox': {'xmax': '376', 'xmin': '271', 'ymax': '244', 'ymin': '209'}, 'difficult': '0', 'occluded': '1', 'pose': 'Left', 'truncated': '0'}, {'name': 'car', 'bndbox': {'xmax': '315', 'xmin': '

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2010_005985.jpg', 'folder': 'VOC2012', 'object': [{'name': 'chair', 'bndbox': {'xmax': '297', 'xmin': '251', 'ymax': '374', 'ymin': '245'}, 'difficult': '0', 'occluded': '1', 'pose': 'Unspecified', 'truncated': '1'}, {'name': 'chair', 'bndbox': {'xmax': '277', 'xmin': '170', 'ymax': '405', 'ymin': '255'}, 'difficult': '0', 'occluded': '1', 'pose': 'Unspecified', 'truncated': '1'}, {'name': 'chair', 'bndbox': {'xmax': '163', 'xmin': '140', 'ymax': '267', 'ymin': '231'}, 'difficult': '0', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '1'}, {'name': 'chair', 'bndbox': {'xmax': '140', 'xmin': '107', 'ymax': '276', 'ymin': '237'}, 'difficult': '0', 'occluded': '1', 'pose': 'Unspecified', 'truncated': '1'}, {'name': 'chair', 'bndbox': {'xmax': '158', 'xmin': '58', 'ymax': '399', 'ymin': '243'}, 'difficult': '0', 'occluded': '1', 'pose': 'Unspecified', 'truncated': '0'}, {'name': 'chair', 'bndbox': {'xmax

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'folder': 'VOC2012', 'filename': '2008_002977.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '500', 'height': '375', 'depth': '3'}, 'segmented': '0', 'object': [{'name': 'car', 'pose': 'Unspecified', 'truncated': '1', 'occluded': '0', 'bndbox': {'xmin': '371', 'ymin': '225', 'xmax': '491', 'ymax': '271'}, 'difficult': '0'}, {'name': 'aeroplane', 'pose': 'Right', 'truncated': '1', 'occluded': '0', 'bndbox': {'xmin': '1', 'ymin': '1', 'xmax': '494', 'ymax': '375'}, 'difficult': '0'}]}}, {'annotation': {'folder': 'VOC2012', 'filename': '2008_005400.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '363', 'height': '500', 'depth': '3'}, 'segmented': '0', 'object': [{'name': 'bird', 'pose': 'Right', 'truncated': '0', 'occluded': '0', 'bndbox': {'xmin': '106', 'ymin': '130', 'x

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'folder': 'VOC2012', 'filename': '2008_002501.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '500', 'height': '375', 'depth': '3'}, 'segmented': '0', 'object': [{'name': 'person', 'pose': 'Frontal', 'truncated': '1', 'occluded': '0', 'bndbox': {'xmin': '125', 'ymin': '33', 'xmax': '328', 'ymax': '375'}, 'difficult': '0'}, {'name': 'person', 'pose': 'Frontal', 'truncated': '1', 'occluded': '1', 'bndbox': {'xmin': '1', 'ymin': '105', 'xmax': '167', 'ymax': '375'}, 'difficult': '0'}, {'name': 'car', 'pose': 'Left', 'truncated': '1', 'occluded': '0', 'bndbox': {'xmin': '431', 'ymin': '180', 'xmax': '500', 'ymax': '232'}, 'difficult': '0'}, {'name': 'car', 'pose': 'Right', 'truncated': '1', 'occluded': '1', 'bndbox': {'xmin': '34', 'ymin': '152', 'xmax': '65', 'ymax': '173'}, 'difficult': '0'}, {'name': 'person', 'pose': 'Unspecified', 'truncated':

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2009_004322.jpg', 'folder': 'VOC2012', 'object': [{'name': 'sofa', 'bndbox': {'xmax': '375', 'xmin': '71', 'ymax': '500', 'ymin': '119'}, 'difficult': '0', 'occluded': '1', 'pose': 'Unspecified', 'truncated': '1'}, {'name': 'person', 'bndbox': {'xmax': '375', 'xmin': '129', 'ymax': '439', 'ymin': '127'}, 'difficult': '0', 'occluded': '1', 'pose': 'Unspecified', 'truncated': '1'}], 'segmented': '0', 'size': {'depth': '3', 'height': '500', 'width': '375'}, 'source': {'annotation': 'PASCAL VOC2009', 'database': 'The VOC2009 Database', 'image': 'flickr'}}}, {'annotation': {'filename': '2011_002920.jpg', 'folder': 'VOC2012', 'object': [{'name': 'bottle', 'bndbox': {'xmax': '336', 'xmin': '232', 'ymax': '268', 'ymin': '187'}, 'difficult': '0', 'occluded': '1', 'pose': 'Unspecified', 'truncated': '0'}, {'name': 'person', 'bndbox': {'xmax': '333', 'xmin': '36', 'ymax': '374', 'ymin': '69'}, 'difficult': '0', 'occ

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2009_002763.jpg', 'folder': 'VOC2012', 'object': [{'name': 'motorbike', 'bndbox': {'xmax': '419', 'xmin': '58', 'ymax': '297', 'ymin': '54'}, 'difficult': '0', 'occluded': '0', 'pose': 'Left', 'truncated': '0'}], 'segmented': '1', 'size': {'depth': '3', 'height': '375', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2009', 'database': 'The VOC2009 Database', 'image': 'flickr'}}}, {'annotation': {'filename': '2011_000954.jpg', 'folder': 'VOC2012', 'object': [{'name': 'car', 'bndbox': {'xmax': '385', 'xmin': '1', 'ymax': '375', 'ymin': '108'}, 'difficult': '0', 'occluded': '0', 'pose': 'Frontal', 'truncated': '1'}, {'name': 'car', 'bndbox': {'xmax': '489', 'xmin': '296', 'ymax': '235', 'ymin': '124'}, 'difficult': '0', 'occluded': '1', 'pose': 'Frontal', 'truncated': '0'}, {'name': 'person', 'bndbox': {'xmax': '134', 'xmin': '69', 'ymax': '200', 'ymin': '134'}, 'difficult': '0', 'occluded': '0', 'pose

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'folder': 'VOC2012', 'filename': '2008_003414.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '500', 'height': '500', 'depth': '3'}, 'segmented': '0', 'object': [{'name': 'train', 'pose': 'Frontal', 'truncated': '0', 'occluded': '0', 'bndbox': {'xmin': '35', 'ymin': '84', 'xmax': '369', 'ymax': '462'}, 'difficult': '0'}, {'name': 'person', 'pose': 'Rear', 'truncated': '0', 'occluded': '1', 'bndbox': {'xmin': '348', 'ymin': '284', 'xmax': '399', 'ymax': '422'}, 'difficult': '0'}, {'name': 'train', 'pose': 'Unspecified', 'truncated': '0', 'occluded': '0', 'bndbox': {'xmin': '412', 'ymin': '302', 'xmax': '470', 'ymax': '372'}, 'difficult': '1'}]}}, {'annotation': {'filename': '2010_004009.jpg', 'folder': 'VOC2012', 'object': [{'name': 'tvmonitor', 'bndbox': {'xmax': '253', 'xmin': '114', 'ymax': '271', 'ymin': '125'}, 'difficult': '0', 'occluded':

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2010_003815.jpg', 'folder': 'VOC2012', 'object': [{'name': 'boat', 'bndbox': {'xmax': '469', 'xmin': '29', 'ymax': '241', 'ymin': '134'}, 'difficult': '0', 'occluded': '0', 'pose': 'Right', 'truncated': '0'}, {'name': 'boat', 'bndbox': {'xmax': '428', 'xmin': '376', 'ymax': '38', 'ymin': '24'}, 'difficult': '1', 'occluded': '0', 'pose': 'Right', 'truncated': '0'}, {'name': 'boat', 'bndbox': {'xmax': '145', 'xmin': '1', 'ymax': '66', 'ymin': '25'}, 'difficult': '1', 'occluded': '0', 'pose': 'Right', 'truncated': '1'}, {'name': 'boat', 'bndbox': {'xmax': '106', 'xmin': '1', 'ymax': '34', 'ymin': '1'}, 'difficult': '1', 'occluded': '1', 'pose': 'Unspecified', 'truncated': '1'}], 'segmented': '0', 'size': {'depth': '3', 'height': '333', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2010', 'database': 'The VOC2010 Database', 'image': 'flickr'}}}, {'annotation': {'filename': '2010_005458.jpg', 'folder': 

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2010_000991.jpg', 'folder': 'VOC2012', 'object': [{'name': 'motorbike', 'bndbox': {'xmax': '389', 'xmin': '115', 'ymax': '372', 'ymin': '115'}, 'difficult': '0', 'occluded': '1', 'pose': 'Unspecified', 'truncated': '0'}, {'name': 'person', 'bndbox': {'xmax': '342', 'xmin': '204', 'ymax': '342', 'ymin': '14'}, 'difficult': '0', 'occluded': '1', 'pose': 'Unspecified', 'truncated': '0'}], 'segmented': '0', 'size': {'depth': '3', 'height': '375', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2010', 'database': 'The VOC2010 Database', 'image': 'flickr'}}}, {'annotation': {'filename': '2009_005037.jpg', 'folder': 'VOC2012', 'object': [{'name': 'cat', 'bndbox': {'xmax': '440', 'xmin': '221', 'ymax': '242', 'ymin': '61'}, 'difficult': '0', 'occluded': '1', 'pose': 'Left', 'truncated': '0'}], 'segmented': '1', 'size': {'depth': '3', 'height': '331', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2009'

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2010_003512.jpg', 'folder': 'VOC2012', 'object': [{'name': 'bottle', 'bndbox': {'xmax': '496', 'xmin': '103', 'ymax': '250', 'ymin': '140'}, 'difficult': '0', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '0'}], 'segmented': '0', 'size': {'depth': '3', 'height': '313', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2010', 'database': 'The VOC2010 Database', 'image': 'flickr'}}}, {'annotation': {'filename': '2009_004764.jpg', 'folder': 'VOC2012', 'object': [{'name': 'bottle', 'bndbox': {'xmax': '300', 'xmin': '183', 'ymax': '360', 'ymin': '1'}, 'difficult': '0', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '0'}], 'segmented': '0', 'size': {'depth': '3', 'height': '360', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2009', 'database': 'The VOC2009 Database', 'image': 'flickr'}}}, {'annotation': {'filename': '2010_001976.jpg', 'folder': 'VOC2012', 'object': [{'name': 'dog', 'bndbo

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'folder': 'VOC2012', 'filename': '2008_003252.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '500', 'height': '375', 'depth': '3'}, 'segmented': '1', 'object': [{'name': 'chair', 'pose': 'Frontal', 'truncated': '0', 'occluded': '0', 'bndbox': {'xmin': '136', 'ymin': '77', 'xmax': '436', 'ymax': '373'}, 'difficult': '0'}]}}, {'annotation': {'filename': '2011_002464.jpg', 'folder': 'VOC2012', 'object': [{'name': 'sheep', 'bndbox': {'xmax': '367', 'xmin': '86', 'ymax': '340', 'ymin': '60'}, 'difficult': '0', 'occluded': '0', 'pose': 'Right', 'truncated': '0'}, {'name': 'sheep', 'bndbox': {'xmax': '162', 'xmin': '98', 'ymax': '109', 'ymin': '1'}, 'difficult': '0', 'occluded': '0', 'pose': 'Frontal', 'truncated': '0'}, {'name': 'sheep', 'bndbox': {'xmax': '486', 'xmin': '364', 'ymax': '215', 'ymin': '56'}, 'difficult': '0', 'occluded': '0', 'pose':

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2009_001339.jpg', 'folder': 'VOC2012', 'object': [{'name': 'bottle', 'bndbox': {'xmax': '256', 'xmin': '154', 'ymax': '375', 'ymin': '191'}, 'difficult': '0', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '1'}, {'name': 'bottle', 'bndbox': {'xmax': '244', 'xmin': '204', 'ymax': '339', 'ymin': '194'}, 'difficult': '0', 'occluded': '1', 'pose': 'Unspecified', 'truncated': '1'}, {'name': 'person', 'bndbox': {'xmax': '500', 'xmin': '219', 'ymax': '284', 'ymin': '56'}, 'difficult': '0', 'occluded': '0', 'pose': 'Frontal', 'truncated': '1'}, {'name': 'person', 'bndbox': {'xmax': '199', 'xmin': '27', 'ymax': '278', 'ymin': '11'}, 'difficult': '0', 'occluded': '1', 'pose': 'Unspecified', 'truncated': '1'}, {'name': 'diningtable', 'bndbox': {'xmax': '500', 'xmin': '1', 'ymax': '375', 'ymin': '275'}, 'difficult': '0', 'occluded': '1', 'pose': 'Unspecified', 'truncated': '1'}], 'segmented': '1', 'size': {'dep

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2011_002927.jpg', 'folder': 'VOC2012', 'object': [{'name': 'bottle', 'bndbox': {'xmax': '355', 'xmin': '342', 'ymax': '223', 'ymin': '192'}, 'difficult': '0', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '0'}, {'name': 'chair', 'bndbox': {'xmax': '340', 'xmin': '234', 'ymax': '356', 'ymin': '233'}, 'difficult': '0', 'occluded': '1', 'pose': 'Unspecified', 'truncated': '0'}, {'name': 'chair', 'bndbox': {'xmax': '276', 'xmin': '211', 'ymax': '357', 'ymin': '221'}, 'difficult': '0', 'occluded': '1', 'pose': 'Unspecified', 'truncated': '1'}, {'name': 'chair', 'bndbox': {'xmax': '159', 'xmin': '108', 'ymax': '238', 'ymin': '200'}, 'difficult': '0', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '1'}, {'name': 'diningtable', 'bndbox': {'xmax': '355', 'xmin': '73', 'ymax': '357', 'ymin': '227'}, 'difficult': '0', 'occluded': '1', 'pose': 'Unspecified', 'truncated': '1'}, {'name': 'chair', 'bndbox':

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2010_005318.jpg', 'folder': 'VOC2012', 'object': [{'name': 'car', 'bndbox': {'xmax': '445', 'xmin': '55', 'ymax': '252', 'ymin': '111'}, 'difficult': '0', 'occluded': '1', 'pose': 'Left', 'truncated': '0'}, {'name': 'person', 'bndbox': {'xmax': '278', 'xmin': '247', 'ymax': '161', 'ymin': '126'}, 'difficult': '0', 'occluded': '1', 'pose': 'Unspecified', 'truncated': '1'}, {'name': 'person', 'bndbox': {'xmax': '100', 'xmin': '70', 'ymax': '151', 'ymin': '74'}, 'difficult': '1', 'occluded': '1', 'pose': 'Unspecified', 'truncated': '1'}, {'name': 'person', 'bndbox': {'xmax': '128', 'xmin': '90', 'ymax': '148', 'ymin': '62'}, 'difficult': '0', 'occluded': '0', 'pose': 'Right', 'truncated': '1'}, {'name': 'person', 'bndbox': {'xmax': '179', 'xmin': '144', 'ymax': '146', 'ymin': '101'}, 'difficult': '0', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '1'}, {'name': 'person', 'bndbox': {'xmax': '278', 'xmi

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'folder': 'VOC2012', 'filename': '2008_007812.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '500', 'height': '375', 'depth': '3'}, 'segmented': '0', 'object': [{'name': 'bird', 'pose': 'Unspecified', 'truncated': '0', 'occluded': '1', 'bndbox': {'xmin': '93', 'ymin': '90', 'xmax': '325', 'ymax': '375'}, 'difficult': '0'}]}}, {'annotation': {'filename': '2010_005782.jpg', 'folder': 'VOC2012', 'object': [{'name': 'bicycle', 'bndbox': {'xmax': '459', 'xmin': '11', 'ymax': '331', 'ymin': '76'}, 'difficult': '0', 'occluded': '0', 'pose': 'Left', 'truncated': '0'}], 'segmented': '0', 'size': {'depth': '3', 'height': '375', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2010', 'database': 'The VOC2010 Database', 'image': 'flickr'}}}, {'annotation': {'filename': '2009_001952.jpg', 'folder': 'VOC2012', 'object': [{'name': 'person', 'bndbox': {'x

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2010_000765.jpg', 'folder': 'VOC2012', 'object': [{'name': 'chair', 'bndbox': {'xmax': '463', 'xmin': '341', 'ymax': '271', 'ymin': '71'}, 'difficult': '0', 'occluded': '0', 'pose': 'Frontal', 'truncated': '1'}, {'name': 'chair', 'bndbox': {'xmax': '216', 'xmin': '104', 'ymax': '274', 'ymin': '156'}, 'difficult': '0', 'occluded': '1', 'pose': 'Frontal', 'truncated': '1'}], 'segmented': '0', 'size': {'depth': '3', 'height': '375', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2010', 'database': 'The VOC2010 Database', 'image': 'flickr'}}}, {'annotation': {'filename': '2011_000297.jpg', 'folder': 'VOC2012', 'object': [{'name': 'bicycle', 'bndbox': {'xmax': '499', 'xmin': '1', 'ymax': '375', 'ymin': '7'}, 'difficult': '0', 'occluded': '0', 'pose': 'Left', 'truncated': '1'}], 'segmented': '0', 'size': {'depth': '3', 'height': '375', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2011', 'database'

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2011_000834.jpg', 'folder': 'VOC2012', 'object': [{'name': 'bird', 'bndbox': {'xmax': '179', 'xmin': '136', 'ymax': '167', 'ymin': '140'}, 'difficult': '0', 'occluded': '0', 'pose': 'Right', 'truncated': '0'}, {'name': 'bird', 'bndbox': {'xmax': '160', 'xmin': '142', 'ymax': '138', 'ymin': '128'}, 'difficult': '1', 'occluded': '0', 'pose': 'Right', 'truncated': '0'}, {'name': 'sheep', 'bndbox': {'xmax': '429', 'xmin': '329', 'ymax': '255', 'ymin': '168'}, 'difficult': '0', 'occluded': '0', 'pose': 'Right', 'truncated': '0'}, {'name': 'sheep', 'bndbox': {'xmax': '194', 'xmin': '89', 'ymax': '256', 'ymin': '153'}, 'difficult': '0', 'occluded': '0', 'pose': 'Right', 'truncated': '0'}, {'name': 'person', 'bndbox': {'xmax': '44', 'xmin': '35', 'ymax': '52', 'ymin': '33'}, 'difficult': '1', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '0'}, {'name': 'person', 'bndbox': {'xmax': '32', 'xmin': '24', 'ymax

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2009_000642.jpg', 'folder': 'VOC2012', 'object': [{'name': 'horse', 'bndbox': {'xmax': '364', 'xmin': '76', 'ymax': '343', 'ymin': '132'}, 'difficult': '0', 'occluded': '1', 'pose': 'Unspecified', 'truncated': '0'}, {'name': 'person', 'bndbox': {'xmax': '269', 'xmin': '183', 'ymax': '233', 'ymin': '45'}, 'difficult': '0', 'occluded': '1', 'pose': 'Unspecified', 'truncated': '0'}, {'name': 'person', 'bndbox': {'xmax': '216', 'xmin': '187', 'ymax': '305', 'ymin': '236'}, 'difficult': '0', 'occluded': '1', 'pose': 'Unspecified', 'truncated': '1'}, {'name': 'person', 'bndbox': {'xmax': '283', 'xmin': '251', 'ymax': '300', 'ymin': '251'}, 'difficult': '0', 'occluded': '1', 'pose': 'Unspecified', 'truncated': '1'}], 'segmented': '0', 'size': {'depth': '3', 'height': '396', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2009', 'database': 'The VOC2009 Database', 'image': 'flickr'}}}, {'annotation': {'filen

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'folder': 'VOC2012', 'filename': '2008_008649.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '500', 'height': '375', 'depth': '3'}, 'segmented': '0', 'object': [{'name': 'tvmonitor', 'pose': 'Unspecified', 'truncated': '0', 'occluded': '0', 'bndbox': {'xmin': '223', 'ymin': '79', 'xmax': '291', 'ymax': '143'}, 'difficult': '0'}, {'name': 'person', 'pose': 'Unspecified', 'truncated': '1', 'occluded': '0', 'bndbox': {'xmin': '347', 'ymin': '15', 'xmax': '396', 'ymax': '93'}, 'difficult': '0'}, {'name': 'sofa', 'pose': 'Unspecified', 'truncated': '1', 'occluded': '0', 'bndbox': {'xmin': '296', 'ymin': '192', 'xmax': '500', 'ymax': '375'}, 'difficult': '0'}, {'name': 'sofa', 'pose': 'Unspecified', 'truncated': '1', 'occluded': '1', 'bndbox': {'xmin': '1', 'ymin': '231', 'xmax': '159', 'ymax': '371'}, 'difficult': '1'}, {'name': 'chair', 'pose': 'U

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'folder': 'VOC2012', 'filename': '2008_001431.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '375', 'height': '500', 'depth': '3'}, 'segmented': '0', 'object': [{'name': 'person', 'pose': 'Frontal', 'truncated': '1', 'occluded': '1', 'bndbox': {'xmin': '26', 'ymin': '88', 'xmax': '373', 'ymax': '500'}, 'difficult': '0'}, {'name': 'bottle', 'pose': 'Unspecified', 'truncated': '0', 'occluded': '1', 'bndbox': {'xmin': '102', 'ymin': '249', 'xmax': '160', 'ymax': '429'}, 'difficult': '0'}, {'name': 'bottle', 'pose': 'Unspecified', 'truncated': '0', 'occluded': '1', 'bndbox': {'xmin': '265', 'ymin': '227', 'xmax': '324', 'ymax': '403'}, 'difficult': '0'}]}}, {'annotation': {'filename': '2011_000221.jpg', 'folder': 'VOC2012', 'object': [{'name': 'train', 'bndbox': {'xmax': '399', 'xmin': '166', 'ymax': '217', 'ymin': '97'}, 'difficult': '0', 'occlud

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2010_004256.jpg', 'folder': 'VOC2012', 'object': [{'name': 'train', 'bndbox': {'xmax': '378', 'xmin': '102', 'ymax': '254', 'ymin': '142'}, 'difficult': '0', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '0'}], 'segmented': '0', 'size': {'depth': '3', 'height': '311', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2010', 'database': 'The VOC2010 Database', 'image': 'flickr'}}}, {'annotation': {'filename': '2010_000248.jpg', 'folder': 'VOC2012', 'object': [{'name': 'car', 'bndbox': {'xmax': '500', 'xmin': '3', 'ymax': '315', 'ymin': '105'}, 'difficult': '0', 'occluded': '1', 'pose': 'Left', 'truncated': '0'}, {'name': 'car', 'bndbox': {'xmax': '182', 'xmin': '1', 'ymax': '163', 'ymin': '5'}, 'difficult': '0', 'occluded': '0', 'pose': 'Left', 'truncated': '1'}], 'segmented': '0', 'size': {'depth': '3', 'height': '375', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2010', 'database': 'The

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2009_000557.jpg', 'folder': 'VOC2012', 'object': [{'name': 'bus', 'bndbox': {'xmax': '489', 'xmin': '21', 'ymax': '325', 'ymin': '70'}, 'difficult': '0', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '0'}, {'name': 'bus', 'bndbox': {'xmax': '500', 'xmin': '363', 'ymax': '297', 'ymin': '137'}, 'difficult': '0', 'occluded': '1', 'pose': 'Unspecified', 'truncated': '1'}, {'name': 'car', 'bndbox': {'xmax': '23', 'xmin': '1', 'ymax': '234', 'ymin': '214'}, 'difficult': '0', 'occluded': '0', 'pose': 'Right', 'truncated': '1'}], 'segmented': '0', 'size': {'depth': '3', 'height': '375', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2009', 'database': 'The VOC2009 Database', 'image': 'flickr'}}}, {'annotation': {'filename': '2010_003509.jpg', 'folder': 'VOC2012', 'object': [{'name': 'cat', 'bndbox': {'xmax': '500', 'xmin': '2', 'ymax': '332', 'ymin': '1'}, 'difficult': '0', 'occluded': '0', 'pose': '

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'folder': 'VOC2012', 'filename': '2008_003870.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '500', 'height': '375', 'depth': '3'}, 'segmented': '0', 'object': [{'name': 'boat', 'pose': 'Frontal', 'truncated': '0', 'occluded': '0', 'bndbox': {'xmin': '143', 'ymin': '117', 'xmax': '209', 'ymax': '204'}, 'difficult': '0'}, {'name': 'boat', 'pose': 'Unspecified', 'truncated': '0', 'occluded': '1', 'bndbox': {'xmin': '209', 'ymin': '114', 'xmax': '263', 'ymax': '194'}, 'difficult': '0'}, {'name': 'boat', 'pose': 'Unspecified', 'truncated': '0', 'occluded': '0', 'bndbox': {'xmin': '278', 'ymin': '162', 'xmax': '332', 'ymax': '203'}, 'difficult': '0'}, {'name': 'boat', 'pose': 'Unspecified', 'truncated': '0', 'occluded': '0', 'bndbox': {'xmin': '269', 'ymin': '128', 'xmax': '289', 'ymax': '143'}, 'difficult': '1'}, {'name': 'boat', 'pose': 'Unspecif

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'folder': 'VOC2012', 'filename': '2008_005639.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '500', 'height': '375', 'depth': '3'}, 'segmented': '0', 'object': [{'name': 'tvmonitor', 'pose': 'Frontal', 'truncated': '0', 'occluded': '0', 'bndbox': {'xmin': '14', 'ymin': '100', 'xmax': '115', 'ymax': '192'}, 'difficult': '0'}, {'name': 'bottle', 'pose': 'Unspecified', 'truncated': '1', 'occluded': '0', 'bndbox': {'xmin': '394', 'ymin': '347', 'xmax': '422', 'ymax': '375'}, 'difficult': '0'}, {'name': 'bottle', 'pose': 'Unspecified', 'truncated': '1', 'occluded': '0', 'bndbox': {'xmin': '477', 'ymin': '341', 'xmax': '500', 'ymax': '375'}, 'difficult': '0'}]}}, {'annotation': {'folder': 'VOC2012', 'filename': '2008_007870.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '500

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2009_003609.jpg', 'folder': 'VOC2012', 'object': [{'name': 'person', 'bndbox': {'xmax': '334', 'xmin': '1', 'ymax': '500', 'ymin': '1'}, 'difficult': '0', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '1'}, {'name': 'bottle', 'bndbox': {'xmax': '334', 'xmin': '253', 'ymax': '500', 'ymin': '277'}, 'difficult': '0', 'occluded': '1', 'pose': 'Unspecified', 'truncated': '1'}], 'segmented': '0', 'size': {'depth': '3', 'height': '500', 'width': '334'}, 'source': {'annotation': 'PASCAL VOC2009', 'database': 'The VOC2009 Database', 'image': 'flickr'}}}, {'annotation': {'filename': '2010_000310.jpg', 'folder': 'VOC2012', 'object': [{'name': 'bottle', 'bndbox': {'xmax': '303', 'xmin': '29', 'ymax': '308', 'ymin': '1'}, 'difficult': '0', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '1'}, {'name': 'bottle', 'bndbox': {'xmax': '500', 'xmin': '195', 'ymax': '308', 'ymin': '130'}, 'difficult': '0', 'occlu

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'folder': 'VOC2012', 'filename': '2008_004844.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '500', 'height': '375', 'depth': '3'}, 'segmented': '0', 'object': [{'name': 'bus', 'pose': 'Right', 'truncated': '0', 'occluded': '0', 'bndbox': {'xmin': '126', 'ymin': '178', 'xmax': '449', 'ymax': '303'}, 'difficult': '0'}, {'name': 'person', 'pose': 'Frontal', 'truncated': '0', 'occluded': '0', 'bndbox': {'xmin': '23', 'ymin': '224', 'xmax': '51', 'ymax': '312'}, 'difficult': '0'}, {'name': 'person', 'pose': 'Frontal', 'truncated': '1', 'occluded': '0', 'bndbox': {'xmin': '1', 'ymin': '233', 'xmax': '24', 'ymax': '313'}, 'difficult': '0'}]}}, {'annotation': {'folder': 'VOC2012', 'filename': '2008_003075.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '500', 'height': '318', 

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2010_004738.jpg', 'folder': 'VOC2012', 'object': [{'name': 'dog', 'bndbox': {'xmax': '255', 'xmin': '1', 'ymax': '396', 'ymin': '107'}, 'difficult': '0', 'occluded': '0', 'pose': 'Frontal', 'truncated': '0'}, {'name': 'chair', 'bndbox': {'xmax': '347', 'xmin': '3', 'ymax': '500', 'ymin': '2'}, 'difficult': '0', 'occluded': '1', 'pose': 'Unspecified', 'truncated': '1'}], 'segmented': '0', 'size': {'depth': '3', 'height': '500', 'width': '347'}, 'source': {'annotation': 'PASCAL VOC2010', 'database': 'The VOC2010 Database', 'image': 'flickr'}}}, {'annotation': {'filename': '2010_005090.jpg', 'folder': 'VOC2012', 'object': [{'name': 'dog', 'bndbox': {'xmax': '435', 'xmin': '89', 'ymax': '329', 'ymin': '17'}, 'difficult': '0', 'occluded': '0', 'pose': 'Right', 'truncated': '0'}], 'segmented': '0', 'size': {'depth': '3', 'height': '334', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2010', 'database': 'T

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'folder': 'VOC2012', 'filename': '2008_007161.jpg', 'source': {'database': 'The VOC2008 Database', 'annotation': 'PASCAL VOC2008', 'image': 'flickr'}, 'size': {'width': '500', 'height': '370', 'depth': '3'}, 'segmented': '0', 'object': [{'name': 'aeroplane', 'pose': 'Left', 'truncated': '0', 'occluded': '0', 'bndbox': {'xmin': '32', 'ymin': '86', 'xmax': '468', 'ymax': '258'}, 'difficult': '0'}]}}, {'annotation': {'filename': '2010_005791.jpg', 'folder': 'VOC2012', 'object': [{'name': 'cat', 'bndbox': {'xmax': '377', 'xmin': '221', 'ymax': '243', 'ymin': '65'}, 'difficult': '0', 'occluded': '1', 'pose': 'Frontal', 'truncated': '0'}], 'segmented': '1', 'size': {'depth': '3', 'height': '375', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2010', 'database': 'The VOC2010 Database', 'image': 'flickr'}}}, {'annotation': {'filename': '2011_002134.jpg', 'folder': 'VOC2012', 'object': [{'name': 'cow', 'bndbox': {'xmax':

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2010_005201.jpg', 'folder': 'VOC2012', 'object': [{'name': 'dog', 'bndbox': {'xmax': '450', 'xmin': '1', 'ymax': '374', 'ymin': '33'}, 'difficult': '0', 'occluded': '0', 'pose': 'Right', 'truncated': '1'}], 'segmented': '0', 'size': {'depth': '3', 'height': '375', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2010', 'database': 'The VOC2010 Database', 'image': 'flickr'}}}, {'annotation': {'filename': '2009_001145.jpg', 'folder': 'VOC2012', 'object': [{'name': 'cow', 'bndbox': {'xmax': '333', 'xmin': '84', 'ymax': '270', 'ymin': '68'}, 'difficult': '0', 'occluded': '0', 'pose': 'Left', 'truncated': '0'}], 'segmented': '1', 'size': {'depth': '3', 'height': '333', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2009', 'database': 'The VOC2009 Database', 'image': 'flickr'}}}, {'annotation': {'filename': '2010_004620.jpg', 'folder': 'VOC2012', 'object': [{'name': 'dog', 'bndbox': {'xmax': '212', 'x

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2010_001406.jpg', 'folder': 'VOC2012', 'object': [{'name': 'cow', 'bndbox': {'xmax': '105', 'xmin': '7', 'ymax': '268', 'ymin': '219'}, 'difficult': '0', 'occluded': '0', 'pose': 'Left', 'truncated': '1'}, {'name': 'cow', 'bndbox': {'xmax': '193', 'xmin': '47', 'ymax': '306', 'ymin': '232'}, 'difficult': '0', 'occluded': '1', 'pose': 'Left', 'truncated': '0'}, {'name': 'cow', 'bndbox': {'xmax': '315', 'xmin': '216', 'ymax': '241', 'ymin': '189'}, 'difficult': '0', 'occluded': '0', 'pose': 'Left', 'truncated': '0'}, {'name': 'cow', 'bndbox': {'xmax': '436', 'xmin': '349', 'ymax': '236', 'ymin': '174'}, 'difficult': '0', 'occluded': '0', 'pose': 'Rear', 'truncated': '0'}], 'segmented': '0', 'size': {'depth': '3', 'height': '375', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2010', 'database': 'The VOC2010 Database', 'image': 'flickr'}}}, {'annotation': {'folder': 'VOC2012', 'filename': '2008_003386.

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2009_005016.jpg', 'folder': 'VOC2012', 'object': [{'name': 'diningtable', 'bndbox': {'xmax': '500', 'xmin': '298', 'ymax': '375', 'ymin': '127'}, 'difficult': '0', 'occluded': '1', 'pose': 'Unspecified', 'truncated': '1'}, {'name': 'person', 'bndbox': {'xmax': '340', 'xmin': '171', 'ymax': '373', 'ymin': '75'}, 'difficult': '0', 'occluded': '1', 'pose': 'Right', 'truncated': '1'}, {'name': 'chair', 'bndbox': {'xmax': '456', 'xmin': '327', 'ymax': '128', 'ymin': '55'}, 'difficult': '0', 'occluded': '0', 'pose': 'Frontal', 'truncated': '1'}, {'name': 'chair', 'bndbox': {'xmax': '51', 'xmin': '1', 'ymax': '252', 'ymin': '70'}, 'difficult': '0', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '1'}, {'name': 'chair', 'bndbox': {'xmax': '295', 'xmin': '60', 'ymax': '375', 'ymin': '153'}, 'difficult': '0', 'occluded': '1', 'pose': 'Right', 'truncated': '1'}], 'segmented': '1', 'size': {'depth': '3', 'height

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2009_000777.jpg', 'folder': 'VOC2012', 'object': [{'name': 'car', 'bndbox': {'xmax': '478', 'xmin': '5', 'ymax': '369', 'ymin': '32'}, 'difficult': '0', 'occluded': '0', 'pose': 'Left', 'truncated': '0'}, {'name': 'car', 'bndbox': {'xmax': '83', 'xmin': '1', 'ymax': '165', 'ymin': '101'}, 'difficult': '0', 'occluded': '1', 'pose': 'Left', 'truncated': '1'}, {'name': 'car', 'bndbox': {'xmax': '176', 'xmin': '66', 'ymax': '172', 'ymin': '98'}, 'difficult': '0', 'occluded': '1', 'pose': 'Rear', 'truncated': '1'}, {'name': 'person', 'bndbox': {'xmax': '481', 'xmin': '473', 'ymax': '153', 'ymin': '119'}, 'difficult': '0', 'occluded': '0', 'pose': 'Rear', 'truncated': '1'}, {'name': 'car', 'bndbox': {'xmax': '500', 'xmin': '486', 'ymax': '141', 'ymin': '124'}, 'difficult': '0', 'occluded': '0', 'pose': 'Right', 'truncated': '1'}], 'segmented': '0', 'size': {'depth': '3', 'height': '375', 'width': '500'}, 'sourc

  with amp.autocast(autocast):


Type of targets: <class 'list'>
Content of targets: [{'annotation': {'filename': '2009_004984.jpg', 'folder': 'VOC2012', 'object': [{'name': 'bicycle', 'bndbox': {'xmax': '462', 'xmin': '46', 'ymax': '329', 'ymin': '52'}, 'difficult': '0', 'occluded': '0', 'pose': 'Left', 'truncated': '0'}, {'name': 'bottle', 'bndbox': {'xmax': '250', 'xmin': '197', 'ymax': '220', 'ymin': '160'}, 'difficult': '0', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '0'}], 'segmented': '0', 'size': {'depth': '3', 'height': '375', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2009', 'database': 'The VOC2009 Database', 'image': 'flickr'}}}, {'annotation': {'filename': '2010_002143.jpg', 'folder': 'VOC2012', 'object': [{'name': 'cat', 'bndbox': {'xmax': '284', 'xmin': '136', 'ymax': '316', 'ymin': '23'}, 'difficult': '0', 'occluded': '0', 'pose': 'Unspecified', 'truncated': '0'}], 'segmented': '0', 'size': {'depth': '3', 'height': '375', 'width': '500'}, 'source': {'annotation': 'PASCAL VOC2010', '

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7885b7d65fc0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1477, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1441, in _shutdown_workers
    w.join(timeout=_utils.MP_STATUS_CHECK_INTERVAL)
  File "/usr/lib/python3.10/multiprocessing/process.py", line 149, in join
    res = self._popen.wait(timeout)
  File "/usr/lib/python3.10/multiprocessing/popen_fork.py", line 40, in wait
    if not wait([self.sentinel], timeout):
  File "/usr/lib/python3.10/multiprocessing/connection.py", line 931, in wait
    ready = selector.select(timeout)
  File "/usr/lib/python3.10/selectors.py", line 416, in select
    fd_event_list = self._selector.poll(timeout)
KeyboardInterrupt: 


KeyboardInterrupt: 

In [None]:
def val(model, val_loader, device):
    model.to(device)
    model.eval()

    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)

            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f"Test Accuracy: {accuracy:.2f}%")
    return accuracy