In [1]:
import torch
import torchvision
import argparse

In [2]:
import arrayfire as af
import array

In [3]:
def toArrayFire(x):
    x_np = x.cpu().detach().contiguous().numpy()
    shape = 1
    if len(x_np.shape) == 0:
        shape = (1,)
    else:
        shape = x_np.shape[::-1]
    afArray = af.Array(x_np.ctypes.data, shape, x_np.dtype.char)
    return afArray

def saveStateDict(model, filepath):
    params = {}
    i = 0
    for (name, param) in model.state_dict().items():
        if 'running' in name:
            continue
        if 'in_proj' in name and 'in_proj.weight' != name and 'in_proj.bias' != name:
            q, k, v = param.chunk(3, dim=0)
            hack = '0'
            if 'in_proj_bias' in name: hack = '1'
            params['0q_' + hack + name] = q
            params['1k_' + hack + name] = k
            params['2v_' + hack + name] = v
            if 'in_proj_bias' in name:
                for key in sorted(params.keys()):
                    af_array = toArrayFire(params[key])
                    if 'weight' in key:
                        af_array = af.array.transpose(af_array)
                    #print(key, i, params[key].shape)
                    af.array.save_array(key, af_array, filepath, True)
                    #print(key, af.array.save_array(key, af_array, filepath, True))
                    i = i + 1
                params = {}
            continue
        elif len(param.size()) > 0:
            if 'input_proj.bias' in name:
                param = param.reshape((1, 1, 256))
            af_array = toArrayFire(param)
            if 'fc' in name and 'weight' in name:
                af_array = af.array.transpose(af_array)
            elif 'weight' in name and 'proj' in name:
                af_array = af.array.transpose(af_array)
            elif 'weight' in name and 'linear' in name:
                af_array = af.array.transpose(af_array)
            elif 'query_embed' in name:
                af_array = af_array
            elif 'weight' in name and 'embed' in name:
                af_array = af.array.transpose(af_array)

            #print(name, i, param.shape)
            af.array.save_array(name, af_array, filepath, True)
            #print(name, af.array.save_array(name, af_array, filepath, True))
            i = i + 1
    for name in model.state_dict():
        if 'running' in name:
            #print(name)
            af_array = toArrayFire(model.state_dict()[name])
            #af.array.save_array(name, af_array, filepath + 'running', True)
            #print(name, af.array.save_array(name, af_array, filepath + 'running', True))
            #print(name, model.state_dict()[name].shape,)
    
def create_parser():
    parser = argparse.ArgumentParser('Set transformer detector', add_help=False)
    parser.add_argument('--lr', default=1e-4, type=float)
    parser.add_argument('--lr_backbone', default=1e-5, type=float)
    parser.add_argument('--batch_size', default=2, type=int)
    parser.add_argument('--weight_decay', default=1e-4, type=float)
    parser.add_argument('--epochs', default=300, type=int)
    parser.add_argument('--lr_drop', default=200, type=int)
    parser.add_argument('--optimizer', default="adam", type=str)
    parser.add_argument('--clip_max_norm', default=0.1, type=float,
                        help='gradient clipping max norm')
    parser.add_argument('--eval_skip', default=1, type=int,
                        help='do evaluation every "eval_skip" frames')
    parser.add_argument('--schedule', default='step', type=str,
                        choices=('step', 'multistep'))

    # Model parameters
    parser.add_argument('--frozen_weights', type=str, default=None,
                        help="Path to the pretrained model. If set, only the mask head will be trained")
    # * Backbone
    parser.add_argument('--backbone', default='resnet50', type=str,
                        help="Name of the convolutional backbone to use")
    parser.add_argument('--dilation', action='store_true',
                        help="If true, we replace stride with dilation in the last convolutional block (DC5)")
    parser.add_argument('--position_embedding', default='sine', type=str, choices=('sine', 'learned'),
                        help="Type of positional embedding to use on top of the image features")

    # * Transformer
    parser.add_argument('--enc_layers', default=6, type=int,
                        help="Number of encoding layers in the transformer")
    parser.add_argument('--dec_layers', default=6, type=int,
                        help="Number of decoding layers in the transformer")
    parser.add_argument('--dim_feedforward', default=2048, type=int,
                        help="Intermediate size of the feedforward layers in the transformer blocks")
    parser.add_argument('--hidden_dim', default=256, type=int,
                        help="Size of the embeddings (dimension of the transformer)")
    parser.add_argument('--dropout', default=0.1, type=float,
                        help="Dropout applied in the transformer")
    parser.add_argument('--nheads', default=8, type=int,
                        help="Number of attention heads inside the transformer's attentions")
    parser.add_argument('--num_queries', default=100, type=int,
                        help="Number of query slots")
    parser.add_argument('--pre_norm', action='store_true')
    parser.add_argument('--no_pass_pos_and_query', dest='pass_pos_and_query', action='store_false',
                        help="Disables passing the positional encodings to each attention layers")

    # * Segmentation
    parser.add_argument('--mask_model', default='none', type=str, choices=("none", "smallconv", "v2"),
                        help="Segmentation head to be used (if None, segmentation will not be trained)")

    # Loss
    parser.add_argument('--no_aux_loss', dest='aux_loss', action='store_false',
                        help="Disables auxiliary decoding losses (loss at each layer)")
    parser.add_argument('--set_loss', default='hungarian', type=str,
                        choices=('sequential', 'hungarian', 'lexicographical'),
                        help="Type of matching to perform in the loss")
    parser.add_argument('--bcl', dest='use_bcl', action='store_true',
                        help="Use balanced classification loss")
    # * Matcher
    parser.add_argument('--set_cost_class', default=1, type=float,
                        help="Class coefficient in the matching cost")
    parser.add_argument('--set_cost_bbox', default=5, type=float,
                        help="L1 box coefficient in the matching cost")
    parser.add_argument('--set_cost_giou', default=2, type=float,
                        help="giou box coefficient in the matching cost")
    # * Loss coefficients
    parser.add_argument('--mask_loss_coef', default=1, type=float)
    parser.add_argument('--dice_loss_coef', default=1, type=float)
    parser.add_argument('--bbox_loss_coef', default=5, type=float)
    parser.add_argument('--giou_loss_coef', default=2, type=float)
    parser.add_argument('--eos_coef', default=0.1, type=float,
                        help="Relative classification weight of the no-object class")

    # dataset parameters
    parser.add_argument('--dataset_file', default='coco')
    parser.add_argument('--coco_path', type=str, default='/datasets01/COCO/022719')
    parser.add_argument('--coco_panoptic_path', type=str, default='/datasets01/COCO/060419')
    parser.add_argument('--remove_difficult', action='store_true')
    parser.add_argument('--masks', action='store_true')

    parser.add_argument('--output-dir', default='',
                        help='path where to save, empty for no saving')
    parser.add_argument('--device', default='cuda',
                        help='device to use for training / testing')
    parser.add_argument('--seed', default=42, type=int)
    parser.add_argument('--resume', default='', help='resume from checkpoint')
    parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
                        help='start epoch')
    parser.add_argument('--eval', action='store_true')
    parser.add_argument('--num_workers', default=2, type=int)

    # distributed training parameters
    parser.add_argument('--world-size', default=1, type=int,
                        help='number of distributed processes')
    parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training')
    return parser

In [21]:
import models.detr 
import datasets.coco
parser = create_parser()
pretrained_path = '/private/home/padentomasello/scratch/pytorch_testing/detr-r50-e632da11.pth'
args = parser.parse_args(["--dropout=0.0", "--eos_coef=0.1", "--resume=/private/home/padentomasello/scratch/pytorch_testing/detr-r50-e632da11.pth"])
#args = parser.parse_args([])
model, criterion, post = models.detr.build(args)   
dataset = datasets.coco.build('train', args)
if args.resume:
    if args.resume.startswith('https'):
        checkpoint = torch.hub.load_state_dict_from_url(
            args.resume, map_location='cpu', check_hash=True)
    else:
        checkpoint = torch.load(args.resume, map_location='cpu')
    model.load_state_dict(checkpoint['model'])

loading annotations into memory...
Done (t=21.31s)
creating index...
index created!


In [13]:
    from torch.utils.data import DataLoader, DistributedSampler
    import util.misc as utils
    dataset_train = dataset
    sampler_train = torch.utils.data.SequentialSampler(dataset)
    #batch_sampler_train = torch.utils.data.BatchSampler(
        #    sampler_train, args.batch_size, drop_last=True)
    batch_sampler_train = torch.utils.data.BatchSampler(
            sampler_train, 1, drop_last=True)
    data_loader_train = DataLoader(dataset_train, batch_sampler=batch_sampler_train,
                                collate_fn=utils.collate_fn, num_workers=1)

In [22]:
device = torch.device(args.device)
for (sample_tmp, target_tmp) in data_loader_train:
    samples = sample_tmp.to(device)
    targets  = [{k: v.to(device) for k, v in t.items()} for t in target_tmp]
    break

In [23]:
from models.transformer import *


from models.backbone import *
filepath = '/private/home/padentomasello/scratch/pytorch_testing/detr.array'

N = 2
C = 3
H = 224
W = 224


image = samples.tensors
image.requires_grad = True
mask = samples.mask

af.array.save_array('image', toArrayFire(image), filepath, False)
af.array.save_array('mask', toArrayFire(mask.float()), filepath, True)
target_boxes = targets[0]['boxes']
target_classes = targets[0]['labels']
af.array.save_array('target_boxes', toArrayFire(target_boxes), filepath, True)
af.array.save_array('target_labels', toArrayFire(target_classes.float()), filepath, True)
#af.array.save_array('pos', toArrayFire(pos), filepath, True)
       

model.to(device)
model.eval()
output = model(samples)
saveStateDict(model, filepath)
af.array.save_array('pred_logits', toArrayFire(output['pred_logits']), filepath, True)
af.array.save_array('pred_boxes', toArrayFire(output['pred_boxes']), filepath, True)


429

In [24]:
output["pred_logits"]

tensor([[[-16.0538,  -1.3937,  -7.2307,  ..., -17.5276,  -6.5040,   7.9455],
         [-15.8562,  -0.7358,  -8.1099,  ..., -13.9172,  -5.6755,   7.3155],
         [-16.5377,  -2.3803,  -5.3548,  ..., -17.0237,  -8.8624,   7.2895],
         ...,
         [-16.0461,   0.1208,  -6.3159,  ..., -12.1571,  -8.2185,   6.3225],
         [-16.3355,  -0.9242,  -5.6127,  ..., -18.6856, -12.2565,   8.1797],
         [-15.3957,  -0.2684,  -6.8669,  ..., -14.4996,  -6.7047,   7.0722]]],
       device='cuda:1', grad_fn=<SelectBackward>)

criteiron(output, sample)

In [25]:
losses = criterion(output, targets)

In [26]:
losses['loss_giou'].backward()

In [27]:
output['pred_logits']

tensor([[[-16.0538,  -1.3937,  -7.2307,  ..., -17.5276,  -6.5040,   7.9455],
         [-15.8562,  -0.7358,  -8.1099,  ..., -13.9172,  -5.6755,   7.3155],
         [-16.5377,  -2.3803,  -5.3548,  ..., -17.0237,  -8.8624,   7.2895],
         ...,
         [-16.0461,   0.1208,  -6.3159,  ..., -12.1571,  -8.2185,   6.3225],
         [-16.3355,  -0.9242,  -5.6127,  ..., -18.6856, -12.2565,   8.1797],
         [-15.3957,  -0.2684,  -6.8669,  ..., -14.4996,  -6.7047,   7.0722]]],
       device='cuda:1', grad_fn=<SelectBackward>)

In [28]:
backbone

NameError: name 'backbone' is not defined

In [29]:
losses

{'loss_ce': tensor(0.9591, device='cuda:1', grad_fn=<NllLoss2DBackward>),
 'class_error': tensor(66.6667, device='cuda:1'),
 'loss_bbox': tensor(0.0881, device='cuda:1', grad_fn=<DivBackward0>),
 'loss_giou': tensor(0.2855, device='cuda:1', grad_fn=<DivBackward0>),
 'cardinality_error': tensor(3., device='cuda:1'),
 'loss_ce_0': tensor(1.2122, device='cuda:1', grad_fn=<NllLoss2DBackward>),
 'loss_bbox_0': tensor(0.0504, device='cuda:1', grad_fn=<DivBackward0>),
 'loss_giou_0': tensor(0.1980, device='cuda:1', grad_fn=<DivBackward0>),
 'cardinality_error_0': tensor(3., device='cuda:1'),
 'loss_ce_1': tensor(0.9716, device='cuda:1', grad_fn=<NllLoss2DBackward>),
 'loss_bbox_1': tensor(0.0953, device='cuda:1', grad_fn=<DivBackward0>),
 'loss_giou_1': tensor(0.2961, device='cuda:1', grad_fn=<DivBackward0>),
 'cardinality_error_1': tensor(3., device='cuda:1'),
 'loss_ce_2': tensor(0.7762, device='cuda:1', grad_fn=<NllLoss2DBackward>),
 'loss_bbox_2': tensor(0.1252, device='cuda:1', grad_fn=<