In [1]:
from parts import *

In [2]:
import argparse
import datetime
import json
import random
import time
from pathlib import Path
import numpy as np
import torch
from torch.utils.data import Dataset,DataLoader, DistributedSampler

import datasets
import util.misc as utils
from datasets import build_dataset, get_coco_api_from_dataset
from engine import evaluate, train_one_epoch
from models import build_model
# import torch.nn.functional as F
import matplotlib.pyplot as plt
from models.segmentation import VisTRsegm
import pycocotools.mask as mask_util
from util.box_ops import box_xyxy_to_cxcywh
##
import glob
import re
import os
from torchvision.ops import masks_to_boxes
from PIL import Image
from tqdm.notebook import tqdm
import torchvision.transforms.functional as F
from engine import train_one_epoch

In [3]:
pat=re.compile("(\d+)\D*$")

def key_func(x):
    mat=pat.search(os.path.split(x)[-1]) # match last group of digits
    if mat is None:
        return x
    return "{:>10}".format(mat.group(1)) # right align to 10 digits

# train_file_dir = glob.glob('./aster_updated_data_22_01_2022/Train/*')
train_file_dir = glob.glob('../Dissertation/aster_updated_data_22_01_2022/Train/*')

n_frames = 36

train_image_list = []
train_mask_list = []

for path in train_file_dir:
    frames = sorted(glob.glob(path+'/*_0001_IMAGES/images/*.jpg'), key=key_func)
    masks = sorted(glob.glob(path+'/*_0001_IMAGES/masks/*.png'), key=key_func)

#     for i in range(35):
    for i in range(len(frames)-n_frames+1):
        train_image_list.append(frames[i:i+n_frames])
        train_mask_list.append(masks[i:i+n_frames])
#         train_mask_list.append(masks[i+n_frames-1])
        
test_file_dir = glob.glob('../Dissertation/aster_updated_data_22_01_2022/Test/*')
test_image_list = []
test_mask_list = []

for path in test_file_dir:
    frames = sorted(glob.glob(path+'/*_0001_IMAGES/images/*.jpg'), key=key_func)
    masks = sorted(glob.glob(path+'/*_0001_IMAGES/masks/*.png'), key=key_func)

#     for i in range(35):
    for i in range(len(frames)-n_frames+1):
        test_image_list.append(frames[i:i+n_frames])
        test_mask_list.append(masks[i:i+n_frames])
#         test_mask_list.append(masks[i+n_frames-1])      

In [4]:
def make_transform(image_set):
    normalize = T.Compose([
        T.ToTensor(),
        T.Normalize([0.2316], [0.2038]) #mean #standard deviation
    ])
    if image_set == 'train':
        return T.Compose([
            T.RandomHorizontalFlip(),
            normalize,
        ])

    if image_set == 'val':
        return T.Compose([normalize])

In [5]:
def get_bbox(mask_list):
    return torch.cat([masks_to_boxes(mask) for mask in mask_list], dim=0)        

In [6]:
class ImagePathDataset(Dataset):
    def __init__(self, image_path, mask_path, n_frames, transform=None):
        self.image_path = image_path
        self.mask_path = mask_path
        self.n_frames = n_frames
        self.transform = transform
        

    def __len__(self):
        return len(self.image_path)

    def __getitem__(self, idx):
        
        image = [Image.open(self.image_path[idx][i]) for i in range(self.n_frames)]
        mask = [F.to_tensor(Image.open(self.mask_path[idx][i]))
                for i in range(self.n_frames)]
        
        target = {}
        target['labels'] = torch.ones(36).long()
        target['valid'] = torch.ones(36).long()
        target['masks'] = torch.cat(mask, dim=0)
        target['boxes'] = get_bbox(mask)
        
        if self.transform is not None:
            image, target = self.transform(image, target)
        
        image = [img.repeat(3,1,1) for img in image]
            
        return torch.cat(image,dim=0), target

In [7]:
parser = argparse.ArgumentParser('VisTR training and evaluation script',
                                 parents=[get_args_parser()])
args = parser.parse_args("")
args
if args.output_dir:
    Path(args.output_dir).mkdir(parents=True, exist_ok=True)

# args.pretrained_weights = 'pretrained/r101.pth'
args.masks = True
args.device = 'cuda:1'

In [None]:
# import torch
# import torch.utils.data
# import torchvision
# from pycocotools.ytvos import YTVOS
# from pycocotools.ytvoseval import YTVOSeval
# import datasets.transforms as T
# from pycocotools import mask as coco_mask
# import os
# from PIL import Image
# from random import randint
# import cv2
# import random

In [8]:
utils.init_distributed_mode(args)
# print("git:\n  {}\n".format(utils.get_sha()))
device = torch.device(args.device)

# fix the seed for reproducibility
seed = args.seed + utils.get_rank()
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

model, criterion, postprocessors = build_model(args)
model.to(device)

model_without_ddp = model
if args.distributed:
    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
    model_without_ddp = model.module
n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
print('number of params:', n_parameters)

param_dicts = [
    {"params": [p for n, p in model_without_ddp.named_parameters() if "backbone" not in n and p.requires_grad]},
    {
        "params": [p for n, p in model_without_ddp.named_parameters() if "backbone" in n and p.requires_grad],
        "lr": args.lr_backbone,
    },
]
optimizer = torch.optim.AdamW(param_dicts, lr=args.lr,
                              weight_decay=args.weight_decay)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, args.lr_drop)

Not using distributed mode




number of params: 75709217


In [9]:
model.state_dict().keys()

odict_keys(['vistr.transformer.encoder.layers.0.self_attn.in_proj_weight', 'vistr.transformer.encoder.layers.0.self_attn.in_proj_bias', 'vistr.transformer.encoder.layers.0.self_attn.out_proj.weight', 'vistr.transformer.encoder.layers.0.self_attn.out_proj.bias', 'vistr.transformer.encoder.layers.0.linear1.weight', 'vistr.transformer.encoder.layers.0.linear1.bias', 'vistr.transformer.encoder.layers.0.linear2.weight', 'vistr.transformer.encoder.layers.0.linear2.bias', 'vistr.transformer.encoder.layers.0.norm1.weight', 'vistr.transformer.encoder.layers.0.norm1.bias', 'vistr.transformer.encoder.layers.0.norm2.weight', 'vistr.transformer.encoder.layers.0.norm2.bias', 'vistr.transformer.encoder.layers.1.self_attn.in_proj_weight', 'vistr.transformer.encoder.layers.1.self_attn.in_proj_bias', 'vistr.transformer.encoder.layers.1.self_attn.out_proj.weight', 'vistr.transformer.encoder.layers.1.self_attn.out_proj.bias', 'vistr.transformer.encoder.layers.1.linear1.weight', 'vistr.transformer.encoder.

In [10]:
args.labels_loss

True

In [None]:
# load coco pretrained weight
checkpoint = torch.load(args.pretrained_weights, map_location='cpu')['model']
del checkpoint["vistr.class_embed.weight"]
del checkpoint["vistr.class_embed.bias"]
del checkpoint["vistr.query_embed.weight"]
model.load_state_dict(checkpoint,strict=False)

args.resume = False
# if args.resume:
#     if args.resume.startswith('https'):
#         checkpoint = torch.hub.load_state_dict_from_url(
#             args.resume, map_location='cpu', check_hash=True)
#     else:
#         checkpoint = torch.load(args.resume, map_location='cpu')
#     model_without_ddp.load_state_dict(checkpoint['model'])
#     if not args.eval and 'optimizer' in checkpoint and 'lr_scheduler' in checkpoint and 'epoch' in checkpoint:
#         optimizer.load_state_dict(checkpoint['optimizer'])
#         lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
#         args.start_epoch = checkpoint['epoch'] + 1

In [None]:
# no validation ground truth for ytvos dataset
dataset_train = ImagePathDataset(train_image_list, train_mask_list,
                                 n_frames, transform=make_transform(image_set='train'))
if args.distributed:
    sampler_train = DistributedSampler(dataset_train)
else:
    sampler_train = torch.utils.data.RandomSampler(dataset_train)

batch_sampler_train = torch.utils.data.BatchSampler(
    sampler_train, args.batch_size, drop_last=True)

data_loader_train = DataLoader(dataset_train, batch_sampler=batch_sampler_train,
                               collate_fn=utils.collate_fn, num_workers=args.num_workers)

output_dir = Path(args.output_dir)

In [None]:
print("Start training")
start_time = time.time()
for epoch in range(args.start_epoch, args.epochs):
    if args.distributed:
        sampler_train.set_epoch(epoch)
    train_stats = train_one_epoch(
        model, criterion, data_loader_train, optimizer, device, epoch,
        args.clip_max_norm)
    lr_scheduler.step()
    if args.output_dir:
        checkpoint_paths = [output_dir / 'checkpoint.pth']
        # extra checkpoint before LR drop and every epochs
        if (epoch + 1) % args.lr_drop == 0 or (epoch + 1) % 1 == 0:
            checkpoint_paths.append(output_dir / f'checkpoint{epoch:04}.pth')
        for checkpoint_path in checkpoint_paths:
            utils.save_on_master({
                'model': model_without_ddp.state_dict(),
                'optimizer': optimizer.state_dict(),
                'lr_scheduler': lr_scheduler.state_dict(),
                'epoch': epoch,
                'args': args,
            }, checkpoint_path)