In [16]:
%matplotlib inline

from typing import Optional, Union

import os
import sys
import argparse

import torch
import torchvision
from torch import Tensor
import torch.nn.functional as F
from torchvision.ops import boxes as box_ops
from torchvision.ops.boxes import box_area
from torchvision.ops.roi_align import _bilinear_interpolate, roi_align
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor

import numpy as np
import matplotlib.pyplot as plt

sys.path.append('../../../')
from video_processing.mask_rcnn.dataset.penn_fudan_ped import PennFudanDataset, get_transform


torch.manual_seed(0)
np.random.seed(0)

In [23]:
# Create the parser
parser = argparse.ArgumentParser(description='MaskRCNN')

# Add arguments
parser.add_argument('--rpn-objectness-weight', type=float, default=10., help='weight to the loss of RPN objectness')
parser.add_argument('--rpn-box-regression-weight', type=float, default=50., help='weight to the loss of RPN box regression')
parser.add_argument('--roi-classifier-weight', type=float, default=1., help='weight to the loss of roi-head for object class')
parser.add_argument('--roi-box-regression-weight', type=float, default=5., help='weight to the loss for roi-head box regression')
parser.add_argument('--mask-weight', type=float, default=0.5, help='weight to mask loss')
parser.add_argument('--n-training', type=int, default=160, help='number of training data')
parser.add_argument('--max-epochs', type=int, default=160, help='maximum number of epochs')
parser.add_argument('--print-freq', type=int, default=5, help='how many batch before printing')

args = parser.parse_args('--max-epochs 4'.split())
args

Namespace(rpn_objectness_weight=10.0, rpn_box_regression_weight=50.0, roi_classifier_weight=1.0, roi_box_regression_weight=5.0, mask_weight=0.5, n_training=160, max_epochs=4, print_freq=5)

In [20]:
device=torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print('device ', device)

main_dirpath='D:/data/'
tensor_dirpath=os.path.join(main_dirpath, 'mask_rcnn')

device  cpu


In [3]:
main_url='https://raw.githubusercontent.com/pytorch/vision/main/references/detection'
for file in ['engine.py', 'utils.py','coco_utils.py','coco_eval.py', 'transforms.py']:
    if os.path.isfile(file): continue
    out=wget.download(url=f'{main_url}/{file}', out=None)
    print(file, out)

import utils

In [4]:
num_classes=2
# load an instance segmentation model pre-trained on COCO
model=torchvision.models.detection.maskrcnn_resnet50_fpn_v2(weights='DEFAULT', rpn_pre_nms_top_n_train=800,
        rpn_pre_nms_top_n_test=500,  rpn_post_nms_top_n_train=800,  rpn_post_nms_top_n_test=500, box_detections_per_img=100,
        box_batch_size_per_image =400, rpn_batch_size_per_image=100)
# get number of input features for the classifier
in_features=model.roi_heads.box_predictor.cls_score.in_features
print('the number of inpute features for classifiers ', in_features)
# replace the pre-trained head with a new one
model.roi_heads.box_predictor=FastRCNNPredictor(in_features,  num_classes)

# get the number of input features for the mask classifiers
in_features_mask=model.roi_heads.mask_predictor.conv5_mask.in_channels
print('the number of input features for mask ', in_features_mask)
hidden_layer=256
# and replace the mask predictor with a new one
model.roi_heads.mask_predictor=MaskRCNNPredictor(in_features_mask, hidden_layer, num_classes)

# move model to the right device
model.to(device);

optimizer=torch.optim.AdamW(model.parameters(), lr=0.001, betas=(0.9, 0.999), weight_decay=0.01)


the number of inpute features for classifiers  1024
the number of input features for mask  256


In [6]:
# there are 170 images
train_dataset=PennFudanDataset(root=os.path.join(main_dirpath, 'PennFudanPed'), image_dirname='PNGImages', mask_dirname='PedMasks',
                               annotation_dirname='Annotation', transforms=get_transform(is_train=True), indices=np.arange(160))
val_dataset=PennFudanDataset(root=os.path.join(main_dirpath, 'PennFudanPed'), image_dirname='PNGImages', mask_dirname='PedMasks',
                               annotation_dirname='Annotation', transforms=get_transform(is_train=False), indices=np.arange(160,170))
print('train_dataset ', len(train_dataset), ' val_dataset ', len(val_dataset))
# define training and validation data loaders
train_loader=torch.utils.data.DataLoader(train_dataset, batch_size=2,shuffle=True,collate_fn=utils.collate_fn)
val_loader=torch.utils.data.DataLoader(val_dataset, batch_size=2,shuffle=False,collate_fn=utils.collate_fn)

train_dataset  160  val_dataset  10


In [25]:
def train_epoch(model, train_loader, optimizer,weights, print_freq=1):
    
    running_loss=0.
    for i, (images, targets) in enumerate(train_loader, 1):
        # zero gradients
        optimizer.zero_grad()
    
        # estimate
        outputs=model(images, targets)
    
        # compute loss
        loss, log=0., ''
        for name, loss_term in outputs.items():
            loss+=weights[name]*loss_term
            log+=f' {name}:{loss.item():.3f},'
        loss.backward()
    
        # adjusting parameters
        optimizer.step()
    
        # gather data and report
        running_loss+=loss.item()
        if print_freq>0 and i%print_freq==0:
            print(f'Iteration {i} [{100*i/len(train_loader):2f}%]: total:{loss.item():.3f}, {log}')
    last_loss=running_loss/len(train_loader)
    return last_loss

def validation(model, val_loader, weights):
    running_loss, loss_per_task=0., dict()
    for i, (images, targets) in enumerate(val_loader, 1):
        with torch.no_grad(): outputs=model(images, targets)
        # compute loss
        loss=0
        for name, loss_term in outputs.items():
            loss+=weights[name]*loss_term
            if name not in loss_per_task: loss_per_task[name]=0.
            loss_per_task[name]+=loss_term.item()
        running_loss+=loss.item()
        break
    loss/=len(val_loader)
    loss_per_task={k:n/len(val_loader) for k, n in loss_per_task.items()}
    return loss, loss_per_task

In [None]:
weights={'loss_classifier':args.roi_classifier_weight, 'loss_box_reg':args.roi_box_regression_weight, 
         'loss_mask':args.mask_weight,'loss_objectness':args.rpn_objectness_weight, 
         'loss_rpn_box_reg':args.rpn_box_regression_weight}

model.train()
best_vloss=np.inf
for epoch in range(1, args.max_epochs+1):
    # make sure gradient tracking is on
    t_loss=train_epoch(model, train_loader, optimizer, weights=weights, print_freq=args.print_freq)

    # validation
    v_loss, v_loss_per_task=validation(model, val_loader, weights)

    # print progress
    log=''
    for k, v in v_loss_per_task.items(): log+=f' {k}:{v:.2f},'
    print(f'Epoch {epoch} [{epoch/args.max_epochs}:.2f]: training loss {t_loss:.3f}, val_loss: {v_loss:.3f}, {log}')

    if v_loss<best_vloss and epoch>1:
        best_vloss=v_loss
        torch.save(model.state_dict(), f'checkpoint{epoch}.pth')
        
    torch.save(model.state_dict(), 'checkpoint.pth')

In [29]:
# model.load_state_dict(torch.load('checkpoint.pth', weights_only=True))

<All keys matched successfully>