In [1]:
%matplotlib notebook
from utils.dataloader import PascalVOCDataset
from utils.dataloader import myimshow
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as td
import torchvision as tv
import numpy as np
from matplotlib import pyplot as plt
from utils.PositionSensitiveScoreMap import PositionSensitiveScoreMap

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda


In [2]:
dataset_root_dir = "/datasets/ee285f-public/PascalVOC2012/"
# dataset_root_dir = "../Datasets/VOCtrainval_11-May-2012/VOCdevkit/VOC2012"

#Creating training and validation sets. Go to utils/dataloader.py for info on important PascalVOCDataset arguments & attributes
train_set = PascalVOCDataset(dataset_root_dir, mode= 'train')
val_set = PascalVOCDataset(dataset_root_dir, mode= 'val')

In [8]:
from __future__ import  absolute_import
# though cupy is not used but without this line, it raise errors...
import cupy as cp
import os

import matplotlib
from tqdm import tqdm

import torch as t
from utils.config import opt
from model.rfcn_resnet101 import RFCNResnet101
from torch.utils import data as data_
from trainer import FasterRCNNTrainer
from utils import array_tool as at
from utils.vis_tool import visdom_bbox
from utils.eval_tool import eval_detection_voc

# fix for ulimit
# https://github.com/pytorch/pytorch/issues/973#issuecomment-346405667
import resource

rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)
resource.setrlimit(resource.RLIMIT_NOFILE, (20480, rlimit[1]))

matplotlib.use('agg')


def eval(dataloader, faster_rcnn, test_num=10000):
    gt_difficults = False
    pred_bboxes, pred_labels, pred_scores = list(), list(), list()
    gt_bboxes, gt_labels, gt_difficults = list(), list(), list()
    for ii, (imgs, sizes, gt_bboxes_, gt_labels_) in tqdm(enumerate(dataloader)):
        pred_bboxes_, pred_labels_, pred_scores_ = faster_rcnn.predict(imgs)
        gt_bboxes += list(gt_bboxes_.numpy())
        gt_labels += list(gt_labels_.numpy())
        pred_bboxes += pred_bboxes_
        pred_labels += pred_labels_
        pred_scores += pred_scores_
        if ii == test_num: break

    result = eval_detection_voc(
        pred_bboxes, pred_labels, pred_scores,
        gt_bboxes, gt_labels, gt_difficults,
        use_07_metric=True)
    return result


def train(train_set, val_set, load_path = False, epochs = 1, lr=1e-3, record_every = 300, lr_decay = 1e-3,test_num=500):

    train_dataloader = td.DataLoader(train_set, batch_size = 1, pin_memory = False, shuffle = True)
    test_dataloader = td.DataLoader(val_set, batch_size = 1, pin_memory = True)
    faster_rcnn = RFCNResnet101().cuda()
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    saved_loss = []
    iterations = []
    if load_path:
        trainer.load(load_path)
        print('load pretrained model from %s' % load_path)
        state_dict = t.load(load_path)
        saved_loss = state_dict['losses']
        iterations = state_dict['iterations']
        
    best_map = 0
    lr_ = lr
    for epoch in range(epochs):
        trainer.reset_meters()
        for ii, (img, bbox_, label_, scale) in tqdm(enumerate(train_dataloader)):
            scale = at.scalar(scale)
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
            losses = trainer.train_step(img, bbox, label, scale)
            loss_info = 'Iter {}; Losses: RPN loc {}, RPN cls: {}, ROI loc {}, ROI cls {}, Total:{}'.format(
                                                str(ii),
                                                "%.3f" % losses[0].cpu().data.numpy(),
                                                "%.3f" % losses[1].cpu().data.numpy(),
                                                "%.3f" % losses[2].cpu().data.numpy(),
                                                "%.3f" % losses[3].cpu().data.numpy(),                                
                                                "%.3f" % losses[4].cpu().data.numpy())
            print(loss_info)
            if (ii + 1) % record_every == 0:
                
                iterations.append(ii + 1) 
                saved_loss.append([losses[0].cpu().item(),losses[1].cpu().item(),
                              losses[2].cpu().item(),losses[3].cpu().item(),
                              losses[4].cpu().item()])
                kwargs = {"losses": saved_loss, "iterations": iterations}
                trainer.save(saved_loss = saved_loss, iterations = iterations)
                print("new model saved")

In [9]:
'''
CHANGE load_path TO THE PATH TO YOUR CHECKPOINT IF YOU ARE RESUMING TRAINING. "checkpoints/rfcnBLAHBLAHBLAH"
'''
train(train_set, val_set, load_path = False, epochs = 1, lr=1e-3, record_every = 300)
#train(train_set, val_set, load_path = "checkpoints/...", epochs = 3, lr=1e-3, record_every = 300)

  nn.init.normal(self.cls_layer.weight.data, 0.0, 0.01)
  nn.init.normal(self.reg_layer.weight.data, 0.0, 0.01)

0it [00:00, ?it/s][A

model construct completed


  scores = self.softmax(F.adaptive_avg_pool2d(pooling_track.float(),(1,1))[:,:])

1it [00:00,  2.29it/s][A

Iter 0; Losses: RPN loc 2.196, RPN cls: 0.340, ROI loc 8.534, ROI cls 2.994, Total:14.063


  h = xp.exp(dh) * src_height[:, xp.newaxis]

2it [00:01,  1.84it/s][A

Iter 1; Losses: RPN loc 88.663, RPN cls: 31.089, ROI loc 142.840, ROI cls 2.600, Total:265.192



3it [00:01,  1.96it/s][A

Iter 2; Losses: RPN loc 34.476, RPN cls: 52.984, ROI loc 279.405, ROI cls 2.739, Total:369.605



4it [00:03,  1.23it/s][A

Iter 3; Losses: RPN loc 104.092, RPN cls: 98.988, ROI loc 682.364, ROI cls 2.811, Total:888.255


  w = xp.exp(dw) * src_width[:, xp.newaxis]

5it [00:04,  1.09it/s][A

Iter 4; Losses: RPN loc 1.375, RPN cls: 47.994, ROI loc 86.882, ROI cls 2.257, Total:138.507



6it [00:04,  1.30it/s][A

Iter 5; Losses: RPN loc 226.017, RPN cls: 133.182, ROI loc 171.451, ROI cls 2.695, Total:533.345



7it [00:06,  1.04s/it][A

Iter 6; Losses: RPN loc 1.607, RPN cls: 8.673, ROI loc 28.534, ROI cls 2.162, Total:40.976



8it [00:08,  1.48s/it][A

Iter 7; Losses: RPN loc 13.987, RPN cls: 10.936, ROI loc 114.155, ROI cls 2.181, Total:141.259



9it [00:11,  1.78s/it][A

Iter 8; Losses: RPN loc 4.109, RPN cls: 6.376, ROI loc 63.828, ROI cls 2.145, Total:76.458



10it [00:13,  1.87s/it][A

Iter 9; Losses: RPN loc 0.544, RPN cls: 0.516, ROI loc 412.442, ROI cls 2.266, Total:415.768



11it [00:15,  1.98s/it][A

Iter 10; Losses: RPN loc 1.465, RPN cls: 1.396, ROI loc 283.612, ROI cls 2.252, Total:288.725



12it [00:17,  1.87s/it][A

Iter 11; Losses: RPN loc 1.544, RPN cls: 56.077, ROI loc 97.213, ROI cls 2.158, Total:156.991



13it [00:19,  1.96s/it][A

Iter 12; Losses: RPN loc 42.101, RPN cls: 16.315, ROI loc 638.351, ROI cls 2.357, Total:699.124



14it [00:21,  2.11s/it][A

Iter 13; Losses: RPN loc 6.875, RPN cls: 10.673, ROI loc 256.121, ROI cls 2.221, Total:275.890



15it [00:28,  3.29s/it][A

Iter 14; Losses: RPN loc 2.078, RPN cls: 5.091, ROI loc 44.846, ROI cls 2.146, Total:54.161



16it [00:34,  4.39s/it][A

Iter 15; Losses: RPN loc 0.787, RPN cls: 0.829, ROI loc 154.555, ROI cls 2.199, Total:158.370



17it [00:39,  4.38s/it][A

Iter 16; Losses: RPN loc 0.956, RPN cls: 0.984, ROI loc 82.762, ROI cls 2.172, Total:86.873



18it [00:43,  4.30s/it][A

Iter 17; Losses: RPN loc 5.196, RPN cls: 11.167, ROI loc 199.902, ROI cls 2.239, Total:218.503



19it [00:50,  5.17s/it][A

Iter 18; Losses: RPN loc 0.331, RPN cls: 0.849, ROI loc 12.305, ROI cls 2.131, Total:15.617
Iter 19; Losses: RPN loc 2.159, RPN cls: 2.534, ROI loc 165.224, ROI cls 2.243, Total:172.160



20it [00:59,  6.14s/it][A

new model saved



21it [01:04,  6.03s/it][A

Iter 20; Losses: RPN loc 0.496, RPN cls: 0.821, ROI loc 17.071, ROI cls 2.135, Total:20.523



22it [01:11,  6.12s/it][A

Iter 21; Losses: RPN loc 1.234, RPN cls: 0.201, ROI loc 23.498, ROI cls 2.146, Total:27.080


KeyboardInterrupt: 

In [21]:
'''This will load the state dict which will allow you to obtain losses'''
#state_dict = t.load("checkpoints/...")
#saved_losses = state_dict["other_info"]["saved_loss"]
#iterations = state_dict["other_info"]["iterations"]

#There are other arguments than other_info.

5