In [1]:
import os
import cv2
import sys
import time
import collections
import torch
import argparse
import numpy as np
import torch.nn as nn
import torch.nn.functional as F

from torch.autograd import Variable
from torch.utils import data

from dataset import IC15TestLoader
import models
import util
# c++ version pse based on opencv 3+
#from pse import pse
#python pse
from pypse import pse as pypse

In [2]:
def extend_3c(img):
    img = img.reshape(img.shape[0], img.shape[1], 1)
    img = np.concatenate((img, img, img), axis=2)
    return img

In [3]:
def debug(idx, img_paths, imgs, output_root):
    if not os.path.exists(output_root):
        os.makedirs(output_root)
    
    col = []
    for i in range(len(imgs)):
        row = []
        for j in range(len(imgs[i])):
            # img = cv2.copyMakeBorder(imgs[i][j], 3, 3, 3, 3, cv2.BORDER_CONSTANT, value=[255, 0, 0])
            row.append(imgs[i][j])
        res = np.concatenate(row, axis=1)
        col.append(res)
    res = np.concatenate(col, axis=0)
    img_name = img_paths[idx].split('/')[-1]
    #print(idx, '/', len(img_paths), img_name)
    cv2.imwrite(output_root + img_name, res)

In [4]:
def write_result_as_txt(image_name, bboxes, path):
    filename = util.io.join_path(path, 'res_%s.txt'%(image_name))
    lines = []
    for b_idx, bbox in enumerate(bboxes):
        values = [int(v) for v in bbox]
        line = "%d, %d, %d, %d, %d, %d, %d, %d\n"%tuple(values)
        lines.append(line)
    util.io.write_lines(filename, lines)

In [5]:
def polygon_from_points(points):
    """
    Returns a Polygon object to use with the Polygon2 class from a list of 8 points: x1,y1,x2,y2,x3,y3,x4,y4
    """
    resBoxes=np.empty([1, 8],dtype='int32')
    resBoxes[0, 0] = int(points[0])
    resBoxes[0, 4] = int(points[1])
    resBoxes[0, 1] = int(points[2])
    resBoxes[0, 5] = int(points[3])
    resBoxes[0, 2] = int(points[4])
    resBoxes[0, 6] = int(points[5])
    resBoxes[0, 3] = int(points[6])
    resBoxes[0, 7] = int(points[7])
    pointMat = resBoxes[0].reshape([2, 4]).T
    return plg.Polygon(pointMat)


In [10]:
from tqdm import tqdm_notebook as tqdm
def test(args):
    data_loader = IC15TestLoader(long_size=args.long_size)
    test_loader = torch.utils.data.DataLoader(
        data_loader,
        batch_size=1,
        shuffle=False,
        num_workers=2,
        drop_last=True)

    # Setup Model
    if args.arch == "resnet50":
        model = models.resnet50(pretrained=True, num_classes=7, scale=args.scale)
    elif args.arch == "resnet101":
        model = models.resnet101(pretrained=True, num_classes=7, scale=args.scale)
    elif args.arch == "resnet152":
        model = models.resnet152(pretrained=True, num_classes=7, scale=args.scale)
    
    for param in model.parameters():
        param.requires_grad = False

    model = model.cuda()
    
    if args.resume is not None:                                         
        if os.path.isfile(args.resume):
            print(("Loading model and optimizer from checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            
            # model.load_state_dict(checkpoint['state_dict'])
            d = collections.OrderedDict()
            for key, value in list(checkpoint['state_dict'].items()):
                tmp = key[7:]
                d[tmp] = value
            model.load_state_dict(d)

            print(("Loaded checkpoint '{}' (epoch {})"
                  .format(args.resume, checkpoint['epoch'])))
            sys.stdout.flush()
        else:
            print(("No checkpoint found at '{}'".format(args.resume)))
            sys.stdout.flush()

    model.eval()
    
    total_frame = 0.0
    total_time = 0.0
    for idx, (org_img, img) in tqdm(list(enumerate(test_loader))):
      #  print(('progress: %d / %d'%(idx, len(test_loader))))
      #  sys.stdout.flush()
        
        with torch.no_grad():
            #img = Variable(img.cuda(), volatile=True)
            img = img.cuda()
            org_img = org_img.numpy().astype('uint8')[0]
            text_box = org_img.copy()

            torch.cuda.synchronize()
            start = time.time()

            outputs = model(img)
            outputs = outputs.detach()

            score = torch.sigmoid(outputs[:, 0, :, :])
            outputs = (torch.sign(outputs - args.binary_th) + 1) / 2
    
            text = outputs[:, 0, :, :]
            kernels = outputs[:, 0:args.kernel_num, :, :] * text
    
            score = score.data.cpu().numpy()[0].astype(np.float32)
            text = text.data.cpu().numpy()[0].astype(np.uint8)
            kernels = kernels.data.cpu().numpy()[0].astype(np.uint8)
            
            # c++ version pse
            #pred = pse(kernels, args.min_kernel_area / (args.scale * args.scale))
            # python version pse
            pred = pypse(kernels, args.min_kernel_area / (args.scale * args.scale))
            
            # scale = (org_img.shape[0] * 1.0 / pred.shape[0], org_img.shape[1] * 1.0 / pred.shape[1])
            scale = (org_img.shape[1] * 1.0 / pred.shape[1], org_img.shape[0] * 1.0 / pred.shape[0])
            label = pred
            label_num = np.max(label) + 1
            bboxes = []
            for i in range(1, label_num):
                points = np.array(np.where(label == i)).transpose((1, 0))[:, ::-1]
    
                if points.shape[0] < args.min_area / (args.scale * args.scale):
                    continue
    
                score_i = np.mean(score[label == i])
                if score_i < args.min_score:
                    continue
    
                rect = cv2.minAreaRect(points)
                bbox = cv2.boxPoints(rect) * scale
                bbox = bbox.astype('int32')
                bboxes.append(bbox.reshape(-1))
    
            torch.cuda.synchronize()
            end = time.time()
            total_frame += 1
            total_time += (end - start)
            #print(('fps: %.2f'%(total_frame / total_time)))
            #sys.stdout.flush()
    
            image_name = data_loader.img_paths[idx].split('/')[-1].split('.')[0]
            write_result_as_txt(image_name, bboxes, 'outputs/submit_ic15/')
            
            for bbox in bboxes:
                cv2.drawContours(text_box, [bbox.reshape(4, 2)], -1, (0, 255, 0), 2)
    
            
            text_box = cv2.resize(text_box, (text.shape[1], text.shape[0]))
            debug(idx, data_loader.img_paths, [[text_box]], 'outputs/vis_ic15/')

    cmd = 'cd %s;zip -j %s %s/*'%('./outputs/', 'submit_ic15.zip', 'submit_ic15');
    print(cmd)
    sys.stdout.flush()
    util.cmd.cmd(cmd)

In [None]:
parser = argparse.ArgumentParser(description='Hyperparams')
parser.add_argument('--arch', nargs='?', type=str, default='resnet50')
parser.add_argument('--resume', nargs='?', type=str, default='./checkpoints/pse_net_checkpoint_538.pth.tar',    
                    help='Path to previous saved model to restart from')
parser.add_argument('--binary_th', nargs='?', type=float, default=1.0,
                    help='binary th')
parser.add_argument('--kernel_num', nargs='?', type=int, default=7,
                    help='')
parser.add_argument('--scale', nargs='?', type=int, default=1,
                    help='scale')
parser.add_argument('--long_size', nargs='?', type=int, default=2240,
                    help='long size')
parser.add_argument('--min_kernel_area', nargs='?', type=float, default=5.0,
                    help='min kernel area')
parser.add_argument('--min_area', nargs='?', type=float, default=800.0,
                    help='min area')
parser.add_argument('--min_score', nargs='?', type=float, default=0.93,
                    help='min score')

args = parser.parse_args('')
test(args)

Loading model and optimizer from checkpoint './checkpoints/pse_net_checkpoint_538.pth.tar'
Loaded checkpoint './checkpoints/pse_net_checkpoint_538.pth.tar' (epoch 536)


HBox(children=(IntProgress(value=0, max=150), HTML(value='')))

0 / 150 img_84.jpg
1 / 150 img_95.jpg
2 / 150 img_94.jpg
3 / 150 img_123.jpg
4 / 150 img_92.jpg
5 / 150 img_109.jpg
6 / 150 img_28.jpg
7 / 150 img_59.jpg
8 / 150 img_134.jpg
9 / 150 img_117.jpg
10 / 150 img_89.jpg
11 / 150 img_108.jpg
12 / 150 img_22.jpg
13 / 150 img_144.jpg
14 / 150 img_87.jpg
15 / 150 img_31.jpg
16 / 150 img_71.jpg
17 / 150 img_103.jpg
18 / 150 img_47.jpg
19 / 150 img_141.jpg
20 / 150 img_122.jpg
21 / 150 img_142.jpg
22 / 150 img_146.jpg
23 / 150 img_27.jpg
24 / 150 img_74.jpg
25 / 150 img_110.jpg
26 / 150 img_125.jpg
27 / 150 img_99.jpg
28 / 150 img_113.jpg
29 / 150 img_69.jpg
30 / 150 img_124.jpg
31 / 150 img_39.jpg
32 / 150 img_119.jpg
33 / 150 img_17.jpg
34 / 150 img_96.jpg
35 / 150 img_150.jpg
36 / 150 img_115.jpg
37 / 150 img_16.jpg
38 / 150 img_50.jpg
39 / 150 img_111.jpg
40 / 150 img_80.jpg
41 / 150 img_60.jpg
42 / 150 img_62.jpg
43 / 150 img_75.jpg
