In [25]:
import cv2
import numpy as np
import torch
import torch.backends.cudnn as cudnn
import torch.utils.data as data
from dataset import TotalText, Ctw1500Text, Icdar15Text, Mlt2017Text, TD500Text
from network.textnet import TextNet
from util.augmentation import BaseTransform,Augmentation
from cfglib.config import config as cfg, update_config, print_config
from cfglib.option import BaseOptions
from util.visualize import visualize_detection, visualize_gt
from util.misc import to_device, mkdirs,rescale_result
from util.eval import deal_eval_total_text, deal_eval_ctw1500, deal_eval_icdar15, \
    deal_eval_TD500, data_transfer_ICDAR, data_transfer_TD500, data_transfer_MLT2017
import sys
sys.argv=['']
option = BaseOptions()
args = option.initialize()
args.cuda=False
args.gpu=-1
args.num_workers=0
args.checkepoch=-1
args.pretrain = True
update_config(cfg, args)
print_config(cfg)

gpu: -1
num_workers: 0
batch_size: 12
max_epoch: 200
start_epoch: 0
lr: 0.001
cuda: False
output_dir: output
input_size: 640
max_annotation: 64
num_points: 20
adj_num: 4
max_points: 20
use_hard: True
scale: 1
grad_clip: 0
dis_threshold: 0.3
cls_threshold: 0.875
approx_factor: 0.007
exp_name: Totaltext
resume: None
mgpu: False
save_dir: ./model/
vis_dir: ./vis/
log_dir: ./logs/
loss: CrossEntropyLoss
pretrain: True
verbose: True
viz: False
lr_adjust: fix
stepvalues: []
weight_decay: 0.0
gamma: 0.1
momentum: 0.9
optim: Adam
save_freq: 5
display_freq: 10
viz_freq: 50
log_freq: 10000
val_freq: 1000
net: resnet50
rescale: 255.0
means: [0.485, 0.456, 0.406]
stds: [0.229, 0.224, 0.225]
test_size: [640, 1024]
checkepoch: -1
img_root: None
device: cpu


In [11]:
model = TextNet(is_training=True, backbone=cfg.net,)
model.train()

load the resnet50 weight from ./cache


TextNet(
  (fpn): FPN(
    (backbone): ResNet(
      (stage1): Sequential(
        (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
        (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      )
      (stage2): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
         

In [17]:
import time
from torch.optim import lr_scheduler
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count
global train_step

losses = AverageMeter()
batch_time = AverageMeter()
data_time = AverageMeter()
end = time.time()
model.train()
optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
scheduler = lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.1)
scheduler.step()

In [22]:
print_config(cfg)

gpu: -1
num_workers: 0
batch_size: 12
max_epoch: 200
start_epoch: 0
lr: 0.001
cuda: False
output_dir: output
input_size: 640
max_annotation: 64
num_points: 20
adj_num: 4
max_points: 20
use_hard: True
scale: 1
grad_clip: 0
dis_threshold: 0.3
cls_threshold: 0.875
approx_factor: 0.007
exp_name: Totaltext
resume: None
mgpu: False
save_dir: ./model/
vis_dir: ./vis/
log_dir: ./logs/
loss: CrossEntropyLoss
pretrain: True
verbose: True
viz: False
lr_adjust: fix
stepvalues: []
weight_decay: 0.0
gamma: 0.1
momentum: 0.9
optim: Adam
save_freq: 5
display_freq: 10
viz_freq: 50
log_freq: 10000
val_freq: 1000
net: resnet50
rescale: 255.0
means: [0.485, 0.456, 0.406]
stds: [0.229, 0.224, 0.225]
test_size: [640, 1024]
checkepoch: -1
img_root: None
device: cpu


In [28]:
from network.loss import  TextLoss

In [66]:
trainset = TD500Text(
            data_root='TD500',
            is_training=True,
            transform=Augmentation(size=512, mean=cfg.means, std=cfg.stds)
        )

In [75]:
def train(model, train_loader, criterion, scheduler, optimizer, epoch):
    
    global train_step

    losses = AverageMeter()
    batch_time = AverageMeter()
    data_time = AverageMeter()
    end = time.time()
    model.train()
    scheduler.step()

    print('Epoch: {} : LR = {}'.format(epoch, lr))

    for i, (img, train_mask, tr_mask, distance_field,
            direction_field, weight_matrix, gt_points,
            proposal_points, ignore_tags) in enumerate(train_loader):
        data_time.update(time.time() - end)

        train_step += 1

        img, train_mask, tr_mask, distance_field, \
        direction_field, weight_matrix, gt_points, \
        proposal_points, ignore_tags = to_device(img, 
                                                train_mask, tr_mask, distance_field,
                                                direction_field, weight_matrix, gt_points,
                                                proposal_points, ignore_tags)

        output = model(img)
        tr_loss, tcl_loss, sin_loss, cos_loss, radii_loss = \
            criterion(output, tr_mask, tcl_mask, sin_map, cos_map, radius_map, train_mask)
        loss = tr_loss + tcl_loss + sin_loss + cos_loss + radii_loss

        # backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        losses.update(loss.item())
        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if cfg.viz and i % cfg.viz_freq == 0:
            visualize_network_output(output, tr_mask, tcl_mask, mode='train')

        if i % cfg.display_freq == 0:
            print('({:d} / {:d}) - Loss: {:.4f} - tr_loss: {:.4f} - tcl_loss: {:.4f} - sin_loss: {:.4f} - cos_loss: {:.4f} - radii_loss: {:.4f}'.format(
                i, len(train_loader), loss.item(), tr_loss.item(), tcl_loss.item(), sin_loss.item(), cos_loss.item(), radii_loss.item())
            )

        if i % cfg.log_freq == 0:
            logger.write_scalars({
                'loss': loss.item(),
                'tr_loss': tr_loss.item(),
                'tcl_loss': tcl_loss.item(),
                'sin_loss': sin_loss.item(),
                'cos_loss': cos_loss.item(),
                'radii_loss': radii_loss.item()
            }, tag='train', n_iter=train_step)

    if epoch % cfg.save_freq == 0:
        save_model(model, epoch, scheduler.get_lr(), optimizer)

    print('Training Loss: {}'.format(losses.avg))


In [76]:
cfg.batch_size

12

In [77]:
criterion = TextLoss()
lr = cfg.lr
optimizer = torch.optim.Adam(model.parameters(), lr=cfg.lr)
scheduler = lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.1)
train_loader = train_loader = data.DataLoader(trainset, batch_size=cfg.batch_size, shuffle=True, num_workers=cfg.num_workers)
train_step = 0
for epoch in range(cfg.start_epoch, cfg.max_epoch):
    train(model, train_loader, criterion, scheduler, optimizer, epoch)

torch.Size([1, 400, 2])
Epoch: 0 : LR = 0.001


TypeError: new(): invalid data type 'str'

In [None]:
for epoch in range(cfg.start_epoch, cfg.max_epoch):

In [7]:
input_dict = dict()
x=cv2.imread('/home/mansour/Pictures/1.jpg')
x=np.moveaxis(x, -1,0)
im=np.array([x])
im=torch.tensor(im)
input_dict['img'] = to_device(im)
output_dict = model(input_dict)
output_dict

RuntimeError: No CUDA GPUs are available

In [12]:
image = im

In [15]:
idx = 0  # test mode can only run with batch_size == 1
# visualization
img_show = image[idx].permute(1, 2, 0).cpu().numpy()
img_show = ((img_show * cfg.stds + cfg.means) * 255).astype(np.uint8)


image_show = img_show.copy()
image_show = np.ascontiguousarray(image_show[:, :, ::-1])

cls_preds = output_dict["fy_preds"][0].data.cpu().numpy()
py_preds = output_dict["py_preds"]
init_polys = output_dict["init_polys"]
shows = []

init_py = init_polys.data.cpu().numpy()

In [27]:
from matplotlib import pyplot as plt
for idx, py in enumerate(py_preds):
        contours = py.data.cpu().numpy()
        im_show = image_show.copy()
        cv2.drawContours(im_show, init_py.astype(np.int32), -1, (255, 255, 0), 2)
        cv2.drawContours(im_show, contours.astype(np.int32), -1, (0, 255, 0), 3)
        print(im_show.shape)
        plt.imshow(im_show)
        plt.show()

(332, 500, 3)
(332, 500, 3)
(332, 500, 3)
