In [2]:
import cv2
import numpy as np
import os

In [3]:
from data.dataset import load_annoataion, check_and_validate_polys, crop_area, generate_rbox, get_images

In [4]:
def image_label(txt_root, image_list, img_name, index,
                input_size=512, random_scale=np.array([0.5, 1, 2.0, 3.0]),
                background_ratio=3. / 8):
    '''
    get image's corresponding matrix and ground truth
    '''

    try:
        im_fn = image_list[index]
        im_name = img_name[index]
        im = cv2.imread(im_fn)
        h, w, _ = im.shape
        #txt_fn = im_name.replace(im_name.split('.')[1], 'txt')
        #print(txt_fn)
        if os.path.exists(txt_root + "/"+im_name[0:-4] + '.txt'):
            txt_fn = im_name[0:-4] + '.txt'
        elif os.path.exists(txt_root + "/"+im_name[0:-5] + '.txt'):
            txt_fn = im_name[0:-5] + '.txt'
        txt_fn = os.path.join(txt_root, txt_fn)

        text_polys, text_tags = load_annoataion(txt_fn)
        text_polys, text_tags = check_and_validate_polys(text_polys, text_tags, (h, w))
        rd_scale = np.random.choice(random_scale)
        im = cv2.resize(im, dsize=None, fx=rd_scale, fy=rd_scale)
        text_polys *= rd_scale
        # random crop a area from image
        if np.random.rand() < background_ratio:
            # crop background
            im, text_polys, text_tags = crop_area(im, text_polys, text_tags, crop_background=True)
            new_h, new_w, _ = im.shape
            max_h_w_i = np.max([new_h, new_w, input_size])
            im_padded = np.zeros((max_h_w_i, max_h_w_i, 3), dtype=np.uint8)
            im_padded[:new_h, :new_w, :] = im.copy()
            im = cv2.resize(im_padded, dsize=(input_size, input_size))
            score_map = np.zeros((input_size, input_size), dtype=np.uint8)
            geo_map_channels = 5
            geo_map = np.zeros((input_size, input_size, geo_map_channels), dtype=np.float32)
            training_mask = np.ones((input_size, input_size), dtype=np.uint8)
        else:
            im, text_polys, text_tags = crop_area(im, text_polys, text_tags, crop_background=False)
            h, w, _ = im.shape

            # pad the image to the training input size or the longer side of image
            new_h, new_w, _ = im.shape
            max_h_w_i = np.max([new_h, new_w, input_size])
            im_padded = np.zeros((max_h_w_i, max_h_w_i, 3), dtype=np.uint8)
            im_padded[:new_h, :new_w, :] = im.copy()
            im = im_padded
            new_h, new_w, _ = im.shape
            resize_h = input_size
            resize_w = input_size
            im = cv2.resize(im, dsize=(resize_w, resize_h))
            resize_ratio_3_x = resize_w / float(new_w)
            resize_ratio_3_y = resize_h / float(new_h)
            text_polys[:, :, 0] *= resize_ratio_3_x
            text_polys[:, :, 1] *= resize_ratio_3_y
            new_h, new_w, _ = im.shape
            score_map, geo_map, training_mask = generate_rbox((new_h, new_w), text_polys, text_tags)

        images = im[:, :, ::-1].astype(np.float32)
        score_maps = score_map[::4, ::4, np.newaxis].astype(np.float32)
        geo_maps = geo_map[::4, ::4, :].astype(np.float32)
        training_masks = training_mask[::4, ::4, np.newaxis].astype(np.float32)

    except Exception as e:
        images, score_maps, geo_maps, training_masks = None, None, None, None

    return images, score_maps, geo_maps, training_masks

In [5]:
root_path = 'D:/_data/personal_project/ICDAR_2015/'
train_img = root_path + 'train_img'
train_txt = root_path + 'train_gt'

image_list, img_name = get_images(train_img)