In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
import random

import cv2
from PIL import Image, ImagePath, ImageDraw
import os
import glob
import csv
import pathlib
from pathlib import Path
import re
import math
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, BatchNormalization, Activation, MaxPooling2D, Reshape, Dense, GRU, Bidirectional
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping, TensorBoard
from tensorflow.keras.models import Model, load_model
import tensorflow.keras.backend as K
import scipy.io as sio
from shapely.geometry import Polygon

import warnings
warnings.filterwarnings("ignore")
colab_path = "/content/drive/MyDrive/"

In [None]:
#loading dataset
dataset = sio.loadmat('SynthText/gt.mat')

#extracting image paths and saving them in a file
image_paths = [names[0] for names in dataset['imnames'][0]]
with open('synthesized/image_paths.txt', 'w') as f:
    for path in image_paths:
        f.write("%s\n" % path)
        
# #extracting text from the dataset        
# all_word_list = []
# for img_words in dataset['txt'][0, :]:
#     img_word_list = []
#     for words in img_words:
#         for word in [item for sublist in [i.strip().split(' ') for i in words.split('\n')] for item in sublist]:
#             if word != '':
#                 img_word_list.append(word)
#     all_word_list.append(img_word_list)

In [None]:
# this code is to save files in the same folder number as their images 
# ex: 1/ant+hill_100_0.jpg and text file will be in 1/ant+hill_100_0.txt
itera = 0
for polys, words, img_path in tqdm(zip(dataset['wordBB'][0, :], dataset_word_list, image_paths)):

    # fix poly
    if len(polys.shape) == 2:
        polys = polys[:, :, np.newaxis]
    
    polys = polys.T
    
    # check if all match up
    if polys.shape[0] != len(words):
        print('number of polys and words do not match')
        print(itera)
        break
    itera += 1
    
    # write file
    folder_path = 'synthesized/annotate_with_folders/' + img_path.split("/")[0]
    if not os.path.isdir(folder_path):
        os.mkdir(folder_path)
        
    with open('synthesized/annotate_with_folders/{}/{}.txt'.format(img_path.split("/")[0], img_path.split('/')[1].split('.')[0]), 'w') as f:
        for poly, word in zip(polys, words):
            line = np.around(poly, 1).ravel()
            line = np.concatenate([line, [word]], axis = 0)
            newline = line[0]
            for words in line[1:]:
                newline += "," + words
            f.write(newline + "\n")

858750it [23:47, 601.72it/s] 


In [None]:
#vocab creation
total_words_list = []
for i in dataset_word_list:
    total_words_list.extend(i)
    
NUM_CLASSES = set(total_words_list)

In [None]:
#creating a dataframe with iages and groundtruth path
image_paths = "synthetic_data/images/"
gt_path = "synthetic_data/ground_truth/"

images_list = []
for i in os.listdir(image_paths):
    new_path = image_paths + str(i) + "/"
    for j in os.listdir(new_path):
        images_list.append(new_path + j)
images_list = np.array(sorted(images_list))
        
gt_path_list = []
for i in os.listdir(gt_path):
    new_path = gt_path + str(i) + "/"
    for j in os.listdir(new_path):
        gt_path_list.append(new_path + j)
gt_path_list = np.array(sorted(gt_path_list))

df = pd.DataFrame(list(zip(images_list, gt_path_list)), columns = ['images_path', 'gt_path'])

In [None]:
# images_list
df.head()

Unnamed: 0,images_path,gt_path
0,synthetic_data/images/1/ant+hill_100_0.jpg,synthetic_data/ground_truth/1/ant+hill_100_0.txt
1,synthetic_data/images/1/ant+hill_100_1.jpg,synthetic_data/ground_truth/1/ant+hill_100_1.txt
2,synthetic_data/images/1/ant+hill_100_10.jpg,synthetic_data/ground_truth/1/ant+hill_100_10.txt
3,synthetic_data/images/1/ant+hill_100_100.jpg,synthetic_data/ground_truth/1/ant+hill_100_100...
4,synthetic_data/images/1/ant+hill_100_101.jpg,synthetic_data/ground_truth/1/ant+hill_100_101...


In [None]:
#checking if images path and ground_truth path are of same image
for i,j in zip(images_list, gt_path_list):
    is_ok = True
    if not i.split("/")[-1][:-4] == j.split("/")[-1][:-4]:
        is_ok = False
        print("error")
        break
if is_ok:
    print("everything ok")

everything ok


In [None]:
def load_annotation(p):
    '''
    load polygon coordinate and text from the text file for corresponding image
    here p is name of image file whose cooresponding annotation we want 
    '''
    text_polys = []
    text_tags = []
    if not os.path.exists(p):
        return np.array(text_polys, dtype=np.float32), np.array(text_tags, dtype=np.int32)
    with open(p, 'r') as f:
        #Reading our data
        reader = f.readlines()
        for line in reader:            
            line= line.replace("\n", "")
            line = line.replace('\xef\xbb\bf', '')
            line = line.replace('\xe2\x80\x8d', '')
            line = line.strip()
            line = line.split(',')
            #Taking word level locations to plot bounding box i.e,377,117,463,117,465,130,378,130
            x1, y1, x2, y2, x3, y3, x4, y4 = list(map(float, line[:8]))
            #Appending coordinates
            text_polys.append([[x1, y1], [x2, y2], [x3, y3], [x4, y4]])
            
            
            if len(line) > 9:
                label = ",".join(line[9:])
            else:
                label = line[-1]
                
            #Appending words and ###
            if label == '*' or label == '###' or label == '':
                text_tags.append(None)
            else:
                text_tags.append(label)

        return np.array(text_polys, dtype=np.float32), np.array(text_tags)

In [None]:
# This Function is used to calculate AREA of polygon
def polygon_area(poly):
    '''
    compute area of a polygon
    '''
    edge = [
        (poly[1][0] - poly[0][0]) * (poly[1][1] + poly[0][1]),
        (poly[2][0] - poly[1][0]) * (poly[2][1] + poly[1][1]),
        (poly[3][0] - poly[2][0]) * (poly[3][1] + poly[2][1]),
        (poly[0][0] - poly[3][0]) * (poly[0][1] + poly[3][1])
    ]
    return np.sum(edge)/2.

In [None]:
#https://github.com/argman/EAST/blob/master/icdar.py

def is_polygon(poly):
    for i in range(3):
        p0 = poly[i]

        p1 = poly[(i + 1) % 4]
        p2 = poly[(i + 2) % 4]

        if p0[0] == p1[0] and p1[1] == p0[1]:
            return False
        if p0[0] == p2[0] and p2[1] == p0[1]:
            return False
        if p1[0] == p2[0] and p1[1] == p2[1]:
            return False

        if p0[0] == p1[0]:
            if p1[0] == p2[0]:
                return False
        else:
            if p1[0] != p2[0]:
                k1 = (p1[1] - p0[1]) / (p1[0] - p0[0])
                k2 = (p2[1] - p1[1]) / (p2[0] - p1[0])
                if abs(k1 - k2) < 1e-6:
                    return False
                else:
                    if p1[1] == p2[1]:
                        return False

    return True

In [None]:
#3
# This function is used to discard invalid polygons and check direction of them
# https://github.com/Masao-Taketani/FOTS_OCR


#https://github.com/argman/EAST/blob/master/icdar.py
def check_and_validate_polys(polys, tags, xxx_todo_changeme):
    '''
    check so that the text poly is in the same direction,
    and also filter some invalid polygons
    :param polys:
    :param tags:
    :return:
    '''
    #Taking height and width of the image
    (h, w) = xxx_todo_changeme
    if polys.shape[0] == 0:
        return polys
    polys[:, :, 0] = np.clip(polys[:, :, 0], 0, w-1)
    polys[:, :, 1] = np.clip(polys[:, :, 1], 0, h-1)

    validated_polys = []
    validated_tags = []
    for poly, tag in zip(polys, tags):
        p_area = polygon_area(poly)
        
        # memory error after hitting not a polygon !!!
        if is_polygon(poly) is False:
            #print("not a polygon: ", poly)
            continue
        
        if abs(p_area) < 1:
#             print('invalid poly')
            continue
        if p_area > 0:
#             print('poly in wrong direction')
            poly = poly[(0, 3, 2, 1), :]
        validated_polys.append(poly)
        validated_tags.append(tag)
        
    return np.array(validated_polys), validated_tags


In [None]:
#https://github.com/argman/EAST/blob/master/icdar.py

#This function is implementation of Polygon Shrinkage Algorithm 
def shrink_poly(poly, r):
    '''
    fit a poly inside the origin poly
    used for generate the score map
    '''
    # shrink ratio
    R = 0.3
    # find the longer pair
    if np.linalg.norm(poly[0] - poly[1]) + np.linalg.norm(poly[2] - poly[3]) > \
                    np.linalg.norm(poly[0] - poly[3]) + np.linalg.norm(poly[1] - poly[2]):
        # first move (p0, p1), (p2, p3), then (p0, p3), (p1, p2)
        ## p0, p1
        theta = np.arctan2((poly[1][1] - poly[0][1]), (poly[1][0] - poly[0][0]))
        poly[0][0] += R * r[0] * np.cos(theta)
        poly[0][1] += R * r[0] * np.sin(theta)
        poly[1][0] -= R * r[1] * np.cos(theta)
        poly[1][1] -= R * r[1] * np.sin(theta)
        ## p2, p3
        theta = np.arctan2((poly[2][1] - poly[3][1]), (poly[2][0] - poly[3][0]))
        poly[3][0] += R * r[3] * np.cos(theta)
        poly[3][1] += R * r[3] * np.sin(theta)
        poly[2][0] -= R * r[2] * np.cos(theta)
        poly[2][1] -= R * r[2] * np.sin(theta)
        ## p0, p3
        theta = np.arctan2((poly[3][0] - poly[0][0]), (poly[3][1] - poly[0][1]))
        poly[0][0] += R * r[0] * np.sin(theta)
        poly[0][1] += R * r[0] * np.cos(theta)
        poly[3][0] -= R * r[3] * np.sin(theta)
        poly[3][1] -= R * r[3] * np.cos(theta)
        ## p1, p2
        theta = np.arctan2((poly[2][0] - poly[1][0]), (poly[2][1] - poly[1][1]))
        poly[1][0] += R * r[1] * np.sin(theta)
        poly[1][1] += R * r[1] * np.cos(theta)
        poly[2][0] -= R * r[2] * np.sin(theta)
        poly[2][1] -= R * r[2] * np.cos(theta)
    else:
        ## p0, p3
        # print poly
        theta = np.arctan2((poly[3][0] - poly[0][0]), (poly[3][1] - poly[0][1]))
        poly[0][0] += R * r[0] * np.sin(theta)
        poly[0][1] += R * r[0] * np.cos(theta)
        poly[3][0] -= R * r[3] * np.sin(theta)
        poly[3][1] -= R * r[3] * np.cos(theta)
        ## p1, p2
        theta = np.arctan2((poly[2][0] - poly[1][0]), (poly[2][1] - poly[1][1]))
        poly[1][0] += R * r[1] * np.sin(theta)
        poly[1][1] += R * r[1] * np.cos(theta)
        poly[2][0] -= R * r[2] * np.sin(theta)
        poly[2][1] -= R * r[2] * np.cos(theta)
        ## p0, p1
        theta = np.arctan2((poly[1][1] - poly[0][1]), (poly[1][0] - poly[0][0]))
        poly[0][0] += R * r[0] * np.cos(theta)
        poly[0][1] += R * r[0] * np.sin(theta)
        poly[1][0] -= R * r[1] * np.cos(theta)
        poly[1][1] -= R * r[1] * np.sin(theta)
        ## p2, p3
        theta = np.arctan2((poly[2][1] - poly[3][1]), (poly[2][0] - poly[3][0]))
        poly[3][0] += R * r[3] * np.cos(theta)
        poly[3][1] += R * r[3] * np.sin(theta)
        poly[2][0] -= R * r[2] * np.cos(theta)
        poly[2][1] -= R * r[2] * np.sin(theta)
    return poly



#Compute distance between p1-p2 and p3
def point_dist_to_line(p1, p2, p3):
    '''compute the distance from p3 to p1-p2'''
    return np.linalg.norm(np.cross(p2 - p1, p1 - p3)) / np.linalg.norm(p2 - p1)

#Find equation of line using two 2D points p1 and p2
def fit_line(p1, p2):
    '''fit a line ax+by+c = 0'''
    if p1[0] == p1[1]:
        return [1., 0., -p1[0]]
    else:
        [k, b] = np.polyfit(p1, p2, deg=1)
        return [k, -1., b]

#Find Intersection poitn of 2 lines
def line_cross_point(line1, line2):
    '''line1 0= ax+by+c, compute the cross point of line1 and line2'''
    if line1[0] != 0 and line1[0] == line2[0]:
        print('Cross point does not exist')
        return None
    if line1[0] == 0 and line2[0] == 0:
        print('Cross point does not exist')
        return None
    if line1[1] == 0:
        x = -line1[2]
        y = line2[0] * x + line2[2]
    elif line2[1] == 0:
        x = -line2[2]
        y = line1[0] * x + line1[2]
    else:
        k1, _, b1 = line1
        k2, _, b2 = line2
        x = -(b1-b2)/(k1-k2)
        y = k1*x + b1
    return np.array([x, y], dtype=np.float32)

#Get Equation of line that is perpendicular to line passing through a point
def line_verticle(line, point):
    '''get the verticle line from line across point'''
    if line[1] == 0:
        verticle = [0, -1, point[1]]
    else:
        if line[0] == 0:
            verticle = [1, 0, -point[0]]
        else:
            verticle = [-1./line[0], -1, point[1] - (-1/line[0] * point[0])]
    return verticle

# Convert a parallelogram to rectangle
def rectangle_from_parallelogram(poly):
    '''
    fit a rectangle from a parallelogram
    '''
    p0, p1, p2, p3 = poly
    angle_p0 = np.arccos(np.dot(p1-p0, p3-p0)/(np.linalg.norm(p0-p1) * np.linalg.norm(p3-p0)))
    if angle_p0 < 0.5 * np.pi:
        if np.linalg.norm(p0 - p1) > np.linalg.norm(p0-p3):
            # p0 and p2
            ## p0
            p2p3 = fit_line([p2[0], p3[0]], [p2[1], p3[1]])
            p2p3_verticle = line_verticle(p2p3, p0)

            new_p3 = line_cross_point(p2p3, p2p3_verticle)
            ## p2
            p0p1 = fit_line([p0[0], p1[0]], [p0[1], p1[1]])
            p0p1_verticle = line_verticle(p0p1, p2)

            new_p1 = line_cross_point(p0p1, p0p1_verticle)
            return np.array([p0, new_p1, p2, new_p3], dtype=np.float32)
        else:
            p1p2 = fit_line([p1[0], p2[0]], [p1[1], p2[1]])
            p1p2_verticle = line_verticle(p1p2, p0)

            new_p1 = line_cross_point(p1p2, p1p2_verticle)
            p0p3 = fit_line([p0[0], p3[0]], [p0[1], p3[1]])
            p0p3_verticle = line_verticle(p0p3, p2)

            new_p3 = line_cross_point(p0p3, p0p3_verticle)
            return np.array([p0, new_p1, p2, new_p3], dtype=np.float32)
    else:
        if np.linalg.norm(p0-p1) > np.linalg.norm(p0-p3):
            # p1 and p3
            ## p1
            p2p3 = fit_line([p2[0], p3[0]], [p2[1], p3[1]])
            p2p3_verticle = line_verticle(p2p3, p1)

            new_p2 = line_cross_point(p2p3, p2p3_verticle)
            ## p3
            p0p1 = fit_line([p0[0], p1[0]], [p0[1], p1[1]])
            p0p1_verticle = line_verticle(p0p1, p3)

            new_p0 = line_cross_point(p0p1, p0p1_verticle)
            return np.array([new_p0, p1, new_p2, p3], dtype=np.float32)
        else:
            p0p3 = fit_line([p0[0], p3[0]], [p0[1], p3[1]])
            p0p3_verticle = line_verticle(p0p3, p1)

            new_p0 = line_cross_point(p0p3, p0p3_verticle)
            p1p2 = fit_line([p1[0], p2[0]], [p1[1], p2[1]])
            p1p2_verticle = line_verticle(p1p2, p3)

            new_p2 = line_cross_point(p1p2, p1p2_verticle)
            return np.array([new_p0, p1, new_p2, p3], dtype=np.float32)

#Sorting a rectangle to get all point in clockwies manner
def sort_rectangle(poly):
    '''sort the four coordinates of the polygon, points in poly should be sorted clockwise'''
    # First find the lowest point
    p_lowest = np.argmax(poly[:, 1])
    if np.count_nonzero(poly[:, 1] == poly[p_lowest, 1]) == 2:
        # if the bottom line is parallel to x-axis, then p0 must be the upper-left corner
        p0_index = np.argmin(np.sum(poly, axis=1))
        p1_index = (p0_index + 1) % 4
        p2_index = (p0_index + 2) % 4
        p3_index = (p0_index + 3) % 4
        return poly[[p0_index, p1_index, p2_index, p3_index]], 0.
    else:
        # find the point that sits right to the lowest point
        p_lowest_right = (p_lowest - 1) % 4
        p_lowest_left = (p_lowest + 1) % 4
        angle = np.arctan(-(poly[p_lowest][1] - poly[p_lowest_right][1])/(poly[p_lowest][0] - poly[p_lowest_right][0]))
        # assert angle > 0
        if angle <= 0:
            print(angle, poly[p_lowest], poly[p_lowest_right])
        if angle/np.pi * 180 > 45:
            #this point is p2
            p2_index = p_lowest
            p1_index = (p2_index - 1) % 4
            p0_index = (p2_index - 2) % 4
            p3_index = (p2_index + 1) % 4
            return poly[[p0_index, p1_index, p2_index, p3_index]], -(np.pi/2 - angle)
        else:
            # this point is p3
            p3_index = p_lowest
            p0_index = (p3_index + 1) % 4
            p1_index = (p3_index + 2) % 4
            p2_index = (p3_index + 3) % 4
            return poly[[p0_index, p1_index, p2_index, p3_index]], angle


def restore_rectangle_rbox(origin, geometry):
    ''' Resotre rectangle tbox'''
    d = geometry[:, :4]
    angle = geometry[:, 4]
    # for angle > 0
    origin_0 = origin[angle >= 0]
    d_0 = d[angle >= 0]
    angle_0 = angle[angle >= 0]
    if origin_0.shape[0] > 0:
        p = np.array([np.zeros(d_0.shape[0]), -d_0[:, 0] - d_0[:, 2],
                      d_0[:, 1] + d_0[:, 3], -d_0[:, 0] - d_0[:, 2],
                      d_0[:, 1] + d_0[:, 3], np.zeros(d_0.shape[0]),
                      np.zeros(d_0.shape[0]), np.zeros(d_0.shape[0]),
                      d_0[:, 3], -d_0[:, 2]])
        p = p.transpose((1, 0)).reshape((-1, 5, 2))  # N*5*2

        rotate_matrix_x = np.array([np.cos(angle_0), np.sin(angle_0)]).transpose((1, 0))
        rotate_matrix_x = np.repeat(rotate_matrix_x, 5, axis=1).reshape(-1, 2, 5).transpose((0, 2, 1))  # N*5*2

        rotate_matrix_y = np.array([-np.sin(angle_0), np.cos(angle_0)]).transpose((1, 0))
        rotate_matrix_y = np.repeat(rotate_matrix_y, 5, axis=1).reshape(-1, 2, 5).transpose((0, 2, 1))

        p_rotate_x = np.sum(rotate_matrix_x * p, axis=2)[:, :, np.newaxis]  # N*5*1
        p_rotate_y = np.sum(rotate_matrix_y * p, axis=2)[:, :, np.newaxis]  # N*5*1

        p_rotate = np.concatenate([p_rotate_x, p_rotate_y], axis=2)  # N*5*2

        p3_in_origin = origin_0 - p_rotate[:, 4, :]
        new_p0 = p_rotate[:, 0, :] + p3_in_origin  # N*2
        new_p1 = p_rotate[:, 1, :] + p3_in_origin
        new_p2 = p_rotate[:, 2, :] + p3_in_origin
        new_p3 = p_rotate[:, 3, :] + p3_in_origin

        new_p_0 = np.concatenate([new_p0[:, np.newaxis, :], new_p1[:, np.newaxis, :],
                                  new_p2[:, np.newaxis, :], new_p3[:, np.newaxis, :]], axis=1)  # N*4*2
    else:
        new_p_0 = np.zeros((0, 4, 2))
    # for angle < 0
    origin_1 = origin[angle < 0]
    d_1 = d[angle < 0]
    angle_1 = angle[angle < 0]
    if origin_1.shape[0] > 0:
        p = np.array([-d_1[:, 1] - d_1[:, 3], -d_1[:, 0] - d_1[:, 2],
                      np.zeros(d_1.shape[0]), -d_1[:, 0] - d_1[:, 2],
                      np.zeros(d_1.shape[0]), np.zeros(d_1.shape[0]),
                      -d_1[:, 1] - d_1[:, 3], np.zeros(d_1.shape[0]),
                      -d_1[:, 1], -d_1[:, 2]])
        p = p.transpose((1, 0)).reshape((-1, 5, 2))  # N*5*2

        rotate_matrix_x = np.array([np.cos(-angle_1), -np.sin(-angle_1)]).transpose((1, 0))
        rotate_matrix_x = np.repeat(rotate_matrix_x, 5, axis=1).reshape(-1, 2, 5).transpose((0, 2, 1))  # N*5*2

        rotate_matrix_y = np.array([np.sin(-angle_1), np.cos(-angle_1)]).transpose((1, 0))
        rotate_matrix_y = np.repeat(rotate_matrix_y, 5, axis=1).reshape(-1, 2, 5).transpose((0, 2, 1))

        p_rotate_x = np.sum(rotate_matrix_x * p, axis=2)[:, :, np.newaxis]  # N*5*1
        p_rotate_y = np.sum(rotate_matrix_y * p, axis=2)[:, :, np.newaxis]  # N*5*1

        p_rotate = np.concatenate([p_rotate_x, p_rotate_y], axis=2)  # N*5*2

        p3_in_origin = origin_1 - p_rotate[:, 4, :]
        new_p0 = p_rotate[:, 0, :] + p3_in_origin  # N*2
        new_p1 = p_rotate[:, 1, :] + p3_in_origin
        new_p2 = p_rotate[:, 2, :] + p3_in_origin
        new_p3 = p_rotate[:, 3, :] + p3_in_origin

        new_p_1 = np.concatenate([new_p0[:, np.newaxis, :], new_p1[:, np.newaxis, :],
                                  new_p2[:, np.newaxis, :], new_p3[:, np.newaxis, :]], axis=1)  # N*4*2
    else:
        new_p_1 = np.zeros((0, 4, 2))
    return np.concatenate([new_p_0, new_p_1])


#Some geometrical functions used in codes
def restore_rectangle(origin, geometry):
    return restore_rectangle_rbox(origin, geometry)

def getRotateRect(box):
    rect = cv2.minAreaRect(box)

    angle=rect[2]  # angle = [-90, 0)
    if angle < -45:
        rect = (rect[0], (rect[1][0], rect[1][1]), rect[2])
        angle += 90
        size = (rect[1][1],rect[1][0])
    else:
        rect = (rect[0], (rect[1][0], rect[1][1]), rect[2])
        size=rect[1]

    box_ = cv2.boxPoints(rect)
    return np.concatenate([rect[0], size]), angle, box_


#These Functions are used to Generate ROI params like out box,crop box & angles that we use to crop text from image
def generate_roiRotatePara(box, angle, expand_w = 60):
    '''Generate all ROI Parameterts'''
    p0_rect, p1_rect, p2_rect, p3_rect = box
    cxy = (p0_rect + p2_rect) / 2.
    size = np.array([np.linalg.norm(p0_rect - p1_rect), np.linalg.norm(p0_rect - p3_rect)])
    rrect = np.concatenate([cxy, size])

    box=np.array(box)

    points=np.array(box, dtype=np.int32)
    xmin=np.min(points[:,0])
    xmax=np.max(points[:,0])
    ymin=np.min(points[:,1])
    ymax=np.max(points[:,1])
    bbox = np.array([xmin, ymin, xmax, ymax])
    if np.any(bbox < -expand_w):
        return None
    
    rrect[:2] -= bbox[:2]
    rrect[:2] -= rrect[2:] / 2
    rrect[2:] += rrect[:2]

    bbox[2:] -= bbox[:2]

    rrect[::2] = np.clip(rrect[::2], 0, bbox[2])
    rrect[1::2] = np.clip(rrect[1::2], 0, bbox[3])
    rrect[2:] -= rrect[:2]
    
    return bbox.astype(np.int32), rrect.astype(np.int32), - angle

def restore_roiRotatePara(box):
    rectange, rotate_angle = sort_rectangle(box)
    return generate_roiRotatePara(rectange, rotate_angle)

#This function is used to generate geo_map,score_map, training_mask,corp_box,out_box,angle that we use while training model
def generate_rbox(im_size, polys, tags):
    '''Genrate score_map and geo_map for image'''
    h, w = im_size
    poly_mask = np.zeros((h, w), dtype=np.uint8)
    score_map = np.zeros((h, w), dtype=np.uint8)
    geo_map = np.zeros((h, w, 5), dtype=np.float32)

    outBoxs = []
    cropBoxs = []
    angles = []
    text_tags = []
    recg_masks = []
    # mask used during traning, to ignore some hard areas
    training_mask = np.ones((h, w), dtype=np.uint8)
    for poly_idx, poly_tag in enumerate(zip(polys, tags)):
        poly = poly_tag[0]
        #print(poly)
        tag = poly_tag[1]
        #print(tag)
        r = [None, None, None, None]
        for i in range(4):
            r[i] = min(np.linalg.norm(poly[i] - poly[(i + 1) % 4]),
                       np.linalg.norm(poly[i] - poly[(i - 1) % 4]))
        # score map
        shrinked_poly = shrink_poly(poly.copy(), r).astype(np.int32)[np.newaxis, :, :]
        cv2.fillPoly(score_map, shrinked_poly, 1)
        cv2.fillPoly(poly_mask, shrinked_poly, poly_idx + 1)

        # if geometry == 'RBOX':
        # generate a parallelogram for any combination of two vertices
        fitted_parallelograms = []
        for i in range(4):
            p0 = poly[i]
            p1 = poly[(i + 1) % 4]
            p2 = poly[(i + 2) % 4]
            p3 = poly[(i + 3) % 4]
            edge = fit_line([p0[0], p1[0]], [p0[1], p1[1]])
            backward_edge = fit_line([p0[0], p3[0]], [p0[1], p3[1]])
            forward_edge = fit_line([p1[0], p2[0]], [p1[1], p2[1]])
            if point_dist_to_line(p0, p1, p2) > point_dist_to_line(p0, p1, p3):
                #  parallel lines through p2
                if edge[1] == 0:
                    edge_opposite = [1, 0, -p2[0]]
                else:
                    edge_opposite = [edge[0], -1, p2[1] - edge[0] * p2[0]]
            else:
                # after p3
                if edge[1] == 0:
                    edge_opposite = [1, 0, -p3[0]]
                else:
                    edge_opposite = [edge[0], -1, p3[1] - edge[0] * p3[0]]
            # move forward edge
            new_p0 = p0
            new_p1 = p1
            new_p2 = p2
            new_p3 = p3
            new_p2 = line_cross_point(forward_edge, edge_opposite)
            if point_dist_to_line(p1, new_p2, p0) > point_dist_to_line(p1, new_p2, p3):
                # across p0
                if forward_edge[1] == 0:
                    forward_opposite = [1, 0, -p0[0]]
                else:
                    forward_opposite = [forward_edge[0], -1, p0[1] - forward_edge[0] * p0[0]]
            else:
                # across p3
                if forward_edge[1] == 0:
                    forward_opposite = [1, 0, -p3[0]]
                else:
                    forward_opposite = [forward_edge[0], -1, p3[1] - forward_edge[0] * p3[0]]
            new_p0 = line_cross_point(forward_opposite, edge)
            new_p3 = line_cross_point(forward_opposite, edge_opposite)
            fitted_parallelograms.append([new_p0, new_p1, new_p2, new_p3, new_p0])
            # or move backward edge
            new_p0 = p0
            new_p1 = p1
            new_p2 = p2
            new_p3 = p3
            new_p3 = line_cross_point(backward_edge, edge_opposite)
            if point_dist_to_line(p0, p3, p1) > point_dist_to_line(p0, p3, p2):
                # across p1
                if backward_edge[1] == 0:
                    backward_opposite = [1, 0, -p1[0]]
                else:
                    backward_opposite = [backward_edge[0], -1, p1[1] - backward_edge[0] * p1[0]]
            else:
                # across p2
                if backward_edge[1] == 0:
                    backward_opposite = [1, 0, -p2[0]]
                else:
                    backward_opposite = [backward_edge[0], -1, p2[1] - backward_edge[0] * p2[0]]
            new_p1 = line_cross_point(backward_opposite, edge)
            new_p2 = line_cross_point(backward_opposite, edge_opposite)
            fitted_parallelograms.append([new_p0, new_p1, new_p2, new_p3, new_p0])
        areas = [Polygon(t).area for t in fitted_parallelograms]
        parallelogram = np.array(fitted_parallelograms[np.argmin(areas)][:-1], dtype=np.float32)
        # sort thie polygon
        parallelogram_coord_sum = np.sum(parallelogram, axis=1)
        min_coord_idx = np.argmin(parallelogram_coord_sum)
        parallelogram = parallelogram[
            [min_coord_idx, (min_coord_idx + 1) % 4, (min_coord_idx + 2) % 4, (min_coord_idx + 3) % 4]]

        rectange = rectangle_from_parallelogram(parallelogram)
        rectange, rotate_angle = sort_rectangle(rectange)

        p0_rect, p1_rect, p2_rect, p3_rect = rectange

        # if the poly is too small, then ignore it during training
        poly_h = min(np.linalg.norm(p0_rect - p3_rect), np.linalg.norm(p1_rect - p2_rect))
        poly_w = min(np.linalg.norm(p0_rect - p1_rect), np.linalg.norm(p2_rect - p3_rect))

        invaild = (min(poly_h, poly_w) < 6) or tag is None or (True and poly_h > poly_w * 2)

        if invaild:
            cv2.fillPoly(training_mask, poly.astype(np.int32)[np.newaxis, :, :], 0)
        xy_in_poly = np.argwhere(poly_mask == (poly_idx + 1))
        
        if not invaild:
            roiRotatePara = generate_roiRotatePara(rectange, rotate_angle)
            if roiRotatePara:
                outBox, cropBox, angle = roiRotatePara
                if min(cropBox[2:]) > 6:
                    w , h = cropBox[2:]
                    textImgW = np.ceil(min(w / float(h) * 32, 256) / 4 /1)
                    #print(tag)
                    if textImgW >= 2 * min(len(tag), 16):  # avoid CTC error
                        outBoxs.append(outBox)
                        cropBoxs.append(cropBox)
                        angles.append(angle)
                        text_tags.append(tag[:16])
                        recg_masks.append(1.)

        for y, x in xy_in_poly:
            point = np.array([x, y], dtype=np.float32)
            # top
            geo_map[y, x, 0] = point_dist_to_line(p0_rect, p1_rect, point) + 3
            # right
            geo_map[y, x, 1] = point_dist_to_line(p1_rect, p2_rect, point) + 3
            # down
            geo_map[y, x, 2] = point_dist_to_line(p2_rect, p3_rect, point) + 3
            # left
            geo_map[y, x, 3] = point_dist_to_line(p3_rect, p0_rect, point) + 3
            # angle
            geo_map[y, x, 4] = rotate_angle
    if len(outBoxs) == 0:
        outBoxs.append([0, 0, 2 * 4, 2 * 4]) # keep extract From sharedConv feature map not zero
        cropBoxs.append([0, 0, 2 * 4, 2 * 4])
        angles.append(0.)
        text_tags.append([NUM_CLASSES - 2])
        recg_masks.append(0.)

    outBoxs = np.array(outBoxs, np.int32)
    cropBoxs = np.array(cropBoxs, np.int32)
    angles = np.array(angles, np.float32)

    return score_map, geo_map, training_mask, (outBoxs, cropBoxs, angles), text_tags, recg_masks
    

In [None]:
#This Function is used to prepare all images FROM icdar 2015 ORIGNAL Images
def text_image_generation(input_size=512,random_scale=np.array([0.5, 3.0]),vis=False):
    '''Genreating text Images From synthext  dataset'''

    image_list=[]
    for root, directories, files in os.walk('synthtext'):
        for filename in files:
            # join the two strings in order to form the full filepath.
            if not filename.endswith('.txt'):
                filepath = os.path.join(root, filename)
                image_list.append(filepath)
    #Taking sample of 5k images and generating text boxex 
    image_list5k=random.sample(image_list,5000)
    index = np.arange(0, len(image_list5k))
    np.random.shuffle(index)
    c=0
    paths=[]
    words=[]
    for i in tqdm(index):
      try:
        im_fn = image_list5k[i]
        im = cv2.imread(im_fn,cv2.IMREAD_UNCHANGED)
        h, w, _ = im.shape
        txt_fn = im_fn
        if not os.path.exists(txt_fn):
          print('text file {} does not exists'.format(txt_fn))
          continue
            
        text_polys, text_tags = load_annotation(txt_fn)
        text_polys, text_tags = check_and_validate_polys(text_polys, text_tags, (h, w))

        
        new_h, new_w, _ = im.shape
        resize_h = new_h
        resize_w = new_w
       
        score_map, geo_map, training_mask, rbox, text_tags, recg_mask = generate_rbox((new_h, new_w), text_polys, text_tags)
          
        outbox, cropbox,angle=rbox
        for my in range(len(outbox)):
          if(recg_mask[my]!=0):
            out=outbox[my]
            crop=cropbox[my]
            if(im.shape[0]>out[3]+out[1] and im.shape[1]>out[2]+out[0] and out[2]>=0 and out[3]>=0 and out[1]>=0 and out[0]>=0):
              ang=angle[my]
              img1=tf.image.crop_to_bounding_box(im,out[1],out[0],out[3],out[2])
              img2=tf.keras.preprocessing.image.random_rotation(img1,ang*180/np.pi,)
              #img3=tf.image.crop_to_bounding_box(img2,crop[1],crop[0],crop[3],crop[2])
              if not isinstance(img2,np.ndarray):
                img2=img2.numpy()
              
              img3=cv2.resize(img2,(128,64),interpolation = cv2.INTER_AREA)
              img3=cv2.detailEnhance(img3)
              c+=1
              cv2.imwrite('text_box_synth/word_'+str(c)+'.png',img3)
              paths.append('text_box_synth/word_'+str(c)+'.png')
              words.append(text_tags[my])
              #print(c)         
      except Exception as e:
        print(image_list[i])
        import traceback
        traceback.print_exc()
        continue
    data=pd.DataFrame({"paths":paths,"words":words})
    return data

In [None]:
# df.to_csv("22k_images_syntext.csv", index = False)

In [None]:
#This Function is used to prepare all images FROM icdar 2015 ORIGNAL Images
#https://github.com/argman/EAST/blob/master/icdar.py

def text_image_generation(data):
    '''Genreating text Images From synthext  dataset'''

    image_list=[]
    paths=[]
    words=[]
    count = 0
    error_count = 0
    for i in tqdm(range(len(data))):
        try:
            img_path = data['images_path'][i] #image_path
            gt = data['gt_path'][i] #ground_truth path
            img = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
            h, w, d = img.shape
            
            if not os.path.exists(gt):
                print("file -> {} does not exist".format(gt))
                continue
            text_polys, text_tags = load_annotation(gt) #getting coordinates and text from ground truth file
            text_polys, text_tags = check_and_validate_polys(text_polys, text_tags, (h,w)) #validating all coordinates
            
            score_map, geo_map, training_mask, rbox, text_tags, recg_mask = generate_rbox((h, w), text_polys, text_tags)
            
            outbox, cropbox, angle = rbox
            for j in range(len(outbox)):
                if(recg_mask[j] != 0):
                    out = outbox[j]
                    crop = cropbox[j]
                if(img.shape[0] > out[3] + out[1] and img.shape[1] > out[2] + out[0] and out[2] >= 0 and\
                   out[3] >= 0 and out[1] >= 0 and out[0] >= 0):
                    
                    ang = angle[j]
                    img1 = tf.image.crop_to_bounding_box(img, out[1], out[0], out[3], out[2])
                    img2 = tf.keras.preprocessing.image.random_rotation(img1, ang*180/np.pi,)
                    if not isinstance(img2, np.ndarray):
                        img2 = img2.numpy()

                    img3 = cv2.resize(img2,(128,64),interpolation = cv2.INTER_AREA)
                    img3 = cv2.detailEnhance(img3)
                    count += 1
                    cv2.imwrite('synthetic_data/cropped_images/synthetic_img_'+str(count)+'.png',img3)
                    paths.append('synthetic_data/cropped_images/synthetic_img_'+str(count)+'.png')
                    words.append(text_tags[j])
                    
        except Exception as e:
            print(img_path)
            error_count += 1
            import traceback
            traceback.print_exc()
            continue
    print("total_errors", error_count)        
    data_new = pd.DataFrame({"paths":paths,"words":words})
    return data_new
        

In [None]:
def data_sample(data, size):
    '''this function will get sample from original dataframe'''
    
    images_list = data['images_path']
    gt = data['gt_path']
    
    total_data_len = list(np.arange(len(data)))
    get_sample = random.sample(total_data_len, size)
    
    images_list_sample = images_list[get_sample]
    gt_sample = gt[get_sample]
    
    new_data = pd.DataFrame(zip(images_list_sample, gt_sample), columns = ['images_path', 'gt_path'])
    
    for i in range(len(new_data)):
        im_p = new_data.iloc[i]['images_path']
        gt_p = new_data.iloc[i]['gt_path']
        is_ok = True
        if im_p.split("/")[-1][:-4] == gt_p.split("/")[-1][:-4]:
            continue
        else:
            print("there is problem with data, col values not aligned")
            is_ok = False
            break 
    if is_ok:
        return new_data
    else:
        return None

In [None]:
nn = data_sample(df, 10000)
final_synthetic_text_data = text_image_generation(nn)

  2%|█▋                                                                         | 217/10000 [08:52<10:09:56,  3.74s/it]Traceback (most recent call last):
  File "<ipython-input-255-e6b4a3e5c545>", line 23, in text_image_generation
    score_map, geo_map, training_mask, rbox, text_tags, recg_mask = generate_rbox((h, w), text_polys, text_tags)
  File "<ipython-input-148-1eb73bf4c7ef>", line 483, in generate_rbox
    text_tags.append([NUM_CLASSES - 2])
TypeError: unsupported operand type(s) for -: 'set' and 'int'
  2%|█▋                                                                          | 218/10000 [08:52<7:25:26,  2.73s/it]

synthetic_data/images/10/baroque_137_89.jpg


  3%|██                                                                         | 278/10000 [12:09<11:18:54,  4.19s/it]Traceback (most recent call last):
  File "<ipython-input-255-e6b4a3e5c545>", line 23, in text_image_generation
    score_map, geo_map, training_mask, rbox, text_tags, recg_mask = generate_rbox((h, w), text_polys, text_tags)
  File "<ipython-input-148-1eb73bf4c7ef>", line 483, in generate_rbox
    text_tags.append([NUM_CLASSES - 2])
TypeError: unsupported operand type(s) for -: 'set' and 'int'
  3%|██                                                                          | 279/10000 [12:09<8:01:35,  2.97s/it]

synthetic_data/images/10/baroque_147_102.jpg


  4%|██▋                                                                         | 356/10000 [15:50<3:27:50,  1.29s/it]Traceback (most recent call last):
  File "<ipython-input-255-e6b4a3e5c545>", line 23, in text_image_generation
    score_map, geo_map, training_mask, rbox, text_tags, recg_mask = generate_rbox((h, w), text_polys, text_tags)
  File "<ipython-input-148-1eb73bf4c7ef>", line 483, in generate_rbox
    text_tags.append([NUM_CLASSES - 2])
TypeError: unsupported operand type(s) for -: 'set' and 'int'
  4%|██▋                                                                         | 357/10000 [15:50<2:34:46,  1.04it/s]

synthetic_data/images/10/baroque_20_84.jpg


 11%|████████▎                                                                  | 1100/10000 [51:34<7:38:41,  3.09s/it]Traceback (most recent call last):
  File "<ipython-input-255-e6b4a3e5c545>", line 23, in text_image_generation
    score_map, geo_map, training_mask, rbox, text_tags, recg_mask = generate_rbox((h, w), text_polys, text_tags)
  File "<ipython-input-148-1eb73bf4c7ef>", line 483, in generate_rbox
    text_tags.append([NUM_CLASSES - 2])
TypeError: unsupported operand type(s) for -: 'set' and 'int'
 11%|████████▎                                                                  | 1101/10000 [51:35<5:29:21,  2.22s/it]

synthetic_data/images/4/aquarium_39_90.jpg


 17%|████████████▎                                                            | 1689/10000 [1:24:01<7:04:29,  3.06s/it]Traceback (most recent call last):
  File "<ipython-input-255-e6b4a3e5c545>", line 23, in text_image_generation
    score_map, geo_map, training_mask, rbox, text_tags, recg_mask = generate_rbox((h, w), text_polys, text_tags)
  File "<ipython-input-148-1eb73bf4c7ef>", line 483, in generate_rbox
    text_tags.append([NUM_CLASSES - 2])
TypeError: unsupported operand type(s) for -: 'set' and 'int'
 17%|████████████▎                                                            | 1690/10000 [1:24:01<5:09:14,  2.23s/it]

synthetic_data/images/10/baroque_70_0.jpg


 18%|████████████▊                                                            | 1756/10000 [1:27:53<7:49:34,  3.42s/it]Traceback (most recent call last):
  File "<ipython-input-255-e6b4a3e5c545>", line 23, in text_image_generation
    score_map, geo_map, training_mask, rbox, text_tags, recg_mask = generate_rbox((h, w), text_polys, text_tags)
  File "<ipython-input-148-1eb73bf4c7ef>", line 483, in generate_rbox
    text_tags.append([NUM_CLASSES - 2])
TypeError: unsupported operand type(s) for -: 'set' and 'int'
 18%|████████████▊                                                            | 1757/10000 [1:27:53<5:35:42,  2.44s/it]

synthetic_data/images/10/baroque_137_29.jpg


 21%|███████████████▎                                                         | 2092/10000 [1:48:11<4:10:01,  1.90s/it]Traceback (most recent call last):
  File "<ipython-input-255-e6b4a3e5c545>", line 23, in text_image_generation
    score_map, geo_map, training_mask, rbox, text_tags, recg_mask = generate_rbox((h, w), text_polys, text_tags)
  File "<ipython-input-148-1eb73bf4c7ef>", line 483, in generate_rbox
    text_tags.append([NUM_CLASSES - 2])
TypeError: unsupported operand type(s) for -: 'set' and 'int'
 21%|███████████████▎                                                         | 2094/10000 [1:48:14<3:28:15,  1.58s/it]

synthetic_data/images/7/asphalt_50_64.jpg


 23%|████████████████▌                                                        | 2277/10000 [1:58:14<5:00:10,  2.33s/it]Traceback (most recent call last):
  File "<ipython-input-255-e6b4a3e5c545>", line 23, in text_image_generation
    score_map, geo_map, training_mask, rbox, text_tags, recg_mask = generate_rbox((h, w), text_polys, text_tags)
  File "<ipython-input-148-1eb73bf4c7ef>", line 483, in generate_rbox
    text_tags.append([NUM_CLASSES - 2])
TypeError: unsupported operand type(s) for -: 'set' and 'int'
 23%|████████████████▋                                                        | 2278/10000 [1:58:15<4:20:33,  2.02s/it]

synthetic_data/images/1/ant+hill_12_2.jpg


 27%|███████████████████▌                                                     | 2673/10000 [2:21:28<8:01:57,  3.95s/it]Traceback (most recent call last):
  File "<ipython-input-255-e6b4a3e5c545>", line 23, in text_image_generation
    score_map, geo_map, training_mask, rbox, text_tags, recg_mask = generate_rbox((h, w), text_polys, text_tags)
  File "<ipython-input-148-1eb73bf4c7ef>", line 483, in generate_rbox
    text_tags.append([NUM_CLASSES - 2])
TypeError: unsupported operand type(s) for -: 'set' and 'int'
 27%|███████████████████▌                                                     | 2674/10000 [2:21:29<5:57:54,  2.93s/it]

synthetic_data/images/10/baroque_27_105.jpg


 29%|█████████████████████                                                   | 2918/10000 [2:35:58<10:31:50,  5.35s/it]Traceback (most recent call last):
  File "<ipython-input-255-e6b4a3e5c545>", line 23, in text_image_generation
    score_map, geo_map, training_mask, rbox, text_tags, recg_mask = generate_rbox((h, w), text_polys, text_tags)
  File "<ipython-input-148-1eb73bf4c7ef>", line 483, in generate_rbox
    text_tags.append([NUM_CLASSES - 2])
TypeError: unsupported operand type(s) for -: 'set' and 'int'
 29%|█████████████████████▎                                                   | 2919/10000 [2:35:58<7:30:55,  3.82s/it]

synthetic_data/images/10/baroque_137_6.jpg


 30%|█████████████████████▌                                                   | 2960/10000 [2:38:27<8:00:15,  4.09s/it]Traceback (most recent call last):
  File "<ipython-input-255-e6b4a3e5c545>", line 23, in text_image_generation
    score_map, geo_map, training_mask, rbox, text_tags, recg_mask = generate_rbox((h, w), text_polys, text_tags)
  File "<ipython-input-148-1eb73bf4c7ef>", line 483, in generate_rbox
    text_tags.append([NUM_CLASSES - 2])
TypeError: unsupported operand type(s) for -: 'set' and 'int'
 30%|█████████████████████▌                                                   | 2961/10000 [2:38:28<5:45:10,  2.94s/it]

synthetic_data/images/8/ballet_115_92.jpg


 31%|██████████████████████▌                                                  | 3096/10000 [2:47:02<7:43:27,  4.03s/it]Traceback (most recent call last):
  File "<ipython-input-255-e6b4a3e5c545>", line 23, in text_image_generation
    score_map, geo_map, training_mask, rbox, text_tags, recg_mask = generate_rbox((h, w), text_polys, text_tags)
  File "<ipython-input-148-1eb73bf4c7ef>", line 483, in generate_rbox
    text_tags.append([NUM_CLASSES - 2])
TypeError: unsupported operand type(s) for -: 'set' and 'int'
 31%|██████████████████████▌                                                  | 3097/10000 [2:47:02<5:39:43,  2.95s/it]

synthetic_data/images/10/baroque_137_88.jpg


 31%|██████████████████████▍                                                 | 3117/10000 [2:48:31<11:27:28,  5.99s/it]Traceback (most recent call last):
  File "<ipython-input-255-e6b4a3e5c545>", line 23, in text_image_generation
    score_map, geo_map, training_mask, rbox, text_tags, recg_mask = generate_rbox((h, w), text_polys, text_tags)
  File "<ipython-input-148-1eb73bf4c7ef>", line 483, in generate_rbox
    text_tags.append([NUM_CLASSES - 2])
TypeError: unsupported operand type(s) for -: 'set' and 'int'
 31%|██████████████████████▊                                                  | 3118/10000 [2:48:31<8:13:52,  4.31s/it]

synthetic_data/images/1/ant+hill_113_90.jpg


 32%|██████████████████████▋                                                 | 3152/10000 [2:50:51<10:24:04,  5.47s/it]Traceback (most recent call last):
  File "<ipython-input-255-e6b4a3e5c545>", line 23, in text_image_generation
    score_map, geo_map, training_mask, rbox, text_tags, recg_mask = generate_rbox((h, w), text_polys, text_tags)
  File "<ipython-input-148-1eb73bf4c7ef>", line 483, in generate_rbox
    text_tags.append([NUM_CLASSES - 2])
TypeError: unsupported operand type(s) for -: 'set' and 'int'
 32%|███████████████████████                                                  | 3153/10000 [2:50:51<7:32:08,  3.96s/it]

synthetic_data/images/10/baroque_137_42.jpg


 38%|███████████████████████████▍                                             | 3762/10000 [3:22:15<6:48:54,  3.93s/it]Traceback (most recent call last):
  File "<ipython-input-255-e6b4a3e5c545>", line 23, in text_image_generation
    score_map, geo_map, training_mask, rbox, text_tags, recg_mask = generate_rbox((h, w), text_polys, text_tags)
  File "<ipython-input-148-1eb73bf4c7ef>", line 483, in generate_rbox
    text_tags.append([NUM_CLASSES - 2])
TypeError: unsupported operand type(s) for -: 'set' and 'int'
 38%|███████████████████████████▍                                             | 3763/10000 [3:22:15<4:50:44,  2.80s/it]

synthetic_data/images/4/aquarium_146_6.jpg


 40%|█████████████████████████████▎                                           | 4023/10000 [3:35:23<4:02:38,  2.44s/it]Traceback (most recent call last):
  File "<ipython-input-255-e6b4a3e5c545>", line 23, in text_image_generation
    score_map, geo_map, training_mask, rbox, text_tags, recg_mask = generate_rbox((h, w), text_polys, text_tags)
  File "<ipython-input-148-1eb73bf4c7ef>", line 483, in generate_rbox
    text_tags.append([NUM_CLASSES - 2])
TypeError: unsupported operand type(s) for -: 'set' and 'int'
 40%|█████████████████████████████▍                                           | 4024/10000 [3:35:24<3:07:15,  1.88s/it]

synthetic_data/images/10/baroque_56_27.jpg


 42%|██████████████████████████████▊                                          | 4214/10000 [3:43:40<3:06:22,  1.93s/it]Traceback (most recent call last):
  File "<ipython-input-255-e6b4a3e5c545>", line 23, in text_image_generation
    score_map, geo_map, training_mask, rbox, text_tags, recg_mask = generate_rbox((h, w), text_polys, text_tags)
  File "<ipython-input-148-1eb73bf4c7ef>", line 483, in generate_rbox
    text_tags.append([NUM_CLASSES - 2])
TypeError: unsupported operand type(s) for -: 'set' and 'int'
 42%|██████████████████████████████▊                                          | 4215/10000 [3:43:41<2:25:11,  1.51s/it]

synthetic_data/images/4/aquarium_40_33.jpg


 44%|███████████████████████████████▉                                         | 4369/10000 [3:51:09<5:04:23,  3.24s/it]Traceback (most recent call last):
  File "<ipython-input-255-e6b4a3e5c545>", line 23, in text_image_generation
    score_map, geo_map, training_mask, rbox, text_tags, recg_mask = generate_rbox((h, w), text_polys, text_tags)
  File "<ipython-input-148-1eb73bf4c7ef>", line 483, in generate_rbox
    text_tags.append([NUM_CLASSES - 2])
TypeError: unsupported operand type(s) for -: 'set' and 'int'
 44%|███████████████████████████████▉                                         | 4370/10000 [3:51:09<3:40:30,  2.35s/it]

synthetic_data/images/4/aquarium_55_70.jpg


 44%|████████████████████████████████                                         | 4394/10000 [3:52:17<6:27:27,  4.15s/it]Traceback (most recent call last):
  File "<ipython-input-255-e6b4a3e5c545>", line 23, in text_image_generation
    score_map, geo_map, training_mask, rbox, text_tags, recg_mask = generate_rbox((h, w), text_polys, text_tags)
  File "<ipython-input-148-1eb73bf4c7ef>", line 483, in generate_rbox
    text_tags.append([NUM_CLASSES - 2])
TypeError: unsupported operand type(s) for -: 'set' and 'int'
 44%|████████████████████████████████                                         | 4395/10000 [3:52:18<4:37:14,  2.97s/it]

synthetic_data/images/10/baroque_34_19.jpg


 46%|█████████████████████████████████▊                                       | 4633/10000 [4:03:46<3:57:36,  2.66s/it]Traceback (most recent call last):
  File "<ipython-input-255-e6b4a3e5c545>", line 23, in text_image_generation
    score_map, geo_map, training_mask, rbox, text_tags, recg_mask = generate_rbox((h, w), text_polys, text_tags)
  File "<ipython-input-148-1eb73bf4c7ef>", line 483, in generate_rbox
    text_tags.append([NUM_CLASSES - 2])
TypeError: unsupported operand type(s) for -: 'set' and 'int'
 46%|█████████████████████████████████▊                                       | 4634/10000 [4:03:47<2:54:55,  1.96s/it]

synthetic_data/images/4/aquarium_55_42.jpg


 47%|██████████████████████████████████▌                                      | 4735/10000 [4:08:38<5:09:37,  3.53s/it]Traceback (most recent call last):
  File "<ipython-input-255-e6b4a3e5c545>", line 23, in text_image_generation
    score_map, geo_map, training_mask, rbox, text_tags, recg_mask = generate_rbox((h, w), text_polys, text_tags)
  File "<ipython-input-148-1eb73bf4c7ef>", line 483, in generate_rbox
    text_tags.append([NUM_CLASSES - 2])
TypeError: unsupported operand type(s) for -: 'set' and 'int'
 47%|██████████████████████████████████▌                                      | 4736/10000 [4:08:38<3:43:21,  2.55s/it]

synthetic_data/images/10/baroque_66_65.jpg


 48%|██████████████████████████████████▉                                      | 4790/10000 [4:11:25<6:28:50,  4.48s/it]Traceback (most recent call last):
  File "<ipython-input-255-e6b4a3e5c545>", line 23, in text_image_generation
    score_map, geo_map, training_mask, rbox, text_tags, recg_mask = generate_rbox((h, w), text_polys, text_tags)
  File "<ipython-input-148-1eb73bf4c7ef>", line 483, in generate_rbox
    text_tags.append([NUM_CLASSES - 2])
TypeError: unsupported operand type(s) for -: 'set' and 'int'
 48%|██████████████████████████████████▉                                      | 4791/10000 [4:11:25<4:39:23,  3.22s/it]

synthetic_data/images/4/aquarium_40_19.jpg


 48%|███████████████████████████████████▎                                     | 4835/10000 [4:13:39<4:44:13,  3.30s/it]Traceback (most recent call last):
  File "<ipython-input-255-e6b4a3e5c545>", line 23, in text_image_generation
    score_map, geo_map, training_mask, rbox, text_tags, recg_mask = generate_rbox((h, w), text_polys, text_tags)
  File "<ipython-input-148-1eb73bf4c7ef>", line 483, in generate_rbox
    text_tags.append([NUM_CLASSES - 2])
TypeError: unsupported operand type(s) for -: 'set' and 'int'
 48%|███████████████████████████████████▎                                     | 4836/10000 [4:13:39<3:26:03,  2.39s/it]

synthetic_data/images/4/aquarium_40_49.jpg


 50%|████████████████████████████████████▌                                    | 5016/10000 [4:22:16<2:25:18,  1.75s/it]Traceback (most recent call last):
  File "<ipython-input-255-e6b4a3e5c545>", line 23, in text_image_generation
    score_map, geo_map, training_mask, rbox, text_tags, recg_mask = generate_rbox((h, w), text_polys, text_tags)
  File "<ipython-input-148-1eb73bf4c7ef>", line 483, in generate_rbox
    text_tags.append([NUM_CLASSES - 2])
TypeError: unsupported operand type(s) for -: 'set' and 'int'
 50%|████████████████████████████████████▌                                    | 5017/10000 [4:22:16<1:49:47,  1.32s/it]

synthetic_data/images/10/baroque_52_67.jpg


 52%|█████████████████████████████████████▉                                   | 5191/10000 [4:30:45<3:07:27,  2.34s/it]Traceback (most recent call last):
  File "<ipython-input-255-e6b4a3e5c545>", line 23, in text_image_generation
    score_map, geo_map, training_mask, rbox, text_tags, recg_mask = generate_rbox((h, w), text_polys, text_tags)
  File "<ipython-input-148-1eb73bf4c7ef>", line 483, in generate_rbox
    text_tags.append([NUM_CLASSES - 2])
TypeError: unsupported operand type(s) for -: 'set' and 'int'
 52%|█████████████████████████████████████▉                                   | 5192/10000 [4:30:45<2:20:32,  1.75s/it]

synthetic_data/images/10/baroque_20_72.jpg


 58%|██████████████████████████████████████████                               | 5770/10000 [5:00:55<4:46:01,  4.06s/it]Traceback (most recent call last):
  File "<ipython-input-255-e6b4a3e5c545>", line 23, in text_image_generation
    score_map, geo_map, training_mask, rbox, text_tags, recg_mask = generate_rbox((h, w), text_polys, text_tags)
  File "<ipython-input-148-1eb73bf4c7ef>", line 483, in generate_rbox
    text_tags.append([NUM_CLASSES - 2])
TypeError: unsupported operand type(s) for -: 'set' and 'int'
 58%|██████████████████████████████████████████▏                              | 5771/10000 [5:00:55<3:23:30,  2.89s/it]

synthetic_data/images/10/baroque_60_62.jpg


 58%|██████████████████████████████████████████▍                              | 5805/10000 [5:03:50<5:09:49,  4.43s/it]Traceback (most recent call last):
  File "<ipython-input-255-e6b4a3e5c545>", line 23, in text_image_generation
    score_map, geo_map, training_mask, rbox, text_tags, recg_mask = generate_rbox((h, w), text_polys, text_tags)
  File "<ipython-input-148-1eb73bf4c7ef>", line 483, in generate_rbox
    text_tags.append([NUM_CLASSES - 2])
TypeError: unsupported operand type(s) for -: 'set' and 'int'
 58%|██████████████████████████████████████████▍                              | 5806/10000 [5:03:52<4:16:06,  3.66s/it]

synthetic_data/images/1/ant+hill_110_82.jpg


 58%|██████████████████████████████████████████▍                              | 5811/10000 [5:04:12<4:03:23,  3.49s/it]Traceback (most recent call last):
  File "<ipython-input-255-e6b4a3e5c545>", line 23, in text_image_generation
    score_map, geo_map, training_mask, rbox, text_tags, recg_mask = generate_rbox((h, w), text_polys, text_tags)
  File "<ipython-input-148-1eb73bf4c7ef>", line 483, in generate_rbox
    text_tags.append([NUM_CLASSES - 2])
TypeError: unsupported operand type(s) for -: 'set' and 'int'
 58%|██████████████████████████████████████████▍                              | 5812/10000 [5:04:14<3:32:49,  3.05s/it]

synthetic_data/images/1/ant+hill_129_32.jpg


 64%|██████████████████████████████████████████████▊                          | 6417/10000 [5:57:28<3:35:17,  3.61s/it]Traceback (most recent call last):
  File "<ipython-input-255-e6b4a3e5c545>", line 23, in text_image_generation
    score_map, geo_map, training_mask, rbox, text_tags, recg_mask = generate_rbox((h, w), text_polys, text_tags)
  File "<ipython-input-148-1eb73bf4c7ef>", line 483, in generate_rbox
    text_tags.append([NUM_CLASSES - 2])
TypeError: unsupported operand type(s) for -: 'set' and 'int'
 64%|██████████████████████████████████████████████▊                          | 6418/10000 [5:57:29<2:42:53,  2.73s/it]

synthetic_data/images/4/aquarium_40_47.jpg


 67%|████████████████████████████████████████████████▊                        | 6690/10000 [6:20:19<3:02:17,  3.30s/it]Traceback (most recent call last):
  File "<ipython-input-255-e6b4a3e5c545>", line 23, in text_image_generation
    score_map, geo_map, training_mask, rbox, text_tags, recg_mask = generate_rbox((h, w), text_polys, text_tags)
  File "<ipython-input-148-1eb73bf4c7ef>", line 483, in generate_rbox
    text_tags.append([NUM_CLASSES - 2])
TypeError: unsupported operand type(s) for -: 'set' and 'int'
 67%|████████████████████████████████████████████████▊                        | 6691/10000 [6:20:19<2:10:27,  2.37s/it]

synthetic_data/images/10/baroque_27_77.jpg


 70%|██████████████████████████████████████████████████▊                      | 6962/10000 [6:42:36<3:45:29,  4.45s/it]Traceback (most recent call last):
  File "<ipython-input-255-e6b4a3e5c545>", line 23, in text_image_generation
    score_map, geo_map, training_mask, rbox, text_tags, recg_mask = generate_rbox((h, w), text_polys, text_tags)
  File "<ipython-input-148-1eb73bf4c7ef>", line 483, in generate_rbox
    text_tags.append([NUM_CLASSES - 2])
TypeError: unsupported operand type(s) for -: 'set' and 'int'
 70%|██████████████████████████████████████████████████▊                      | 6963/10000 [6:42:36<2:45:21,  3.27s/it]

synthetic_data/images/4/aquarium_39_37.jpg


 75%|██████████████████████████████████████████████████████▉                  | 7521/10000 [7:33:49<3:04:43,  4.47s/it]Traceback (most recent call last):
  File "<ipython-input-255-e6b4a3e5c545>", line 23, in text_image_generation
    score_map, geo_map, training_mask, rbox, text_tags, recg_mask = generate_rbox((h, w), text_polys, text_tags)
  File "<ipython-input-148-1eb73bf4c7ef>", line 483, in generate_rbox
    text_tags.append([NUM_CLASSES - 2])
TypeError: unsupported operand type(s) for -: 'set' and 'int'
 75%|██████████████████████████████████████████████████████▉                  | 7522/10000 [7:33:49<2:12:25,  3.21s/it]

synthetic_data/images/4/aquarium_40_23.jpg


 77%|███████████████████████████████████████████████████████▉                 | 7667/10000 [7:48:47<3:38:44,  5.63s/it]Traceback (most recent call last):
  File "<ipython-input-255-e6b4a3e5c545>", line 23, in text_image_generation
    score_map, geo_map, training_mask, rbox, text_tags, recg_mask = generate_rbox((h, w), text_polys, text_tags)
  File "<ipython-input-148-1eb73bf4c7ef>", line 483, in generate_rbox
    text_tags.append([NUM_CLASSES - 2])
TypeError: unsupported operand type(s) for -: 'set' and 'int'
 77%|███████████████████████████████████████████████████████▉                 | 7668/10000 [7:48:50<3:10:03,  4.89s/it]

synthetic_data/images/1/ant+hill_110_26.jpg


 77%|████████████████████████████████████████████████████████▎                | 7707/10000 [7:52:28<4:50:37,  7.60s/it]Traceback (most recent call last):
  File "<ipython-input-255-e6b4a3e5c545>", line 23, in text_image_generation
    score_map, geo_map, training_mask, rbox, text_tags, recg_mask = generate_rbox((h, w), text_polys, text_tags)
  File "<ipython-input-148-1eb73bf4c7ef>", line 483, in generate_rbox
    text_tags.append([NUM_CLASSES - 2])
TypeError: unsupported operand type(s) for -: 'set' and 'int'
 77%|████████████████████████████████████████████████████████▎                | 7708/10000 [7:52:28<3:27:34,  5.43s/it]

synthetic_data/images/8/ballet_41_105.jpg


 78%|████████████████████████████████████████████████████████▌                | 7753/10000 [7:56:44<2:20:26,  3.75s/it]Traceback (most recent call last):
  File "<ipython-input-255-e6b4a3e5c545>", line 23, in text_image_generation
    score_map, geo_map, training_mask, rbox, text_tags, recg_mask = generate_rbox((h, w), text_polys, text_tags)
  File "<ipython-input-148-1eb73bf4c7ef>", line 483, in generate_rbox
    text_tags.append([NUM_CLASSES - 2])
TypeError: unsupported operand type(s) for -: 'set' and 'int'
 78%|████████████████████████████████████████████████████████▌                | 7754/10000 [7:56:44<1:43:02,  2.75s/it]

synthetic_data/images/10/baroque_137_17.jpg


 80%|██████████████████████████████████████████████████████████               | 7951/10000 [8:15:28<4:36:23,  8.09s/it]Traceback (most recent call last):
  File "<ipython-input-255-e6b4a3e5c545>", line 23, in text_image_generation
    score_map, geo_map, training_mask, rbox, text_tags, recg_mask = generate_rbox((h, w), text_polys, text_tags)
  File "<ipython-input-148-1eb73bf4c7ef>", line 483, in generate_rbox
    text_tags.append([NUM_CLASSES - 2])
TypeError: unsupported operand type(s) for -: 'set' and 'int'
 80%|██████████████████████████████████████████████████████████               | 7952/10000 [8:15:30<3:32:36,  6.23s/it]

synthetic_data/images/10/baroque_40_49.jpg


 83%|████████████████████████████████████████████████████████████▊            | 8323/10000 [8:52:15<3:04:50,  6.61s/it]Traceback (most recent call last):
  File "<ipython-input-255-e6b4a3e5c545>", line 23, in text_image_generation
    score_map, geo_map, training_mask, rbox, text_tags, recg_mask = generate_rbox((h, w), text_polys, text_tags)
  File "<ipython-input-148-1eb73bf4c7ef>", line 483, in generate_rbox
    text_tags.append([NUM_CLASSES - 2])
TypeError: unsupported operand type(s) for -: 'set' and 'int'
 83%|████████████████████████████████████████████████████████████▊            | 8324/10000 [8:52:15<2:13:06,  4.77s/it]

synthetic_data/images/1/ant+hill_3_60.jpg


 84%|█████████████████████████████████████████████████████████████▎           | 8405/10000 [9:00:37<2:30:51,  5.67s/it]Traceback (most recent call last):
  File "<ipython-input-255-e6b4a3e5c545>", line 23, in text_image_generation
    score_map, geo_map, training_mask, rbox, text_tags, recg_mask = generate_rbox((h, w), text_polys, text_tags)
  File "<ipython-input-148-1eb73bf4c7ef>", line 483, in generate_rbox
    text_tags.append([NUM_CLASSES - 2])
TypeError: unsupported operand type(s) for -: 'set' and 'int'
 84%|█████████████████████████████████████████████████████████████▎           | 8406/10000 [9:00:38<1:49:40,  4.13s/it]

synthetic_data/images/4/aquarium_56_7.jpg


 86%|███████████████████████████████████████████████████████████████          | 8632/10000 [9:21:54<1:16:20,  3.35s/it]Traceback (most recent call last):
  File "<ipython-input-255-e6b4a3e5c545>", line 23, in text_image_generation
    score_map, geo_map, training_mask, rbox, text_tags, recg_mask = generate_rbox((h, w), text_polys, text_tags)
  File "<ipython-input-148-1eb73bf4c7ef>", line 483, in generate_rbox
    text_tags.append([NUM_CLASSES - 2])
TypeError: unsupported operand type(s) for -: 'set' and 'int'
 86%|████████████████████████████████████████████████████████████████▋          | 8633/10000 [9:21:55<55:06,  2.42s/it]

synthetic_data/images/10/baroque_60_89.jpg


 89%|█████████████████████████████████████████████████████████████████▎       | 8944/10000 [9:55:18<2:15:25,  7.69s/it]Traceback (most recent call last):
  File "<ipython-input-255-e6b4a3e5c545>", line 23, in text_image_generation
    score_map, geo_map, training_mask, rbox, text_tags, recg_mask = generate_rbox((h, w), text_polys, text_tags)
  File "<ipython-input-148-1eb73bf4c7ef>", line 483, in generate_rbox
    text_tags.append([NUM_CLASSES - 2])
TypeError: unsupported operand type(s) for -: 'set' and 'int'
 89%|█████████████████████████████████████████████████████████████████▎       | 8945/10000 [9:55:19<1:38:50,  5.62s/it]

synthetic_data/images/1/ant+hill_12_47.jpg


 97%|███████████████████████████████████████████████████████████████████████▌  | 9668/10000 [11:07:18<54:35,  9.87s/it]

-0.22671446 [591.8989  361.88324] [492.7 339. ]


100%|█████████████████████████████████████████████████████████████████████████| 10000/10000 [11:41:13<00:00,  4.21s/it]


total_errors 40


In [None]:
final_synthetic_text_data.head()

Unnamed: 0,paths,words
0,synthetic_data/cropped_images/synthetic_img_1.png,which
1,synthetic_data/cropped_images/synthetic_img_2.png,and
2,synthetic_data/cropped_images/synthetic_img_3.png,the
3,synthetic_data/cropped_images/synthetic_img_4.png,with
4,synthetic_data/cropped_images/synthetic_img_5.png,Los


In [None]:
final_synthetic_text_data.to_csv("synthtext_cropped_data.csv", index = False)