In [1]:
import numpy as np
import pandas as pd
import cv2
import glob
import os
import math
from uuid import uuid4
from preprocessing import preprocess
import config

In [2]:
def load_metadata():
    image_files = glob.glob(os.path.join(config.DATASET_PATH, '*.JPG'))
    dfs = []
    for f in image_files:
        filename = os.path.splitext(os.path.split(f)[1])[0]
        try:
            names = ['patch_number', 'difficult', 'x', 'y', 'w', 'h', 'angle']
            dtype = [np.int, np.bool, np.int, np.int, np.int, np.int, np.float]
            tmp_df = pd.read_csv(f[:-3] + 'gt',
                                 delimiter=' ',
                                 header=None,
                                 names=names,
                                 dtype={n: t for n, t in zip(names, dtype)})
        except:
            print('{} contains no text'.format(filename))
            continue
        tmp_df['filename'] = filename
        dfs.append(tmp_df)
    return pd.concat(dfs).reset_index()


In [3]:
def extract_random_text_patch(row, apply_preprocessing=True):
    '''
    Return a horizontal random 8 by 8 patch
    '''
    # load image
    img = cv2.imread(os.path.join(config.DATASET_PATH, row.filename + '.JPG'))
    rows, cols, dim = img.shape
    
    # rotate image to get horizontal
    y = row['y'] + (row['h'] / 2.0)
    x = row['x'] + (row['w'] / 2.0)
    M = cv2.getRotationMatrix2D((x, y), row.angle * 180.0 / math.pi, 1)
    rotated_img = cv2.warpAffine(img, M, (cols, rows))
    
    # cut text
    text_img = rotated_img[row.y:row.y+row.h, row.x:row.x+row.w]

    # extract random patches
    for _ in range(config.NUM_PATCHES_PER_TEXT):
        try:
            x, y = np.random.randint(0, row.w-8), np.random.randint(0, row.h-8)
        except ValueError:
            continue
        patch = text_img[y:y+8, x:x+8]
        if apply_preprocessing:
            patch = preprocess(patch)

        # save to file
        cv2.imwrite('{}/{}.png'.format(config.PATCH_PATH, uuid4()), patch)

In [4]:
df = load_metadata()
df.head(10)

Unnamed: 0,index,patch_number,difficult,x,y,w,h,angle,filename
0,0,0,False,749,860,47,105,-0.04804,IMG_0030
1,1,1,True,728,919,16,44,-0.023252,IMG_0030
2,0,0,False,300,675,1232,112,0.053651,IMG_0059
3,1,1,False,208,801,1489,178,0.101044,IMG_0059
4,0,0,False,830,594,242,72,0.097976,IMG_0063
5,0,0,False,614,574,262,41,-0.303571,IMG_0064
6,1,1,False,675,542,625,98,-0.292443,IMG_0064
7,2,2,False,712,645,609,51,-0.293994,IMG_0064
8,0,0,False,439,670,286,94,-0.320308,IMG_0080
9,1,1,False,855,391,940,129,-0.326279,IMG_0080


In [5]:
#df.apply(extract_random_text_patch, axis=1)

0       None
1       None
2       None
3       None
4       None
5       None
6       None
7       None
8       None
9       None
10      None
11      None
12      None
13      None
14      None
15      None
16      None
17      None
18      None
19      None
20      None
21      None
22      None
23      None
24      None
25      None
26      None
27      None
28      None
29      None
        ... 
1689    None
1690    None
1691    None
1692    None
1693    None
1694    None
1695    None
1696    None
1697    None
1698    None
1699    None
1700    None
1701    None
1702    None
1703    None
1704    None
1705    None
1706    None
1707    None
1708    None
1709    None
1710    None
1711    None
1712    None
1713    None
1714    None
1715    None
1716    None
1717    None
1718    None
dtype: object

In [10]:
def extract_all_windows(stepSize, windowSize, apply_preprocessing=True):
    '''
    Return all windows for given image
    '''
    image_files = glob.glob(os.path.join(config.DATASET_PATH, '*.JPG'))
    
    for f in image_files:
        
        filename = os.path.splitext(os.path.split(f)[1])[0]
        
        if not os.path.exists(os.path.join(config.WINDOW_PATH, filename)):
            os.makedirs(os.path.join(config.WINDOW_PATH, filename))
        
        img = cv2.imread(f)
        
        stepSize = 32
        windowSize = (32, 32)
        
        for y in range(0, img.shape[0], stepSize):
            for x in range(0, img.shape[1], stepSize):
            # yield the current window
                window = (x, y, img[y:y + windowSize[1], x:x + windowSize[0]])
                cv2.imwrite('{}/{}.png'.format(os.path.join(config.WINDOW_PATH, filename), uuid4()), window[2])

In [None]:
extract_all_windows()

(0, 0) (32, 32, 3)
(32, 0) (32, 32, 3)
(64, 0) (32, 32, 3)
(96, 0) (32, 32, 3)
(128, 0) (32, 32, 3)
(160, 0) (32, 32, 3)
(192, 0) (32, 32, 3)
(224, 0) (32, 32, 3)
(256, 0) (32, 32, 3)
(288, 0) (32, 32, 3)
(320, 0) (32, 32, 3)
(352, 0) (32, 32, 3)
(384, 0) (32, 32, 3)
(416, 0) (32, 32, 3)
(448, 0) (32, 32, 3)
(480, 0) (32, 32, 3)
(512, 0) (32, 32, 3)
(544, 0) (32, 32, 3)
(576, 0) (32, 32, 3)
(608, 0) (32, 32, 3)
(640, 0) (32, 32, 3)
(672, 0) (32, 32, 3)
(704, 0) (32, 32, 3)
(736, 0) (32, 32, 3)
(768, 0) (32, 32, 3)
(800, 0) (32, 32, 3)
(832, 0) (32, 32, 3)
(864, 0) (32, 32, 3)
(896, 0) (32, 32, 3)
(928, 0) (32, 32, 3)
(960, 0) (32, 32, 3)
(992, 0) (32, 32, 3)
(1024, 0) (32, 32, 3)
(1056, 0) (32, 32, 3)
(1088, 0) (32, 32, 3)
(1120, 0) (32, 32, 3)
(1152, 0) (32, 32, 3)
(1184, 0) (32, 32, 3)
(1216, 0) (32, 32, 3)
(1248, 0) (32, 32, 3)
(1280, 0) (32, 32, 3)
(1312, 0) (32, 32, 3)
(1344, 0) (32, 32, 3)
(1376, 0) (32, 32, 3)
(1408, 0) (32, 32, 3)
(1440, 0) (32, 32, 3)
(1472, 0) (32, 32, 3)
(15