In [1]:
import os
import sys
import cv2
import numpy as np
from torch.autograd import Variable
from matplotlib import pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision

import json
import torch.nn as nn
import torch
import tqdm
import torch.nn.functional as F
import pytorch_lightning as pl
from collections import Counter
from torch.utils.data import DataLoader, random_split, TensorDataset

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
# NomeroffNet path
NOMEROFF_NET_DIR = os.path.abspath('../')

sys.path.append(NOMEROFF_NET_DIR)

In [4]:
from NomeroffNet.BBoxNpPoints import NpPointsCraft, getCvZoneRGB, convertCvZonesRGBtoBGR, reshapePoints
npPointsCraft = NpPointsCraft()
npPointsCraft.load()

Loading weights from checkpoint (/mnt/store/nomeroff-net/NomeroffNet/tools/../../data/./models/NpPointsCraft/craft_mlt/craft_mlt_25k_2020-02-16.pth)
Loading weights of refiner from checkpoint (/mnt/store/nomeroff-net/NomeroffNet/tools/../../data/./models/NpPointsCraft/craft_refiner/craft_refiner_CTW1500_2020-02-16.pth)


In [5]:
%matplotlib inline

In [6]:
plt.rcParams["figure.figsize"] = (10, 10)

In [7]:
def check_craft(json_path, img_path):
    count = Counter()
    with open(json_path) as json_file:
        data = json.load(json_file)
        for p in tqdm.tqdm(data['_via_img_metadata']):
            item = data['_via_img_metadata'][p]
            filename = item["filename"]
            image_path = os.path.join(img_path, filename)
            img = cv2.imread(image_path)
            target_boxes = []
            for region in item['regions']:
                if len(region['shape_attributes'].get('all_points_x', [])) != 4:
                    continue
                if len(region['shape_attributes'].get('all_points_y', [])) != 4:
                    continue
                
                xs = np.array(region['shape_attributes']['all_points_x'])
                ys = np.array(region['shape_attributes']['all_points_y'])
                min_x = min(xs)
                max_x = max(xs)
                min_y = min(ys)
                max_y = max(ys)
                target_boxes.append([min_x, min_y, max_x, max_y])
            count["all"] += len(target_boxes)
            try:
                all_points = npPointsCraft.detect(img, target_boxes, [5, 2, 0])
            except:
                all_points = []
            count["craft"] += len(all_points)
            #for points in all_points:
            #    cv2.polylines(img, np.array([points], np.int32), True, (255, 0, 0), 3)
            #plt.imshow(img)
            #plt.show()
    return count

In [8]:
val_img_path="../datasets/autoriaNumberplateDataset-2021-05-12/val"
val_json_path="../datasets/autoriaNumberplateDataset-2021-05-12/val/via_region_data.json"
train_img_path="../datasets/autoriaNumberplateDataset-2021-05-12/train"
train_json_path="../datasets/autoriaNumberplateDataset-2021-05-12/train/via_region_data.json"

In [9]:
#check_craft(val_json_path, val_img_path)

In [10]:
#check_craft(train_json_path, train_img_path)

In [11]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 36)

        # Spatial transformer localization-network
        self.localization = nn.Sequential(
            nn.Conv2d(1, 8, kernel_size=7),
            nn.MaxPool2d(2, stride=2),
            nn.ReLU(True),
            nn.Conv2d(8, 10, kernel_size=5),
            nn.MaxPool2d(2, stride=2),
            nn.ReLU(True)
        )

        # Regressor for the 3 * 2 affine matrix
        self.fc_loc = nn.Sequential(
            nn.Linear(10 * 3 * 3, 32),
            nn.ReLU(True),
            nn.Linear(32, 3 * 2)
        )

        # Initialize the weights/bias with identity transformation
        self.fc_loc[2].weight.data.zero_()
        self.fc_loc[2].bias.data.copy_(torch.tensor([1, 0, 0, 0, 1, 0], dtype=torch.float))

    # Spatial transformer network forward function
    def stn(self, x):
        xs = self.localization(x)
        xs = xs.view(-1, 10 * 3 * 3)
        theta = self.fc_loc(xs)
        theta = theta.view(-1, 2, 3)

        grid = F.affine_grid(theta, x.size())
        x = F.grid_sample(x, grid)

        return x

    def forward(self, x):
        # transform the input
        x = self.stn(x)

        # Perform the usual forward pass
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)


model = Net().to(device)

In [12]:
model.load_state_dict(torch.load("letter_recognition.ph"))
model.eval()

Net(
  (conv1): Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1))
  (conv2_drop): Dropout2d(p=0.5, inplace=False)
  (fc1): Linear(in_features=320, out_features=50, bias=True)
  (fc2): Linear(in_features=50, out_features=36, bias=True)
  (localization): Sequential(
    (0): Conv2d(1, 8, kernel_size=(7, 7), stride=(1, 1))
    (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (2): ReLU(inplace=True)
    (3): Conv2d(8, 10, kernel_size=(5, 5), stride=(1, 1))
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): ReLU(inplace=True)
  )
  (fc_loc): Sequential(
    (0): Linear(in_features=90, out_features=32, bias=True)
    (1): ReLU(inplace=True)
    (2): Linear(in_features=32, out_features=6, bias=True)
  )
)

In [13]:
whitelist = [
    letter for letter in "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
]

In [14]:
import math
import copy
import easyocr
import torch
from craft_mlt import imgproc
from craft_mlt.craft import CRAFT
from craft_mlt.refinenet import RefineNet
from craft_mlt import craft_utils
from typing import List, Dict, Tuple, Any, Union


def get_det_boxes(textmap, linkmap, text_threshold, link_threshold, low_text):
    # prepare data
    linkmap = linkmap.copy()
    textmap = textmap.copy()
    img_h, img_w = textmap.shape

    """ labeling method """
    ret, text_score = cv2.threshold(textmap, low_text, 1, 0)
    ret, link_score = cv2.threshold(linkmap, link_threshold, 1, 0)

    text_score_comb = np.clip(text_score + link_score, 0, 1)
    nLabels, labels, stats, centroids = cv2.connectedComponentsWithStats(text_score_comb.astype(np.uint8),
                                                                         connectivity=4)

    det = []
    mapper = []
    for k in range(1, nLabels):
        # size filtering
        size = stats[k, cv2.CC_STAT_AREA]
        if size < 10: continue

        # thresholding
        if np.max(textmap[labels == k]) < text_threshold: continue

        # make segmentation map
        segmap = np.zeros(textmap.shape, dtype=np.uint8)
        segmap[labels == k] = 255
        segmap[np.logical_and(link_score == 1, text_score == 0)] = 0  # remove link area
        x, y = stats[k, cv2.CC_STAT_LEFT], stats[k, cv2.CC_STAT_TOP]
        w, h = stats[k, cv2.CC_STAT_WIDTH], stats[k, cv2.CC_STAT_HEIGHT]
        niter = int(math.sqrt(size * min(w, h) / (w * h)) * 2)
        sx, ex, sy, ey = x - niter, x + w + niter + 1, y - niter, y + h + niter + 1
        # boundary check
        if sx < 0: sx = 0
        if sy < 0: sy = 0
        if ex >= img_w: ex = img_w
        if ey >= img_h: ey = img_h
        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1 + niter, 1 + niter))
        segmap[sy:ey, sx:ex] = cv2.dilate(segmap[sy:ey, sx:ex], kernel)

        # make box
        np_contours = np.roll(np.array(np.where(segmap != 0)), 1, axis=0).transpose().reshape(-1, 2)
        rectangle = cv2.minAreaRect(np_contours)
        box = cv2.boxPoints(rectangle)

        # align diamond-shape
        w, h = np.linalg.norm(box[0] - box[1]), np.linalg.norm(box[1] - box[2])
        box_ratio = max(w, h) / (min(w, h) + 1e-5)
        if abs(1 - box_ratio) <= 0.1:
            l, r = min(np_contours[:, 0]), max(np_contours[:, 0])
            t, b = min(np_contours[:, 1]), max(np_contours[:, 1])
            box = np.array([[l, t], [r, t], [r, b], [l, b]], dtype=np.float32)

        # make clock-wise order
        startidx = box.sum(axis=1).argmin()
        box = np.roll(box, 4 - startidx, 0)
        box = np.array(box)

        det.append(box)
        mapper.append(k)

    return det

@torch.no_grad()
def test_net(image: np.ndarray, 
             net: CRAFT = npPointsCraft.net, 
             text_threshold: float = 0.6,
             link_threshold: float = 0.7, 
             low_text: float = 0.4, 
             cuda: bool = True,
             poly: bool = False, 
             canvas_size: int = 300, 
             refine_net: RefineNet = npPointsCraft.refine_net,
             mag_ratio: float = 1.5) -> Tuple[Any, Any]:
    """
    TODO: describe function
    """

    # resize
    img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(image,
                                                                          canvas_size,
                                                                          interpolation=cv2.INTER_LINEAR,
                                                                          mag_ratio=mag_ratio)
    ratio_h = ratio_w = 1 / target_ratio

    # preprocessing
    x = imgproc.normalizeMeanVariance(img_resized)
    x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
    x = Variable(x.unsqueeze(0))  # [c, h, w] to [b, c, h, w]
    if cuda:
        x = x.cuda()

    # forward pass
    y, feature = net(x)

    # make score and link map
    score_text = y[0, :, :, 0].cpu().data.numpy()
    score_link = y[0, :, :, 1].cpu().data.numpy()

    # refine link
    if refine_net is not None:
        y_refiner = refine_net(y, feature)
        score_link = y_refiner[0, :, :, 0].cpu().data.numpy()

    # Post-processing
    boxes = get_det_boxes(score_text, score_link, text_threshold, link_threshold, low_text)

    # coordinate adjustment
    boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)

    # render results (optional)
    render_img = score_text.copy()
    render_img = np.hstack((render_img, score_link))
    ret_score_text = imgproc.cvt2HeatmapImg(render_img)
    return boxes, ret_score_text

In [15]:
def add_pading_to_bbox(bbox, dr = 0.2):
    ## get the center and the radius
    x, y, w, h = bbox

    cx = x+w//2
    cy = y+h//2
    ch  = h/2
    cw  = w/2

    ## set offset, repeat enlarger ROI
    rx = cw + cw*dr
    ry = ch + ch*dr

    new_bbox = [(int(cx-rx), int(cy-ry)), (int(cx+rx), int(cy+ry))]
    return new_bbox

In [16]:
def polygon_to_bbox(polygon):
    min_x = np.min(polygon[:, 0])
    max_x = np.max(polygon[:, 0])
    min_y = np.min(polygon[:, 1])
    max_y = np.max(polygon[:, 1])
    return (min_x, min_y, max_x-min_x, max_y-min_y)

def polygons_to_bboxs(points, dr = 0.2):
    bboxs = []
    for polygon in polygons:
        bbox = polygon_to_bbox(polygon)
        bbox = add_pading_to_bbox(bbox, dr)
        bboxs.append(bbox)
    return bboxs

In [111]:
def find_angle(vector_1, vector_2):
    unit_vector_1 = vector_1 / np.linalg.norm(vector_1)
    unit_vector_2 = vector_2 / np.linalg.norm(vector_2)
    dot_product = np.dot(unit_vector_1, unit_vector_2)
    angle = np.arccos(dot_product)
    return math.degrees(angle)

In [156]:

from random import randint
from math import sin, cos, radians


def rotatePolygon(points, degrees_x, degrees_y):
    """ Rotate polygon the given angle about its center. """
    theta_x = radians(degrees_x)  # Convert angle to radians
    cosang_x, sinang_x = cos(theta_x), sin(theta_x)
    
    theta_y = radians(degrees_y) 
    cosang_y, sinang_y = cos(theta_y), sin(theta_y)

    # find center point of Polygon to use as pivot
    n = len(points)
    cx = sum(p[0] for p in points) / n
    cy = sum(p[1] for p in points) / n

    new_points = []
    for p in points:
        x, y = p[0], p[1]
        tx, ty = x-cx, y-cy
        new_x = ( tx*cosang_x + ty*sinang_y) + cx
        new_y = (-tx*sinang_x + ty*cosang_y) + cy
        new_points.append([new_x, new_y])

    return new_points

In [157]:
find_angle([0, 1], [1, 0])

90.0

In [185]:
json_path = train_json_path
img_path = train_img_path

In [193]:
N = 10
j = 0

with open(json_path) as json_file:
    data = json.load(json_file)

for p in tqdm.tqdm(data['_via_img_metadata']):
    item = data['_via_img_metadata'][p]
    filename = item["filename"]
    #print(filename)
    image_path = os.path.join(img_path, filename)
    img = cv2.imread(image_path)
    letter_regions = []
    for region in item['regions']:
        if 'all_points_x' not in region['shape_attributes']:
            continue
        xs = np.array(region['shape_attributes']['all_points_x'])
        ys = np.array(region['shape_attributes']['all_points_y'])
        
        angle_xs = []
        angle_xs.append(find_angle([xs[-1], ys[-1]], [xs[0], ys[0]]))
        angle_xs.append(find_angle([xs[1], ys[1]], [xs[2], ys[2]]))
        angle_x = np.mean(angle_xs)
        
        angle_ys = []
        angle_ys.append(find_angle([xs[0], ys[0]], [xs[1], ys[1]]))
        angle_ys.append(find_angle([xs[2], ys[2]], [xs[3], ys[3]]))
        angle_y = np.mean(angle_y)
        
        min_x = int(min(xs))
        max_x = int(max(xs))
        min_y = int(min(ys))
        max_y = int(max(ys))
        img_part = img[min_y:max_y, min_x:max_x]
        if img_part.size == 0:
            continue
        canvas = copy.deepcopy(img_part)
        polygons, ret_score_text = test_net(img_part, 
                                         refine_net=None, 
                                         #mag_ratio=1,
                                         low_text=0.6)
        bboxs = polygons_to_bboxs(polygons, 0.3)
        
        for i, bbox in enumerate(bboxs):
            letter_img = img_part[bbox[0][1]:bbox[1][1], bbox[0][0]:bbox[1][0]]
            if letter_img.size == 0:
                continue
            letter_img_rgb = cv2.cvtColor(letter_img, cv2.COLOR_BGR2RGB)
            
            x = cv2.resize(letter_img_rgb, (28, 28))
            x = cv2.cvtColor(x, cv2.COLOR_RGB2GRAY)
            x = cv2.normalize(x, None, alpha=0, beta=1,
                                norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)
            x = x.reshape((*x.shape, 1))
            x = np.moveaxis(np.array(x), 2, 0)
            x = x.reshape((1, *x.shape))
            x = torch.from_numpy(x)
            x = x.to(device)
            
            y = model(x)
            y = y.cpu().detach().numpy()[0]
            text = whitelist[np.argmax(y)]
            
            pts = list(zip([bbox[1][0], bbox[1][0], bbox[0][0], bbox[0][0]],
                           [bbox[0][1], bbox[1][1], bbox[1][1], bbox[0][1]]))
            #pts = np.array(pts, np.int32)
            #img_part = cv2.polylines(img_part, [pts], True, (0, 255, 0), 2)
            pts = rotatePolygon(pts, -angle_x, 0)
            pts = rotatePolygon(pts, 0, -angle_y)
            pts = np.array(pts, np.int32)
            letter_regions.append({
                'region_attributes': {
                    'class': text
                }, 
                'shape_attributes': {
                    'name': 'polygon', 
                    'all_points_x': [p[0]+min_x for p in pts.tolist()],
                    'all_points_y': [p[1]+min_y for p in pts.tolist()],
                }
            })
            
            #img_part = cv2.polylines(img_part, [pts], True, (255, 0, 0), 2)
            
        #plt.imshow(img_part)
        #plt.show()
    data['_via_img_metadata'][p]['regions'] = letter_regions
#     j += 1
#     if j >= N:
#         break

100%|██████████| 7666/7666 [04:41<00:00, 27.26it/s]


In [194]:
with open(f"{'.'.join(json_path.split('.')[:-1])}_letters_bbox.json", "w") as json_file:
    json.dump(data, json_file)