In [1]:
import cv2
import numpy as np
import os
from PIL import Image

In [2]:
def random_rotate(image, angle):
    height, width = image.shape[:2]
    rotation_matrix = cv2.getRotationMatrix2D((width / 2, height / 2), angle, 1)
    rotated_image = cv2.warpAffine(image, rotation_matrix, (width, height))
    return Image.fromarray(rotated_image)

def random_blur(image, kernel_size):
    blurred_image = cv2.GaussianBlur(image, (kernel_size, kernel_size), 0)
    return Image.fromarray(blurred_image)

def random_add_noise(image, mean=0, stddev=10):
    noise = np.random.normal(mean, stddev, image.shape).astype('uint8')
    noisy_image = cv2.add(image, noise)
    return Image.fromarray(noisy_image)

In [3]:
def resize_image(image, height=32):
    w, h = image.size
    ratio = height / h
    new_width = int(w * ratio)
    new_width = np.ceil(new_width / 32) * 32
    processed_image = image.resize((int(new_width), height))
    return processed_image

def padding_image(image):
    image = resize_image(image)
    background = Image.new('RGB', (1600, 32))
    background.paste(image, (0, 0))
    return background

In [4]:
angles = [0, 0, 0]
blurs = [3]
if not os.path.exists('dataset/augment_data'):
    os.mkdir('dataset/augment_data')
idx = 0
with open('dataset/augment_labels.txt', 'w', encoding='utf-8') as file:
    with open('dataset/label.txt', 'r', encoding='utf-8') as f:
        for line in f.readlines():
            image_name, label = line.rstrip().split('--------')
            image_path = os.path.join('dataset/images', image_name)
            image = cv2.imread(image_path)
            # Save root image
            root_image = Image.fromarray(image)
            new_path = os.path.join('dataset/augment_data/', f'image_{idx}.png')
            root_image = padding_image(root_image)
            root_image.save(new_path)
            file.write(f'image_{idx}.png--------{label}\n')
            idx += 1
            # Rotate image
            for angle in angles:
                new_path = os.path.join('dataset/augment_data/', f'image_{idx}.png')
                new_image = np.array(root_image)
                rotated_image = random_rotate(new_image, angle)
                rotated_image.save(new_path)
                file.write(f'image_{idx}.png--------{label}\n')
                idx += 1
            # Blur image
            for blur in blurs:
                new_path = os.path.join('dataset/augment_data/', f'image_{idx}.png')
                new_image = np.array(root_image)
                blured_image = random_blur(new_image, blur)
                blured_image.save(new_path)
                file.write(f'image_{idx}.png--------{label}\n')
                idx += 1
print(len(os.listdir('dataset/augment_data')))

24965


In [5]:
import regex

characters = set()
l = []

with open('dataset/augment_labels.txt', 'r', encoding='utf-8') as f:
    for line in f.readlines():
        image_name, label = line.strip().split('--------')
        chars = regex.findall(r'\X', label)
        l.append(len(chars))
        if '' in chars:
            print(label)
        for char in chars:
            if char != ' ':
                characters.add(char)
    f.close()

print(max(l), len(characters))

127 229


In [6]:
with open('dataset/vocab.txt', 'w', encoding='utf-8') as f:
    for char in characters:
        f.write(char + '\n')

In [7]:
import random

with open('dataset/augment_labels.txt', 'r', encoding='utf-8') as file:
    lines = file.readlines()
    n = len(lines)
    idx = [i for i in range(n)]
    temp = random.sample(idx, int(n * 0.8))

In [8]:
len(lines)

24965

In [9]:
train_line = [lines[i] for i in temp]
val_line = [lines[i] for i in range(n) if i not in temp]

In [10]:
len(train_line), len(val_line)

(19972, 4993)

In [11]:
with open('dataset/train.txt', 'w', encoding='utf-8') as file:
    for line in train_line:
        file.write(line)
        
with open('dataset/val.txt', 'w', encoding='utf-8') as file:
    for line in val_line:
        file.write(line)

In [12]:
# import regex
# vocab = r'''aAàÀảẢãÃáÁạẠăĂằẰẳẲẵẴắẮặẶâÂầẦẩẨẫẪấẤậẬbBcCdDđĐeEèÈẻẺẽẼéÉẹẸêÊềỀểỂễỄếẾệỆfFgGhHiIìÌỉỈĩĨíÍịỊjJkKlLmMnNoOòÒỏỎõÕóÓọOỌôÔồỒổỔỗÔỖốỐộỘơƠơỜởỞỡỠớỚợỢpPqQrRsStTuUùÙủỦũŨúÚụỤưƯừỪửỬữỮứỨựỰvVwWxXyYỳỲỷỶỹỸýÝỵỴzZ0123456789ōā²ūĀüŪöÖÜ!"#$%&()*+,-./:;<=>?@[\]^_`{|}~' ∈Ō°'''
# 
# tokens = regex.findall(r'\X', vocab)
# vocabs = set()
# with open('D:/DATN_Handle/annotations.txt', 'r', encoding='utf-8') as f:
#     for line in f.readlines():
#         image_name, label = line.strip().split('--------')
#         chars = regex.findall(r'\X', label)
#         for char in line.rstrip():
#             vocabs.add(char)
#     f.close()
# i = 0
# for char in vocabs:
#     if char not in tokens:
#         print(char)
#         i += 1
#         if i == 10:
#             break