In [14]:
from PIL import Image
from PIL import ImageDraw
from PIL import ImageFont
import os 
from glob import glob
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
import pandas
import shutil


# making directories
for i in range(10):
    if not os.path.exists("train"):
        os.mkdir('train')
        
    if not os.path.exists('test'):
        os.mkdir('test')
        
    if not os.path.exists('train/'+ str(i)):
        os.mkdir('train/'+ str(i))
        
    if not os.path.exists('test/'+ str(i)):
        os.mkdir('test/'+ str(i))
    
    else:
        pass
        
    
    

### checking the fonts

In [15]:
path = '/usr/share/fonts/truetype/custom/'
files = [f for f in glob(path + "**/*.ttf", recursive=True)]

files.pop(55)
len(files)

58

In [16]:
def digit_generator(digit = '1', font_name = '/usr/share/fonts/truetype/custom/HindSiliguri-Regular.ttf',
                    font_size = 265, x_pos= 50, y_pos = -60, color = (255,255,255)):
    
    img = Image.new('RGB', (256, 256), color = color)
    d = ImageDraw.Draw(img)
    font = ImageFont.truetype(font=font_name, size = font_size)
    d.text((x_pos, y_pos), digit, fill=(0, 0, 0), font=font)
    return img

### train dataset gen

In [17]:
digits_bns = "০ ১ ২ ৩ ৪ ৫ ৬ ৭ ৮ ৯".split()
digits_ens = "0 1 2 3 4 5 6 7 8 9".split()
color_list = [(255,255,255), (255, 255, 204), (255, 153, 102), (102, 255, 51), (0, 0, 255), (255, 0, 102) ]
color_names = ['white', 'yellow', 'orange', 'green', 'blue', 'red']

for idx, font_name in tqdm(enumerate(files)):
    for jdx, (digit_bn, digit_en) in enumerate(zip(digits_bns,digits_ens)):
        for kdx, (color, color_name) in enumerate(zip(color_list, color_names)): 
            try:
                img = digit_generator(digit = digit_bn, font_name = font_name, color = color)
                img.save('train/{}/{}_{}_{}.jpg'.format(digit_en,idx,jdx,color_name))
            except:
                pass

58it [00:06,  9.05it/s]


### test dataset gen

In [None]:
font_sizes = np.arange(150,200,1)
digits_bns = "০ ১ ২ ৩ ৪ ৫ ৬ ৭ ৮ ৯".split()
digits_ens = "0 1 2 3 4 5 6 7 8 9".split()

for idx, font_name in tqdm(enumerate(files)):
    for jdx, font_size in enumerate(font_sizes):
        for kdx, (digit_bn, digit_en) in enumerate(zip(digits_bns,digits_ens)): 
            try:
                img = digit_generator(digit = digit_bn, font_name = font_name, font_size=font_size)
                img.save('test/{}/{}_{}_{}.jpg'.format(digit_en,idx,jdx,kdx))
            except:
                pass

9it [00:07,  1.19it/s]

### image augmentation


In [18]:
import Augmentor

In [19]:
def augmentation(folder, sample=100000):
    p = Augmentor.Pipeline(folder)
    p.rotate90(probability=0.1)
    p.rotate270(probability=0.1)
    p.crop_random(probability=1, percentage_area=0.9)
    p.zoom(probability=0.5, min_factor=1.01, max_factor=1.03)
    p.skew_tilt(probability = 0.5)
    p.skew_left_right(probability = 0.2)
    p.skew_top_bottom(probability = 0.6)
    p.skew_corner(probability = 0.1)
    p.skew(probability = 0.33)
    p.sample(sample, multi_threaded=True)
    
     

In [20]:
augmentation('train/', sample=100)

Executing Pipeline:   0%|          | 0/100 [00:00<?, ? Samples/s]

Initialised with 3420 image(s) found.
Output directory set to train/output.

Processing <PIL.Image.Image image mode=RGB size=230x230 at 0x7F0200C67D68>: 100%|██████████| 100/100 [00:00<00:00, 184.44 Samples/s]


### copying to the original folder

In [23]:
# moving the augmented images to the corresponding folders 
def copytree(src, dst, symlinks=False, ignore=None):
    if not os.path.exists(dst):
        os.makedirs(dst)
    for item in tqdm(os.listdir(src)):
        s = os.path.join(src, item)
        d = os.path.join(dst, item)
        if os.path.isdir(s):
            copytree(s, d, symlinks, ignore)
        else:
            if not os.path.exists(d) or os.stat(s).st_mtime - os.stat(d).st_mtime > 1:
                shutil.copy2(s, d)

# removing the original augmented folder
def remove_output():
    shutil.rmtree('train/output')
    
    
src = 'train/output'
dst = 'train/'
copytree(src, dst)
remove_output()


  0%|          | 0/10 [00:00<?, ?it/s]
  0%|          | 0/12 [00:00<?, ?it/s][A
100%|██████████| 12/12 [00:00<00:00, 12246.14it/s][A
  0%|          | 0/8 [00:00<?, ?it/s][A
100%|██████████| 8/8 [00:00<00:00, 12995.52it/s][A
  0%|          | 0/14 [00:00<?, ?it/s][A
100%|██████████| 14/14 [00:00<00:00, 8682.58it/s][A
  0%|          | 0/13 [00:00<?, ?it/s][A
100%|██████████| 13/13 [00:00<00:00, 7317.94it/s][A
  0%|          | 0/10 [00:00<?, ?it/s][A
100%|██████████| 10/10 [00:00<00:00, 5366.31it/s][A
  0%|          | 0/11 [00:00<?, ?it/s][A
100%|██████████| 11/11 [00:00<00:00, 6254.21it/s][A
  0%|          | 0/15 [00:00<?, ?it/s][A
100%|██████████| 15/15 [00:00<00:00, 7413.92it/s][A
  0%|          | 0/6 [00:00<?, ?it/s][A
100%|██████████| 6/6 [00:00<00:00, 6011.90it/s][A
  0%|          | 0/8 [00:00<?, ?it/s][A
100%|██████████| 8/8 [00:00<00:00, 7430.12it/s][A
  0%|          | 0/3 [00:00<?, ?it/s][A
100%|██████████| 10/10 [00:00<00:00, 254.07it/s][A


In [27]:
from PIL import Image
from PIL import ImageDraw
from PIL import ImageFont
import os 
from glob import glob
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
import pandas
import Augmentor
import shutil    

# making directories
def directory_generator():
    for i in range(10):
        if not os.path.exists("train"):
            os.mkdir('train')

        if not os.path.exists('test'):
            os.mkdir('test')
            
        if not os.path.exists('train/'+ str(i)):
            os.mkdir('train/'+ str(i))
            
        if not os.path.exists('test/'+ str(i)):
            os.mkdir('test/' + str(i))
            
        else:
            pass


# checking the fonts
def check_fonts():

    """
    Make a directory named 'custom' in the 'usr/share/fonts/truetype' path and copy the bangla fonts there. 
    """
    path = '/usr/share/fonts/truetype/custom/'
    files = [f for f in glob(path + "**/*.ttf", recursive=True)]

    files.pop(55) # this one cause issue
    return files


# digit generation
def digit_generator(digit = '1', font_name = '/usr/share/fonts/truetype/custom/HindSiliguri-Regular.ttf',
                    font_size = 265, x_pos= 50, y_pos = -60, color = (255,255,255)):
    
    img = Image.new('RGB', (256, 256), color = color)
    d = ImageDraw.Draw(img)
    font = ImageFont.truetype(font=font_name, size = font_size)
    d.text((x_pos, y_pos), digit, fill=(0, 0, 0), font=font)
    return img


# train data generation
def train_datagen(fonts, color_list, color_names):
    """
    color_list is a list of tuples like (255,255,255) and color_names represents the corresponding names.
    ------------------------------------------------------------------------------------------------------
    Example:
    color_list = [(255,255,255), (255, 255, 204), (255, 153, 102), (102, 255, 51), (0, 0, 255), (255, 0, 102)]
    color_names = color_names = ['white', 'yellow', 'orange', 'green', 'blue', 'red']
    ------------------------------------------------------------------------------------------------------
    """
    digits_bns = "০ ১ ২ ৩ ৪ ৫ ৬ ৭ ৮ ৯".split()
    digits_ens = "0 1 2 3 4 5 6 7 8 9".split()
    
    for idx, font_name in tqdm(enumerate(fonts)):
        for jdx, (digit_bn, digit_en) in enumerate(zip(digits_bns,digits_ens)):
            for color, color_name in zip(color_list, color_names): 
                try:
                    img = digit_generator(digit = digit_bn, font_name = font_name, color = color)
                    img.save('train/{}/{}_{}_{}.jpg'.format(digit_en,idx,jdx,color_name))
                except:
                    pass


# test data generation
def test_datagen(fonts):
    font_sizes = np.arange(150,200,1)
    digits_bns = "০ ১ ২ ৩ ৪ ৫ ৬ ৭ ৮ ৯".split()
    digits_ens = "0 1 2 3 4 5 6 7 8 9".split()

    for idx, font_name in tqdm(enumerate(fonts)):
        for jdx, font_size in enumerate(font_sizes):
            for kdx, (digit_bn, digit_en) in enumerate(zip(digits_bns,digits_ens)): 
                try:
                    img = digit_generator(digit = digit_bn, font_name = font_name, font_size=font_size)
                    img.save('test/{}/{}_{}_{}.jpg'.format(digit_en,idx,jdx,kdx))
                except:
                    pass



# image augmentation 
def augmentation(folder, sample=100000):
    p = Augmentor.Pipeline(folder)
    p.rotate90(probability=0.1)
    p.rotate270(probability=0.1)
    p.crop_random(probability=1, percentage_area=0.9)
    p.zoom(probability=0.5, min_factor=1.01, max_factor=1.03)
    p.skew_tilt(probability = 0.5)
    p.skew_left_right(probability = 0.2)
    p.skew_top_bottom(probability = 0.6)
    p.skew_corner(probability = 0.1)
    p.skew(probability = 0.33)
    p.sample(sample, multi_threaded=True)


# moving the augmented images to the corresponding folders 
def copytree(src, dst, symlinks=False, ignore=None):
    if not os.path.exists(dst):
        os.makedirs(dst)
    for item in tqdm(os.listdir(src)):
        s = os.path.join(src, item)
        d = os.path.join(dst, item)
        if os.path.isdir(s):
            copytree(s, d, symlinks, ignore)
        else:
            if not os.path.exists(d) or os.stat(s).st_mtime - os.stat(d).st_mtime > 1:
                shutil.copy2(s, d)

# removing the original augmented folder
def remove_output():
    shutil.rmtree('train/output')
    
# __main__
def main_func():
    directory_generator()
    fonts = check_fonts()
    digit_generator()
    color_list = [(255,255,255), (255, 255, 204), (255, 153, 102), (102, 255, 51), (0, 0, 255), (255, 0, 102) ]
    color_names = ['white', 'yellow', 'orange', 'green', 'blue', 'red']
    train_datagen(fonts, color_list, color_names)
    test_datagen(fonts)
    augmentation('train/', sample=100)
    src = 'train/output'
    dst = 'train/'
    copytree(src, dst)
    remove_output()



main_func()


    





58it [00:05, 10.86it/s]
58it [00:40,  1.37it/s]
Processing <PIL.Image.Image image mode=RGB size=230x230 at 0x7F0200BC2A58>:   1%|          | 1/100 [00:00<00:16,  5.90 Samples/s]

Initialised with 3620 image(s) found.
Output directory set to train/output.

Processing <PIL.Image.Image image mode=RGB size=230x230 at 0x7F0201517AC8>: 100%|██████████| 100/100 [00:00<00:00, 285.54 Samples/s]
  0%|          | 0/10 [00:00<?, ?it/s]
  0%|          | 0/6 [00:00<?, ?it/s][A
100%|██████████| 6/6 [00:00<00:00, 1474.96it/s][A
  0%|          | 0/10 [00:00<?, ?it/s][A
100%|██████████| 10/10 [00:00<00:00, 1714.06it/s][A
  0%|          | 0/9 [00:00<?, ?it/s][A
100%|██████████| 9/9 [00:00<00:00, 896.15it/s][A
  0%|          | 0/10 [00:00<?, ?it/s][A
100%|██████████| 10/10 [00:00<00:00, 4195.56it/s][A
  0%|          | 0/10 [00:00<?, ?it/s][A
100%|██████████| 10/10 [00:00<00:00, 5790.84it/s][A
  0%|          | 0/13 [00:00<?, ?it/s][A
100%|██████████| 13/13 [00:00<00:00, 4775.44it/s][A
  0%|          | 0/7 [00:00<?, ?it/s][A
100%|██████████| 7/7 [00:00<00:00, 3283.40it/s][A
  0%|          | 0/9 [00:00<?, ?it/s][A
100%|██████████| 9/9 [00:00<00:00, 2712.03it/s][A
  0%|          | 0/15 [00:00<?, ?it/s][A
100%|██████████| 15/15 [00:00<00:00, 30