# Creates a dataset of captcha

The code to wave-distort the text is taken from https://github.com/ods/kcaptcha. I've added random spacing, random rotation and random fonts for the letters in the images.
I've also added some noise, some blur and random background and text colors.
Please note that no effort has been put into optimizing the code, it's just a quick and dirty script to generate a dataset.

In [121]:
from random import Random
import string, math, itertools
from PIL import Image, ImageFont, ImageDraw, ImageOps, ImageFilter
from time import time
import numpy as np
import tqdm
import os
import json

json_file = Path("./config.json").read_text()
json_data = json.loads(json_file)

data_dir = Path(json_data["dataset_dir"])

folder_name = "rotated-data"

folder = data_dir / folder_name
#folder = 'data'

In [122]:
class TextGenerator(object):
    # From https://github.com/ods/kcaptcha

    def __init__(self, length, letters=string.ascii_lowercase, random=None):
        self.letters = letters
        self.length = length
        if random is None:
            random = Random()
        self.random = random

    def __call__(self):
        return ''.join(self.random.sample(self.letters, self.length))

In [126]:
class Captcha(object):
    mode = 'L'
    bg_color = 255
    color = 0
    min_bg_color = 200
    max_txt_color = 150
    max_letter_angle = 30

    def __init__(self, size, font, random=None, **kwargs):
        self.size = size
        self.font = font
        if random is None:
            random = Random()
        self.random = random
        self.__dict__.update(kwargs)
        self.rem_cols = 5

        self._ideal_w = 200
        self._ideal_h = 70

        self._ratio = self._ideal_w / self._ideal_h

    def _period(self):
        return self.random.uniform(0.075, 0.12)

    def _phase(self):
        return self.random.uniform(0, math.pi)

    def _amplitude(self):
        return self.random.uniform(3, 3.8)

    def _wave(self, img):
        # From https://github.com/ods/kcaptcha
        dst_img = Image.new(self.mode, img.size, self.bg_color)
        src_data = img.getdata()
        width, height = img.size
        dx_period_x = self._period()
        dx_period_y = self._period()
        dy_period_x = self._period()
        dy_period_y = self._period()
        dx_phase_x = self._phase()
        dx_phase_y = self._phase()
        dy_phase_x = self._phase()
        dy_phase_y = self._phase()
        dx_amplitude = self._amplitude()
        dy_amplitude = self._amplitude()
        # Variable lookup optimization
        sin = math.sin
        bg_color = self.bg_color
        dst_data = [self.bg_color] * (height*width)
        for x, y in itertools.product(range(width), range(height)):
            color_diff = src_data[x + width*y] - bg_color
            if not color_diff:
                continue
            # source x (float)
            dx_x = sin(x * dx_period_x + dx_phase_x)
            dx_y = sin(y * dx_period_y + dx_phase_y)
            sx = x + (dx_x + dx_y) * dx_amplitude
            if not 0 <= sx < width-1:
                continue
            # source y (float)
            dy_x = sin(x * dy_period_x + dy_phase_x)
            dy_y = sin(y * dy_period_y + dy_phase_y)
            sy = y + (dy_x + dy_y) * dy_amplitude
            if not 0 <= sy < height-1:
                continue
            sx_i = int(sx)
            sy_i = int(sy)
            frx = sx - sx_i
            fry = sy - sy_i
            idx1 = sx_i + width*sy_i
            idx2 = idx1 + width
            dst_data[idx1] += int(color_diff * (1-frx) * (1-fry))
            dst_data[idx1+1] += int(color_diff * frx * (1-fry))
            dst_data[idx2] += int(color_diff * (1-frx) * fry)
            dst_data[idx2+1] += int(color_diff * frx * fry)
        dst_img.putdata(dst_data)
        return dst_img
    
    def add_noise(self, img, min_noise=0.0, max_noise=0.07, max_light=100, min_dark=150):
        ''' 
        Add random noise to the image, i.e. random pixels with random intensity.
        Note that the noise is applied two times. Once with light, once with dark pixels.
        So min_noise and max_noise are applied twice.
        
        :param img: numpy array of the image
        :param min_noise: minimum percentage of noise to be added
        :param max_noise: maximum percentage of noise to be added
        :param max_light: maximum intensity of the light pixels
        :param min_dark: minimum intensity of the dark pixels
        :return: PIL image with random noise
        '''

        # Getting the dimensions of the image
        row, col = img.shape
        surf = row * col

        for is_light in [True, False]:
            
            # Randomly pick some pixels in the image
            number_of_pixels = self.random.randint(min_noise * surf, max_noise * surf)

            # Color these pixels with random intensity
            for i in range(number_of_pixels):
                y_coord=self.random.randint(0, row - 1)
                x_coord=self.random.randint(0, col - 1)
                
                pixel_intensity = self.random.randint(0, max_light) if is_light else self.random.randint(min_dark, 255)
                img[y_coord][x_coord] = pixel_intensity
            
        return Image.fromarray(img)

    def _get_random_spacing(self, nb_letters):
        ''' 
        Determine a random spacing between the characters
        Values where determined empirically
        
        :param nb_letters: number of letters in the captcha
        :return: list of spacings
        '''
        
        spacings = []
        for i in range(nb_letters):
            tmp = self.random.random()

            # 70% of the time, the spacing is between -10 and -7 (close letters)
            if tmp < 0.7:
                spacings.append(self.random.uniform(-10, -7))

            # 27% of the time, the spacing is between 5 and 9 (far letters)
            elif tmp < 0.9:
                spacings.append(self.random.uniform(5, 9))

            # 3% of the time, the spacing is between -1 and 1 (normal spacing)
            else:
                spacings.append(self.random.uniform(-1, 1))
        
        return spacings


    def create(self, text):
        ''' 
        Create an image captcha of the given text.
        
        :param text: text to be displayed on the captcha
        :return: PIL image captcha of the given text
        '''
        self.bg_color = self.random.randint(self.min_bg_color, 255)
        img = Image.new(self.mode, self.size, self.bg_color)
        draw = ImageDraw.Draw(img)
        
        total_width = 0
        max_height = 0

        # Determine a random spacing between the characters
        # Values where determined empirically
        spacings = self._get_random_spacing(len(text))
        
        # Get total width and max height to center the text (because of the rotation it's an approximation)
        for i, char in enumerate(text):
            char_img = Image.new('L', (1, 1))
            char_draw = ImageDraw.Draw(char_img)
            char_size = get_text_dimensions(char, self.font)
            total_width += char_size[0] + spacings[i]
            max_height = max(max_height, char_size[1])
        
        x_offset = int((self.size[0] - total_width) / 2)
        y_offset = int((self.size[1] - max_height) / 2)

        # Choose a random color for the text
        color = self.random.randint(0, self.max_txt_color)
        
        # Add each letter one by one to the image
        for i, char in enumerate(text):
            char_size = get_text_dimensions(char, self.font)
            char_size = (char_size[0] * 2 / np.sqrt(2), char_size[1] * 2 / np.sqrt(2))  # Make it bigger because of the rotation

            # Create an image just for the letter
            char_img = Image.new('L', (int(char_size[0]), int(char_size[1])), self.bg_color)
            char_draw = ImageDraw.Draw(char_img)
            
            char_draw.text((0, 0), char, fill=color, font=self.font)
            char_img = char_img.rotate(self.random.uniform(-self.max_letter_angle, self.max_letter_angle), expand=1, fillcolor=self.bg_color)

            # Create a mask to remove the background behind the letter
            mask = char_img.point(lambda x: 1 if x < (self.max_txt_color + 1) else 0, mode='1')

            # Paste the letter on the captcha
            img.paste(char_img, (int(x_offset), int(y_offset)), mask)

            # Update the offset
            x_offset += char_size[0] + spacings[i]
        
        # Make the image wavy (this part is quite slow)
        new_im = self._wave(img)

        # Crop the image to remove the extra white space around the text
        x = np.array(new_im)
        i = np.argmax(np.any(x != self.bg_color, axis=1))
        j = np.argmax(np.any(x != self.bg_color, axis=0))
        k = x.shape[1] - np.argmax(np.any(x != self.bg_color, axis=0)[::-1])
        l = x.shape[0] - np.argmax(np.any(x != self.bg_color, axis=1)[::-1])
        new_im = new_im.crop((j, i, k, l))

        # Add padding to the image to make it fit the ideal size (defined in the constructor)
        ratio = new_im.size[0] / new_im.size[1]
        if ratio > self._ratio:
            new_im = new_im.resize((self._ideal_w, int(self._ideal_w / ratio)))
            new_im = ImageOps.expand(new_im, border=(0, int((self._ideal_h - new_im.size[1]) / 2)), fill=self.bg_color)

        else:
            new_im = new_im.resize((int(self._ideal_h * ratio), self._ideal_h))
            new_im = ImageOps.expand(new_im, border=(int((self._ideal_w - new_im.size[0]) / 2), 0), fill=self.bg_color)

        # If one pixel is missing, just resize the image
        new_im = new_im.resize((self._ideal_w, self._ideal_h))

        # Blur a little bit the image
        new_im = new_im.filter(ImageFilter.GaussianBlur(radius=self.random.uniform(0, 1.5)))

        # In x% of the cases, output it as it is (no more noise)
        if self.random.random() < 0.35:
            return new_im

        # In y% of the cases, apply a weird filter to the image (it's still readable but the pixels change a lot)
        if self.random.random() < 0.35:
            new_im = new_im.filter(ImageFilter.EMBOSS)

        # Finally add some random noise to the image and return it
        return self.add_noise(np.array(new_im))
    
def get_text_dimensions(text_string, font):
    # From https://stackoverflow.com/a/46220683/9263761
    ascent, descent = font.getmetrics()

    text_width = font.getmask(text_string).getbbox()[2]
    text_height = font.getmask(text_string).getbbox()[3] + descent

    return (text_width, text_height)

In [127]:
font = ImageFont.truetype('arial.ttf', 70)
get_text = TextGenerator(length=10)
captcha = Captcha(size=(1000, 350), font=font)#, mode='RGB', color='#033')
random = Random()
start = time()

txt_len = 10
font_index = -1

# From windows fonts folder
fonts = sorted(os.listdir('C:/Windows/fonts'))
fonts = [f'C:/Windows/fonts/{f}' for f in fonts if f.endswith('.ttf')]

filtered_fonts = fonts[:65]
filtered_fonts += fonts[75:80]
filtered_fonts += fonts[90:110]
print(filtered_fonts)
print(len(filtered_fonts))


font_names = [f.split('/')[-1].split('.')[0] for f in filtered_fonts]
fonts_tmp = [f.split('/')[-1] for f in filtered_fonts]

truetypes = [ImageFont.truetype(f, 70) for f in fonts_tmp]


count = 2000 * len(filtered_fonts)

for i in range(count):
    if i % (count // 4) == 0:
        get_text.length = txt_len
        txt_len -= 1

    if font_index < len(truetypes) and i % (count // len(truetypes)) == 0:
        captcha.font = truetypes[font_index]
        font_index += 1
        
    text = get_text()
    
    img = captcha.create(text)

    img_f_name = f'{folder}/{text}.png'
    img.save(img_f_name)

print(f'%.4f' % ((time()-start)/count, ))

['C:/Windows/fonts/Alternate Gothic No2 BT.ttf', 'C:/Windows/fonts/Candara.ttf', 'C:/Windows/fonts/Candarab.ttf', 'C:/Windows/fonts/Candarai.ttf', 'C:/Windows/fonts/Candaral.ttf', 'C:/Windows/fonts/Candarali.ttf', 'C:/Windows/fonts/Candaraz.ttf', 'C:/Windows/fonts/CascadiaCode.ttf', 'C:/Windows/fonts/CascadiaMono.ttf', 'C:/Windows/fonts/Gabriola.ttf', 'C:/Windows/fonts/Inkfree.ttf', 'C:/Windows/fonts/LeelUIsl.ttf', 'C:/Windows/fonts/LeelaUIb.ttf', 'C:/Windows/fonts/LeelawUI.ttf', 'C:/Windows/fonts/Nirmala.ttf', 'C:/Windows/fonts/NirmalaB.ttf', 'C:/Windows/fonts/NirmalaS.ttf', 'C:/Windows/fonts/arial.ttf', 'C:/Windows/fonts/arialbd.ttf', 'C:/Windows/fonts/arialbi.ttf', 'C:/Windows/fonts/ariali.ttf', 'C:/Windows/fonts/ariblk.ttf', 'C:/Windows/fonts/bahnschrift.ttf', 'C:/Windows/fonts/calibri.ttf', 'C:/Windows/fonts/calibrib.ttf', 'C:/Windows/fonts/calibrii.ttf', 'C:/Windows/fonts/calibril.ttf', 'C:/Windows/fonts/calibrili.ttf', 'C:/Windows/fonts/calibriz.ttf', 'C:/Windows/fonts/cambriab.

# Optional Section

In [58]:
# Optional

# Remove all files created in the folder
import os
import shutil

for filename in os.listdir(folder):
    file_path = os.path.join(folder, filename)
    try:
        if os.path.isfile(file_path) or os.path.islink(file_path):
            os.unlink(file_path)
        elif os.path.isdir(file_path):
            shutil.rmtree(file_path)
    except Exception as e:
        print('Failed to delete %s. Reason: %s' % (file_path, e))