# Text Augmentation

### Basic Image Augmentation using PIL Image

In [35]:
import cv2
import numpy as np
from PIL import ImageFont, ImageDraw, Image

im_name = "data/selected_fashion_bl/t_shirt/oblong15.jpg"
img = cv2.imread(im_name)
img = Image.fromarray(img)

draw = ImageDraw.Draw(img)

# use a truetype font
font = ImageFont.truetype("font/arial.ttf", 100)

draw.text((10, 25), "world", font=font)

img = np.array(img)
name = im_name.split('/')[-1]
name = "labelled_image/" + name
# cv2.imwrite(name, img)

In [36]:
import matplotlib.pyplot as plt
# %matplotlib inline  

# draw image
fig = plt.figure()
ax1 = fig.add_axes((0.1, 0.2, 0.8, 0.7))
ax1.set_title(im_name)
plt.imshow(img)


<matplotlib.image.AxesImage at 0x7f7be3ae6d30>

### Convert image from PNG to JPG

[Note] some of Bukalapak's fashion image can't be opened in PNG, so it must be converted to JPG

In [37]:
# import glob
# import os
# import shutil
# import sys

# # import list of images
# DATA_DIR = "data/"
# im_names = glob.glob(os.path.join(DATA_DIR, 'selected_fashion_bl/', '*/*.png'))


# for im_name in im_names:
# #     print(im_name)
#     img = cv2.imread(im_name)
    
#     name = im_name.split('.')[0]
#     name = name + ".jpg"
    
#     cv2.imwrite(name, img)

### Create Text Augmentation from an Image

In [1]:
import glob
import os
import shutil
import sys
from PIL import ImageFont, ImageDraw, Image
import random

def augmentText(im_name, text) :
#     im_name = "data/selected_fashion_bl/t_shirt/oblong15.jpg"
    image = Image.open(im_name)
    draw = ImageDraw.Draw(image)
    fontsize = 1  # starting font size

    # portion of image width you want text width to be
    img_fraction = float(random.randint(25, 50)) / 100.0
    # print(img_fraction)

    list_name = glob.glob(os.path.join('font/', '*.ttf')) + glob.glob(os.path.join('font/', '*.otf'))
    # list_name = ["font/Chunkfive.otf", "font/arial.ttf" "font/Rubik-Black.ttf", "font/Monsterrat-.ttf"]
    list_color = ["orange", "black", "blue", "green", "violet"]

    font_name = random.choice(list_name)
    font_color = random.choice(list_color)

    font = ImageFont.truetype(font_name, fontsize)
    while font.getsize(text)[0] < img_fraction*image.size[0]:
        # iterate until the text size is just larger than the criteria
        fontsize += 1
        font = ImageFont.truetype(font_name, fontsize)

    # optionally de-increment to be sure it is less than criteria
    fontsize -= 1
    font = ImageFont.truetype(font_name, fontsize)


    w,h = font.getsize(text)

    padding = 5
    
#     print("DEBUG")
#     print(text)
#     print(image.size)
#     print((w, h))
#     print((padding, image.size[0]- w - padding))
#     print((padding, image.size[1] - h - padding))
    
    top_left = (random.randint(padding, image.size[0]- w - padding), random.randint(padding, image.size[1] - h - padding))
    rectangle = ((top_left[0] - padding, top_left[1] - padding), (w + top_left[0] + padding, h + top_left[1] + padding))
    if random.randint(40, 100) > 50 :    
        draw.rectangle(rectangle, fill=font_color)
    draw.text(top_left, text, font=font) # put the text on the image

    return image, rectangle

def saveImage(image, directory, name):
    name = directory + "/" + name
    image.save(name)

def saveAnnotation(rectangle, directory, im_name, annotation):
    filename = directory + "/" + im_name.split('.')[0] + ".txt"
    f = open(filename,"w+")
    f.write("%d,%d,%d,%d,%s" % (rectangle[0][0], rectangle[0][1], rectangle[1][0], rectangle[1][1], annotation))
    f.close()

im_name = "data/selected_fashion_bl/t_shirt/oblong15.jpg"
text = "Hello World!"
image, rectangle = augmentText(im_name, text)
name = im_name.split('/')[-1]
folder = im_name.split('/')[-2]
name = folder + "/" + name
saveImage(image, "labelled_image", name)
saveAnnotation(rectangle, "labelled_image", name, text)

### Create Text Augmentation from Selected Directory

In [39]:
import glob
import os
import shutil
import sys

# import list of images
DATA_DIR = "/home/mhilmiasyrofi/ocr/notebook/data/"
im_names = glob.glob(os.path.join(DATA_DIR, 'selected_fashion_bl/*/*.png')) \
        + glob.glob(os.path.join(DATA_DIR, 'selected_fashion_bl/*/*.jpg'))

for im_name in im_names:
    im_name = "data/selected_fashion_bl/t_shirt/oblong15.jpg"
    text = "Hello World!"
    image, rectangle = augmentText(im_name, text)
    name = im_name.split('/')[-1]
    folder = im_name.split('/')[-2]
    name = folder + "/" + name
    saveImage(image, "labelled_image", name)
    saveAnnotation(rectangle, "labelled_image", name, text)

### Read Corpus 
The data is Bukalapak Product Description Data

In [3]:
import json
import string
from pprint import pprint


def readBukalapakData(filename) :
    with open(filename) as f:
        data = json.load(f)

    texts = []

    table = str.maketrans({key: None for key in string.punctuation})

    for d in data :
        text = d['description']
        text = text.translate(table)
        text = text.replace('\n', ' ')
        text = text.replace('\r', ' ')
        for t in text.split(' '):
            if len(t) >= 5:
                texts.append(t)
                
    return texts

texts = readBukalapakData('corpus/fashion.json')
            
# print(texts)

## Augment Text using Corpus Data

In [4]:
import glob
import os
import shutil
import sys

# import list of images
DATA_DIR = "/home/mhilmiasyrofi/ocr/notebook/data/"
im_names = glob.glob(os.path.join(DATA_DIR, 'selected_fashion_bl/*/*.png')) \
        + glob.glob(os.path.join(DATA_DIR, 'selected_fashion_bl/*/*.jpg'))


texts = readBukalapakData('corpus/fashion.json')
random.shuffle(texts)
i = 0

for im_name in im_names:
    image, rectangle = augmentText(im_name, texts[i])
    name = im_name.split('/')[-1]
    folder = im_name.split('/')[-2]
    name = folder + "/" + name
    saveImage(image, "labelled_image", name)
    saveAnnotation(rectangle, "labelled_image", name, texts[i])
    i += 1