In [None]:
import os
from PIL import Image, ImageDraw, ImageFont
import re
import shutil
import zipfile
import pandas as pd
import random
from tqdm.notebook import tqdm

In [None]:
def generator(words, font_folder, image_size, font_size, output_folder):
  '''
    Synthetic VFR Data Generator. Generates images of words using TrueType fonts and saves them in a specified output folder.

    Parameters:
    - words: List of words to generate images for.
    - font_folder: Path to the folder containing TrueType font files (.ttf).
    - image_size: Tuple representing the dimensions (width, height) of the generated images.
    - font_size: Font size to be used for rendering the words.
    - output_folder: Path to the folder where generated images will be saved.

    Output:
    The generator creates a separate subfolder for each font in the output folder and saves
    individual images for each word using the specified fonts. The images are saved in JPEG format.

    Example Usage:

    words = ['hello', 'world', 'python', 'anaconda']
    font_folder = '/path/fonts'
    image_size = (400, 200)
    font_size = 100
    output_folder = '/path/output'

    generator(words_to_generate, font_folder_path, image_dimensions, font_size, output_folder_path)
    ```

    Note:
    - Requires the Pillow library for image processing.
    - The output folder will be created if it doesn't exist.

    '''

  # Create output directory
  if not os.path.exists(output_folder):
      os.makedirs(output_folder)

  # List of all fonts of the respective language
  font_files = [f for f in os.listdir(font_folder) if f.endswith('.ttf')]


  for font_file in font_files:
    # Load a TrueType font file and create a font object
    font_name = os.path.splitext(font_file)[0]
    font_path = os.path.join(font_folder, font_file)
    font = ImageFont.truetype(font_path, font_size)
    print(f"Font Name - {font_name}")

    # Create a output subfolder for the respective font
    font_output_folder = os.path.join(output_folder, font_name)
    if not os.path.exists(font_output_folder):
        os.makedirs(font_output_folder)

    for i, word in enumerate(tqdm(words), start=1):
      # Draw a grayscale blank image
      word_img = Image.new('L', image_size, (255))
      draw = ImageDraw.Draw(word_img)

      # Define anchor coordinates of the word to be printed
      x = image_size[0] // 2
      y = image_size[1] // 2

      # Print the word on the blank image, center aligned
      draw.text((x, y), word, font=font, fill=0, anchor='mm')

      # Save the word image
      image_filename = f"{font_name}__{i}.jpg"
      image_path = os.path.join(font_output_folder, image_filename)
      word_img.save(image_path)


In [None]:
# Define Paths
WORD_CSV_DIR = '/content/Bangla Words.csv'
FONT_DIR = '/content/Fonts/Bangla'
DATA_OUTPUT_DIR = '/content/Bangla'

In [None]:
# Define Generator Parameters
IMG_SIZE = (400, 200)
FONT_SIZE = 80
WORD_SAMPLE_SIZE = 5000
WORD_MIN_LEN = 9
WORD_MAX_LEN = 10

In [None]:
# Load a CSV containing words
df = pd.read_csv(WORD_CSV_DIR)

# Make a list of all words from the DataFrame
words_list = list(df.iloc[:,0])

# Filter word list based on length
words_filtered = [w for w in words_list if WORD_MIN_LEN <= len(w) and len(w) <= WORD_MAX_LEN]
len(words_filtered)

17532

In [None]:
# Randomly select #WORD_SAMPLE_SIZE words to be printed
words_sample = random.sample(words_filtered, WORD_SAMPLE_SIZE)
words_sample[:10]

['উচ্চশিক্ষা',
 'বিলাসকানন',
 'বিবর্তবাদ',
 'কণ্ঠস্থিত',
 'সন্ধিক্ষণ',
 'স্বধর্মস্থ',
 'কিষ্কিন্ধা',
 'আত্মবিদারণ',
 'হাতিশুঁড়া',
 'গার্জিয়ান']

In [None]:
generator(words_sample, FONT_DIR, IMG_SIZE, FONT_SIZE, DATA_OUTPUT_DIR)

Font Name - Jagat Shonkhoneel


  0%|          | 0/5000 [00:00<?, ?it/s]

Font Name - Suborno Jayonti


  0%|          | 0/5000 [00:00<?, ?it/s]

Font Name - Alinur Phulkuri


  0%|          | 0/5000 [00:00<?, ?it/s]

Font Name - Alinur Saikat


  0%|          | 0/5000 [00:00<?, ?it/s]

Font Name - Alinur Sanghoti


  0%|          | 0/5000 [00:00<?, ?it/s]

Font Name - Fazlay Munnisha


  0%|          | 0/5000 [00:00<?, ?it/s]

Font Name - Mahfuz Himadri


  0%|          | 0/5000 [00:00<?, ?it/s]

Font Name - Niladri Russian


  0%|          | 0/5000 [00:00<?, ?it/s]

Font Name - Patabahar


  0%|          | 0/5000 [00:00<?, ?it/s]

Font Name - Upohar 56


  0%|          | 0/5000 [00:00<?, ?it/s]

# **Thank You**