In [1]:
import emnist
import math
import matplotlib.pyplot as plt
import numpy as np
import random
from PIL import Image

label_ascii_map = np.loadtxt('./emnist-balanced-mapping.txt')

my_file = open("./winnie.txt", "r")
data = my_file.read()
my_file.close()

img_emnist, labels_emnist = emnist.extract_training_samples('bymerge')

img_kmnist = np.load('./kmnist/k49-train-imgs.npz')['arr_0']
labels_kmnist = np.load('./kmnist/k49-train-labels.npz')['arr_0']

In [2]:
blank_space = np.full((28, 28),255)

emnist_img_label_map = {}
for label in np.unique(labels_emnist):
    emnist_img_label_map[label] = np.where(labels_emnist == label)[0]

kmnist_img_label_map = {}
for label in np.unique(labels_kmnist):
    kmnist_img_label_map[label] = np.where(labels_kmnist == label)[0]

In [3]:
def ascii_to_emnist_id(ascii):
    return label_ascii_map[np.where(label_ascii_map[:,1] == ascii)][0][0]

In [4]:
ascii_in_emnist = label_ascii_map[:,1]

missing_letters = [
    99, 105, 106, 
    107, 108, 109, 
    111, 112, 115, 
    117, 118, 119, 
    120, 121, 122
]

winnie_wo_missing = [letter if ord(letter) not in missing_letters else chr(ord(letter)-32) for letter in data]
winnie_wo_missing = ''.join(winnie_wo_missing)
winnie_wo_missing = winnie_wo_missing.split("\n")

winnie_ascii = []
for line_num in range(len(winnie_wo_missing)):
    line = [ord(x) if ord(x) in ascii_in_emnist else -1 for x in [*winnie_wo_missing[line_num]]]
    winnie_ascii.append(line)

In [5]:
def translate_ascii_to_label(winnie_ascii):
    blank_line = [-1] * 80
    pages_num = math.ceil(len(winnie_ascii)/114)
    book = []
    for page_num in range(pages_num):
        new_page = []

        for line in winnie_ascii[page_num*114:(page_num+1)*114]:
            new_line = []
            
            for letter in line:
                if letter == -1:
                    new_line.append(-1) 

                else:
                    e_id = ascii_to_emnist_id(letter)
                    new_line.append(e_id)

            while len(new_line) < 80:
                new_line.append(-1) 

            new_page.append(new_line)

        while len(new_page) < 114:
            new_page.append(blank_line)

        book.append(new_page)

    return np.array(book)

winnie_label = translate_ascii_to_label(winnie_ascii)

In [6]:
def generate(winnie_label, is_emnist=True):

    blank_line = np.full((80, 80*114),255)
    pages_num = winnie_label.shape[0]

    for page_num in range(pages_num):
        new_page = []
        for line in winnie_label[page_num]:
            new_line = []
            for letter in line:
                if letter == -1:
                    new_line.append(blank_space) 

                else:
                    if is_emnist:
                        random_id = random.choice(emnist_img_label_map[letter])
                        new_letter = 255 - img_emnist[random_id]


                    else:
                        random_id = random.choice(kmnist_img_label_map[letter])
                        new_letter = 255 - img_kmnist[random_id]
                    new_line.append(new_letter)
                
            while len(new_line) < 80:
                new_line.append(blank_space) 

            new_line = np.concatenate(new_line, axis=1) 
            new_page.append(new_line)

        new_page = np.concatenate(new_page, axis=0) 
        
        im = Image.fromarray(new_page)

        im = im.convert('RGB')
        if is_emnist:
            im.save(f'./winnie-emnist/{page_num}.png')
        else:
            im.save(f'./winnie-kmnist/{page_num}.png')

generate(winnie_label)