In [1]:
import os

In [2]:
print(os.getcwd())
print(os.listdir())

C:\Users\Russell\Dropbox\Hobbies\Caligraphy
['.ipynb_checkpoints', 'Books', 'cubicspline.m', 'DotPaper', 'ExtractBooks.ipynb', 'fundamentalforms.jpg', 'lowercasecombos', 'lowercaseletters.jpg']


In [3]:
!pip install minecart



In [4]:
import math
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import minecart
import skimage 

from skimage import io, color, filters
from skimage.transform import resize

In [5]:
all_books = os.listdir("books")
print(all_books)

['19th century swedish copybook.pdf', 'Ames Guide to Self-Instruction.pdf', 'Ames Notebook.pdf', 'Bible Pearls of Promise.pdf', 'Compendium of Real Pen Work.pdf', 'Kelchner - Complete Compendium of Plain Practical Penmanship.pdf']


In [26]:
GLOBAL_PATCHSIZE = (80, 80)
GLOBAL_CHANNELS = (1, )

In [25]:
pdffile = open("books\\" + all_books[1], 'rb')
doc = minecart.Document(pdffile)

page = doc.get_page(0) # getting a single page
pages = []
for page in doc.iter_pages():
    im = page.images[0].as_pil()  # requires pillow
#     display(im)
    pages.append(np.asarray(im))


In [56]:
class DataGenerator(tf.keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, pages, num_of_states, is_grayscale = True, batch_size=32, dim=(80,80), n_channels=3,
                 n_classes=2, shuffle=True):
        'Initialization'
        self.grayscale = is_grayscale #Input is grayscale
        if self.grayscale:
            self.grayscale = True
            self.n_channels = 1 #Only applies if grayscale = False
        else:
            self.n_channels = n_channels #Only applies if grayscale = False

        self.pages = pages
        self.dim = dim
        self.batch_size = batch_size
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.states = list(zip(list(range(num_of_states)), [0]*num_of_states))
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.states) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        states = [self.states[k] for k in indexes]

        # Generate data
        X, Y = self.__data_generation(states)
#         print("Feeding X data with shape {}".format(X.shape))
#         print("Feeding Y assignments with shape {}".format(Y.shape))
        return X, Y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.states))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, states):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization
        X = np.empty((self.batch_size, *self.dim, self.n_channels))
        y = np.empty((self.batch_size, *self.dim), dtype=int)
        
        page_indexs = np.random.randint(0, len(pages), size = len(states))
        # Generate data
        for i, a_state in enumerate(states):
            a_page = self.pages[page_indexs[i]]
#             print("current state {}".format(a_state))
#             print("current image dim is {}".format(self.dim + (self.n_channels, )))
#             print("current page shape is {}".format(a_page.shape))
            if len(a_page.shape)>2:
                a_page = tf.image.rgb_to_grayscale(a_page)
            else:
                a_page = tf.expand_dims(a_page, axis=-1)
            X[i,] = tf.image.stateless_random_crop(a_page, self.dim + (self.n_channels, ), a_state)
        
        #For now the output is the greyscaled input
        return X, X[:, 3:-3, 3:-3, :]

In [57]:
model = tf.keras.Sequential(
    [
        tf.keras.Input(shape=GLOBAL_PATCHSIZE + GLOBAL_CHANNELS),
        tf.keras.layers.Conv2D(32, kernel_size=(3, 3), activation="relu"),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Conv2D(32, kernel_size=(3, 3), activation="relu"),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Conv2D(1, kernel_size=(3, 3), activation="relu"),
    ]
)

In [58]:
model.compile(tf.keras.optimizers.Adam(), loss=tf.keras.losses.MSE)



<tensorflow.python.keras.callbacks.History at 0x19237fe8340>

model.fit(DataGenerator(pages, 10000))