In [None]:
import os
import glob
import random
import datetime

import poppler
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

from PIL import Image
from IPython.display import display

In [None]:
# download a few pdfs into resources folder
os.system('wget -P ../resources --no-clobber --no-verbose ' +
          'https://www.uibk.ac.at/mathematik/personal/hell/pdfs/skripten/analysis1videohellostermannws16.pdf ' +
          'https://www.mdy.univie.ac.at/lehre/mathe/skriptum/skrip2b.pdf ' +
          'https://www.jku.at/fileadmin/gruppen/194/lokalkonvexe.pdf ' +
          'https://www.jku.at/fileadmin/gruppen/194/Lehre/Analysis_1_2017.pdf ' +
          'https://www.jku.at/fileadmin/gruppen/194/Lehre/PseudoSkript_ger.pdf'
)

In [None]:
N_SAMPLES = 10000
RESOLUTION = 64
INPUT_SIZE = 192

In [None]:
print(tf.config.list_physical_devices('GPU'))

In [None]:
renderer = poppler.PageRenderer()


def render_page(pdf_page, res) -> Image:
    pimg = renderer.render_page(pdf_page, xres=res, yres=res)
    pil_img = Image.frombytes("RGBA", (pimg.width, pimg.height), pimg.data, "raw", str(pimg.format))
    return pil_img.convert('F')


pdfs = glob.glob('../resources/*.pdf')[:-1]
pages = sum([[render_page(pdf.create_page(i), 115) for i in range(pdf.pages)] for pdf in
             map(lambda x: poppler.load_from_file(x), pdfs)], [])
print(f'{len(pages)=}')

display(f'First Page {np.asarray(pages[0]).shape}')
display(pages[0].convert('L'))


In [None]:
def split_image(array: np.ndarray):
    sections = np.asarray(np.array_split(array[:-(array.shape[0] % INPUT_SIZE)], array.shape[0] // INPUT_SIZE, axis=0))
    sections = np.asarray(
        [np.array_split(array[:, :-(array.shape[1] % INPUT_SIZE)], array.shape[1] // INPUT_SIZE, axis=1)
         for array in sections]
    )
    return sections.reshape((np.prod(sections.shape[:2]), *sections.shape[2:]))


images = [split_image(np.asarray(page)) for page in pages]
images = np.concatenate(images, 0)

images = images[np.argsort(images.mean(axis=-1).mean(axis=-1))[:int(0.8 * images.shape[0])]]
print(images.shape)

display('Random Sections')
fig, axs = plt.subplots(5, 5, figsize=(15, 15))
for ax in axs.reshape(-1):
    ax.imshow(random.choice(images), cmap='Greys')
plt.show()

display('Rotated Sections')
fig, axs = plt.subplots(5, 5, figsize=(15, 15))
for ax in axs.reshape(-1):
    image = Image.fromarray(random.choice(images))
    image = image.rotate(360 * random.random(), fillcolor=255.)
    ax.imshow(np.asarray(image), cmap='Greys')
plt.show()

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Input((INPUT_SIZE, INPUT_SIZE, 1)),
    tf.keras.layers.Conv2D(64, 3, activation='relu'),
    tf.keras.layers.Conv2D(64, 3, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv2D(96, 3, activation='relu'),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Conv2D(96, 3, activation='relu'),
    tf.keras.layers.Conv2D(128, 3, activation='relu'),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Conv2D(128, 3, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv2D(128, 3, activation='relu'),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Conv2D(256, 3, activation='relu'),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Conv2D(256, 3, activation='relu'),
    tf.keras.layers.Conv2D(128, 3, activation='relu'),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(RESOLUTION),
])

model.summary()
model.compile(optimizer='adam', loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True), metrics='accuracy')

In [None]:
for _ in range(3):
    rotations = np.random.random(N_SAMPLES)
    y = (rotations * RESOLUTION).astype(int)
    x = np.asarray([np.asarray(Image.fromarray(random.choice(images)).rotate(360 * r)) for r in rotations]) / 255.
    model.fit(x, y, epochs=10)

In [None]:
model_name = f'../models/cnn_{datetime.datetime.now():%Y%m%d%H%M}'
model.save(model_name)

In [None]:
model_name = sorted(glob.glob(f'../models/*'))[-1]
print(model_name)

In [None]:
model = tf.keras.models.load_model(model_name)

In [None]:
fig, axs = plt.subplots(10, 2, figsize=(8, 20))
for in_ax, out_ax in axs:
    rotation = 360 * random.random()
    image = Image.fromarray(random.choice(images)).rotate(rotation, fillcolor=255.)
    array = np.asarray(image)
    in_ax.imshow(array, cmap='Greys')

    samples = [
        np.asarray(Image.fromarray(random.choice(images)).rotate(rotation, fillcolor=255.))
        for _ in range(50)
    ]

    prediction = 360 * model.predict(np.asarray(samples))
    predicted_rotation = 360 * prediction.sum(axis=0).argmax() / RESOLUTION
    out_ax.imshow(image.rotate(-predicted_rotation, fillcolor=255.), cmap='Greys')
plt.show()

In [None]:
converter = tf.lite.TFLiteConverter.from_saved_model(model_name)

with open(f'{model_name}/model.tflite', 'wb') as f:
    f.write(converter.convert())

converter.optimizations = [tf.lite.Optimize.DEFAULT]

with open(f'{model_name}/optimized_model.tflite', 'wb') as f:
    f.write(converter.convert())

In [None]:
class TFLiteModel:
    def __init__(self, filename, threads=2):
        self.interpreter = tf.lite.Interpreter(filename, num_threads=threads)
        self.interpreter.allocate_tensors()

        self.inputs = self.interpreter.get_input_details()
        self.outputs = self.interpreter.get_output_details()

    def __call__(self, x: np.ndarray):
        self.interpreter.set_tensor(self.inputs[0]['index'], x)
        self.interpreter.invoke()
        return self.interpreter.get_tensor(self.outputs[0]['index'])


In [None]:
tflite_model = TFLiteModel(f'{model_name}/model.tflite', 2)

fig, axs = plt.subplots(5, 2, figsize=(8, 20))
for in_ax, out_ax in axs:
    rotation = 360 * random.random()
    image = Image.fromarray(random.choice(images)).rotate(rotation, fillcolor=255.)
    array = np.asarray(image)
    in_ax.imshow(array, cmap='Greys')

    samples = np.array([
        np.asarray(Image.fromarray(random.choice(images)).rotate(rotation, fillcolor=255.))
        for _ in range(10)
    ])

    samples = np.expand_dims(samples, -1)
    prediction = np.array([tflite_model([sample]) for sample in samples])
    print(prediction.shape)
    predicted_rotation = 360 * prediction.sum(axis=0).argmax() / (RESOLUTION + 0.5)
    out_ax.imshow(image.rotate(-predicted_rotation, fillcolor=255.), cmap='Greys')
plt.show()