# Teil 1: Vorbereitung der Trainingsdaten

Das neuronale Netz braucht Daten, um trainieren zu können. Dafür nehmen wir die ausgefüllten und eingescannten Blätter und extrahieren die einzelnen Ziffern. Dazu muss die gescannte Seite (oder die gescannten Seiten) im Verzeichnis `01-pages` liegen. Wir führen nun die folgenden Schritte durch:

1. Farbanpassung des gescannten Bildes
2. Erkennung des Rahmens
3. Extrahierung der einzelnen Ziffern

Am Ende landen die Trainings landen die einzelnen Ziffern im Verzeichnis `03-digits`.

### Schritt 1: Farbanpassung

In diesem Schritt werden alle gescannten Seiten aus dem Verzeichnis `01-pages` mit Hilfe von ImageMagick leicht angepasst und die Ergebnisse in das Verzeichnis `02-cache` geschrieben.

In [None]:
import glob
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import numpy as np
import os
import math
from PIL import Image
import subprocess

PAGES_DIR  = "01-pages"
CACHE_DIR  = "02-cache"
DIGITS_DIR = "03-digits"
SHEETS_DIR = "04-sheets"
MODEL_DIR  = "05-model"

os.makedirs(CACHE_DIR, exist_ok=True)
for path in glob.glob(os.path.join(CACHE_DIR, "*")):
    try:
        os.remove(path)
    except:
        pass

paths = []

for path in glob.glob(os.path.join(PAGES_DIR, "*")):
    out_path = os.path.join(CACHE_DIR, os.path.basename(path))
    paths.append(out_path)
    command = f"convert \"{path}\" -colorspace Gray -auto-level -threshold 90% -resize 50% -resize 200% \"{out_path}\""
    process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    process.wait()
    print(f"✅ Farben angepasst: {path} 🠪 {out_path}")

paths = sorted(paths)

Schauen wir uns einmal das Ergebnis an – die Ziffern sollten jetzt kräftiger sein und mehr Kontrast haben.

In [None]:
n = len(paths)
rows = n
cols = 2

fig, axs = plt.subplots(rows, cols, figsize=(5, 2.5*rows))

s = 120

for i, path in enumerate(paths):
    old_image = np.array(Image.open(path.replace(CACHE_DIR, PAGES_DIR)).convert('L'))
    new_image = np.array(Image.open(path).convert('L'))

    center_x = old_image.shape[1] // 2
    center_y = old_image.shape[0] // 2
    zoomed_old = old_image[center_y-s:center_y+s, center_x-s:center_x+s]
    axs[i, 0].imshow(zoomed_old, cmap='gray')
    axs[i, 0].set_title('vorher')
    axs[i, 0].axis('off')

    center_x = new_image.shape[1] // 2
    center_y = new_image.shape[0] // 2
    zoomed_new = new_image[center_y-s:center_y+s, center_x-s:center_x+s]
    axs[i, 1].imshow(zoomed_new, cmap='gray')
    axs[i, 1].set_title('nachher')
    axs[i, 1].axis('off')

plt.tight_layout()
plt.show()

### Schritt 2: Erkennung des Rahmens

Da wir davon ausgehen müssen, dass das Bild etwas schräg eingescannt worden ist, müssen wir zunächst die vier Ecken des Gitters suchen, um danach die einzelnen Ziffern extrahieren zu können.

In [None]:
frame_for_image = {}
for path in paths:
    img = Image.open(path).convert('L')
    image = np.array(img)

    margin = int(img.width * 0.03)

    x0 = x1 = x2 = x3 = image.shape[1] // 2
    y0 = y1 = margin
    y2 = y3 = image.shape[0] - 1 - margin

    def adjust(x, y):
        if sum(image[y-2:y+1, x]) < sum(image[y-1:y+2, x]):
            return -1
        elif sum(image[y:y+3, x]) < sum(image[y-1:y+2, x]):
            return 1
        return 0

    t = 64

    while image[y0, x0] >= t:
        y0 += 1
        y1 += 1
    y0 += 1
    y1 += 1
    while image[y0, x0] < t:
        x0 -= 1
        y0 += adjust(x0, y0)
    while image[y1, x1] < t:
        x1 += 1
        y1 += adjust(x1, y1)

    while image[y2, x2] >= t:
        y2 -= 1
        y3 -= 1
    y2 -= 1
    y3 -= 1
    while image[y2, x2] < t:
        x2 -= 1
        y2 += adjust(x2, y2)
    while image[y3, x3] < t:
        x3 += 1
        y3 += adjust(x3, y3)

    frame_for_image[path] = (x0, y0, x1, y1, x2, y2, x3, y3)
    print(f"✅ Rahmen erkannt: {path}")

Wenn alles geklappt hat, sollte der rote Rahmen jetzt genau das Gitter umschließen:

In [None]:
n = len(paths)
rows = math.ceil(n / 3)
cols = min(3, n)

fig, axs = plt.subplots(rows, cols, figsize=(10, 5*rows))

if rows == 1:
    axs = np.array([axs])

for i, path in enumerate(paths):
    ax = axs[i // cols, i % cols]
    img = Image.open(path).convert('L')
    frame = frame_for_image[path]
    ax.imshow(img, cmap='gray')
    ax.add_patch(patches.Polygon([(frame[0], frame[1]), (frame[2], frame[3]), (frame[6], frame[7]), (frame[4], frame[5])], linewidth=1, edgecolor='r', facecolor='none'))
    ax.axis('off')

# Remove extra subplots
if n < rows * cols:
    for i in range(n, rows * cols):
        fig.delaxes(axs.flatten()[i])

plt.tight_layout()
plt.show()

### Schritt 3: Extraktion der Ziffern

Der nachfolgende Code extrahiert die einzelnen Ziffern aus dem Gitter. Der Code durchläuft jedes Bild im Gitter und führt die folgenden Schritte aus:

1. Das Bild wird auf die Größe von 64x64 Pixeln skaliert.
2. Es wird die Bounding Box des Bildes ermittelt, also der Bereich, der die nicht-weißen Pixel umschließt.
3. Das Bild wird auf die Bounding Box zugeschnitten.
4. Das Bild wird auf eine Größe von 28x28 Pixeln skaliert und mit einem weißem Hintergrund aufgefüllt.
5. Das Bild wird gespeichert.

🕔 Bitte beachte, dass dieser Schritt einige Zeit in Anspruch nehmen kann.

In [None]:
os.makedirs(DIGITS_DIR, exist_ok=True)
for path in glob.glob(os.path.join(DIGITS_DIR, "*")):
    try:
        os.remove(path)
    except:
        pass

def sample(image, x, y):
    ix = int(x)
    iy = int(y)
    p00 = image[iy, ix]
    p10 = image[iy, ix + 1]
    p01 = image[iy + 1, ix]
    p11 = image[iy + 1, ix + 1]
    fx = x - ix
    fy = y - iy
    p0 = p00 * (1 - fx) + p10 * fx
    p1 = p01 * (1 - fx) + p11 * fx
    return int(p0 * (1 - fy) + p1 * fy)

print("Extrahiere Ziffern...")

count_for_label = {}
for path in paths:
    frame = frame_for_image[path]
    x0, y0, x1, y1, x2, y2, x3, y3 = frame
    img = Image.open(path).convert('L')
    image = np.array(img)
    box_width = 64
    box_height = 64
    count = 0
    found = 0
    for y in range(30):
        for x in range(20):
            count += 1
            progress = (y * 20 + x) * 40 // 600
            total = (30 * 20 - 1) * 40 // 600
            print(f"\r{'=' * progress}>{'-' * (total - progress)} [{path}] {found} of {count} usable", end="")

            label = str(y // 3)
            
            if os.path.basename(path)[0] == "l":
                label = 'ABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÜß'[y]
            box_image = np.zeros((box_height, box_width))
            for by in range(box_height):
                for bx in range(box_width):
                    fx = bx / (box_width - 1)
                    fy = by / (box_height - 1)
                    fx = (fx + x) / 20.0
                    fy = (fy + y) / 30.0
                    p0x = fx * (x1 - x0) + x0
                    p0y = fx * (y1 - y0) + y0
                    p1x = fx * (x3 - x2) + x2
                    p1y = fx * (y3 - y2) + y2
                    px = p0x + (p1x - p0x) * fy
                    py = p0y + (p1y - p0y) * fy
                    box_image[by, bx] = sample(image, px, py)

            box_image = box_image[6:-6, 6:-6]

            # Find the bounding box of non-white pixels
            left = box_image.shape[1]
            top = box_image.shape[0]
            right = 0
            bottom = 0

            for _y in range(box_image.shape[0]):
                for _x in range(box_image.shape[1]):
                    if box_image[_y, _x] < 64:
                        left = min(_x, left)
                        top = min(_y, top)
                        right = max(_x, right)
                        bottom = max(_y, bottom)

            if right == box_image.shape[1] - 1:
                right -= 1
            if bottom == box_image.shape[0] - 1:
                bottom -= 1
            if left > right or top > bottom:
                continue
            if right - left < box_image.shape[1] / 5 and bottom - top < box_image.shape[0] / 5:
                continue

            # Crop the image to the bounding box
            box_image = box_image[top:bottom+1, left:right+1]

            scale = 26.0 / box_image.shape[1]
            if scale > 26.0 / box_image.shape[0]:
                scale = 26.0 / box_image.shape[0]

            width = box_image.shape[1]
            height = box_image.shape[0]

            # Convert numpy array to PIL Image
            box_image = box_image.astype(np.uint8)
            box_image = Image.fromarray(box_image, mode='L')

            # Resize the image
            box_image = box_image.resize((int(width * scale), int(height * scale)), Image.LANCZOS)
            box_image = np.array(box_image)

            # Calculate the padding needed to make the image 28x28
            padding_x = (28 - box_image.shape[1]) // 2
            padding_y = (28 - box_image.shape[0]) // 2

            # Create a new 28x28 white image
            new_image = np.full((28, 28), 255)

            # Overlay the box image onto the new image, centered with the calculated padding
            new_image[padding_y:padding_y+box_image.shape[0], padding_x:padding_x+box_image.shape[1]] = box_image

            # Replace box_image with the new image
            box_image = new_image

            if box_image[box_image.shape[1] - 2, 1] < 64 or box_image[1, box_image.shape[0] - 2] < 64 or \
               box_image[box_image.shape[1] - 2, box_image.shape[0] - 2] < 64 or box_image[1, 1] < 64:
                continue

            count_for_label[label] = count_for_label.get(label, 0)
            dpath = os.path.join(DIGITS_DIR, f"{label}_{count_for_label[label]}.png")
            count_for_label[label] += 1
            found += 1
            box_image = box_image.astype(np.uint8)
            Image.fromarray(box_image, mode='L', ).save(dpath)
    print()
    print(f"✅ {found} Ziffern extrahiert: {path}")

### Schritt 4: Zusammenstellung der Sprite Sheets

Im letzten Schritt kombinieren wir die vielen kleinen Einzelbilder in Sprite Sheets, so dass wir für jede Ziffer eine Bilddatei bekommen, die alle Bilder enthält.

In [None]:
from PIL import Image
import os
import random

os.makedirs(SHEETS_DIR, exist_ok=True)
for path in glob.glob(os.path.join(SHEETS_DIR, "*")):
    try:
        os.remove(path)
    except:
        pass

for c in "0123456789":
    image_width = 28
    image_height = 28
    images_per_row = 20
    image_files = [file for file in os.listdir(DIGITS_DIR) if file.startswith(f"{c}_")]
    random.shuffle(image_files)
    num_rows = (len(image_files) + images_per_row - 1) // images_per_row
    grid_width = image_width * images_per_row
    grid_height = image_height * num_rows
    grid_image = Image.new("L", (grid_width, grid_height), color=255)

    for i, image_file in enumerate(image_files):
        image_path = os.path.join(DIGITS_DIR, image_file)
        image = Image.open(image_path)

        row = i // images_per_row
        col = i % images_per_row
        x = col * image_width
        y = row * image_height

        grid_image.paste(image, (x, y))

    path = os.path.join(SHEETS_DIR, f"{c}.png")
    grid_image.save(path)
    print(f"✅ Symbol {c}: {len(image_files)} Beispiele zusammengefasst: {path}")


Wenn dein Notebook bis hierhin durchgelaufen ist, schau dir die Dateien im Verzeichnis `04-sheets` an. Jetzt kannst du mit dem Training des Modells beginnen.