In [1]:
#to upload model to colab from local disk
from google.colab import files
import tensorflow as tf

# Upload model file (.keras or .h5)
uploaded = files.upload()

model_filename = list(uploaded.keys())[0]
model = tf.keras.models.load_model(model_filename)

print("✅ Model loaded successfully")
model.summary()


Saving pen_to_pixel_sd19_2.keras to pen_to_pixel_sd19_2.keras
✅ Model loaded successfully


In [2]:
import os
import zipfile
import requests

# URL for SD-19 ByClass dataset (2nd edition)
url = "https://s3.amazonaws.com/nist-srd/SD19/by_class.zip"
zip_path = "/content/by_class.zip"
extract_dir = "/content/sd19_by_class"

# 1) Download
if not os.path.exists(zip_path):
    print("📥 Downloading NIST SD19 ByClass (~2.5GB, this may take a while)...")
    r = requests.get(url, stream=True)
    with open(zip_path, "wb") as f:
        for chunk in r.iter_content(chunk_size=8192):
            if chunk:
                f.write(chunk)
    print("✅ Download complete")

# 2) Extract
if not os.path.exists(extract_dir):
    print("📂 Extracting dataset...")
    with zipfile.ZipFile(zip_path, "r") as zip_ref:
        zip_ref.extractall(extract_dir)
    print("✅ Extracted to:", extract_dir)

# 3) Verify
print("📊 Classes found:", len(os.listdir(extract_dir)))
print("Example classes:", os.listdir(extract_dir)[:10])


📥 Downloading NIST SD19 ByClass (~2.5GB, this may take a while)...
✅ Download complete
📂 Extracting dataset...
✅ Extracted to: /content/sd19_by_class
📊 Classes found: 1
Example classes: ['by_class']


In [3]:
import os
import glob
from pathlib import Path
from typing import List, Tuple, Dict

import numpy as np
import tensorflow as tf
from PIL import Image

# Acceptable image extensions in SD19 dumps
IMG_EXTS = {".png", ".jpg", ".jpeg", ".bmp", ".tif", ".tiff", ".gif"}

# =========================
# Utilities
# =========================
def is_image_file(p: str) -> bool:
    """Check if file is an image (skip .mit and other non-images)."""
    return Path(p).suffix.lower() in IMG_EXTS

def scan_dataset(by_class_dir: str) -> Tuple[List[str], List[int], Dict[str, int], Dict[int, str]]:
    """
    Scan SD19 by_class directory structure:
      by_class/
        <class_name>/
          hsf_1/*.png
          hsf_2/*.png
          ...
    Returns:
      filepaths: list of image file paths
      labels:    list of integer labels
      label2idx: mapping {label_str -> int}
      idx2label: reverse mapping
    """
    filepaths = []
    str_labels = []

    # Top-level label folders (e.g., "30", "7a", "A", etc.)
    label_dirs = sorted([d for d in glob.glob(os.path.join(by_class_dir, "*")) if os.path.isdir(d)])

    # Build label map (alphabetical order for consistency)
    label_names = [os.path.basename(d) for d in label_dirs]
    label2idx = {lab: i for i, lab in enumerate(label_names)}
    idx2label = {i: lab for lab, i in label2idx.items()}

    # Collect image paths + labels
    for lab_dir in label_dirs:
        lab = os.path.basename(lab_dir)
        for p in glob.glob(os.path.join(lab_dir, "**", "*"), recursive=True):
            if os.path.isfile(p) and is_image_file(p):
                filepaths.append(p)
                str_labels.append(lab)

    # Convert labels to integer indices
    int_labels = [label2idx[s] for s in str_labels]

    return filepaths, int_labels, label2idx, idx2label

def load_and_preprocess_image(path: tf.Tensor, img_size=(128, 128)) -> tf.Tensor:
    """
    Load one image, convert to grayscale, resize, scale to [0,1].
    """
    img = tf.io.read_file(path)
    img = tf.io.decode_image(img, channels=1, expand_animations=False)
    img = tf.image.resize(img, img_size)
    img = tf.cast(img, tf.float32) / 255.0
    return img

# =========================
# Example Usage
# =========================
if __name__ == "__main__":
    DATASET_DIR = ""   # <- change to your SD19 path

    print("🔍 Scanning dataset...")
    filepaths, labels, label2idx, idx2label = scan_dataset(DATASET_DIR)
    print(f"Found {len(filepaths)} images across {len(label2idx)} classes")
    print("Example classes:", list(label2idx.keys())[:10])  # first 10 class names

    # Convert to tf.data.Dataset
    paths_ds = tf.data.Dataset.from_tensor_slices(filepaths)
    labels_ds = tf.data.Dataset.from_tensor_slices(labels)
    dataset = tf.data.Dataset.zip((paths_ds, labels_ds))

    dataset = dataset.map(
        lambda p, y: (load_and_preprocess_image(p), y),
        num_parallel_calls=tf.data.AUTOTUNE
    )
    dataset = dataset.shuffle(10000).batch(128).prefetch(tf.data.AUTOTUNE)

    # Test: fetch one batch
    for images, labs in dataset.take(1):
        print("Batch image shape:", images.shape)
        print("Batch labels:", labs[:10].numpy())


🔍 Scanning dataset...
Found 1545923 images across 2 classes
Example classes: ['sample_data', 'sd19_by_class']
Batch image shape: (128, 128, 128, 1)
Batch labels: [1 1 1 1 1 1 1 1 1 1]


In [4]:
# sd19_pen_to_pixel_finetune.py
import os
import glob
import json
import random
from pathlib import Path
from typing import List, Tuple, Dict

import numpy as np
import tensorflow as tf
from tensorflow import keras

# sklearn used for stratified split and class weight computation
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.utils.class_weight import compute_class_weight

# For auto-download
from google.colab import files


# -----------------------
# User config
# -----------------------
DATASET_DIR = "/content/sd19_by_class/by_class"
IMG_SIZE = (128, 128)
BATCH_SIZE = 64
EPOCHS = 1
VAL_SPLIT = 0.15
RANDOM_SEED = 42
SHUFFLE_BUFFER = 10_000
INVERT = True
PRETRAINED_MODEL = "pen_to_pixel_sd19_2.keras"
LABELMAP_JSON = "pen_to_pixel_labelmap.json"
BEST_MODEL_PATH = "p_to_p_3.keras"

IMG_EXTS = {".png", ".jpg", ".jpeg", ".bmp", ".tif", ".tiff", ".gif"}


# =========================
# Reproducibility
# =========================
def set_all_seeds(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)


# =========================
# Dataset scanning & preprocessing
# =========================
def is_image_file(p: str) -> bool:
    return Path(p).suffix.lower() in IMG_EXTS


def scan_dataset(by_class_dir: str) -> Tuple[List[str], List[str], Dict[str, int], Dict[int, str]]:
    filepaths, str_labels = [], []

    label_dirs = sorted([d for d in glob.glob(os.path.join(by_class_dir, "*")) if os.path.isdir(d)])
    if not label_dirs:
        raise RuntimeError(f"No class folders found under: {by_class_dir}")

    label_names = [os.path.basename(d) for d in label_dirs]
    label2idx = {lab: i for i, lab in enumerate(label_names)}
    idx2label = {i: lab for lab, i in label2idx.items()}

    for lab_dir in label_dirs:
        lab = os.path.basename(lab_dir)
        for p in glob.glob(os.path.join(lab_dir, "**", "*"), recursive=True):
            if os.path.isfile(p) and is_image_file(p):
                filepaths.append(p)
                str_labels.append(lab)

    if len(filepaths) == 0:
        raise RuntimeError("No images found in dataset path!")

    return filepaths, str_labels, label2idx, idx2label


def load_and_preprocess_image(path: tf.Tensor) -> tf.Tensor:
    img = tf.io.read_file(path)
    img = tf.io.decode_image(img, channels=1, expand_animations=False)
    img.set_shape([None, None, 1])
    img = tf.image.resize(img, IMG_SIZE)
    img = tf.cast(img, tf.float32) / 255.0
    if INVERT:
        img = 1.0 - img
    return img


# =========================
# Data augmentation
# =========================
def get_augmentation_model(seed: int = RANDOM_SEED) -> keras.Sequential:
    layers = [
        keras.layers.RandomRotation(0.08, fill_mode="reflect", seed=seed),
        keras.layers.RandomTranslation(0.08, 0.08, fill_mode="reflect", seed=seed+1),
        keras.layers.RandomZoom(0.08, 0.08, seed=seed+2),
        keras.layers.RandomContrast(0.15, seed=seed+3),
        keras.layers.GaussianNoise(0.02),
    ]
    return keras.Sequential(layers, name="data_augmentation")


AUGMENTOR = get_augmentation_model()


def build_dataset(paths: List[str], labels: np.ndarray, training: bool) -> tf.data.Dataset:
    ds_paths = tf.data.Dataset.from_tensor_slices(paths)
    ds_labels = tf.data.Dataset.from_tensor_slices(labels.astype(np.int32))
    ds = tf.data.Dataset.zip((ds_paths, ds_labels))

    def _map_fn(p, y):
        img = load_and_preprocess_image(p)
        return img, y

    ds = ds.map(_map_fn, num_parallel_calls=tf.data.AUTOTUNE)
    if training:
        ds = ds.shuffle(SHUFFLE_BUFFER, seed=RANDOM_SEED, reshuffle_each_iteration=True)

        def _augment(img, y):
            img_aug = AUGMENTOR(tf.expand_dims(img, axis=0), training=True)
            img_aug = tf.squeeze(img_aug, axis=0)
            return img_aug, y

        ds = ds.map(_augment, num_parallel_calls=tf.data.AUTOTUNE)

    ds = ds.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
    return ds


# =========================
# Model Rebuild
# =========================
def rebuild_model_128(pretrained_path: str, num_classes: int):
    old_model = keras.models.load_model(pretrained_path)

    inp = keras.layers.Input(shape=(128, 128, 1), name="image")

    x = keras.layers.Conv2D(64, 3, padding="same", activation="relu", name="conv2d_3")(inp)
    x = keras.layers.BatchNormalization(name="batch_normalization_3")(x)
    x = keras.layers.MaxPooling2D((2, 2), name="max_pooling2d_3")(x)

    x = keras.layers.Conv2D(128, 3, padding="same", activation="relu", name="conv2d_4")(x)
    x = keras.layers.BatchNormalization(name="batch_normalization_4")(x)
    x = keras.layers.MaxPooling2D((2, 2), name="max_pooling2d_4")(x)

    x = keras.layers.Conv2D(256, 3, padding="same", activation="relu", name="conv2d_5")(x)
    x = keras.layers.BatchNormalization(name="batch_normalization_5")(x)
    x = keras.layers.MaxPooling2D((2, 2), name="max_pooling2d_5")(x)

    x = keras.layers.MaxPooling2D((2, 2), name="max_pooling2d_6")(x)

    x = keras.layers.Flatten(name="flatten_1")(x)
    x = keras.layers.Dense(512, activation="relu", name="dense_2")(x)
    x = keras.layers.Dropout(0.5, name="dropout_1")(x)
    out = keras.layers.Dense(num_classes, activation="softmax", name="dense_3")(x)

    new_model = keras.Model(inp, out, name="pen_to_pixel_128")

    for old_layer, new_layer in zip(old_model.layers[1:], new_model.layers[1:]):
        try:
            new_layer.set_weights(old_layer.get_weights())
            print(f"✅ Copied weights: {old_layer.name} -> {new_layer.name}")
        except Exception as e:
            print(f"⚠️ Skipped layer {old_layer.name}: {e}")

    return new_model


# =========================
# Train / Eval
# =========================
def main():
    set_all_seeds(RANDOM_SEED)

    gpus = tf.config.list_physical_devices("GPU")
    for gpu in gpus:
        try:
            tf.config.experimental.set_memory_growth(gpu, True)
        except Exception:
            pass

    print(f"🔍 Scanning dataset at: {DATASET_DIR}")
    filepaths, str_labels, label2idx, idx2label = scan_dataset(DATASET_DIR)
    print(f"Found {len(filepaths)} images across {len(label2idx)} classes.")

    if len(label2idx) < 2:
        raise RuntimeError(f"❌ Dataset must have at least 2 classes, found {len(label2idx)}.")

    y = np.array([label2idx[s] for s in str_labels], dtype=np.int32)

    sss = StratifiedShuffleSplit(n_splits=1, test_size=VAL_SPLIT, random_state=RANDOM_SEED)
    train_idx, val_idx = next(sss.split(filepaths, y))
    train_paths = [filepaths[i] for i in train_idx]
    val_paths = [filepaths[i] for i in val_idx]
    y_train, y_val = y[train_idx], y[val_idx]

    print(f"Train: {len(train_paths)} | Val: {len(val_paths)}")

    ds_train = build_dataset(train_paths, y_train, training=True)
    ds_val = build_dataset(val_paths, y_val, training=False)

    classes = np.unique(y_train)
    class_weights_vals = compute_class_weight(class_weight="balanced", classes=classes, y=y_train)
    class_weights = {int(c): float(w) for c, w in zip(classes, class_weights_vals)}
    print("Computed class weights (sample):", dict(list(class_weights.items())[:10]))

    print(f"📥 Rebuilding pretrained model with 128x128 input")
    model = rebuild_model_128(PRETRAINED_MODEL, num_classes=len(label2idx))

    model.compile(
        optimizer=keras.optimizers.Adam(1e-4),
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"],
    )
    model.summary()

    # Callbacks
    checkpoint = keras.callbacks.ModelCheckpoint(
        BEST_MODEL_PATH,
        monitor="val_accuracy",
        save_best_only=True,
        mode="max",
        verbose=1
    )
    early = keras.callbacks.EarlyStopping(monitor="val_accuracy", patience=6, restore_best_weights=True, verbose=1)
    lr_reduce = keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=3, verbose=1)

    history = model.fit(
        ds_train,
        validation_data=ds_val,
        epochs=EPOCHS,
        callbacks=[checkpoint, early, lr_reduce],
        class_weight=class_weights,
    )

    with open(LABELMAP_JSON, "w") as f:
        json.dump(
            {"label2idx": label2idx, "idx2label": {int(k): v for k, v in idx2label.items()}},
            f,
            indent=2,
        )
    print(f"✅ Label maps saved to {LABELMAP_JSON}")

    val_metrics = model.evaluate(ds_val, verbose=1)
    print("Validation metrics:", dict(zip(model.metrics_names, val_metrics)))


if __name__ == "__main__":
    main()

🔍 Scanning dataset at: /content/sd19_by_class/by_class
Found 1545923 images across 62 classes.
Train: 1314034 | Val: 231889
Computed class weights (sample): {0: 0.3317227273668208, 1: 0.3013093087033487, 2: 0.3357853033079874, 3: 0.32634418536267473, 4: 0.3435135137961287, 5: 0.3684520144326266, 6: 0.33687409438588467, 7: 0.32094761606083877, 8: 0.3394096594419568, 9: 0.3403853974816277}
📥 Rebuilding pretrained model with 128x128 input
✅ Copied weights: conv2d_3 -> conv2d_3
✅ Copied weights: batch_normalization_3 -> batch_normalization_3
✅ Copied weights: max_pooling2d_3 -> max_pooling2d_3
✅ Copied weights: conv2d_4 -> conv2d_4
✅ Copied weights: batch_normalization_4 -> batch_normalization_4
✅ Copied weights: max_pooling2d_4 -> max_pooling2d_4
✅ Copied weights: conv2d_5 -> conv2d_5
✅ Copied weights: batch_normalization_5 -> batch_normalization_5
✅ Copied weights: max_pooling2d_5 -> max_pooling2d_5
✅ Copied weights: max_pooling2d_6 -> max_pooling2d_6
✅ Copied weights: flatten_1 -> flatt

[1m20532/20532[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 288ms/step - accuracy: 0.5241 - loss: 1.7788
Epoch 1: val_accuracy improved from -inf to 0.79827, saving model to p_to_p_3.keras
[1m20532/20532[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6065s[0m 294ms/step - accuracy: 0.5241 - loss: 1.7787 - val_accuracy: 0.7983 - val_loss: 0.5200 - learning_rate: 1.0000e-04
Restoring model weights from the end of the best epoch: 1.
✅ Label maps saved to pen_to_pixel_labelmap.json
[1m3624/3624[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 17ms/step - accuracy: 0.7973 - loss: 0.5217
Validation metrics: {'loss': 0.5200343132019043, 'compile_metrics': 0.7982655763626099}


In [5]:
from google.colab import files

# After training completes and best_model.keras is saved
files.download("p_to_p_3.keras")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>