In [1]:
# ====================================================
# Character Recognition with VGG16 (Transfer Learning, TF/Keras)
# ====================================================

import os
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.applications import VGG16
from sklearn.metrics import classification_report, precision_score, recall_score, f1_score
from collections import defaultdict

2025-09-14 22:40:13.625405: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1757889613.850009      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1757889613.917730      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
# -----------------------
# CONFIG
# -----------------------
train_dir = "/kaggle/input/ocr-data/OCR_data/train_data"  
test_dir  = "/kaggle/input/ocr-data/OCR_data/test_data"   
img_size = (128, 128)
batch_size = 16
epochs = 50 

In [3]:
# -----------------------
# CLASS MAPPING
# -----------------------
train_class_names = sorted({f[0] for f in os.listdir(train_dir) if os.path.isdir(os.path.join(train_dir, f))})
print("Train classes (first letters):", train_class_names)

class_to_index = {cls: idx for idx, cls in enumerate(train_class_names)}
index_to_class = {idx: cls for cls, idx in class_to_index.items()}
num_classes = len(class_to_index)

print("Class mapping:", class_to_index)
print("Number of classes:", num_classes)


Train classes (first letters): ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
Class mapping: {'0': 0, '1': 1, '2': 2, '3': 3, '4': 4, '5': 5, '6': 6, '7': 7, '8': 8, '9': 9, 'A': 10, 'B': 11, 'C': 12, 'D': 13, 'E': 14, 'F': 15, 'G': 16, 'H': 17, 'I': 18, 'J': 19, 'K': 20, 'L': 21, 'M': 22, 'N': 23, 'O': 24, 'P': 25, 'Q': 26, 'R': 27, 'S': 28, 'T': 29, 'U': 30, 'V': 31, 'W': 32, 'X': 33, 'Y': 34, 'Z': 35, 'a': 36, 'b': 37, 'c': 38, 'd': 39, 'e': 40, 'f': 41, 'g': 42, 'h': 43, 'i': 44, 'j': 45, 'k': 46, 'l': 47, 'm': 48, 'n': 49, 'o': 50, 'p': 51, 'q': 52, 'r': 53, 's': 54, 't': 55, 'u': 56, 'v': 57, 'w': 58, 'x': 59, 'y': 60, 'z': 61}
Number of classes: 62


In [4]:
# -----------------------
# DATASET LOADER
# -----------------------
keys_tensor = tf.constant(list(class_to_index.keys()))
vals_tensor = tf.constant(list(class_to_index.values()), dtype=tf.int32)
table = tf.lookup.StaticHashTable(
    tf.lookup.KeyValueTensorInitializer(keys_tensor, vals_tensor),
    default_value=-1
)

def process_path(path):
    img = tf.io.read_file(path)
    img = tf.image.decode_png(img, channels=1)   # grayscale
    img = tf.image.resize(img, img_size)
    img = (tf.cast(img, tf.float32) / 127.5) - 1.0

    parts = tf.strings.split(path, os.sep)
    folder_name = parts[-2]
    label_char = tf.strings.substr(folder_name, 0, 1)
    label = table.lookup(label_char)

    return img, label

train_files = tf.data.Dataset.list_files(train_dir + "/*/*", shuffle=True)
test_files  = tf.data.Dataset.list_files(test_dir + "/*/*", shuffle=False)

full_train_ds = train_files.map(process_path, num_parallel_calls=tf.data.AUTOTUNE)

# Split into train/validation (80/20)
train_size = int(0.8 * len(list(train_files)))
train_ds = (full_train_ds.take(train_size)
            .batch(batch_size)
            .prefetch(tf.data.AUTOTUNE))
val_ds = (full_train_ds.skip(train_size)
          .batch(batch_size)
          .prefetch(tf.data.AUTOTUNE))

test_ds = (test_files
           .map(process_path, num_parallel_calls=tf.data.AUTOTUNE)
           .batch(batch_size)
           .prefetch(tf.data.AUTOTUNE))


2025-09-14 22:40:29.445878: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


In [5]:
# -----------------------
# MODEL: VGG16 Backbone
# -----------------------
vgg_base = VGG16(
    input_shape=(128,128,3),
    include_top=False,
    weights="imagenet"
)

vgg_base.trainable = False  # freeze backbone

inputs = layers.Input(shape=(128,128,1))
x = layers.Concatenate()([inputs, inputs, inputs])   # grayscale → RGB
x = vgg_base(x, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(num_classes, activation="softmax",
                       kernel_regularizer=tf.keras.regularizers.l2(1e-4))(x)

model = models.Model(inputs, outputs)
model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step


In [6]:
# -----------------------
# COMPILE & TRAIN
# -----------------------
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

early_stop = tf.keras.callbacks.EarlyStopping(
    monitor="val_loss",
    patience=5,
    restore_best_weights=True
)

history = model.fit(
    train_ds,
    epochs=epochs,
    validation_data=val_ds,
    callbacks=[early_stop]
)

Epoch 1/50
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 2s/step - accuracy: 0.0163 - loss: 4.6712 - val_accuracy: 0.0242 - val_loss: 4.2586
Epoch 2/50
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 2s/step - accuracy: 0.0278 - loss: 4.4170 - val_accuracy: 0.1048 - val_loss: 4.0453
Epoch 3/50
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 2s/step - accuracy: 0.0277 - loss: 4.2237 - val_accuracy: 0.0968 - val_loss: 3.9379
Epoch 4/50
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 2s/step - accuracy: 0.0267 - loss: 4.0367 - val_accuracy: 0.1855 - val_loss: 3.7855
Epoch 5/50
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 2s/step - accuracy: 0.0838 - loss: 3.9409 - val_accuracy: 0.2339 - val_loss: 3.7323
Epoch 6/50
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 2s/step - accuracy: 0.0800 - loss: 3.8316 - val_accuracy: 0.2823 - val_loss: 3.6462
Epoch 7/50
[1m31/31[0m [32m━━━━━━━━━━

In [7]:
# -----------------------
# SAVE & LOAD
# -----------------------
model.save("vgg16_char_tf.h5")
print("Model saved!")

loaded_model = tf.keras.models.load_model("vgg16_char_tf.h5")
print("Model loaded!")


Model saved!
Model loaded!


In [8]:
# -----------------------
# FINAL EVALUATION (TEST DATA)
# -----------------------
correct = 0
wrong = 0
total = 0

y_true = []
y_pred = []
class_correct = defaultdict(int)
class_total = defaultdict(int)

for images, labels in test_ds:
    preds = loaded_model.predict(images, verbose=0)
    predicted_classes = np.argmax(preds, axis=1)

    y_true.extend(labels.numpy())
    y_pred.extend(predicted_classes)

    correct += np.sum(predicted_classes == labels.numpy())
    wrong += np.sum(predicted_classes != labels.numpy())
    total += labels.shape[0]

    for true, pred in zip(labels.numpy(), predicted_classes):
        class_total[true] += 1
        if true == pred:
            class_correct[true] += 1

print("\n=== Per-Class Results ===")
for idx in sorted(class_total.keys()):
    total_i = class_total[idx]
    correct_i = class_correct[idx]
    wrong_i = total_i - correct_i
    print(f"Class {index_to_class[idx]}: Correct={correct_i}, Wrong={wrong_i}, Total={total_i}, Acc={100*correct_i/total_i:.2f}%")

print("\n=== Overall Results ===")
print(f"Correct predictions: {correct}")
print(f"Wrong predictions: {wrong}")
print(f"Total images: {total}")
print(f"Accuracy: {100.0 * correct / total:.2f}%")

precision = precision_score(y_true, y_pred, average="macro")
recall = recall_score(y_true, y_pred, average="macro")
f1 = f1_score(y_true, y_pred, average="macro")

print("\n=== Precision / Recall / F1 (Macro) ===")
print(f"Precision: {precision:.4f}")
print(f"Recall:    {recall:.4f}")
print(f"F1 Score:  {f1:.4f}")

print("\n=== Classification Report ===")
print(classification_report(y_true, y_pred, target_names=[index_to_class[i] for i in range(num_classes)]))


=== Per-Class Results ===
Class 0: Correct=7, Wrong=18, Total=25, Acc=28.00%
Class 1: Correct=1, Wrong=24, Total=25, Acc=4.00%
Class 2: Correct=8, Wrong=17, Total=25, Acc=32.00%
Class 3: Correct=15, Wrong=10, Total=25, Acc=60.00%
Class 4: Correct=9, Wrong=16, Total=25, Acc=36.00%
Class 5: Correct=10, Wrong=15, Total=25, Acc=40.00%
Class 6: Correct=15, Wrong=10, Total=25, Acc=60.00%
Class 7: Correct=4, Wrong=21, Total=25, Acc=16.00%
Class 8: Correct=10, Wrong=15, Total=25, Acc=40.00%
Class 9: Correct=8, Wrong=17, Total=25, Acc=32.00%
Class A: Correct=11, Wrong=14, Total=25, Acc=44.00%
Class B: Correct=12, Wrong=13, Total=25, Acc=48.00%
Class C: Correct=18, Wrong=7, Total=25, Acc=72.00%
Class D: Correct=16, Wrong=9, Total=25, Acc=64.00%
Class E: Correct=8, Wrong=17, Total=25, Acc=32.00%
Class F: Correct=13, Wrong=12, Total=25, Acc=52.00%
Class G: Correct=10, Wrong=15, Total=25, Acc=40.00%
Class H: Correct=10, Wrong=15, Total=25, Acc=40.00%
Class I: Correct=7, Wrong=18, Total=25, Acc=28.