In [1]:
import tensorflow as tf
from tensorflow.keras import layers, models
import string

# Character set
CHARS = string.ascii_uppercase + string.digits
CHAR_TO_ID = {c: i + 1 for i, c in enumerate(CHARS)}
ID_TO_CHAR = {i + 1: c for i, c in enumerate(CHARS)}
NUM_CLASSES = len(CHARS) + 1  # +1 for CTC blank token

2025-05-05 20:57:20.203083: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1746503840.215527    8970 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746503840.219283    8970 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1746503840.230005    8970 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1746503840.230019    8970 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1746503840.230020    8970 computation_placer.cc:177] computation placer alr

In [2]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("Memory growth enabled")
    except RuntimeError as e:
        print("Error setting memory growth:", e)

tf.debugging.set_log_device_placement(True)


Memory growth enabled


In [3]:
from tensorflow.keras.mixed_precision import set_global_policy
set_global_policy('mixed_float16')

In [4]:
class LicensePlateModel(tf.keras.Model):
    def __init__(self):
        super().__init__()
        base_model = tf.keras.applications.ResNet50(include_top=False, weights='imagenet', input_shape=(60, 160, 3))
        self.cnn = tf.keras.Sequential([
            base_model,
            layers.Reshape((-1, base_model.output_shape[-1]))  # (B, width, channels)
        ])
        self.bilstm = tf.keras.Sequential([
            layers.Bidirectional(layers.LSTM(128, return_sequences=True)),
            layers.Bidirectional(layers.LSTM(128, return_sequences=True))
        ])
        self.classifier = layers.Dense(NUM_CLASSES)

    def call(self, images, training=False):
        x = self.cnn(images)
        x = self.bilstm(x)
        return self.classifier(x)

In [5]:
class CTCLossLayer(tf.keras.layers.Layer):
    def call(self, y_true, y_pred):
        batch_len = tf.cast(tf.shape(y_true)[0], dtype=tf.int64)
        input_len = tf.cast(tf.shape(y_pred)[1], dtype=tf.int64)
        label_len = tf.cast(tf.math.count_nonzero(y_true, axis=-1), dtype=tf.int64)
        input_lengths = input_len * tf.ones(shape=(batch_len, 1), dtype=tf.int64)
        label_lengths = label_len

        loss = tf.keras.backend.ctc_batch_cost(y_true, y_pred, input_lengths, label_lengths)
        return loss

In [6]:
def create_training_model():
    image_input = layers.Input(shape=(60, 160, 3), name="image")
    label_input = layers.Input(shape=(None,), dtype="int32", name="label")

    model = LicensePlateModel()
    logits = model(image_input)
    loss = CTCLossLayer()(label_input, logits)

    return tf.keras.Model(inputs=[image_input, label_input], outputs=loss)

In [7]:
def create_prediction_model():
    image_input = layers.Input(shape=(60, 160, 3), name="image")
    model = LicensePlateModel()
    logits = model(image_input)
    return tf.keras.Model(inputs=image_input, outputs=logits)


In [8]:
from captcha.image import ImageCaptcha
from datasets import Dataset
import random
import string
from PIL import Image

# Same charset
CHARS = string.ascii_uppercase + string.digits
CHAR_TO_ID = {c: i + 1 for i, c in enumerate(CHARS)}
BLANK_TOKEN = 0

def create_captcha_dataset(size=100):
    generator = ImageCaptcha(width=160, height=60)
    data = []
    for _ in range(size):
        label = ''.join(random.choices(CHARS, k=5))
        img = generator.generate_image(label).convert("RGB")
        data.append({'image': img, 'label': label})
    return Dataset.from_list(data)

train_dataset = create_captcha_dataset(500)
val_dataset = create_captcha_dataset(100)

In [9]:
import tensorflow as tf
import numpy as np

# Label tokenizer
def encode_label(text):
    return [CHAR_TO_ID[c] for c in text]

# Image preprocessing
def process_example(example, augment=False):
    img = example["image"]
    img = img.resize((160, 60))  # Resize if needed
    img = np.array(img).astype("float32") / 255.0  # Normalize to [0,1]
    img = (img - [0.485, 0.456, 0.406]) / [0.229, 0.224, 0.225]  # Standardize
    label = encode_label(example["label"])
    return img, label

def tf_map_fn(example):
    img, label = tf.py_function(
        func=process_example,
        inp=[example],
        Tout=(tf.float32, tf.int32)
    )
    img.set_shape((60, 160, 3))
    label.set_shape([None])
    return {"image": img, "label": label}


In [10]:
def build_tf_dataset(dataset, batch_size=32, shuffle=True):
    def gen():
        for ex in dataset:
            yield process_example(ex)

    def tf_process(img, label):
        img = tf.convert_to_tensor(img, dtype=tf.float32)
        label = tf.convert_to_tensor(label, dtype=tf.int32)
        return {"image": img, "label": label}

    output_signature = (
        tf.TensorSpec(shape=(60, 160, 3), dtype=tf.float32),
        tf.TensorSpec(shape=(None,), dtype=tf.int32)
    )

    ds = tf.data.Dataset.from_generator(gen, output_signature=output_signature)
    ds = ds.map(tf_process, num_parallel_calls=tf.data.AUTOTUNE)
    
    if shuffle:
        ds = ds.shuffle(512)
    
    ds = ds.padded_batch(
        batch_size,
        padded_shapes={"image": [60, 160, 3], "label": [None]},
        padding_values={"image": 0.0, "label": BLANK_TOKEN}
    )
    return ds.prefetch(tf.data.AUTOTUNE)


In [14]:
# train_tfds = build_tf_dataset(train_dataset)
# val_tfds = build_tf_dataset(val_dataset, shuffle=False)

In [12]:
class CTCLossTrainer(tf.keras.Model):
    def __init__(self, base_model):
        super().__init__()
        self.base_model = base_model
        self.loss_fn = tf.keras.backend.ctc_batch_cost

    def train_step(self, data):
        x, y_true = data["image"], data["label"]
        with tf.GradientTape() as tape:
            y_pred = self.base_model(x, training=True)

            time_steps = tf.cast(tf.shape(y_pred)[1], tf.int64)
            input_len = tf.fill([tf.shape(y_pred)[0], 1], time_steps)

            label_len = tf.math.count_nonzero(y_true, axis=-1, keepdims=True, dtype=tf.int64)

            loss = self.loss_fn(y_true, y_pred, input_len, label_len)

        gradients = tape.gradient(loss, self.base_model.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.base_model.trainable_variables))
        return {"loss": tf.reduce_mean(loss)}

    def test_step(self, data):
        x, y_true = data["image"], data["label"]
        y_pred = self.base_model(x, training=True)

        time_steps = tf.cast(tf.shape(y_pred)[1], tf.int64)
        input_len = tf.fill([tf.shape(y_pred)[0], 1], time_steps)

        label_len = tf.math.count_nonzero(y_true, axis=-1, keepdims=True, dtype=tf.int64)

        loss = self.loss_fn(y_true, y_pred, input_len, label_len)


        return {"loss": tf.reduce_mean(loss)}


In [15]:
# base_model = create_prediction_model()
# model = CTCLossTrainer(base_model)

# model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=3e-4))

# model.fit(train_tfds, validation_data=val_tfds, epochs=20)


In [None]:
from tensorflow import keras

model = keras.models.load_model("models/ctc_model_full.keras", compile=False)

In [18]:
def build_model():
    inputs = layers.Input(shape=(IMG_HEIGHT, IMG_WIDTH, 3), name="image")
    x = layers.GaussianNoise(0.05)(inputs)
    
    # Initial CNN layers
    x = layers.Conv2D(32, 3, padding='same', use_bias=False)(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    x = layers.MaxPooling2D((2, 2))(x)  # 100x30
    
    # Residual block 1 with proper dimension matching
    def residual_block(input_tensor, filters):
        # Main path
        x = layers.Conv2D(filters, 3, padding='same', use_bias=False)(input_tensor)
        x = layers.BatchNormalization()(x)
        x = layers.ReLU()(x)
        x = layers.Conv2D(filters, 3, padding='same', use_bias=False)(x)
        x = layers.BatchNormalization()(x)
        
        # Residual connection with matching dimensions
        residual = layers.Conv2D(filters, 1)(input_tensor)  # No stride needed
        residual = layers.BatchNormalization()(residual)
        
        x = layers.add([x, residual])
        x = layers.ReLU()(x)
        return layers.MaxPooling2D((2, 2))(x)  # Downsample after addition
    
    x = residual_block(x, 64)  # 50x15
    x = residual_block(x, 128) # 25x15
    
    # Final CNN layers with corrected dimensions
    x = layers.Conv2D(256, (3, 3), padding='same', use_bias=False)(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    x = layers.MaxPooling2D((2, 2))(x)  # Now outputs 12x7
    
    # Calculate feature dimension
    time_steps = 12
    features_per_step = 3 * 256  # 7 width * 256 channels
    
    x = layers.Reshape((time_steps, features_per_step))(x)
    
    # RNN section
    x = layers.Bidirectional(layers.LSTM(256, return_sequences=True))(x)
    x = layers.Bidirectional(layers.LSTM(256, return_sequences=True))(x)
    
    logits = layers.Dense(NUM_CLASSES, activation="linear", name="logits")(x)
    return tf.keras.Model(inputs, logits)

In [19]:
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
import tensorflow as tf
import random

# -- Reuse constants from training
IMG_WIDTH, IMG_HEIGHT = 200, 60
CHARS = string.ascii_uppercase + string.digits
ID_TO_CHAR = {i + 1: c for i, c in enumerate(CHARS)}  # CTC labels start at 1
ID_TO_CHAR[0] = ''  # blank token

# -- Helper to decode prediction
def decode_prediction(pred):
    pred_ids = tf.argmax(pred, axis=-1).numpy()
    decoded = []
    for seq in pred_ids:
        text = []
        prev = -1
        for idx in seq:
            if idx != prev and idx != 0:
                text.append(ID_TO_CHAR.get(idx, ''))
            prev = idx
        decoded.append(''.join(text))
    return decoded

# -- Load model
def load_trained_model(weights_path):
    model = build_model()
    model.load_weights(weights_path)
    return model

# -- Plot random 5 images from dataset
def show_predictions(model, dataset, num_samples=5):
    # Unbatch and collect samples
    samples = list(dataset.unbatch().take(100))
    chosen = random.sample(samples, num_samples)

    plt.figure(figsize=(15, 4))
    for i, sample in enumerate(chosen):
        img = sample["image"].numpy()
        label = sample["label"].numpy()
        label_len = sample["label_len"].numpy()

        img_batch = tf.expand_dims(img, 0)  # Add batch dim
        pred = model(img_batch, training=False)
        decoded = decode_prediction(pred)[0]

        label_text = ''.join([ID_TO_CHAR[c] for c in label[:label_len]])

        plt.subplot(1, num_samples, i + 1)
        plt.imshow(img)
        plt.title(f"GT: {label_text}\nPred: {decoded}")
        plt.axis("off")

    plt.tight_layout()
    plt.show()


In [21]:
model = load_trained_model("checkpoints/license_plate_best.weights.h5")

Executing op _EagerConst in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op Cast in device /job:localhost/replica:0/task:0/device:GPU:0
resource_RetVal: (_Retval): /job:localhost/replica:0/task:0/device:GPU:0
VarHandleOp: (VarHandleOp): /job:localhost/replica:0/task:0/device:GPU:0
Executing op VarHandleOp in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op AssignVariableOp in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op _EagerConst in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op Cast in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op FloorMod in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op Cast in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op _EagerConst in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op StatelessRandomGetKeyCounter in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op StatelessRandomUniformV2 in device /job:loc

2025-05-05 22:03:40.284342: I tensorflow/core/common_runtime/placer.cc:162] resource_RetVal: (_Retval): /job:localhost/replica:0/task:0/device:GPU:0
2025-05-05 22:03:40.284359: I tensorflow/core/common_runtime/placer.cc:162] VarHandleOp: (VarHandleOp): /job:localhost/replica:0/task:0/device:GPU:0
2025-05-05 22:03:40.291268: I tensorflow/core/common_runtime/placer.cc:162] resource_RetVal: (_Retval): /job:localhost/replica:0/task:0/device:GPU:0
2025-05-05 22:03:40.291283: I tensorflow/core/common_runtime/placer.cc:162] VarHandleOp: (VarHandleOp): /job:localhost/replica:0/task:0/device:GPU:0
2025-05-05 22:03:40.295022: I tensorflow/core/common_runtime/placer.cc:162] resource_RetVal: (_Retval): /job:localhost/replica:0/task:0/device:GPU:0
2025-05-05 22:03:40.295035: I tensorflow/core/common_runtime/placer.cc:162] VarHandleOp: (VarHandleOp): /job:localhost/replica:0/task:0/device:GPU:0
2025-05-05 22:03:40.297150: I tensorflow/core/common_runtime/placer.cc:162] resource_RetVal: (_Retval): /j

resource_RetVal: (_Retval): /job:localhost/replica:0/task:0/device:GPU:0
VarHandleOp: (VarHandleOp): /job:localhost/replica:0/task:0/device:GPU:0
Executing op VarHandleOp in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op AssignVariableOp in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op _EagerConst in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op Fill in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op _EagerConst in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op Fill in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op _EagerConst in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op Fill in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op ConcatV2 in device /job:localhost/replica:0/task:0/device:GPU:0
resource_RetVal: (_Retval): /job:localhost/replica:0/task:0/device:GPU:0
VarHandleOp: (VarHandleOp): /job:localhost/replica:0/task:0/device:GPU:0
Executi

2025-05-05 22:03:40.485388: I tensorflow/core/common_runtime/placer.cc:162] resource_RetVal: (_Retval): /job:localhost/replica:0/task:0/device:GPU:0
2025-05-05 22:03:40.485410: I tensorflow/core/common_runtime/placer.cc:162] VarHandleOp: (VarHandleOp): /job:localhost/replica:0/task:0/device:GPU:0
2025-05-05 22:03:40.490327: I tensorflow/core/common_runtime/placer.cc:162] resource_RetVal: (_Retval): /job:localhost/replica:0/task:0/device:GPU:0
2025-05-05 22:03:40.490341: I tensorflow/core/common_runtime/placer.cc:162] VarHandleOp: (VarHandleOp): /job:localhost/replica:0/task:0/device:GPU:0
2025-05-05 22:03:40.496307: I tensorflow/core/common_runtime/placer.cc:162] resource_RetVal: (_Retval): /job:localhost/replica:0/task:0/device:GPU:0
2025-05-05 22:03:40.496326: I tensorflow/core/common_runtime/placer.cc:162] VarHandleOp: (VarHandleOp): /job:localhost/replica:0/task:0/device:GPU:0
2025-05-05 22:03:40.518477: I tensorflow/core/common_runtime/placer.cc:162] resource_RetVal: (_Retval): /j

ValueError: A total of 21 objects could not be loaded. Example error message for object <Conv2D name=conv2d_8, built=True>:

Layer 'conv2d_8' expected 1 variables, but received 0 variables during loading. Expected: ['kernel']

List of objects that could not be loaded:
[<Conv2D name=conv2d_8, built=True>, <BatchNormalization name=batch_normalization_8, built=True>, <Conv2D name=conv2d_9, built=True>, <BatchNormalization name=batch_normalization_9, built=True>, <Conv2D name=conv2d_10, built=True>, <Conv2D name=conv2d_11, built=True>, <BatchNormalization name=batch_normalization_10, built=True>, <BatchNormalization name=batch_normalization_11, built=True>, <Conv2D name=conv2d_12, built=True>, <BatchNormalization name=batch_normalization_12, built=True>, <Conv2D name=conv2d_13, built=True>, <Conv2D name=conv2d_14, built=True>, <BatchNormalization name=batch_normalization_13, built=True>, <BatchNormalization name=batch_normalization_14, built=True>, <Conv2D name=conv2d_15, built=True>, <BatchNormalization name=batch_normalization_15, built=True>, <LSTMCell name=lstm_cell, built=True>, <LSTMCell name=lstm_cell, built=True>, <LSTMCell name=lstm_cell, built=True>, <LSTMCell name=lstm_cell, built=True>, <Dense name=logits, built=True>]