In [1]:
import tensorflow as tf

tf.random.set_seed(50)

batch_size = 8
num_classes = 37   # 36 characters + 1 blank at index 0
max_label_length = 10
time_steps = 284   # flatten H*W from your model output

# Random dense labels (padding with 0 at the end if shorter than max_label_length)
labels = tf.random.uniform([batch_size, max_label_length],
                           minval=1, maxval=num_classes, dtype=tf.int32)

# Each sequence has random true length between 3 and 10
label_length = tf.random.uniform([batch_size], minval=3,
                                 maxval=max_label_length+1, dtype=tf.int32)

# Apply mask → zero out padding positions
label_mask = tf.sequence_mask(label_length, maxlen=max_label_length,
                              dtype=tf.int32)
labels *= label_mask


In [5]:
print(labels.shape)
for i in range(5):
    print(labels[i])

(8, 10)
tf.Tensor([12 27 15 19  0  0  0  0  0  0], shape=(10,), dtype=int32)
tf.Tensor([33 11 24 14  3  0  0  0  0  0], shape=(10,), dtype=int32)
tf.Tensor([ 2 10 25  0  0  0  0  0  0  0], shape=(10,), dtype=int32)
tf.Tensor([ 2 30 15 12  7 22 36  0  0  0], shape=(10,), dtype=int32)
tf.Tensor([16 10  5  9 19 16 34 33  6  8], shape=(10,), dtype=int32)


In [None]:
# Fake logits (batch, time, num_classes)
logits = tf.random.uniform([batch_size, time_steps, num_classes], dtype=tf.float32)
print(logits.shape)
for i in range(5):
    print(logits[i])

# Each input has 284 time steps
logit_length = tf.fill([batch_size], time_steps)


(8, 284, 37)
tf.Tensor(
[[0.63302183 0.4549662  0.13450539 ... 0.90973794 0.19173634 0.59932053]
 [0.72287667 0.58431506 0.8866216  ... 0.34071648 0.6914804  0.91204166]
 [0.55996966 0.33776295 0.71286213 ... 0.3919481  0.9734224  0.98789   ]
 ...
 [0.9272051  0.7476417  0.5433881  ... 0.5210618  0.6206019  0.51927996]
 [0.00536728 0.6365942  0.5971236  ... 0.14164734 0.597612   0.05568016]
 [0.37533653 0.5941305  0.9234253  ... 0.6358385  0.49370325 0.598449  ]], shape=(284, 37), dtype=float32)
tf.Tensor(
[[0.00923419 0.766767   0.1920495  ... 0.97353864 0.863531   0.14868426]
 [0.8987088  0.22154188 0.66756797 ... 0.9135784  0.21104515 0.00705957]
 [0.14524603 0.82534254 0.92810106 ... 0.8524648  0.7753761  0.65505564]
 ...
 [0.01978254 0.14131701 0.26184237 ... 0.40653622 0.83232033 0.79178727]
 [0.74095416 0.6701547  0.6166954  ... 0.226655   0.47121155 0.14657187]
 [0.6168709  0.16097271 0.93752146 ... 0.22719693 0.54741347 0.3883685 ]], shape=(284, 37), dtype=float32)
tf.Tensor(


In [8]:
loss = tf.nn.ctc_loss(
    labels=labels,                # dense padded labels
    logits=logits,                # [B, T, C]
    label_length=label_length,    # [B]
    logit_length=logit_length,    # [B]
    logits_time_major=False,      # since our logits are [B, T, C]
    blank_index=0                 # class 0 is reserved for blank
)

print("CTC loss per example:", loss.numpy())
print("Mean loss:", tf.reduce_mean(loss).numpy())


CTC loss per example: [ 994.7967   995.81903 1005.3434   978.7675   958.4418   967.2306
  999.75867 1011.1817 ]
Mean loss: 988.9174


In [9]:
optimizer = tf.keras.optimizers.Adam(1e-3)

with tf.GradientTape() as tape:
    # logits = model(x)  # your real forward pass
    loss = tf.nn.ctc_loss(
        labels=labels,
        logits=logits,
        label_length=label_length,
        logit_length=logit_length,
        logits_time_major=False,
        blank_index=0
    )
    loss = tf.reduce_mean(loss)

grads = tape.gradient(loss, logits)  # or model.trainable_variables
optimizer.apply_gradients(zip(grads, [logits]))  # replace with real model vars


TypeError: 'NoneType' object is not iterable

## CTC Works

In [10]:
import tensorflow as tf

# ---------- Settings ----------
BATCH_SIZE = 4
NUM_CLASSES = 37    # include blank in this count
MAX_LABEL_LEN = 10  # maximum ground-truth length (you said 10)
BLANK_INDEX = 0     # set to 0 OR (NUM_CLASSES-1) depending on how you encoded labels
# ---------- end settings ----------

# Dummy model_output to simulate your (batch, H, W, C) output:
# For real training: model_output = model(inputs)
model_output = tf.random.normal([BATCH_SIZE, 4, 71, NUM_CLASSES], dtype=tf.float32)

# 1) Collapse height -> use mean over H (same as PyTorch torch.mean(x, dim=2))
logits = tf.reduce_mean(model_output, axis=1)          # shape (BATCH_SIZE, 71, NUM_CLASSES)
time_steps = tf.shape(logits)[1]                       # 71

# 2) Prepare example labels (dense, padded) and their lengths
# Example: random labels in [1..NUM_CLASSES-1], 0 reserved for blank if BLANK_INDEX==0
# In practice convert strings -> ints with your char2idx
labels = tf.random.uniform([BATCH_SIZE, MAX_LABEL_LEN],
                           minval=1, maxval=NUM_CLASSES, dtype=tf.int32)

# Make random true lengths between 1 and MAX_LABEL_LEN:
label_length = tf.random.uniform([BATCH_SIZE], minval=1, maxval=MAX_LABEL_LEN+1, dtype=tf.int32)

# Zero out padding positions:
label_mask = tf.sequence_mask(label_length, maxlen=MAX_LABEL_LEN, dtype=tf.int32)
labels = labels * label_mask

# 3) logit lengths: here time dimension is width (71) for every sample
logit_length = tf.fill([BATCH_SIZE], time_steps)

# 4) Compute CTC loss: (no softmax beforehand)
# tf.nn.ctc_loss returns a vector of shape (batch,)
loss_per_batch = tf.nn.ctc_loss(
    labels=labels,
    logits=logits,            # (batch, time, classes)
    label_length=label_length,
    logit_length=logit_length,
    logits_time_major=False,
    blank_index=BLANK_INDEX
)

loss = tf.reduce_mean(loss_per_batch)
print("CTC mean loss:", loss.numpy())


CTC mean loss: 236.98169
