In [1]:
!pip install albumentations opencv-python



In [3]:
import os
import cv2
import albumentations as A
from albumentations import (
    Rotate, ShiftScaleRotate, RandomBrightnessContrast,
    GaussNoise, Affine
)
from tqdm import tqdm
import numpy as np

# Parameters
base_dir = '/content/drive/MyDrive/datasets/test-yolo-2-4-annotations/char_dataset'
min_samples = 100  # target number of images per class
image_size = 64  # assuming 64x64

# Define safe augmentations
transform = A.Compose([
    A.Rotate(limit=2, p=0.7),  # ±2 degrees
    A.RandomBrightnessContrast(brightness_limit=0.1, contrast_limit=0.1, p=0.5),
    A.GaussNoise(var_limit=(5.0, 20.0), p=0.3),
    A.ShiftScaleRotate(shift_limit=0.02, scale_limit=0.05, rotate_limit=0, p=0.5)
])

# Augment underrepresented classes
for class_name in os.listdir(base_dir):
    class_path = os.path.join(base_dir, class_name)
    if not os.path.isdir(class_path):
        continue

    images = [f for f in os.listdir(class_path) if f.endswith('.png')]
    current_count = len(images)

    if current_count >= min_samples:
        continue

    print(f"🔄 Augmenting class: {class_name} ({current_count} images)")

    needed = min_samples - current_count
    augment_idx = 0

    for i in tqdm(range(needed)):
        img_name = images[i % current_count]
        img_path = os.path.join(class_path, img_name)

        image = cv2.imread(img_path)
        image = cv2.resize(image, (image_size, image_size))

        augmented = transform(image=image)
        aug_image = augmented["image"]

        save_name = f"aug_{augment_idx}_{img_name}"
        cv2.imwrite(os.path.join(class_path, save_name), aug_image)
        augment_idx += 1

  A.GaussNoise(var_limit=(5.0, 20.0), p=0.3),
  original_init(self, **validated_kwargs)


🔄 Augmenting class: ৩ (97 images)


100%|██████████| 3/3 [00:02<00:00,  1.39it/s]


🔄 Augmenting class: হ (37 images)


100%|██████████| 63/63 [00:27<00:00,  2.32it/s]


🔄 Augmenting class: সিলেট (5 images)


100%|██████████| 95/95 [00:04<00:00, 21.02it/s]


🔄 Augmenting class: ল (40 images)


100%|██████████| 60/60 [00:29<00:00,  2.04it/s]


🔄 Augmenting class: মেট্রো (63 images)


100%|██████████| 37/37 [00:25<00:00,  1.47it/s]


🔄 Augmenting class: প (3 images)


100%|██████████| 97/97 [00:03<00:00, 24.34it/s]


🔄 Augmenting class: ন (13 images)


100%|██████████| 87/87 [00:09<00:00,  9.10it/s]


🔄 Augmenting class: ব (7 images)


100%|██████████| 93/93 [00:05<00:00, 15.50it/s]


🔄 Augmenting class: ময়মনসিংহ (1 images)


100%|██████████| 99/99 [00:02<00:00, 35.48it/s]


🔄 Augmenting class: ঢাকা (88 images)


100%|██████████| 12/12 [00:08<00:00,  1.48it/s]


🔄 Augmenting class: গ (46 images)


100%|██████████| 54/54 [00:31<00:00,  1.74it/s]


🔄 Augmenting class: ঘ (4 images)


100%|██████████| 96/96 [00:03<00:00, 24.83it/s]


🔄 Augmenting class: চ (3 images)


100%|██████████| 97/97 [00:03<00:00, 27.57it/s]


🔄 Augmenting class: ৮ (63 images)


100%|██████████| 37/37 [00:24<00:00,  1.49it/s]


🔄 Augmenting class: চট্ট (6 images)


100%|██████████| 94/94 [00:05<00:00, 18.32it/s]


🔄 Augmenting class: ঢ (6 images)


100%|██████████| 94/94 [00:05<00:00, 17.54it/s]


🔄 Augmenting class: খ (2 images)


100%|██████████| 98/98 [00:02<00:00, 36.01it/s]


🔄 Augmenting class: ক (5 images)


100%|██████████| 95/95 [00:04<00:00, 20.77it/s]


🔄 Augmenting class: ৭ (80 images)


100%|██████████| 20/20 [00:13<00:00,  1.53it/s]


🔄 Augmenting class: ৯ (64 images)


100%|██████████| 36/36 [00:22<00:00,  1.62it/s]


🔄 Augmenting class: ৬ (83 images)


100%|██████████| 17/17 [00:10<00:00,  1.56it/s]


🔄 Augmenting class: ৫ (82 images)


100%|██████████| 18/18 [00:11<00:00,  1.58it/s]


🔄 Augmenting class: ৪ (86 images)


100%|██████████| 14/14 [00:08<00:00,  1.57it/s]


In [1]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing import image_dataset_from_directory
import os


In [2]:
# Constants
DATASET_PATH = "/content/drive/MyDrive/datasets/test-yolo-2-4-annotations/char_dataset"
IMAGE_SIZE = (64, 64)  # ResNet expects at least 32×32, we're okay
BATCH_SIZE = 32

train_ds = image_dataset_from_directory(
    DATASET_PATH,
    image_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    label_mode='categorical',  # for softmax classification
    shuffle=True,
    seed=123,
    validation_split=0.2,
    subset="training"
)

val_ds = image_dataset_from_directory(
    DATASET_PATH,
    image_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    label_mode='categorical',
    shuffle=True,
    seed=123,
    validation_split=0.2,
    subset="validation"
)

class_names = train_ds.class_names
num_classes = len(class_names)
print("Classes:", class_names)


Found 2786 files belonging to 26 classes.
Using 2229 files for training.
Found 2786 files belonging to 26 classes.
Using 557 files for validation.
Classes: ['ক', 'খ', 'গ', 'ঘ', 'চ', 'চট্ট', 'ঢ', 'ঢাকা', 'ন', 'প', 'ব', 'ময়মনসিংহ', 'মেট্রো', 'ল', 'সিলেট', 'হ', '০', '১', '২', '৩', '৪', '৫', '৬', '৭', '৮', '৯']


In [3]:
from tensorflow.keras.applications.resnet import preprocess_input

def preprocess_rgb(image, label):
    # Ensure the image has 3 channels before converting to grayscale and then to rgb
    image = tf.image.convert_image_dtype(image, dtype=tf.float32)
    image = tf.image.grayscale_to_rgb(tf.image.rgb_to_grayscale(image))  # Convert 1 channel → 3
    image = preprocess_input(image)
    return image, label

train_ds = train_ds.map(preprocess_rgb).prefetch(buffer_size=tf.data.AUTOTUNE)
val_ds = val_ds.map(preprocess_rgb).prefetch(buffer_size=tf.data.AUTOTUNE)

In [4]:
# Load ResNet50 with pretrained ImageNet weights
base_model = ResNet50(
    weights="imagenet",
    include_top=False,
    input_shape=(64, 64, 3)
)
base_model.trainable = False  # Freeze all layers initially

# Add custom classification head
model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(num_classes, activation='softmax')
])


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [5]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=35
)


Epoch 1/35
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m448s[0m 6s/step - accuracy: 0.4160 - loss: 2.6029 - val_accuracy: 0.8564 - val_loss: 0.5923
Epoch 2/35
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 113ms/step - accuracy: 0.8760 - loss: 0.4768 - val_accuracy: 0.8815 - val_loss: 0.4726
Epoch 3/35
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 119ms/step - accuracy: 0.9169 - loss: 0.3178 - val_accuracy: 0.9031 - val_loss: 0.3822
Epoch 4/35
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 109ms/step - accuracy: 0.9456 - loss: 0.1792 - val_accuracy: 0.9192 - val_loss: 0.3194
Epoch 5/35
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 107ms/step - accuracy: 0.9593 - loss: 0.1537 - val_accuracy: 0.9336 - val_loss: 0.2834
Epoch 6/35
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 112ms/step - accuracy: 0.9781 - loss: 0.0966 - val_accuracy: 0.9264 - val_loss: 0.2715
Epoch 7/35
[1m70/70[0m 

In [6]:
loss, acc = model.evaluate(val_ds)
print(f"Validation Accuracy: {acc:.2%}")

[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 74ms/step - accuracy: 0.9221 - loss: 0.5151
Validation Accuracy: 93.36%


In [8]:
model.save("/content/drive/MyDrive/datasets/test-yolo-2-4-annotations/bangla_ocr_resnet50_v1.keras")