In [None]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import *
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
import tensorflow.keras.backend as K

In [None]:
train_dir = "/kaggle/input/rsbdsl38/RSBDSL38_ver02_aug/RSBDSL38_resized_aug_splitted/train"
test_dir = "/kaggle/input/rsbdsl38/RSBDSL38_ver02_aug/RSBDSL38_resized_aug_splitted/test"
val_dir = "/kaggle/input/rsbdsl38/RSBDSL38_ver02_aug/RSBDSL38_resized_aug_splitted/val"

In [None]:
    img_size = 224
    batch_size = 32
    initial_lr = 1e-3

In [None]:
AUTOTUNE = tf.data.AUTOTUNE

train_dataset = tf.keras.utils.image_dataset_from_directory(
    train_dir,
    image_size=(img_size, img_size),
    batch_size=batch_size,
    label_mode='categorical',
    shuffle=True,
    color_mode="rgb",
    seed=42
)
val_dataset = tf.keras.utils.image_dataset_from_directory(
    val_dir,
    image_size=(img_size, img_size),
    batch_size=batch_size,
    label_mode='categorical',
    shuffle=False,
    color_mode="rgb",
    seed=42
)
test_dataset = tf.keras.utils.image_dataset_from_directory(
    test_dir,
    image_size=(img_size, img_size),
    batch_size=batch_size,
    label_mode='categorical',
    shuffle=False,
    color_mode="rgb",
    seed=42
)

class_names = train_dataset.class_names
num_classes = len(class_names)
print(f"Number of classes: {num_classes}")


data_augmentation = tf.keras.Sequential([
    tf.keras.layers.Rescaling(1./255),
    tf.keras.layers.RandomRotation(0.1),
    tf.keras.layers.RandomTranslation(0.1, 0.1),
    tf.keras.layers.RandomZoom(0.08),
    tf.keras.layers.RandomContrast(0.1),
    tf.keras.layers.RandomBrightness(0.1),
])


rescale = tf.keras.Sequential([
    tf.keras.layers.Rescaling(1./255)
])

def apply_augmentation(images, labels):
    return data_augmentation(images, training=True), labels

def apply_rescaling(images, labels):
    return rescale(images), labels

train_dataset = train_dataset.map(apply_augmentation, num_parallel_calls=AUTOTUNE)
val_dataset = val_dataset.map(apply_rescaling, num_parallel_calls=AUTOTUNE)
test_dataset = test_dataset.map(apply_rescaling, num_parallel_calls=AUTOTUNE)


train_dataset = train_dataset.cache().prefetch(AUTOTUNE)
val_dataset = val_dataset.cache().prefetch(AUTOTUNE)
test_dataset = test_dataset.cache().prefetch(AUTOTUNE)

In [None]:
def channel_attention_block(x, ratio=16):
    channel = x.shape[-1]
    avg_pool = GlobalAveragePooling2D()(x)
    avg_pool = Reshape((1, 1, channel))(avg_pool)
    max_pool = GlobalMaxPooling2D()(x)
    max_pool = Reshape((1, 1, channel))(max_pool)
    shared_dense_1 = Dense(channel // ratio, activation='relu', kernel_initializer='he_normal', use_bias=False)
    shared_dense_2 = Dense(channel, kernel_initializer='he_normal', use_bias=False)
    avg_pool = shared_dense_1(avg_pool)
    avg_pool = shared_dense_2(avg_pool)
    max_pool = shared_dense_1(max_pool)
    max_pool = shared_dense_2(max_pool)
    cbam_feature = Add()([avg_pool, max_pool])
    cbam_feature = Activation('sigmoid')(cbam_feature)
    return Multiply()([x, cbam_feature])

def improved_spatial_attention_block(x):
    avg_pool = tf.keras.layers.AveragePooling2D(
        pool_size=(1, 1), padding='same')(x)
    max_pool = tf.keras.layers.MaxPooling2D(
        pool_size=(1, 1), padding='same')(x)
    
    concat = Concatenate(axis=3)([avg_pool, max_pool])
    conv1 = Conv2D(8, 3, padding='same', dilation_rate=1, kernel_initializer='he_normal')(concat)
    conv2 = Conv2D(8, 3, padding='same', dilation_rate=2, kernel_initializer='he_normal')(concat)
    conv3 = Conv2D(8, 3, padding='same', dilation_rate=4, kernel_initializer='he_normal')(concat)
    multi_scale = Concatenate()([conv1, conv2, conv3])
    multi_scale = Conv2D(1, 1, padding='same', activation='sigmoid', kernel_initializer='he_normal')(multi_scale)
    return Multiply()([x, multi_scale])


def enhanced_residual_block(x, filters, kernel_size=3, strides=1, use_projection=False, groups=1):
    shortcut = x
    bottleneck_filters = filters // 2
    x = Conv2D(bottleneck_filters, 1, padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(1e-5))(x)
    x = BatchNormalization()(x)
    x = Activation('swish')(x)
    x = Conv2D(bottleneck_filters, kernel_size, strides=strides, padding='same', groups=groups, kernel_initializer='he_normal', kernel_regularizer=l2(1e-5))(x)
    x = BatchNormalization()(x)
    x = Activation('swish')(x)
    x = Conv2D(filters, 1, padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(1e-5))(x)
    x = BatchNormalization()(x)
    x = channel_attention_block(x)
    spatial_att = Conv2D(1, 7, padding='same', activation='sigmoid', kernel_initializer='he_normal', use_bias=False)(x)
    x = Multiply()([x, spatial_att])
    if use_projection or strides > 1 or shortcut.shape[-1] != filters:
        shortcut = Conv2D(filters, 1, strides=strides, padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(1e-5))(shortcut)
        shortcut = BatchNormalization()(shortcut)
    x = Add()([x, shortcut])
    x = Activation('swish')(x)
    return x


def hand_feature_block(x, filters, dropout_rate=0.1):
    x = Conv2D(filters, 1, padding='same', kernel_initializer='he_normal')(x)
    x = BatchNormalization()(x)
    x = Activation('swish')(x)
    dw1 = DepthwiseConv2D(kernel_size=3, padding='same', depthwise_initializer='he_normal')(x)
    dw1 = BatchNormalization()(dw1)
    dw1 = Activation('swish')(dw1)
    dw2 = DepthwiseConv2D(kernel_size=5, padding='same', depthwise_initializer='he_normal')(x)
    dw2 = BatchNormalization()(dw2)
    dw2 = Activation('swish')(dw2)
    concat = Concatenate()([dw1, dw2])
    x = Conv2D(filters, 1, padding='same', kernel_initializer='he_normal')(concat)
    x = BatchNormalization()(x)
    x = Activation('swish')(x)
    x = SpatialDropout2D(dropout_rate)(x)
    return x

def create_sign_language_model(input_shape, num_classes):
    inputs = Input(shape=input_shape)
    x = Conv2D(32, 5, strides=2, padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(1e-5))(inputs)
    x = BatchNormalization()(x)
    x = Activation('swish')(x)
    x = MaxPooling2D(pool_size=3, strides=2, padding='same')(x)
    x = enhanced_residual_block(x, 32, use_projection=True, groups=2)
    x = enhanced_residual_block(x, 32, groups=2)
    x = hand_feature_block(x, 32, dropout_rate=0.1)
    x = MaxPooling2D(pool_size=2)(x)
    
    x = enhanced_residual_block(x, 64, use_projection=True, groups=4)
    x = enhanced_residual_block(x, 64, groups=4)
    x = hand_feature_block(x, 64, dropout_rate=0.15)
    x = MaxPooling2D(pool_size=2)(x)
    
    x = enhanced_residual_block(x, 128, use_projection=True, groups=8)
    x = enhanced_residual_block(x, 128, groups=8)
    x = enhanced_residual_block(x, 128, groups=8)
    x = hand_feature_block(x, 128, dropout_rate=0.2)
    
    x = enhanced_residual_block(x, 256, use_projection=True, groups=16)
    x = enhanced_residual_block(x, 256, groups=16)
    x = hand_feature_block(x, 256, dropout_rate=0.25)

    max_pool = GlobalMaxPooling2D()(x)
    avg_pool = GlobalAveragePooling2D()(x)
    weighted_pool = Concatenate()([max_pool, avg_pool])
    
    x = Dropout(0.4)(weighted_pool)
    x = Dense(128, kernel_regularizer=l2(1e-5), activation='swish')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.3)(x)
    x = Dense(64, kernel_regularizer=l2(1e-5), activation='swish')(x)
    x = BatchNormalization()(x)
    
    outputs = Dense(num_classes, activation='softmax')(x)
    model = Model(inputs=inputs, outputs=outputs, name='Enhanced_Bengali_Sign_Model')
    return model

In [None]:
input_shape = (img_size, img_size, 3)
model = create_sign_language_model(input_shape, num_classes)

In [None]:
print("\nEnhanced_Bengali_Sign_Model Summary:")
model.summary()

In [None]:
def get_callbacks(model_name):
    return [
        ModelCheckpoint(
            f'/kaggle/working/best_{model_name}.keras', 
            monitor='val_accuracy', 
            save_best_only=True, 
            mode='max', 
            verbose=1
        ),
        EarlyStopping(
            monitor='val_accuracy',
            patience=25,
            restore_best_weights=True,
            verbose=1
        ),
        ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.5,
            patience=10,
            min_lr=1e-6,
            verbose=1
        )
    ]

callbacks = get_callbacks("Enhanced_Bengali_Sign_Model")
model.compile(optimizer=Adam(learning_rate=initial_lr), loss='categorical_crossentropy',metrics=['accuracy'])

In [None]:
print("\nTraining Enhanced_Bengali_Sign_Model...")
history = model.fit(train_dataset, epochs=200, validation_data=val_dataset, callbacks=callbacks)

In [None]:
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf

def evaluate_model(model, history, name, test_dataset):
    print(f"\nEvaluating {name}...")
    test_loss, test_acc = model.evaluate(test_dataset)
    print(f"Test accuracy: {test_acc:.4f}")
    
    y_pred_prob = model.predict(test_dataset)
    y_pred = np.argmax(y_pred_prob, axis=1)
    

    y_true = []
    for images, labels in test_dataset.unbatch():
        if len(labels.shape) > 0 and labels.shape[0] > 1:
            y_true.append(tf.argmax(labels).numpy())
        else:
            y_true.append(int(labels.numpy()))
    
    y_true = np.array(y_true[:len(y_pred)])
    
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')
    f1 = f1_score(y_true, y_pred, average='weighted')
    
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    
    print("\nClassification Report:")
    print(classification_report(y_true, y_pred))
    
    plt.figure(figsize=(12, 10))
    cm = confusion_matrix(y_true, y_pred)
    sns.heatmap(cm, annot=False, cmap='Blues', cbar=True)
    plt.title(f"Confusion Matrix - {name}", fontsize=16)
    plt.xlabel('Predicted', fontsize=12)
    plt.ylabel('True', fontsize=12)
    plt.tight_layout()
    plt.savefig(f'/kaggle/working/confusion_matrix_{name.replace(" ", "_")}.png')
    plt.close()
    
    plt.figure(figsize=(12, 5))
    
    plt.subplot(1, 2, 1)
    plt.plot(history.history['loss'], color='darkorange', label='Training')
    plt.plot(history.history['val_loss'], color='blue', label='Validation')
    plt.title(f'Loss vs Epochs - {name}', fontsize=14)
    plt.xlabel('Epochs', fontsize=12)
    plt.ylabel('Loss', fontsize=12)
    plt.legend(fontsize=10)
    plt.grid(True, linestyle='--', alpha=0.6)
    
    plt.subplot(1, 2, 2)
    plt.plot(history.history['accuracy'], color='darkorange', label='Training')
    plt.plot(history.history['val_accuracy'], color='blue', label='Validation')
    plt.title(f'Accuracy vs Epochs - {name}', fontsize=14)
    plt.xlabel('Epochs', fontsize=12)
    plt.ylabel('Accuracy', fontsize=12)
    plt.legend(fontsize=10)
    plt.grid(True, linestyle='--', alpha=0.6)
    
    plt.tight_layout()
    plt.savefig(f'/kaggle/working/training_history_{name.replace(" ", "_")}.png')
    plt.close()
    
    return accuracy, y_true, y_pred, y_pred_prob

In [None]:
acc1, y_true1, y_pred1, y_pred_prob1 = evaluate_model(model, history, "Enhanced_Bengali_Sign_Model", test_dataset)