# Sign Language Digits Classifier
Using MobileNet transfer learning to classify ASL digits (0-9)

## 1. Import Dependencies

In [27]:
# Import dependencies
import os
import shutil
import random
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.applications import MobileNetV2, MobileNet, MobileNetV3Small
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from sklearn.metrics import confusion_matrix, classification_report
from tensorflow.keras import regularizers
from tensorflow.keras.layers import Dropout
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.applications.mobilenet import preprocess_input
from xgboost import XGBClassifier

# Set random seeds for reproducibility
random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)

In [2]:
import tensorflow as tf
print("TensorFlow version:", tf.__version__)
print("Built with CUDA:", tf.test.is_built_with_cuda())

TensorFlow version: 2.10.1
Built with CUDA: True


In [3]:
physical_devices = tf.config.experimental.list_physical_devices('GPU')
print("Num GPUs Available: ", len(physical_devices))
if physical_devices:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)

Num GPUs Available:  1


## 2. Set Project Constants

In [4]:
# Project constants
IMG_SIZE = (224, 224)
BATCH_SIZE = 32
EPOCHS = 30
VALIDATION_SPLIT = 0.2
NUM_CLASSES = 10

# Paths
DATASET_PATH = 'Sign-Language-Digits-Dataset/Dataset'
TRAIN_PATH = 'Sign-Language-Digits-Dataset/training'
TEST_PATH = 'Sign-Language-Digits-Dataset/test'

## 3. Create Dataset Splits
Creating training/ folder (85% of dataset) and test/ folder (15% of dataset)

In [5]:
# Create training/ and test/ folders with 85/15 split
def create_train_test_split(source_path, train_path, test_path, test_split=0.15):
    """Split dataset into training (85%) and test (15%) folders"""
    
    # Remove destination folders if they exist
    for path in [train_path, test_path]:
        if os.path.exists(path):
            shutil.rmtree(path)
        os.makedirs(path)
    
    total_train_images = 0
    total_test_images = 0
    
    # Process each digit (0-9)
    for digit in range(10):
        digit_source = os.path.join(source_path, str(digit))
        digit_train = os.path.join(train_path, str(digit))
        digit_test = os.path.join(test_path, str(digit))
        
        os.makedirs(digit_train)
        os.makedirs(digit_test)
        
        # Get all images for this digit
        all_images = [f for f in os.listdir(digit_source) if f.endswith('.JPG')]
        
        # Shuffle images
        random.shuffle(all_images)
        
        # Calculate split point (15% for test, 85% for training)
        num_test = int(len(all_images) * test_split)
        num_train = len(all_images) - num_test
        
        # Split images
        test_images = all_images[:num_test]
        train_images = all_images[num_test:]
        
        # Copy test images
        for img in test_images:
            src = os.path.join(digit_source, img)
            dst = os.path.join(digit_test, img)
            shutil.copy2(src, dst)
        
        # Copy training images
        for img in train_images:
            src = os.path.join(digit_source, img)
            dst = os.path.join(digit_train, img)
            shutil.copy2(src, dst)
        
        total_train_images += num_train
        total_test_images += num_test
        
        print(f'Digit {digit}: {num_train} training, {num_test} test')
    
    return total_train_images, total_test_images

print('Creating training/test split...')
num_train, num_test = create_train_test_split(DATASET_PATH, TRAIN_PATH, TEST_PATH)

print(f'\nTotal training images: {num_train} ({num_train/(num_train+num_test)*100:.1f}%)')
print(f'Total test images: {num_test} ({num_test/(num_train+num_test)*100:.1f}%)')

Creating training/test split...
Digit 0: 175 training, 30 test
Digit 1: 176 training, 30 test
Digit 2: 176 training, 30 test
Digit 3: 176 training, 30 test
Digit 4: 176 training, 31 test
Digit 5: 176 training, 31 test
Digit 6: 176 training, 31 test
Digit 7: 176 training, 30 test
Digit 8: 177 training, 31 test
Digit 9: 174 training, 30 test

Total training images: 1758 (85.3%)
Total test images: 304 (14.7%)


## 4. Create Data Generators
Setting up training and validation generators with 80/20 split

In [41]:
# Create image data generators with data augmentation
train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    validation_split=VALIDATION_SPLIT,
    # Data augmentation parameters
    rotation_range=15,              # Randomly rotate images by up to 15 degrees
    zoom_range=0.15,                # Randomly zoom in/out by up to 15%
    brightness_range=[0.8, 1.2],    # Randomly adjust brightness
    fill_mode='nearest'             # Fill pixels after transformations
)

# Validation generator (no augmentation, only rescaling)
val_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    validation_split=VALIDATION_SPLIT
)

# Training generator
train_generator = train_datagen.flow_from_directory(
    TRAIN_PATH,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='training',
    shuffle=False,
    seed=42
)

# Validation generator
validation_generator = val_datagen.flow_from_directory(
    TRAIN_PATH,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='validation',
    shuffle=False,
    seed=42
)

print(f'\nTraining samples: {train_generator.samples}')
print(f'Validation samples: {validation_generator.samples}')

Found 1409 images belonging to 10 classes.
Found 349 images belonging to 10 classes.

Training samples: 1409
Validation samples: 349


## 5. Compare Mobilenet V1, V2 and V3Small
Tune the model, compile it and train it for each of these

In [8]:
# base_model.trainable = False

# Build model on top of MobileNet
def get_predictions_layer(base_model_output):
    x = GlobalAveragePooling2D()(base_model_output)
    predictions = Dense(
        NUM_CLASSES,
        activation='softmax',
        # kernel_regularizer=regularizers.l2(0.1)
    )(x)
    return predictions

In [10]:
callbacks = [
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6),
    EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
]

### V1

In [7]:
base_model = MobileNet(
    include_top=True, 
    weights='imagenet'
)


for layer in base_model.layers[-30:]:
    print(f"Layer: {layer.name}")
    print(f"  Type: {layer.__class__.__name__}")
    print(f"  Output Shape: {layer.output.shape}")
    print(f"  Params: {layer.count_params()}")
    print("-" * 50)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet/mobilenet_1_0_224_tf.h5
Layer: conv_dw_10
  Type: DepthwiseConv2D
  Output Shape: (None, 14, 14, 512)
  Params: 4608
--------------------------------------------------
Layer: conv_dw_10_bn
  Type: BatchNormalization
  Output Shape: (None, 14, 14, 512)
  Params: 2048
--------------------------------------------------
Layer: conv_dw_10_relu
  Type: ReLU
  Output Shape: (None, 14, 14, 512)
  Params: 0
--------------------------------------------------
Layer: conv_pw_10
  Type: Conv2D
  Output Shape: (None, 14, 14, 512)
  Params: 262144
--------------------------------------------------
Layer: conv_pw_10_bn
  Type: BatchNormalization
  Output Shape: (None, 14, 14, 512)
  Params: 2048
--------------------------------------------------
Layer: conv_pw_10_relu
  Type: ReLU
  Output Shape: (None, 14, 14, 512)
  Params: 0
--------------------------------------------------
Layer: conv_dw_11
  Type: Depthw

In [9]:
x = base_model.get_layer("conv_pw_13_relu").output
predictions = get_predictions_layer(x)

model = Model(inputs=base_model.input, outputs=predictions)

for layer in model.layers[:-1]:
    layer.trainable = False

# Compile model
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

print('\nModel architecture:')
for layer in model.layers[-20:]:
    print(f"Layer: {layer.name}")
    print(f"  Type: {layer.__class__.__name__}")
    print(f"  Output Shape: {layer.output.shape}")
    print(f"  Params: {layer.count_params()}")
    print(f" Trainable: {layer.trainable}")
    print("-" * 50)


Model architecture:
Layer: conv_dw_11_bn
  Type: BatchNormalization
  Output Shape: (None, 14, 14, 512)
  Params: 2048
 Trainable: False
--------------------------------------------------
Layer: conv_dw_11_relu
  Type: ReLU
  Output Shape: (None, 14, 14, 512)
  Params: 0
 Trainable: False
--------------------------------------------------
Layer: conv_pw_11
  Type: Conv2D
  Output Shape: (None, 14, 14, 512)
  Params: 262144
 Trainable: False
--------------------------------------------------
Layer: conv_pw_11_bn
  Type: BatchNormalization
  Output Shape: (None, 14, 14, 512)
  Params: 2048
 Trainable: False
--------------------------------------------------
Layer: conv_pw_11_relu
  Type: ReLU
  Output Shape: (None, 14, 14, 512)
  Params: 0
 Trainable: False
--------------------------------------------------
Layer: conv_pad_12
  Type: ZeroPadding2D
  Output Shape: (None, 15, 15, 512)
  Params: 0
 Trainable: False
--------------------------------------------------
Layer: conv_dw_12
  Type

In [11]:
history = model.fit(
    train_generator,
    validation_data=validation_generator,
    epochs=EPOCHS,
    callbacks=callbacks,
    verbose=1
)
model.save("mobilenet.h5")

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


### V2

In [None]:
base_model = MobileNetV2(
    include_top=True, 
    weights='imagenet'
)

for layer in base_model.layers[-20:]:
    print(f"Layer: {layer.name}")
    print(f"  Type: {layer.__class__.__name__}")
    print(f"  Output Shape: {layer.output.shape}")
    print(f"  Params: {layer.count_params()}")
    print("-" * 50)

Loading MobileNetV2...
Layer: block_15_expand_relu
  Type: ReLU
  Output Shape: (None, 7, 7, 960)
  Params: 0
--------------------------------------------------
Layer: block_15_depthwise
  Type: DepthwiseConv2D
  Output Shape: (None, 7, 7, 960)
  Params: 8640
--------------------------------------------------
Layer: block_15_depthwise_BN
  Type: BatchNormalization
  Output Shape: (None, 7, 7, 960)
  Params: 3840
--------------------------------------------------
Layer: block_15_depthwise_relu
  Type: ReLU
  Output Shape: (None, 7, 7, 960)
  Params: 0
--------------------------------------------------
Layer: block_15_project
  Type: Conv2D
  Output Shape: (None, 7, 7, 160)
  Params: 153600
--------------------------------------------------
Layer: block_15_project_BN
  Type: BatchNormalization
  Output Shape: (None, 7, 7, 160)
  Params: 640
--------------------------------------------------
Layer: block_15_add
  Type: Add
  Output Shape: (None, 7, 7, 160)
  Params: 0
--------------------

In [13]:
x = base_model.get_layer("out_relu").output
predictions = get_predictions_layer(x)

model = Model(inputs=base_model.input, outputs=predictions)

for layer in model.layers[:-1]:
    layer.trainable = False

# Compile model
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

print('\nModel architecture:')
for layer in model.layers[-20:]:
    print(f"Layer: {layer.name}")
    print(f"  Type: {layer.__class__.__name__}")
    print(f"  Output Shape: {layer.output.shape}")
    print(f"  Params: {layer.count_params()}")
    print(f" Trainable: {layer.trainable}")
    print("-" * 50)


Model architecture:
Layer: block_15_expand_relu
  Type: ReLU
  Output Shape: (None, 7, 7, 960)
  Params: 0
 Trainable: False
--------------------------------------------------
Layer: block_15_depthwise
  Type: DepthwiseConv2D
  Output Shape: (None, 7, 7, 960)
  Params: 8640
 Trainable: False
--------------------------------------------------
Layer: block_15_depthwise_BN
  Type: BatchNormalization
  Output Shape: (None, 7, 7, 960)
  Params: 3840
 Trainable: False
--------------------------------------------------
Layer: block_15_depthwise_relu
  Type: ReLU
  Output Shape: (None, 7, 7, 960)
  Params: 0
 Trainable: False
--------------------------------------------------
Layer: block_15_project
  Type: Conv2D
  Output Shape: (None, 7, 7, 160)
  Params: 153600
 Trainable: False
--------------------------------------------------
Layer: block_15_project_BN
  Type: BatchNormalization
  Output Shape: (None, 7, 7, 160)
  Params: 640
 Trainable: False
-------------------------------------------

In [14]:
history = model.fit(
    train_generator,
    validation_data=validation_generator,
    epochs=EPOCHS,
    callbacks=callbacks,
    verbose=1
)
model.save("mobilenet2.h5")

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


### V3

In [15]:
base_model = MobileNetV3Small(
    include_top=True, 
    weights='imagenet'
)


for layer in base_model.layers[-30:]:
    print(f"Layer: {layer.name}")
    print(f"  Type: {layer.__class__.__name__}")
    print(f"  Output Shape: {layer.output.shape}")
    print(f"  Params: {layer.count_params()}")
    print("-" * 50)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v3/weights_mobilenet_v3_small_224_1.0_float.h5
Layer: re_lu_29
  Type: ReLU
  Output Shape: (None, None, None, 576)
  Params: 0
--------------------------------------------------
Layer: tf.math.multiply_24
  Type: TFOpLambda
  Output Shape: (None, None, None, 576)
  Params: 0
--------------------------------------------------
Layer: multiply_16
  Type: Multiply
  Output Shape: (None, None, None, 576)
  Params: 0
--------------------------------------------------
Layer: expanded_conv_10/squeeze_excite/AvgPool
  Type: GlobalAveragePooling2D
  Output Shape: (None, 1, 1, 576)
  Params: 0
--------------------------------------------------
Layer: expanded_conv_10/squeeze_excite/Conv
  Type: Conv2D
  Output Shape: (None, 1, 1, 144)
  Params: 83088
--------------------------------------------------
Layer: expanded_conv_10/squeeze_excite/Relu
  Type: ReLU
  Output Shape: (None, 1, 1, 144)
  Params: 0
--

In [17]:
x = base_model.get_layer("multiply_17").output
predictions = get_predictions_layer(x)

model = Model(inputs=base_model.input, outputs=predictions)

for layer in model.layers[:-1]:
    layer.trainable = False

# Compile model
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

print('\nModel architecture:')
for layer in model.layers[-20:]:
    print(f"Layer: {layer.name}")
    print(f"  Type: {layer.__class__.__name__}")
    print(f"  Output Shape: {layer.output.shape}")
    print(f"  Params: {layer.count_params()}")
    print(f" Trainable: {layer.trainable}")
    print("-" * 50)


Model architecture:
Layer: multiply_16
  Type: Multiply
  Output Shape: (None, None, None, 576)
  Params: 0
 Trainable: False
--------------------------------------------------
Layer: expanded_conv_10/squeeze_excite/AvgPool
  Type: GlobalAveragePooling2D
  Output Shape: (None, 1, 1, 576)
  Params: 0
 Trainable: False
--------------------------------------------------
Layer: expanded_conv_10/squeeze_excite/Conv
  Type: Conv2D
  Output Shape: (None, 1, 1, 144)
  Params: 83088
 Trainable: False
--------------------------------------------------
Layer: expanded_conv_10/squeeze_excite/Relu
  Type: ReLU
  Output Shape: (None, 1, 1, 144)
  Params: 0
 Trainable: False
--------------------------------------------------
Layer: expanded_conv_10/squeeze_excite/Conv_1
  Type: Conv2D
  Output Shape: (None, 1, 1, 576)
  Params: 83520
 Trainable: False
--------------------------------------------------
Layer: tf.__operators__.add_25
  Type: TFOpLambda
  Output Shape: (None, 1, 1, 576)
  Params: 0
 Tr

In [18]:
history = model.fit(
    train_generator,
    validation_data=validation_generator,
    epochs=EPOCHS,
    callbacks=callbacks,
    verbose=1
)
model.save("mobilenet3.h5")

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


## 7. MobileNetV1 Feature Extraction + XGBoost
Using frozen MobileNetV1 as feature extractor for XGBoost classifier

In [36]:
# Load MobileNetV1 without head and freeze it
feature_extractor = MobileNet(
    include_top=False,
    weights='imagenet',
    pooling='avg'
)

# Freeze all layers
feature_extractor.trainable = False

# feature_extractor.summary()

  feature_extractor = MobileNet(


In [42]:
# Extract features from training and validation data
print('Extracting training features...')
X_train = feature_extractor.predict(train_generator, verbose=1)
y_train = train_generator.classes

print(f'\nExtracting validation features...')
X_val = feature_extractor.predict(validation_generator, verbose=1)
y_val = validation_generator.classes

print(f'\nFeature shapes:')
print(f'X_train: {X_train.shape}')
print(f'X_val: {X_val.shape}')
print(f'y_train: {y_train.shape}')
print(f'y_val: {y_val.shape}')

Extracting training features...
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 452ms/step

Extracting validation features...
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 378ms/step

Feature shapes:
X_train: (1409, 1024)
X_val: (349, 1024)
y_train: (1409,)
y_val: (349,)


In [43]:
X_val

array([[0.1545032 , 0.        , 0.9261188 , ..., 0.67607176, 0.2801631 ,
        1.4572111 ],
       [0.09016517, 0.        , 2.0843697 , ..., 1.6000296 , 0.26056516,
        1.3161356 ],
       [0.29053462, 0.        , 1.3169994 , ..., 2.1586185 , 0.09628654,
        0.83240616],
       ...,
       [1.9129909 , 0.07743069, 2.4824505 , ..., 1.0810454 , 0.2585196 ,
        2.1517427 ],
       [1.11826   , 0.01619181, 2.0798414 , ..., 1.7273229 , 0.39629543,
        1.4504533 ],
       [1.3605657 , 0.        , 2.4364338 , ..., 1.6396586 , 0.31785315,
        1.0187277 ]], shape=(349, 1024), dtype=float32)

In [44]:
y_val

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5,
       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
       6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
       6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
       7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8,
       8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,

In [None]:
# Train XGBoost classifier
xgb_model = XGBClassifier(
    random_state=42,
    n_estimators=100,
    learning_rate=0.1,
    max_depth=6
)

print('Training XGBoost...')
xgb_model.fit(X_train, y_train)

Training XGBoost...


0,1,2
,"objective  objective: typing.Union[str, xgboost.sklearn._SklObjWProto, typing.Callable[[typing.Any, typing.Any], typing.Tuple[numpy.ndarray, numpy.ndarray]], NoneType] Specify the learning task and the corresponding learning objective or a custom objective function to be used. For custom objective, see :doc:`/tutorials/custom_metric_obj` and :ref:`custom-obj-metric` for more information, along with the end note for function signatures.",'multi:softprob'
,"base_score  base_score: typing.Union[float, typing.List[float], NoneType] The initial prediction score of all instances, global bias.",
,booster,
,"callbacks  callbacks: typing.Optional[typing.List[xgboost.callback.TrainingCallback]] List of callback functions that are applied at end of each iteration. It is possible to use predefined callbacks by using :ref:`Callback API `. .. note::  States in callback are not preserved during training, which means callback  objects can not be reused for multiple training sessions without  reinitialization or deepcopy. .. code-block:: python  for params in parameters_grid:  # be sure to (re)initialize the callbacks before each run  callbacks = [xgb.callback.LearningRateScheduler(custom_rates)]  reg = xgboost.XGBRegressor(**params, callbacks=callbacks)  reg.fit(X, y)",
,colsample_bylevel  colsample_bylevel: typing.Optional[float] Subsample ratio of columns for each level.,
,colsample_bynode  colsample_bynode: typing.Optional[float] Subsample ratio of columns for each split.,
,colsample_bytree  colsample_bytree: typing.Optional[float] Subsample ratio of columns when constructing each tree.,
,"device  device: typing.Optional[str] .. versionadded:: 2.0.0 Device ordinal, available options are `cpu`, `cuda`, and `gpu`.",
,"early_stopping_rounds  early_stopping_rounds: typing.Optional[int] .. versionadded:: 1.6.0 - Activates early stopping. Validation metric needs to improve at least once in  every **early_stopping_rounds** round(s) to continue training. Requires at  least one item in **eval_set** in :py:meth:`fit`. - If early stopping occurs, the model will have two additional attributes:  :py:attr:`best_score` and :py:attr:`best_iteration`. These are used by the  :py:meth:`predict` and :py:meth:`apply` methods to determine the optimal  number of trees during inference. If users want to access the full model  (including trees built after early stopping), they can specify the  `iteration_range` in these inference methods. In addition, other utilities  like model plotting can also use the entire model. - If you prefer to discard the trees after `best_iteration`, consider using the  callback function :py:class:`xgboost.callback.EarlyStopping`. - If there's more than one item in **eval_set**, the last entry will be used for  early stopping. If there's more than one metric in **eval_metric**, the last  metric will be used for early stopping.",
,enable_categorical  enable_categorical: bool See the same parameter of :py:class:`DMatrix` for details.,False


In [46]:
# Evaluate XGBoost on validation set
y_pred = xgb_model.predict(X_val)
accuracy = (y_pred == y_val).mean()

print(f'\nValidation Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)')
print(f'Previous best (MobileNetV1): 86.25%')


Validation Accuracy: 0.7650 (76.50%)
Previous best (MobileNetV1): 86.25%


## 8. Evaluate Model
Testing on unseen eval dataset

In [None]:
# Evaluate model on eval/ folder
eval_datagen = ImageDataGenerator(rescale=1./255)

eval_generator = eval_datagen.flow_from_directory(
    EVAL_PATH,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=False
)

print('Evaluating model on eval dataset...')
eval_loss, eval_accuracy = model.evaluate(eval_generator)
print(f'\nEval Loss: {eval_loss:.4f}')
print(f'Eval Accuracy: {eval_accuracy:.4f}')

## 9. Confusion Matrix & Classification Report

In [None]:
# Generate predictions for confusion matrix
eval_generator.reset()
predictions = model.predict(eval_generator, verbose=1)
predicted_classes = np.argmax(predictions, axis=1)
true_classes = eval_generator.classes

# Create confusion matrix
cm = confusion_matrix(true_classes, predicted_classes)

# Plot confusion matrix
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=range(10), yticklabels=range(10))
plt.title('Confusion Matrix - Sign Language Digits')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()

# Print classification report
print('\nClassification Report:')
print(classification_report(true_classes, predicted_classes, 
                          target_names=[str(i) for i in range(10)]))