<a href="https://colab.research.google.com/github/tariqshaban/arabic-sign-language-image-classification/blob/master/Arabic%20Sign%20Language%20Image%20Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Importing Dependencies

In [None]:
# Display generic output messages
!pip install colorama

# Properly display Arabic characters in plots
!pip install arabic_reshaper
!pip install python-bidi

# Download assets from the GitHub repository
!apt install subversion
!svn checkout https://github.com/tariqshaban/arabic-sign-language-image-classification/trunk/assets

import arabic_reshaper
import os
import re
import random
import shutil
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf

from bidi.algorithm import get_display
from colorama import Fore, Style
from keras.callbacks import EarlyStopping
from keras.layers import Dense, Dropout, GlobalAveragePooling2D, Layer
from keras.models import Sequential
from keras_preprocessing.image import ImageDataGenerator
from sklearn.metrics import confusion_matrix
from tensorflow.keras.applications.efficientnet import EfficientNetB0
from glob import glob
from IPython.display import clear_output

clear_output()
print(Fore.GREEN + u'\u2713 ' + 'Successfully downloaded dependencies.')    
print(Style.RESET_ALL)

# Global Variables

In [None]:
CLASSES_DF = pd.read_excel('./assets/Labels/ClassLabels.xlsx', index_col='ClassId')
CLASSES_DF.sort_values('ClassAr', inplace=True)

CLASSES = CLASSES_DF['Class'].to_list()
CLASSES_AR = CLASSES_DF['ClassAr'].to_list()

SEED = 42

TRAIN_SPLIT = 0.7
VALID_SPLIT = 0.2
TEST_SPLIT = 0.1

SOURCE_DIRECTORY = './assets/ArASL_Database_54K/'
REFACTORED_DIRECTORY = './assets/refactored_data/'
TRAIN_DIRECTORY = f'{REFACTORED_DIRECTORY}train/'
VALID_DIRECTORY = f'{REFACTORED_DIRECTORY}valid/'
TEST_DIRECTORY = f'{REFACTORED_DIRECTORY}test/'

EPOCHS = 2
LEARNING_RATE = 0.001

BASE_MODEL = EfficientNetB0(weights='imagenet', include_top=False)

PREPROCESSING_METHOD = tf.keras.applications.efficientnet.preprocess_input

# Helper Methods

### Finalize Seed

In [None]:
def finalize_seed():
    os.environ['PYTHONHASHSEED'] = str(SEED)
    random.seed(SEED)
    np.random.seed(SEED)
    tf.random.set_seed(SEED)

### Prime Dataset

In [None]:
def prime_dataset():
    if os.path.exists(REFACTORED_DIRECTORY):
        shutil.rmtree(REFACTORED_DIRECTORY)

    # Create Training, Validation, and Testing directories
    for c in CLASSES:
        os.makedirs(f'{TRAIN_DIRECTORY}{c}', exist_ok=True)
        os.makedirs(f'{VALID_DIRECTORY}{c}', exist_ok=True)
        os.makedirs(f'{TEST_DIRECTORY}{c}', exist_ok=True)

    # Partition Images into Training, Validation, and Testing
    for c in CLASSES:
        numOfFiles = len(next(os.walk(f'{SOURCE_DIRECTORY}{c}/'))[2])
        
        for files in random.sample(glob(f'{SOURCE_DIRECTORY}{c}/*'), int(numOfFiles * TRAIN_SPLIT)):
            shutil.move(files, f'{TRAIN_DIRECTORY}{c}')

        for files in random.sample(glob(f'{SOURCE_DIRECTORY}{c}/*'), int(numOfFiles * VALID_SPLIT)):
            shutil.move(files, f'{VALID_DIRECTORY}{c}')

        for files in glob(f'{SOURCE_DIRECTORY}{c}/*'):
            shutil.move(files, f'{TEST_DIRECTORY}{c}')

### Build Model

In [None]:
def build_model(measure_performance: bool = True):
    train_batches = ImageDataGenerator(preprocessing_function=PREPROCESSING_METHOD).flow_from_directory(
        directory=TRAIN_DIRECTORY, classes=CLASSES, batch_size=128)
    valid_batches = ImageDataGenerator(preprocessing_function=PREPROCESSING_METHOD).flow_from_directory(
        directory=VALID_DIRECTORY, classes=CLASSES, batch_size=128, shuffle=False)
    test_batches = ImageDataGenerator(preprocessing_function=PREPROCESSING_METHOD).flow_from_directory(
        directory=TEST_DIRECTORY, classes=CLASSES, batch_size=128, shuffle=False)

    nclass = len(CLASSES)
    epoch = EPOCHS
    base_model = BASE_MODEL
    base_model.trainable = False

    add_model = Sequential()
    add_model.add(base_model)
    add_model.add(GlobalAveragePooling2D())
    add_model.add(Dropout(0.5))
    add_model.add(Dense(nclass, activation='softmax'))

    model = add_model
    model.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=LEARNING_RATE), loss='categorical_crossentropy',
                  metrics=['accuracy'])
    es = EarlyStopping(monitor='val_loss', mode='auto', verbose=1, patience=10)

    fitted_model = model.fit(x=train_batches, validation_data=valid_batches, epochs=epoch, callbacks=[es])
    score, accuracy = model.evaluate(x=test_batches, batch_size=128)

    print(Fore.GREEN + u'\n\u2713 ' + f'Accuracy ==> {accuracy}')

    plt.rcParams["figure.figsize"] = (15, 8)

    if measure_performance:
        plt.plot(fitted_model.history['accuracy'])
        plt.plot(fitted_model.history['val_accuracy'])
        plt.title('Model accuracy')
        plt.ylabel('Accuracy')
        plt.xlabel('Epoch')
        plt.legend(['Train', 'Test'], loc='upper left')
        plt.show()

        plt.plot(fitted_model.history['loss'])
        plt.plot(fitted_model.history['val_loss'])
        plt.title('Model loss')
        plt.ylabel('Loss')
        plt.xlabel('Epoch')
        plt.legend(['Train', 'Test'], loc='upper left')
        plt.show()

        y_pred = model.predict(test_batches)

        labels = [f'{ar} ({en})' for ar, en in zip(CLASSES_AR, CLASSES)]
        labels = [get_display(arabic_reshaper.reshape(label)) for label in labels]

        ax = sns.heatmap(confusion_matrix(test_batches.classes, y_pred.argmax(axis=1)), annot=True, cmap='Blues',
                         fmt='g')
        ax.set_title('Confusion Matrix')
        ax.set_xlabel('Predicted Values')
        ax.set_ylabel('Actual Values')
        ax.xaxis.set_ticklabels(labels)
        ax.yaxis.set_ticklabels(labels)
        plt.xticks(rotation=90)
        plt.yticks(rotation=0)
        plt.show()

    return fitted_model

# Methods Invocation

In [None]:
finalize_seed()

In [None]:
prime_dataset()

In [None]:
model = build_model(measure_performance=True)