<a href="https://www.kaggle.com/code/botaylaamin/arabic-alphabets-sign-language?scriptVersionId=262594935" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [None]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import os 
from PIL import Image, ImageFile
import random
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import img_to_array
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from PIL import Image
from tqdm import tqdm  
import glob

In [None]:
path = r"/kaggle/input/rgb-arabic-alphabets-sign-language-dataset/RGB ArSL dataset"
categories = os.listdir(path)     # get name of folders (alphabets)

# save images and its corresponding labels 

images  = []
labels = []

for cat in categories:
    cat_path = os.path.join(path , cat)
    # loop in each image in category
    img_names = os.listdir(cat_path)
    for img_name in img_names:
        img_path = os.path.join(cat_path , img_name)
        images.append(img_path)
        labels.append(cat)
        

In [None]:
#display random 12 images with their labels     

random_indices = random.sample(range(len(images)),12)  # range -> convert to list to choose 12 from it

# get selected images 
random_imges =  [images[i] for i in random_indices]
random_labels = [labels[i] for i in random_indices]

# plot images
for i,(img,label)  in enumerate(zip(random_imges,random_labels)):
    
    plt.subplot(3,4,i+1)
    # open and read image 
    img=Image.open(img)
    img_array=np.array(img) # convert image to numpy array to plot it with plt
    plt.imshow(img_array)
    plt.title(label)
    plt.axis('off')     # display x,y axis 

plt.tight_layout() # prevent overlapping
plt.show()    

In [None]:
# create Dataframe 

df = pd.DataFrame({'images':images,'labels':labels})
df

In [None]:
df.info()

In [None]:
df["labels"].value_counts().plot(kind = 'barh' ,color='tan' )
plt.title("Distribution of Categories")
plt.show()

In [None]:
#resize and normalization for images
x_list = []
y_list = []
img_width, img_height = 224, 224 
for cat in tqdm(categories, desc="Processing categories"):
    cat_path = os.path.join(path, cat)
    img_paths = glob.glob(cat_path + "/**/*.*", recursive=True)
    
    for img_path in img_paths:
        try:
            img = Image.open(img_path).convert("RGB")
            img = img.resize((img_width, img_height))
            img_arr = img_to_array(img) / 255.0
            x_list.append(img_arr)
            y_list.append(cat)
        except Exception as e:
            print(f"Skipped {img_path}: {e}")

In [None]:
# convert list to array
x = np.array(x_list, dtype="float32")

# encode labels
le = LabelEncoder()
y = le.fit_transform(y_list)
y = to_categorical(y)

In [None]:
print("X shape:", x.shape)
print("Y shape:", y.shape)
print("Classes:", le.classes_)
print("Number of Classes" , len(le.classes_))

In [None]:
# train/test split
X_train, X_test, y_train, y_test = train_test_split(
    x, y, test_size=0.2, random_state=42, stratify=y
)

print("Train shape:", X_train.shape, y_train.shape)
print("Test shape:", X_test.shape, y_test.shape)

In [None]:
# Data Augmentation
datagen = ImageDataGenerator(
    rotation_range=20,       # rotation for 20 degree 
    width_shift_range=0.2,   # vertical shift
    height_shift_range=0.2,  # horizontal shift
    zoom_range=0.2,          # zoom in/out
    shear_range=0.2,         # shear
    horizontal_flip=True,    
    brightness_range=[0.8, 1.2], # change brigthness
    fill_mode='nearest'      # fill the missing pixels
)

In [None]:
# apply data augmentation 
train_generator = datagen.flow(
    X_train, y_train,
    batch_size=32,
    shuffle=True
)


test_generator = ImageDataGenerator().flow(
    X_test, y_test,
    batch_size=32,
    shuffle=False
)