In [1]:
import numpy as np
import cv2
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, concatenate, Dense, BatchNormalization
from tensorflow.keras.models import Model
import mediapipe as mp

In [2]:
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=True, max_num_hands=1, min_detection_confidence=0.2)

In [3]:
def random_augmentation(img):
    rows, cols, _ = img.shape
    tx, ty = np.random.uniform(-20, 20, size=2)
    M_trans = np.float32([[1, 0, tx], [0, 1, ty]])
    img = cv2.warpAffine(img, M_trans, (cols, rows))
    return img

def mediapipe_preprocessing(img):
    img_uint8 = (img * 255).astype('uint8')
    img_augmented = random_augmentation(img_uint8)

    noise_factor = 0.5
    noise = np.random.randn(*img_augmented.shape) * noise_factor
    img_noisy = img_augmented + noise
    img_noisy = np.clip(img_noisy, 0, 255)

    img_rgb = cv2.cvtColor(img_noisy.astype('uint8'), cv2.COLOR_BGR2RGB)
    
    results = hands.process(img_rgb)
    landmarks_array = None
    
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            landmarks = hand_landmarks.landmark
            landmarks_array = np.array([[landmark.x, landmark.y, landmark.z] for landmark in landmarks]).flatten()
            break
    else:
        landmarks_array = np.zeros((21, 3)).flatten()

    img_rescaled = img_noisy / 255.0
    return (img_rescaled, landmarks_array)



In [4]:
class CustomDataGenerator(tf.keras.utils.Sequence):
    def __init__(self, directory, batch_size, target_size, class_mode='categorical', preprocessing_function=None):
        self.batch_size = batch_size
        self.target_size = target_size
        self.class_mode = class_mode
        self.preprocessing_function = preprocessing_function

        self.datagen = ImageDataGenerator(rescale=1./255)
        self.generator = self.datagen.flow_from_directory(directory, 
                                                          batch_size=batch_size, 
                                                          target_size=target_size, 
                                                          class_mode=class_mode)

    def __len__(self):
        return len(self.generator)

    def __getitem__(self, idx):
        batch_x, batch_y = self.generator[idx]
        batch_x_processed = []
        batch_landmarks = []

        for img in batch_x:
            img_processed, landmarks = self.preprocessing_function(img)
            batch_x_processed.append(img_processed)
            batch_landmarks.append(landmarks)

        batch_x_processed = np.array(batch_x_processed)
        batch_landmarks = np.array(batch_landmarks)

        return [batch_x_processed, batch_landmarks], batch_y

In [5]:
train_data_dir = 'D:/My Documents/GIT Projects/IEEE-Hack-the-meta/ASL/asl_alphabet_train'
test_data_dir = 'D:/My Documents/GIT Projects/IEEE-Hack-the-meta/ASL/asl_alphabet_test'

In [6]:
input_img = Input(shape=(200, 200, 3))
x = Conv2D(32, (3, 3), activation='relu')(input_img)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2))(x)
x = Flatten()(x)

input_landmarks = Input(shape=(63,))
y = Dense(32, activation='relu')(input_landmarks)

combined = concatenate([x, y])

z = Dense(64, activation='relu')(combined)
output = Dense(29, activation='softmax')(z)

model = Model(inputs=[input_img, input_landmarks], outputs=output)

optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(loss='categorical_crossentropy',optimizer=optimizer, metrics=['accuracy'])

In [7]:
train_generator = CustomDataGenerator(train_data_dir, batch_size=32, target_size=(200, 200), preprocessing_function=mediapipe_preprocessing)
test_generator = CustomDataGenerator(test_data_dir, batch_size=32, target_size=(200, 200), preprocessing_function=mediapipe_preprocessing)

Found 73950 images belonging to 29 classes.
Found 13050 images belonging to 29 classes.


In [8]:
model.fit(train_generator, epochs=10, validation_data=test_generator)

Epoch 1/10
Epoch 2/10
Epoch 3/10

KeyboardInterrupt: 

In [None]:
score = model.evaluate(test_generator, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

In [None]:
model.save('asl_model_bone.h5')