In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder

class DataGenerator(tf.keras.utils.Sequence):
    def __init__(self, csv_file, batch_size=32, image_size=(400,400), shuffle=True):
        self.data = pd.read_csv(csv_file)
        self.batch_size = batch_size
        self.image_size = image_size
        self.shuffle = shuffle
        self.on_epoch_end()

        self.label_encoder = LabelEncoder()
        self.data['label'] = self.label_encoder.fit_transform(self.data['label'])

    def __len__(self):
        return len(self.data) // self.batch_size

    def __getitem__(self, index):
        batch_data = self.data.iloc[index * self.batch_size : (index + 1) * self.batch_size]
        X, y = self.__data_generation(batch_data)
        return X, y

    def on_epoch_end(self):
        if self.shuffle:
            self.data = self.data.sample(frac=1).reset_index(drop=True)

    def __data_generation(self, batch_data):
        X = np.empty((self.batch_size, *self.image_size, 1))
        y = np.empty((self.batch_size), dtype=int)

        for i, row in enumerate(batch_data.iterrows()):
            img_path, label = row[1]['image_path'], row[1]['label']
            img = tf.keras.preprocessing.image.load_img(img_path, color_mode="grayscale", target_size=self.image_size)
            X[i,] = tf.keras.preprocessing.image.img_to_array(img) / 255.0
            y[i] = label

        return X, y


In [2]:
!unzip processed_combine_asl_dataset.zip

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: processed_combine_asl_dataset/y/Y1190.jpg  
  inflating: processed_combine_asl_dataset/y/Y1191.jpg  
  inflating: processed_combine_asl_dataset/y/Y1192.jpg  
  inflating: processed_combine_asl_dataset/y/Y1193.jpg  
  inflating: processed_combine_asl_dataset/y/Y1194.jpg  
  inflating: processed_combine_asl_dataset/y/Y1195.jpg  
  inflating: processed_combine_asl_dataset/y/Y1196.jpg  
  inflating: processed_combine_asl_dataset/y/Y1197.jpg  
  inflating: processed_combine_asl_dataset/y/Y1198.jpg  
  inflating: processed_combine_asl_dataset/y/Y1199.jpg  
  inflating: processed_combine_asl_dataset/y/Y12 - Copy - Copy.jpg  
  inflating: processed_combine_asl_dataset/y/Y12.jpg  
  inflating: processed_combine_asl_dataset/y/Y120.jpg  
  inflating: processed_combine_asl_dataset/y/Y1200.jpg  
  inflating: processed_combine_asl_dataset/y/Y1201.jpg  
  inflating: processed_combine_asl_dataset/y/Y1202.jpg  
  inflating: p

In [3]:
import os
import csv

directory = "./processed_combine_asl_dataset"
ext = [".png", ".jpg", ".jpeg"]

with open('mp-to-asl.csv', newline='', mode='w+') as data:
    writer = csv.writer(data, quotechar='"', quoting=csv.QUOTE_ALL)
    writer.writerow(["image_path", "label"])
    for path, folders, files in os.walk(directory):
        for file in files:
            writer.writerow([
                os.path.join(path, file),
                path.split('/')[-1].upper()
            ])


In [4]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from sklearn.preprocessing import LabelEncoder

batch_size = 128
train_generator = DataGenerator(csv_file='mp-to-asl.csv', batch_size=batch_size)
val_generator = DataGenerator(csv_file='mp-to-asl.csv', batch_size=batch_size, shuffle=False)

model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(400, 400, 1)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2,2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2,2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(36, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

with tf.device('/gpu:0'):
  model.fit(train_generator, validation_data=val_generator, epochs=15)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/15


  self._warn_if_super_not_called()


[1m823/823[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m490s[0m 569ms/step - accuracy: 0.6348 - loss: 1.3081 - val_accuracy: 0.9684 - val_loss: 0.1320
Epoch 2/15
[1m823/823[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m462s[0m 559ms/step - accuracy: 0.9730 - loss: 0.0976 - val_accuracy: 0.9911 - val_loss: 0.0350
Epoch 3/15
[1m823/823[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m463s[0m 561ms/step - accuracy: 0.9895 - loss: 0.0341 - val_accuracy: 0.9916 - val_loss: 0.0279
Epoch 4/15
[1m823/823[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m463s[0m 561ms/step - accuracy: 0.9909 - loss: 0.0270 - val_accuracy: 0.9947 - val_loss: 0.0179
Epoch 5/15
[1m823/823[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m463s[0m 560ms/step - accuracy: 0.9939 - loss: 0.0196 - val_accuracy: 0.9935 - val_loss: 0.0208
Epoch 6/15
[1m823/823[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m461s[0m 558ms/step - accuracy: 0.9951 - loss: 0.0176 - val_accuracy: 0.9955 - val_loss: 0.0138
Epoch 7/15
[1m

In [6]:
model.save('mp-to-asl-cnn-model.keras')

In [7]:
model.save('mp-to-asl-cnn-model.h5')

