# Download Dataset

In [50]:
!pip install -q kaggle

In [2]:
import kaggle

# Specify the name of the dataset and the path where you want to save it
dataset_name = "datamunge/sign-language-mnist"
save_path = "./sl_data"

# Download the dataset
kaggle.api.dataset_download_files(dataset_name, path=save_path, unzip=True)


# Load Data into Dataframes & Exploratory Data Analysis

In [51]:
!pip install -q pandas

In [71]:
import pandas as pd

# Read the CSV file
df = pd.read_csv("./sl_data/sign_mnist_train.csv")
df_test = pd.read_csv("./sl_data/sign_mnist_test.csv")

# Print the column names and types
column_types = df.dtypes
columns_names = df.columns
# Get column_names and column_types as lists
column_name = list(columns_names)
column_type = list(column_types)

# print number of unique values in the label column
unique_labels = df['label'].max()
print(unique_labels)

24


In [76]:
# print number of rows for df and df_test
print(df.shape)
print(df_test.shape)

(27455, 785)
(7172, 785)


# Normalise Train and Test Dataframes

In [72]:
# Normalise train and test data

# get the pixel columns
pixel_columns = df.columns[1:]
# normalize the pixel columns
df[pixel_columns] = df[pixel_columns] / 255
df_test[pixel_columns] = df_test[pixel_columns] / 255

# Load into Tensorflow Dataset

In [77]:
# Convert pandas dataframe to tensorflow dataframe that returns (image, label) instead of a dictionary and reshape to (32, 28, 28, 1)
import tensorflow as tf
train = tf.data.Dataset.from_tensor_slices((df[pixel_columns].values, df['label'].values))
test = tf.data.Dataset.from_tensor_slices((df_test[pixel_columns].values, df_test['label'].values))

In [65]:
!pip install -q tensorflow-addons

# Create CNN Model

In [78]:
import tensorflow_addons as tfa

# Define the CNN model
num_filters = 64
model = tf.keras.Sequential([
    tf.keras.layers.Reshape((28, 28, 1)),
    tf.keras.layers.Conv2D(num_filters, 3, activation='relu', input_shape=(28, 28, 1)),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(unique_labels+1, activation='softmax')
])

# Compile and train the model

In [79]:
learning_rate = 1e-4
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
model.compile(optimizer=optimizer, loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False), metrics=['accuracy'])
train = train.batch(32) # Convert to a BatchDataset
model.fit(train, epochs=5)

# Evaluate the model
_, accuracy = model.evaluate(train)
print('Train Accuracy: ', accuracy)

test = test.batch(32)
test_loss, test_acc = model.evaluate(test)
print('Test accuracy:', test_acc)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Train Accuracy:  0.8893826007843018
Test accuracy: 0.739403247833252


# Save Model

In [80]:
tf.saved_model.save(model, 'model')



INFO:tensorflow:Assets written to: model\assets


INFO:tensorflow:Assets written to: model\assets


# Compress Model into .zip

In [89]:
import zipfile
import os

with zipfile.ZipFile('sign_language_model.zip', 'w') as zip_obj:
    for folderName, subfolders, filenames in os.walk("model"):
        for filename in filenames:
            filePath = os.path.join(folderName, filename)
            zip_obj.write(filePath)