# Import Libraries

In [2]:
import numpy as np
import os
import matplotlib.pyplot as plt
import tensorflow as tf

In [3]:
# tqdm makes your loops show progess meter instantly
from tqdm import tqdm
# We will be using opencv to resize our image
import cv2
# pickle is used to save the data
import pickle

# Exploiratory Data Analysis

In [4]:
# Defining a base directory containing all the class labels
BASE_DIR = "./Datasets/Sign Language for Alphabets"
# Defining all the class labesl in the dataset
# Here the names of the folders are treated as class labels
CLASSES = ["a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z"]

In [5]:
# Instanciting a list object to store image arrays along with their class labels
data = list()

# Iterating through each class labels
for items in tqdm(CLASSES):
    # Defining a path to all the unique folders which behave as class labels
    path = os.path.join(BASE_DIR, items)
    # Defining the label for each class as per their index in list object CLASSES
    label = CLASSES.index(items)
    # Iterating through each folder which beahve as unique class labels
    for img in os.listdir(path):
        # Defining a path for each image
        img_path = os.path.join(path, img)
        # Reading images using opencv
        # Each image is converted into an array representation
        img_arr = cv2.imread(img_path)
        # Using opencv to resize image arrays to 80X80 dimensions
        img_arr = cv2.resize(img_arr, (80, 80))
        # Storing array of each image to list object along with their class labels
        data.append([img_arr, label])

100%|██████████| 26/26 [01:28<00:00,  3.40s/it]


In [6]:
print("The length of data is:", len(data))

The length of data is: 39000


# Data Preprocessing

In [7]:
import random

In [8]:
# shuffle is used to shuffle data contents randomly
# This shuffling of data is necessary in order to train all the categories simultaneously
# If we train each categories end-to-end one after another our model may learn categories incorrectly

random.shuffle(data)

In [9]:
data[0]

[array([[[133, 133, 133],
         [138, 138, 138],
         [141, 141, 141],
         ...,
         [131, 131, 131],
         [128, 128, 128],
         [126, 126, 126]],
 
        [[136, 136, 136],
         [141, 141, 141],
         [143, 143, 143],
         ...,
         [132, 132, 132],
         [129, 129, 129],
         [126, 126, 126]],
 
        [[138, 138, 138],
         [142, 142, 142],
         [143, 143, 143],
         ...,
         [133, 133, 133],
         [130, 130, 130],
         [128, 128, 128]],
 
        ...,
 
        [[124, 124, 124],
         [125, 125, 125],
         [128, 128, 128],
         ...,
         [125, 125, 125],
         [122, 122, 122],
         [120, 120, 120]],
 
        [[122, 122, 122],
         [124, 124, 124],
         [126, 126, 126],
         ...,
         [123, 123, 123],
         [120, 120, 120],
         [118, 118, 118]],
 
        [[121, 121, 121],
         [123, 123, 123],
         [125, 125, 125],
         ...,
         [121, 121, 121],
  

In [11]:
# list x is used to store features of the image
# list y is used to store class labels of the image
x, y = list(), list()

for features, labels in data:
    x.append(features)
    y.append(labels)

In [12]:
# Converting lists x and y to numpy arrays
x = np.array(x)
y = np.array(y)

In [14]:
# Saving our data to pickle files
# 'wb' stands for write in binary
pickle.dump(x, open('features.pkl', 'wb'))
pickle.dump(x, open('labels.pkl', 'wb'))