In [None]:
import cv2 
import uuid
import os
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns

Reading data from .csv files

In [None]:
train = pd.read_csv('Tensorflow/workspace/images/train/sign_mnist_train.csv')
test = pd.read_csv('Tensorflow/workspace/images/test/sign_mnist_test.csv')

In [None]:
train.head()

Adding label and calculating number of datasets for each label

In [None]:
labels = train['label'].values

In [None]:
label_array=np.array(labels)
np.unique(label_array)

In [None]:
plt.figure(figsize=(20,10))
sns.countplot(x=labels)

Above we can see that each label 0-24 has around 900-1300 images

Refining the data to such that only pixels are left i.e removing labels

In [None]:
train.drop('label',axis = 1,inplace = True)
train.head()

Extracting Images data from each row in our .csv

In [None]:
images= train.values 
images= np.array([np.reshape(i,(28,28)) for i in images])
images= np.array([i.flatten() for i in images])

Hot one encoding the labels

In [None]:
!pip3 install scikit-learn
from sklearn.preprocessing import LabelBinarizer

label_binar = LabelBinarizer()
labels = label_binar.fit_transform(labels)
labels

View images

In [None]:
plt.imshow(images[33].reshape(28,28))

In [None]:
plt.imshow(images[2].reshape(28,28))

Spliting our dataset into x_train, x_test, y_train, y_test

In [None]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(images, labels, test_size= 0.3, random_state=101)

Scale our images

In [None]:
x_train = x_train/255
x_test = x_test/255

Reshape images to the size required by Tensorflow and Keras

In [None]:
x_train = x_train.reshape(x_train.shape[0],28,28,1)
x_test = x_test.reshape(x_test.shape[0],28,28,1)

plt.imshow(x_train[0].reshape(28,28))

Building our own dataset

In [None]:
class_names = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'O', 'P','Q', 'R', 'S','T','U','V','W','X','Y']

In [None]:
# Function to setup the directories

IMAGES_PATH = os.path.join('Tensorflow', 'workspace', 'images')
IMAGES_PATH
os.name

In [None]:
if not os.path.exists(IMAGES_PATH):
    if os.name == 'posix':
        !mkdir -p {IMAGES_PATH}
    if os.name == 'nt':
        !mkdir {IMAGES_PATH}

for name in class_names:
    path = os.path.join(IMAGES_PATH, name)
    if not os.path.exists(path):
        !mkdir {path}



In [None]:
for name in class_names: 
    cap = cv2.VideoCapture(0) #Connects to our webcam; for could be 2 instead of 0
    if cap.isOpened():
        print('Collecting images for {}'.format(name))
        for i in range(500):
            print('Collecting image {}'.format(i))
            ret, frame = cap.read()
            imageName = os.path.join(IMAGES_PATH, name, name+'.'+'{}.jpg'.format(str(uuid.uuid1())))
            cv2.imwrite(imageName, frame)
            cv2.imshow('frame', frame)
            
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
cap.release()
cv2.destroyAllWindows()


Labelling the colllected images

In [None]:
!pip3 install --upgrade pyqt5 lxml

In [None]:
LABELING_PATH = os.path.join('Tensorflow', 'labelimg')

In [None]:
if not os.path.exists(LABELING_PATH):
    !mkdir {LABELING_PATH}
    !git clone https://github.com/tzutalin/LabelImg {LABELING_PATH}

In [None]:
if os.name == 'posix':
    !cd {LABELING_PATH} && make qt5py3
if os.name == 'nt':
    !cd {LABELING_PATH} && pyrcc5 -o libs/resources.py resources.qrc

In [None]:
!cd {LABELING_PATH} && python labelImg.py