## This notebook focuses on extracting a concatenated vector of unique features for each image in place of using raw pixel values. It is assumed the hand is the foreground of the picture.

In [84]:
import numpy as np
import cv2
import matplotlib.pyplot as plt
import os
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow.python.keras.utils import to_categorical
from tensorflow.python.keras import Input, Model 
from tensorflow.python.keras.layers import Dense, Dropout
import pixiedust
from tqdm import tqdm_notebook as tqdm

%config InlineBackend.figure_format = 'retina'

In [24]:
# img = cv2.imread('images/alef/1.jpg')

# ft = cv2.freetype.createFreeType2()
# ft.loadFontData(fontFileName='NewPeninimMT.ttc', id = 0)
# ft.putText(img, 'הקידב', org=(15, 70), fontHeight=60, color=(255,255,255), 
#            thickness=-1, line_type=cv2.LINE_AA, bottomLeftOrigin=True)

# cv2.imshow('א - Alef', img)
# cv2.waitKey(0)

# cv2.destroyAllWindows()
# cv2.waitKey(1)

Automatic pdb calling has been turned OFF


In [97]:
LETTERS = ['images/alef/', 'images/bet/', 'images/gimel/', 'images/daled/', 'images/hey/', 'images/vav/', 
           'images/zayin/', 'images/het/', 'images/tet/', 'images/yud/', 'images/haf/', 'images/lamed/', 
           'images/mem/', 'images/nun/', 'images/samech/', 'images/ayin/', 'images/pey/', 'images/tzadik/', 
           'images/kuf/', 'images/reish/', 'images/shin/', 'images/taf/', 'images/space/', 'images/empty/']

def load_pictures(num_pictures = -1, test_sets = 1):
    train_pictures, test_pictures = [], []
    train_labels, test_labels = [], []
    for k, letter in enumerate(tqdm(LETTERS)):
#         print('loading pictures for:', letter[7:-1])
        if num_pictures == -1:  # Use all pictures in the folder
            path, dirs, files = next(os.walk(letter))
            file_count = len(files)
        else:
            file_count = num_pictures
        # Collect test pictures
        for i in range(1, test_sets * 200 + 1): # Iterate in batches of 200
            test_labels.append(k)
            test_pictures.append(cv2.imread(letter + str(i) + '.jpg'))
        # Collect train pictures
        for i in range(test_sets * 200 + 1, file_count+1): # iterate starting where we stopped with test set
            train_labels.append(k)
            train_pictures.append(cv2.imread(letter + str(i) + '.jpg'))

    return np.array(train_pictures), np.array(train_labels), np.array(test_pictures), np.array(test_labels)

def get_hog() :
    # (WinSize - BlockSize) % BlockStride must equal 0
    # BlockSize % CellSize must equal 0
    winSize = (200,200)
    blockSize = (40,40)
    blockStride = (40,40)
    cellSize = (20,20)
    nbins = 9
    derivAperture = 1
    winSigma = -1.
    histogramNormType = 0
    L2HysThreshold = 0.2
    gammaCorrection = 1
    nlevels = 64
    signedGradients = True
    hog = cv2.HOGDescriptor(winSize,blockSize,blockStride,cellSize,nbins,derivAperture,winSigma,\
                            histogramNormType,L2HysThreshold,gammaCorrection,nlevels, signedGradients)
    return hog

def calculate_hog(hog, pictures):
    # uses the labels only to print progress
    hog_descriptors = []
    pics_per_class = len(labels) / 24
    for i, picture in enumerate(tqdm(pictures)):
        hog_descriptors.append(np.squeeze(hog.compute(picture)))
    return np.array(hog_descriptors)

In [98]:
%%time
print('Loading letter pictures ... ')
# Load data.
train_pictures, train_labels, test_pictures, test_labels = load_pictures()
print('Finished loading pictures')
print('Loaded', len(train_pictures), 'train pictures and', len(test_pictures), 'test pictures')

Loading letter pictures ... 


HBox(children=(IntProgress(value=0, max=24), HTML(value='')))


Finished loading pictures
Loaded 48000 train pictures and 4800 test pictures
CPU times: user 46.2 s, sys: 26.3 s, total: 1min 12s
Wall time: 1min 44s


In [99]:
%%time
print('Setting up HoG parameters')
hog = get_hog()
print('Calculating HoG descriptor for train set')
hog_train = calculate_hog(hog, train_pictures)
print('Calculating HoG descriptor for test set')
hog_test = calculate_hog(hog, test_pictures)
print('Finished calculating HoG descriptors.')

Setting up HoG parameters
Calculating HoG descriptor for train set


HBox(children=(IntProgress(value=0, max=48000), HTML(value='')))


Calculating HoG descriptor for test set


HBox(children=(IntProgress(value=0, max=4800), HTML(value='')))


Finished calculating HoG descriptors.
CPU times: user 43.4 s, sys: 8.02 s, total: 51.4 s
Wall time: 54.5 s


In [100]:
print('Splitting data')
X_train, X_val, y_train, y_val = train_test_split(hog_train, train_labels, test_size=0.1, random_state=42)

print('X_train shape:', X_train.shape)
print('y_train shape:', y_train.shape)
print('X_test shape:', X_val.shape)
print('y_test shape:', y_val.shape)


Splitting data
X_train shape: (43200, 900)
y_train shape: (43200,)
X_test shape: (4800, 900)
y_test shape: (4800,)


# SVM Model

In [None]:
%%time
# NEED TO DO THIS BEFORE CONVERTING Y TO CATEGORICAL OTHERWISE IT WILL NOT RUN

######     Training SVM Model     ##################

svm = cv2.ml.SVM_create()
# trainingDataMat = np.array(hog_descriptors, np.float32)
# labelsMat = np.array([y_train], np.int32)
svm.setType(cv2.ml.SVM_C_SVC)
svm.setKernel(cv2.ml.SVM_RBF) # For long feature vector use SVM_LINEAR
svm.setC(12.5)
svm.setGamma(0.50625)
svm.train(X_train, cv2.ml.ROW_SAMPLE, y_train)
svm.save('HSL_svm.dat')

In [102]:
######      Testing      ########################

val_result = svm.predict(X_val)
test_result = svm.predict(hog_test)

#######   Check Accuracy   ########################

mask_val = [0 if x == y_t else 1 for x, y_t in zip(val_result[1], y_val)]
mask_test = [0 if x == y_t else 1 for x, y_t in zip(test_result[1], test_labels)]
incorrect_val = np.count_nonzero(mask_val)
incorrect_test = np.count_nonzero(mask_test)
print('Number of validation samples got wrong:',incorrect_val)
print('Number of validation samples got right:',len(y_val) - incorrect_val)
print('Validation accuracy: {:6.2f}%'.format((len(y_val)-incorrect_val)/len(y_val)*100))
print('Number of testing samples got wrong:',incorrect_test)
print('Number of testing samples got right:',len(test_labels) - incorrect_test)
print('Testing accuracy: {:6.2f}%'.format((len(test_labels)-incorrect_test)/len(test_labels)*100))

Number of validation samples got wrong: 23
Number of validation samples got right: 4777
Validation accuracy:  99.52%
Number of testing samples got wrong: 3265
Number of testing samples got right: 1535
Testing accuracy:  31.98%


In [103]:
# print(test_result[1])

print(classification_report(test_labels, test_result[1]))
# print(confusion_matrix(test_labels, test_result[1]))

             precision    recall  f1-score   support

          0       0.00      0.00      0.00       200
          1       0.25      0.20      0.22       200
          2       0.45      0.76      0.57       200
          3       0.53      0.39      0.45       200
          4       0.25      0.41      0.31       200
          5       0.47      0.94      0.63       200
          6       0.76      0.33      0.46       200
          7       0.01      0.01      0.01       200
          8       0.37      0.73      0.49       200
          9       0.06      0.11      0.08       200
         10       0.76      0.61      0.68       200
         11       0.88      0.43      0.58       200
         12       0.18      0.05      0.08       200
         13       0.08      0.01      0.01       200
         14       0.73      0.14      0.23       200
         15       0.27      0.01      0.03       200
         16       0.00      0.00      0.00       200
         17       0.89      0.32      0.46   

  'precision', 'predicted', average, warn_for)


In [None]:

# resp = model.predict(samples)
# err = (labels != resp).mean()
# print('Accuracy: %.2f %%' % ((1 - err)*100))

# confusion = np.zeros((10, 10), np.int32)
# for i, j in zip(labels, resp):
#     confusion[int(i), int(j)] += 1
# print('confusion matrix:')
# print(confusion)

# vis = []
# for img, flag in zip(digits, resp == labels):
#     img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
#     if not flag:
#         img[...,:2] = 0

#     vis.append(img)
# return mosaic(25, vis)




# ANN network

In [89]:
def get_model_generic(x_train, depth):
    inputs = Input(shape=(x_train[0].shape[0],))
    f = Dense(1024, activation='relu')(inputs)
    f = Dropout(rate = 0.5)(f)
    for i in range(depth):
        f = Dense(512, activation='relu')(f)
        f = Dropout(rate = 0.5)(f)
    f = Dense(256, activation='relu')(f)
    f = Dropout(rate = 0.5)(f)
    f = Dense(64, activation='relu')(f)
    f = Dropout(rate = 0.4)(f)
    outputs = Dense(24, activation='softmax')(f)
    return Model(inputs=inputs, outputs=outputs)

# Model for high dimensional input 

# def get_model_generic(x_train, depth):
#     inputs = Input(shape=(x_train[0].shape[0],))
#     f = Dense(2024, activation='relu')(inputs)
#     f = Dropout(rate = 0.5)(f)
#     for i in range(depth):
#         f = Dense(1024, activation='relu')(f)
#         f = Dropout(rate = 0.5)(f)
#     f = Dense(256, activation='relu')(f)
#     f = Dropout(rate = 0.5)(f)
#     f = Dense(64, activation='relu')(f)
#     f = Dropout(rate = 0.4)(f)
#     outputs = Dense(24, activation='softmax')(f)
#     return Model(inputs=inputs, outputs=outputs)

In [90]:
y_train_cat = to_categorical(y_train, 24)
y_val_cat = to_categorical(y_val, 24)

In [94]:
model = get_model_generic(X_train, 1)                    
print(model.summary())
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['accuracy'])
history = model.fit(x=X_train, y=y_train_cat, batch_size=1024, epochs=70, validation_data=(X_val, y_val_cat))

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, 3600)              0         
_________________________________________________________________
dense_11 (Dense)             (None, 2024)              7288424   
_________________________________________________________________
dropout_9 (Dropout)          (None, 2024)              0         
_________________________________________________________________
dense_12 (Dense)             (None, 1024)              2073600   
_________________________________________________________________
dropout_10 (Dropout)         (None, 1024)              0         
_________________________________________________________________
dense_13 (Dense)             (None, 256)               262400    
_________________________________________________________________
dropout_11 (Dropout)         (None, 256)               0         
__________

Epoch 48/70
Epoch 49/70
Epoch 50/70
Epoch 51/70
Epoch 52/70
Epoch 53/70
Epoch 54/70
Epoch 55/70
Epoch 56/70
Epoch 57/70
Epoch 58/70
Epoch 59/70
Epoch 60/70
Epoch 61/70
Epoch 62/70
Epoch 63/70
Epoch 64/70
Epoch 65/70
Epoch 66/70
Epoch 67/70
Epoch 68/70
Epoch 69/70
Epoch 70/70


In [96]:
y_test_cat = to_categorical(test_labels, 24)
score, acc = model.evaluate(hog_test, y_test_cat)
print('Test score:', score)
print('Test accuracy:', acc)

Test score: 5.551863814030342
Test accuracy: 0.20145833333333332


In [86]:
# serialize model to JSON
model_json = model.to_json()
with open("model.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5200
model.save_weights("model.h5")
print("Saved model to disk")

Saved model to disk


In [None]:
class_weight = {0: 1.,
                1: 50.,
                2: 2.}
model.fit(X_train, Y_train, nb_epoch=5, batch_size=32, class_weight=class_weight)