In [2]:
import numpy as np
from feature_extraction import OCR_raw_data, OCR_feature_data, face_raw_data, face_feature_data

Two-Layer Neural Network on digit OCR and Face Detection

In [214]:
class TwoLayerNN:
    def __init__(self, input_size, hidden_size, output_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        W1 = np.random.randn(self.hidden_size, self.input_size)
        W1 = W1.flatten()
        W2 = np.random.randn(self.output_size, self.hidden_size)
        W2 = W2.flatten()
        self.weights = np.concatenate((W1,W2))
        self.biases = np.array([1,1])
        L1_grads = np.zeros((self.hidden_size, self.input_size))
        L2_grads = np.zeros((self.output_size, self.hidden_size))
        self.gradients = np.concatenate((L1_grads.flatten(),L2_grads.flatten()))
    
    def forward_propagation(self, X):
        W1 = np.reshape(self.weights[0:self.input_size*self.hidden_size],(self.hidden_size,self.input_size))
        W2 = np.reshape(self.weights[self.input_size*self.hidden_size:],(self.output_size,self.hidden_size))
        b1 = self.biases[0]
        b2 = self.biases[1]
        # First layer
        A1 = X
        Z2 = np.dot(W1, A1)+b1
        A2 = self.sigmoid(Z2)
        
        # Second layer
        Z3 = np.dot(W2, A2)+b2
        A3 = self.sigmoid(Z3)
        
        cache = {"Z2": Z2, "A2": A2, "Z3": Z3, "A3": A3, "A1": A1}
        
        return A3, cache
    
    def sigmoid(self, Z):
        return 1 / (1 + np.exp(-Z))
    
    def backward_propagation(self, Y, cache):
        W2 = np.reshape(self.weights[self.input_size*self.hidden_size:],(self.output_size,self.hidden_size))
        A1 = cache["A1"]
        A2 = cache["A2"]
        A3 = cache["A3"]
        y=np.array(Y)
        encode_Y = np.eye(self.output_size)[y.astype(int)]
        dZ3 = A3 - encode_Y
        dZ2 = np.dot(W2.T, dZ3) * (A2 * (1 - A2))
        L1_grads = np.reshape(self.gradients[0:self.hidden_size*self.input_size],(self.hidden_size,self.input_size))
        L1_grads += dZ2[:, np.newaxis] * A1
        L2_grads = np.reshape(self.gradients[self.hidden_size*self.input_size:],(self.output_size,self.hidden_size))
        L2_grads += dZ3[:, np.newaxis] * A2
        grads = [L1_grads, L2_grads]
        return grads    
    def update_parameters(self,grads, learning_rate, n):
        W1 = np.reshape(self.weights[0:self.input_size*self.hidden_size],(self.hidden_size,self.input_size))
        W2 = np.reshape(self.weights[self.input_size*self.hidden_size:],(self.output_size,self.hidden_size))
        b1 = self.biases[0]
        b2 = self.biases[1]

        L1_grads = grads[0]
        L2_grads = grads[1]

        dW1 = L1_grads/n

        dW2 = L2_grads/n

        W1 -= learning_rate * dW1
        W2 -= learning_rate * dW2
        #b1 -= learning_rate * dW1
        #b2 -= learning_rate * dW2


        self.weights = np.concatenate((W1.flatten(),W2.flatten()))
        self.biases[0] = b1
        self.biases[1] = b2
        self.gradients = np.concatenate((L1_grads.flatten(),L2_grads.flatten()))
    
    def fit(self, X, Y, num_iterations, learning_rate):
        n = len(Y)
        
        for i in range(num_iterations):
            correct = 0
            self.gradients = np.zeros(self.gradients.shape)    
            for j in range(X.shape[0]):

                x = X[j]
                y = Y[j]
                A3, cache = self.forward_propagation(x)
                if np.argmax(A3) == y:
                    correct += 1
                    continue
              
                grads= self.backward_propagation(y, cache)
                
                self.update_parameters(grads, learning_rate,n)
            print("Iteration: ", i, "Accuracy: ", correct/n)
    
    def predict(self, X):
        A3, _ = self.forward_propagation(X)
        if self.output_size == 1:
            predictions = (A3 > 0.5).astype(int)
        else:
            predictions = np.argmax(A3, axis=0)
        return predictions


OCR

In [204]:
OCRtrainImg = "digitdata/trainingimages"
OCRtrainLabel = "digitdata/traininglabels"

OCRvalidImg = "digitdata/validationimages"
OCRvalidLabel = "digitdata/validationlabels"

OCRtestImg = "digitdata/testimages"
OCRtestLabel = "digitdata/testlabels"

X_train, Y_train = OCR_raw_data(OCRtrainImg, OCRtrainLabel)
X_train = X_train[:int(X_train.shape[0]*1.0), :]
Y_train = Y_train[:int(len(Y_train)*1.0)]

X_valid, Y_valid = OCR_raw_data(OCRvalidImg, OCRvalidLabel)
X_valid = X_valid[:int(X_valid.shape[0]*1.0), :]
Y_valid = Y_valid[:int(len(Y_valid)*1.0)]

X_test, Y_test = OCR_raw_data(OCRtestImg, OCRtestLabel)
X_test = X_test[:int(X_test.shape[0]*1.0), :]
Y_test = Y_test[:int(len(Y_test)*1.0)]


#X_train, Y_train = OCR_feature_data(OCRtrainImg, OCRtrainLabel, 7, 7)
#X_train = X_train[:int(X_train.shape[0]*1), :]
#Y_train = Y_train[:int(len(Y_train)*1)]

#X_valid, Y_valid = OCR_feature_data(OCRvalidImg, OCRvalidLabel,7,7)
#X_valid = X_valid[:int(X_valid.shape[0]*1), :]
#Y_valid = Y_valid[:int(len(Y_valid)*1)]

#X_test, Y_test = OCR_feature_data(OCRtestImg, OCRtestLabel,7 ,7)
#X_test = X_test[:int(X_test.shape[0]*1), :]
#Y_test = Y_test[:int(len(Y_test)*1)]

print(X_train.shape)
print(len(Y_train))
print(X_valid.shape)
print(len(Y_valid))
print(X_test.shape)
print(len(Y_test))

(5000, 784)
5000
(1000, 784)
1000
(1000, 784)
1000


In [210]:
# Initialize and train Custom Perceptron for OCR
ocr_NN = TwoLayerNN(input_size=784,hidden_size=50, output_size=10)
ocr_NN.fit(X_train, Y_train, 20,0.01)

# Predict and evaluate
ocr_predictions = []
for i in range(X_valid.shape[0]):
    ocr_predictions.append(ocr_NN.predict(X_valid[i]))
#ocr_predictions = ocr_NN.predict(X_valid)
print(ocr_predictions[0:100])
CorrectPredictionCount = 0
for i in range(len(ocr_predictions)):
    if ocr_predictions[i] == Y_valid[i]:
        CorrectPredictionCount += 1
Ocr_Accuracy = CorrectPredictionCount/len(ocr_predictions)
print(f'OCR Accuracy: {Ocr_Accuracy * 100:.2f}%')

Iteration:  0 Accuracy:  0.1542
Iteration:  1 Accuracy:  0.3206
Iteration:  2 Accuracy:  0.4474
Iteration:  3 Accuracy:  0.5198
Iteration:  4 Accuracy:  0.5678
Iteration:  5 Accuracy:  0.605
Iteration:  6 Accuracy:  0.6398
Iteration:  7 Accuracy:  0.6604
Iteration:  8 Accuracy:  0.665
Iteration:  9 Accuracy:  0.6832
Iteration:  10 Accuracy:  0.683
Iteration:  11 Accuracy:  0.6946
Iteration:  12 Accuracy:  0.699
Iteration:  13 Accuracy:  0.7078
Iteration:  14 Accuracy:  0.7084
Iteration:  15 Accuracy:  0.7186
Iteration:  16 Accuracy:  0.7196
Iteration:  17 Accuracy:  0.7262
Iteration:  18 Accuracy:  0.7274
Iteration:  19 Accuracy:  0.7358
[7, 2, 1, 0, 2, 1, 4, 7, 2, 9, 0, 2, 9, 0, 1, 3, 9, 7, 3, 4, 9, 6, 2, 5, 4, 0, 7, 9, 0, 1, 3, 5, 3, 2, 7, 2, 7, 1, 2, 1, 1, 7, 9, 2, 6, 3, 3, 2, 9, 9, 6, 3, 5, 6, 4, 0, 4, 3, 4, 5, 7, 5, 9, 2, 2, 4, 6, 4, 3, 0, 7, 0, 5, 9, 1, 7, 3, 7, 9, 7, 7, 6, 2, 7, 8, 4, 7, 3, 6, 1, 3, 6, 9, 3, 1, 6, 3, 3, 6, 9]
OCR Accuracy: 62.60%


Face Detection

In [212]:
FacetrainImg = "facedata/facedatatrain"
FacetrainLabel = "facedata/facedatatrainlabels"
FacevalidImg = "facedata/facedatavalidation"
FacevalidLabel = "facedata/facedatavalidationlabels"
FacetestImg = "facedata/facedatatest"
FacetestLabel = "facedata/facedatatestlabels"

X_train, Y_train = face_raw_data(FacetrainImg, FacetrainLabel)
X_train = X_train[:int(X_train.shape[0]*1.0), :]
Y_train = Y_train[:int(len(Y_train)*1.0)]

X_valid, Y_valid = face_raw_data(FacevalidImg, FacevalidLabel)
X_valid = X_valid[:int(X_valid.shape[0]*1.0), :]
Y_valid = Y_valid[:int(len(Y_valid)*1.0)]

X_test, Y_test = face_raw_data(FacetestImg, FacetestLabel)
X_test = X_test[:int(X_test.shape[0]*1.0), :]
Y_test = Y_test[:int(len(Y_test)*1.0)]

print(X_train.shape)
print(len(Y_train))
print(X_valid.shape)
print(len(Y_valid))
print(X_test.shape)
print(len(Y_test))

(451, 4200)
451
(301, 4200)
301
(150, 4200)
150


In [215]:
face_nn = TwoLayerNN(input_size=70*60, hidden_size=50, output_size=2)
face_nn.fit(X_train, Y_train, 20, 0.01)

face_predictions = []
for i in range(X_valid.shape[0]):
    face_predictions.append(face_nn.predict(X_valid[i]))
print(face_predictions[0:100])
CorrectPredictionCount = 0
for i in range(len(face_predictions)):
    if face_predictions[i] == Y_valid[i]:
        CorrectPredictionCount += 1
Face_Accuracy = CorrectPredictionCount/len(face_predictions)
print(f'Face Accuracy: {Face_Accuracy * 100:.2f}%')

Iteration:  0 Accuracy:  0.47671840354767187
Iteration:  1 Accuracy:  0.4678492239467849
Iteration:  2 Accuracy:  0.5033259423503326
Iteration:  3 Accuracy:  0.5033259423503326
Iteration:  4 Accuracy:  0.532150776053215
Iteration:  5 Accuracy:  0.516629711751663
Iteration:  6 Accuracy:  0.5454545454545454
Iteration:  7 Accuracy:  0.5343680709534369
Iteration:  8 Accuracy:  0.5631929046563193
Iteration:  9 Accuracy:  0.5521064301552107
Iteration:  10 Accuracy:  0.5809312638580931
Iteration:  11 Accuracy:  0.5831485587583148
Iteration:  12 Accuracy:  0.5986696230598669
Iteration:  13 Accuracy:  0.5920177383592018
Iteration:  14 Accuracy:  0.6031042128603105
Iteration:  15 Accuracy:  0.6031042128603105
Iteration:  16 Accuracy:  0.614190687361419
Iteration:  17 Accuracy:  0.6097560975609756
Iteration:  18 Accuracy:  0.6297117516629712
Iteration:  19 Accuracy:  0.6297117516629712
[1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 