# Đồ án thị giác máy tính
* Sử dụng đặc trưng HOG trong bài báo "An improved handwritten Chinese character recognition system using support vector machine" để rút trích vector đặc trưng trên tập MNIST
* Giảm số chiều vector đặc trưng bằng LDA
* Phân lớp dựa trên khoảng cách Euclidean

Thêm các thư viện cần thiết

In [1]:
import numpy as np
import pickle
from scipy import ndimage
import gzip
import cv2
from PIL import Image
import math
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

NORMAL_SAMPLE_PICTURE_LENGTH = 64
BLOCK_NUMBER_PIXEL_PER_DIMENSION = 8
PIXEL_STEP_PER_BLOCK = 8
LENGTH_OF_NORMALIZE_PICTURE = 80
SIZE_OF_FEATURE_BLOCK = 16

NUMBER_OF_BLOCK_PER_ROW = (LENGTH_OF_NORMALIZE_PICTURE - PIXEL_STEP_PER_BLOCK) / PIXEL_STEP_PER_BLOCK #equal 9 

SIZE_OF_FEATURE_VECTOR_ONE_BLOCK = 16

Hàm đọc file dữ liệu

In [2]:
def read_mnist(mnist_file):
    """
    Reads MNIST data.
    
    Parameters
    ----------
    mnist_file : string
        The name of the MNIST file (e.g., 'mnist.pkl.gz').
    
    Returns
    -------
    (train_X, train_Y, val_X, val_Y, test_X, test_Y) : tuple
        train_X : numpy array, shape (N=50000, d=784)
            Input vectors of the training set.
        train_Y: numpy array, shape (N=50000)
            Outputs of the training set.
        val_X : numpy array, shape (N=10000, d=784)
            Input vectors of the validation set.
        val_Y: numpy array, shape (N=10000)
            Outputs of the validation set.
        test_X : numpy array, shape (N=10000, d=784)
            Input vectors of the test set.
        test_Y: numpy array, shape (N=10000)
            Outputs of the test set.
    """
    f = gzip.open(mnist_file, 'rb')
    train_data, val_data, test_data = pickle.load(f, encoding='latin1')
    f.close()
    
    train_X, train_Y = train_data
    val_X, val_Y = val_data
    test_X, test_Y = test_data    

    return train_X, train_Y, val_X, val_Y, test_X, test_Y

Hàm tìm vector đặc trừng từ tập dữ liệu:

Hàm tính khu vực A: **Grad_A(rowIndex, colIndex, gradientMagnitude)**  
Hàm tính khu vực B: **Grad_B(rowIndex, colIndex, gradientMagnitude)**  
Hàm tính khu vực C: **Grad_C(rowIndex, colIndex, gradientMagnitude)**  
Hàm tính khu vực D: **Grad_D(rowIndex, colIndex, gradientMagnitude)**  
Hàm tính vector 16 chiều:  **Extract_Feature_From_A_Block**
Hàm rút trích đặc trưng HOG: **HOG_Feature_Extractor(rawData)**  
Hàm tìm giá trị tối đa trong vector đặc trưng (dùng để normalize): **Find_Feature_Max_Value(featureVector)**  
Hàm chuẩn hóa vector: **Normalize_Feature_Vector(featureVector)**

In [3]:
def Grad_A(rowIndex, colIndex, gradientMagnitude):
    result = 0
    row = (rowIndex + 6) * LENGTH_OF_NORMALIZE_PICTURE + 6 + colIndex
    result += gradientMagnitude[row] + gradientMagnitude[row + 1] + gradientMagnitude[row + 2] + gradientMagnitude[row + 3]
    for i in range(0,2):
        row += LENGTH_OF_NORMALIZE_PICTURE
        result += gradientMagnitude[row] + gradientMagnitude[row + 1] + gradientMagnitude[row + 2] + gradientMagnitude[row + 3]


    return result

In [4]:
def Grad_B(rowIndex, colIndex, gradientMagnitude):
    result = 0
    row = (rowIndex + 4) * LENGTH_OF_NORMALIZE_PICTURE + 4 + colIndex
    result += gradientMagnitude[row] + gradientMagnitude[row + 1] + gradientMagnitude[row + 2] + gradientMagnitude[row + 3] + \
           gradientMagnitude[row + 4] + gradientMagnitude[row + 5] + gradientMagnitude[row + 6] + gradientMagnitude[
               row + 7]
    row += LENGTH_OF_NORMALIZE_PICTURE
    result += gradientMagnitude[row] + gradientMagnitude[row + 1] + gradientMagnitude[row + 2] + gradientMagnitude[row + 3] + \
           gradientMagnitude[row + 4] + gradientMagnitude[row + 5] + gradientMagnitude[row + 6] + gradientMagnitude[
               row + 7]
    
    for i in range(0,3):
        row += LENGTH_OF_NORMALIZE_PICTURE
        result += gradientMagnitude[row] + gradientMagnitude[row + 1] + gradientMagnitude[row + 6] + gradientMagnitude[row + 7]


    row += LENGTH_OF_NORMALIZE_PICTURE
    result += gradientMagnitude[row] + gradientMagnitude[row + 1] + gradientMagnitude[row + 2] + gradientMagnitude[row + 3] + \
           gradientMagnitude[row + 4] + gradientMagnitude[row + 5] + gradientMagnitude[row + 6] + gradientMagnitude[
               row + 7]
    row += LENGTH_OF_NORMALIZE_PICTURE
    result += gradientMagnitude[row] + gradientMagnitude[row + 1] + gradientMagnitude[row + 2] + gradientMagnitude[row + 3] + \
           gradientMagnitude[row + 4] + gradientMagnitude[row + 5] + gradientMagnitude[row + 6] + gradientMagnitude[
               row + 7]

    return result

In [5]:
def Grad_C(rowIndex, colIndex, gradientMagnitude):
    result = 0
    row = (rowIndex + 2) * LENGTH_OF_NORMALIZE_PICTURE + 2 + colIndex
    result += gradientMagnitude[row] + gradientMagnitude[row + 1] + gradientMagnitude[row + 2] + gradientMagnitude[row + 3] + \
           gradientMagnitude[row + 4] + gradientMagnitude[row + 5] + gradientMagnitude[row + 6] + gradientMagnitude[
               row + 7] + gradientMagnitude[row + 8] + gradientMagnitude[row + 9] + gradientMagnitude[row + 10] + \
           gradientMagnitude[row + 11]
    row += LENGTH_OF_NORMALIZE_PICTURE
    result += gradientMagnitude[row] + gradientMagnitude[row + 1] + gradientMagnitude[row + 2] + gradientMagnitude[row + 3] + \
           gradientMagnitude[row + 4] + gradientMagnitude[row + 5] + gradientMagnitude[row + 6] + gradientMagnitude[
               row + 7] + gradientMagnitude[row + 8] + gradientMagnitude[row + 9] + gradientMagnitude[row + 10] + \
           gradientMagnitude[row + 11]
    
    for i in range(0,7):
        row += LENGTH_OF_NORMALIZE_PICTURE
        result += gradientMagnitude[row] + gradientMagnitude[row + 1] + gradientMagnitude[row + 10] + gradientMagnitude[row + 11]


    row += LENGTH_OF_NORMALIZE_PICTURE
    result += gradientMagnitude[row] + gradientMagnitude[row + 1] + gradientMagnitude[row + 2] + gradientMagnitude[row + 3] + \
           gradientMagnitude[row + 4] + gradientMagnitude[row + 5] + gradientMagnitude[row + 6] + gradientMagnitude[
               row + 7] + gradientMagnitude[row + 8] + gradientMagnitude[row + 9] + gradientMagnitude[row + 10] + \
           gradientMagnitude[row + 11]
    row += LENGTH_OF_NORMALIZE_PICTURE
    result += gradientMagnitude[row] + gradientMagnitude[row + 1] + gradientMagnitude[row + 2] + gradientMagnitude[row + 3] + \
           gradientMagnitude[row + 4] + gradientMagnitude[row + 5] + gradientMagnitude[row + 6] + gradientMagnitude[
               row + 7] + gradientMagnitude[row + 8] + gradientMagnitude[row + 9] + gradientMagnitude[row + 10] + \
           gradientMagnitude[row + 11]


    return result

In [6]:
def Grad_D(rowIndex, colIndex, gradientMagnitude):
    result = 0
    row = rowIndex * LENGTH_OF_NORMALIZE_PICTURE + colIndex
    result += gradientMagnitude[row] + gradientMagnitude[row + 1] + gradientMagnitude[row + 2] + gradientMagnitude[row + 3] + \
           gradientMagnitude[row + 4] + gradientMagnitude[row + 5] + gradientMagnitude[row + 6] + gradientMagnitude[
               row + 7] + gradientMagnitude[row + 8] + gradientMagnitude[row + 9] + gradientMagnitude[row + 10] + \
           gradientMagnitude[row + 11] + gradientMagnitude[row + 12] + gradientMagnitude[row + 13] + gradientMagnitude[
               row + 14] + gradientMagnitude[row + 15]
    row += LENGTH_OF_NORMALIZE_PICTURE
    result += gradientMagnitude[row] + gradientMagnitude[row + 1] + gradientMagnitude[row + 2] + gradientMagnitude[row + 3] + \
           gradientMagnitude[row + 4] + gradientMagnitude[row + 5] + gradientMagnitude[row + 6] + gradientMagnitude[
               row + 7] + gradientMagnitude[row + 8] + gradientMagnitude[row + 9] + gradientMagnitude[row + 10] + \
           gradientMagnitude[row + 11] + gradientMagnitude[row + 12] + gradientMagnitude[row + 13] + gradientMagnitude[
               row + 14] + gradientMagnitude[row + 15]
    
    for i in range(0,11):
        row += LENGTH_OF_NORMALIZE_PICTURE
        result += gradientMagnitude[row] + gradientMagnitude[row + 1] + gradientMagnitude[row + 14] + gradientMagnitude[row + 15]

    row += LENGTH_OF_NORMALIZE_PICTURE
    result += gradientMagnitude[row] + gradientMagnitude[row + 1] + gradientMagnitude[row + 2] + gradientMagnitude[row + 3] + \
           gradientMagnitude[row + 4] + gradientMagnitude[row + 5] + gradientMagnitude[row + 6] + gradientMagnitude[
               row + 7] + gradientMagnitude[row + 8] + gradientMagnitude[row + 9] + gradientMagnitude[row + 10] + \
           gradientMagnitude[row + 11] + gradientMagnitude[row + 12] + gradientMagnitude[row + 13] + gradientMagnitude[
               row + 14] + gradientMagnitude[row + 15]
    row += LENGTH_OF_NORMALIZE_PICTURE
    result += gradientMagnitude[row] + gradientMagnitude[row + 1] + gradientMagnitude[row + 2] + gradientMagnitude[row + 3] + \
           gradientMagnitude[row + 4] + gradientMagnitude[row + 5] + gradientMagnitude[row + 6] + gradientMagnitude[
               row + 7] + gradientMagnitude[row + 8] + gradientMagnitude[row + 9] + gradientMagnitude[row + 10] + \
           gradientMagnitude[row + 11] + gradientMagnitude[row + 12] + gradientMagnitude[row + 13] + gradientMagnitude[
               row + 14] + gradientMagnitude[row + 15]


    return result

In [7]:
def ExtractBlockFeatures(i , j, FeatureVector, gradientMagnitude, gradientAngles): 
    i = i * PIXEL_STEP_PER_BLOCK
    MagnitudeFromAreaA = Grad_A(i, j, gradientMagnitude)
    MagnitudeFromAreaB = Grad_B(i, j, gradientMagnitude)
    MagnitudeFromAreaC = Grad_C(i, j, gradientMagnitude)
    MagnitudeFromAreaD = Grad_D(i, j, gradientMagnitude)
    magnitude = 4 * MagnitudeFromAreaA + 3 * MagnitudeFromAreaB + 2 * MagnitudeFromAreaC + MagnitudeFromAreaD
    
    stopRow = i + 16
    stopCol = j + 16
    kArray = np.zeros(32)

    for row in range(i,stopRow-1):
        for col in range(j, stopCol-1):
            k = (gradientAngles[row * LENGTH_OF_NORMALIZE_PICTURE + col]) * 32 / 360 #converse angle to bin
            angleBin = math.floor(k)

            remain = (k - angleBin)
            if angleBin > 31:
                angleBin -= 32

            kArray[angleBin] += magnitude * (1 - remain)
            angleBin += 1
            if angleBin > 31:
                angleBin -= 32
            kArray[angleBin] += magnitude * remain   
    block = int((i * NUMBER_OF_BLOCK_PER_ROW + j) * SIZE_OF_FEATURE_VECTOR_ONE_BLOCK / PIXEL_STEP_PER_BLOCK)
    temp = 6 * kArray[0] + 4 * kArray[1] + kArray[2]
    FeatureVector[block] = float(math.pow(temp, 0.4))
    block+=1
    #print (block)
    for k in range(2,28,2):
        temp = kArray[k - 2] + 4 * kArray[k - 1] + 6 * kArray[k] + 4 * kArray[k + 1] + kArray[k + 2]
        FeatureVector[block] = float(math.pow(temp, 0.4))
        block += 1
    temp = kArray[28] + 4 * kArray[29] + 6 * kArray[30] + 4 * kArray[31]
    FeatureVector[block] = float(math.pow(temp, 0.4))

In [8]:
def HOG_Feature_Extractor(rawData):
    
    Data_HOG = np.zeros((rawData.shape[0],1296))
    for indexOfData in range(0,Data_HOG.shape[0]-1):
        img = rawData[indexOfData].reshape((28,28))
        img = cv2.resize(img,(64,64))
        
        background = np.zeros((80,80))
        
        background[8:72,8:72] = img
        
        vertical_Robert = np.array( [[ 0, 0, 0 ],
                                     [ 0, 1, 0 ],
                                     [ 0, 0,-1 ]] )

        horizontal_Robert = np.array( [[ 0, 0, 0 ],
                                       [ 0, 0, 1 ],
                                       [ 0,-1, 0 ]] )
        gx = ndimage.convolve( background, vertical_Robert )
        gy = ndimage.convolve( background, horizontal_Robert )
        #Find gradient magnitude and orientation
        mag, angle = cv2.cartToPolar(gx, gy, angleInDegrees=True)
        mag = mag.flatten()
        angle = angle.flatten()
        
        for i in range(0, 9 , 1):
            for j in range(0, LENGTH_OF_NORMALIZE_PICTURE - PIXEL_STEP_PER_BLOCK, PIXEL_STEP_PER_BLOCK):
                ExtractBlockFeatures(i, j,Data_HOG[indexOfData], mag, angle)
        Normalize_Feature_Vector(Data_HOG[indexOfData])
    return Data_HOG

In [9]:
def Find_Feature_Max_Value(featureVector):
    max = -float("inf")
    for i in range(len(featureVector)):
        if(featureVector[i]>max):
            max = featureVector[i]
    return max

In [10]:
def Normalize_Feature_Vector(featureVector):
    max = Find_Feature_Max_Value(featureVector)
    for i in range(len(featureVector)):
        featureVector[i] = featureVector[i]/max

Hàm giảm chiều dữ liệu

In [11]:
def Data_Dimension_Reduction(trainData,label):
    lda = LinearDiscriminantAnalysis(n_components = 100)
    X_train_lda = lda.fit_transform(trainData, label)
    return X_train_lda

Hàm tìm mean vector của mỗi lớp

In [12]:
def Calculate_Mean_Vectors(trainData, label):
    meanMatrix = np.zeros((10,trainData.shape[1]))
    numberVectorContribute = np.zeros(10)
    for i in range(len(trainData)):
        meanMatrix[int(label[i])] += trainData[i]
        numberVectorContribute[int(label[i])] +=1
    for i in range(10):
        meanMatrix[i] = meanMatrix[i]/numberVectorContribute[i]
    return meanMatrix

Hàm tính khoảng cách Euclidean

In [13]:
def GetSquaredEuclideanDist(x,y):
    return np.sum((x-y)**2)

Hàm phân lớp bằng Euclidean distance:

In [14]:
def Euclidean_Classifier(meanMatrix,testData):
    mindist = GetSquaredEuclideanDist(testData,meanMatrix[0])
    label = -1
    for i in range(10):
        if (GetSquaredEuclideanDist(testData,meanMatrix[i])<mindist):
            mindist = GetSquaredEuclideanDist(testData,meanMatrix[i])
            label = i
    return label

Chương trình chính

In [15]:
def main():
    train_X, train_Y, val_X, val_Y, test_X, test_Y = read_mnist('mnist.pkl.gz')
    number_train = 5000
    train_X_test = np.zeros((number_train,train_X.shape[1]))
    train_Y_test = np.zeros(number_train)
    
    test_X_test = np.zeros((number_train,test_X.shape[1]))
    test_Y_test = np.zeros(number_train)
    for i in range(0,number_train -1):
        train_Y_test[i] = train_Y[i]
        train_X_test[i] = train_X[i]
    for i in range(0,number_train -1):
        test_Y_test[i] = test_Y[i]
        test_X_test[i] = test_X[i]
    average_Error_Rate = 0
    
    HOG_train = HOG_Feature_Extractor(train_X)
    train_lda = Data_Dimension_Reduction(HOG_train,train_Y)
    HOG_test = HOG_Feature_Extractor(test_X_test)
    test_lda = Data_Dimension_Reduction(HOG_test,test_Y_test)
    test_Data_Length = len(test_lda)
    meanMatrix = Calculate_Mean_Vectors(train_lda,train_Y_test)
    for i in range(test_Data_Length):
        if(Euclidean_Classifier(meanMatrix,test_lda[i]) != test_Y_test[i]):
            average_Error_Rate += 1/test_Data_Length
    print("Average error rate: %f" %average_Error_Rate)
    

In [16]:
main()

MemoryError: 

In [None]:
def main2():
    train_X, train_Y, val_X, val_Y, test_X, test_Y = read_mnist('mnist.pkl.gz')
    number_train = 3000
    train_X_5000 = np.zeros((number_train,train_X.shape[1]))
    train_Y_5000 = np.zeros(number_train)
    k = 4
    
    test_X_5000 = np.zeros((number_train,test_X.shape[1]))
    test_Y_5000 = np.zeros(number_train)
    for i in range(0,number_train-1):
        train_Y_5000[i] = train_Y[i]
        train_X_5000[i] = train_X[i]
    for i in range(0,number_train-1):
        test_Y_5000[i] = test_Y[i]
        test_X_5000[i] = test_X[i]
    
    HOG_train = HOG_Feature_Extractor(train_X_5000)
    train_lda = Data_Dimension_Reduction(HOG_train,train_Y_5000)
    HOG_test = HOG_Feature_Extractor(test_X_5000)
    test_lda = Data_Dimension_Reduction(HOG_test,test_Y_5000)
    test_Data_Length = len(test_lda)
    meanMatrix = Calculate_Mean_Vectors(train_lda,train_Y_5000)
    mindist = GetSquaredEuclideanDist(test_lda[k],meanMatrix[0])
    label = 0
    for i in range(0,10):
        print("Khoang cach toi tap so %d la" %i, GetSquaredEuclideanDist(test_lda[k],meanMatrix[i]))
        if (GetSquaredEuclideanDist(test_lda[k],meanMatrix[i])<mindist):
            mindist = GetSquaredEuclideanDist(test_lda[k],meanMatrix[i])
            label = i
    print("Khoang cach nho nhat la %f, thuoc so %d" %(mindist,label) )
    print("So chinh xac: %d" %test_Y_5000[k])

In [None]:
main2()