In [1]:
import os
import struct
import numpy as np
import cv2
import torch
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score

# load MNIST data from PyTorch
train_data = MNIST('C:\\Users\\bkkj\\minist\\', train=True, download=True)
test_data = MNIST('C:\\Users\\bkkj\\minist\\', train=False, download=True)

# convert data to numpy arrays
X_train = train_data.data.numpy().reshape(-1, 784) / 255.0
y_train = train_data.targets.numpy()
X_test = test_data.data.numpy().reshape(-1, 784) / 255.0
y_test = test_data.targets.numpy()

In [2]:
from sklearn.model_selection import train_test_split
tra_img, val_img,tra_L, val_L=train_test_split(X_train,y_train,test_size=0.7,random_state=0)

In [3]:
from scipy import ndimage

def normalize_blocks(blocks):
    eps = 1e-5
    for i in range(len(blocks)):
        norm = np.sqrt(np.sum(blocks[i]**2) + eps**2)
        blocks[i] /= norm
    return blocks

def extract_hog_features(image, cell_size=(8, 8), block_size=(2, 2), nbins=9):
    # 计算图像梯度
    gx = ndimage.sobel(image, axis=1, mode='reflect')
    gy = ndimage.sobel(image, axis=0, mode='reflect')
    mag = np.sqrt(gx**2 + gy**2)  # 梯度幅值
    ori = np.arctan2(gy, (gx + 1e-6)) * (180 / np.pi) + 90  # 梯度方向
    
    # 将梯度方向划分到nbins个bin中
    bin = np.int32(nbins * ori / 180.0)
    
    # 根据cell_size计算图像中的cell数量
    cells_per_block_x = int(image.shape[1] // cell_size[1])
    cells_per_block_y = int(image.shape[0] // cell_size[0])
    
    # 分割图像为若干个cell
    cells = [np.zeros((cell_size[0], cell_size[1], nbins)) for i in range(cells_per_block_x * cells_per_block_y)]
    for i in range(cells_per_block_y):
        for j in range(cells_per_block_x):
            cell_m = mag[i * cell_size[0]: (i+1) * cell_size[0], j * cell_size[1]: (j+1) * cell_size[1]]
            cell_o = bin[i * cell_size[0]: (i+1) * cell_size[0], j * cell_size[1]: (j+1) * cell_size[1]]
            for k in range(nbins):
                cells[i*cells_per_block_x + j][:,:,k] = np.sum(cell_m * (cell_o == k), axis=(0,1))
    
    # 将多个cell组合成一个block，并进行归一化
    blocks = []
    for i in range(cells_per_block_y - block_size[0] + 1):
        for j in range(cells_per_block_x - block_size[1] + 1):
            block = np.zeros((block_size[0]*cell_size[0], block_size[1]*cell_size[1], nbins))
            for kx in range(block_size[0]):
                for ky in range(block_size[1]):
                    block[kx * cell_size[0]: (kx+1)*cell_size[0], ky * cell_size[1]: (ky+1)*cell_size[1], :] = cells[(i+kx)*cells_per_block_x + j+ky]
            blocks.append(block)
    
    blocks = normalize_blocks(blocks)
    
    # 将多个block串联起来，形成最终的HOG特征向量
    hog_features = np.concatenate(blocks, axis=0).ravel()
    return hog_features

In [4]:
class SVM:
    def __init__(self, lr=0.01, epochs=100):
        self.lr = lr
        self.epochs = epochs
        
    def fit(self, X, y):
        self.w = np.zeros(X.shape[1])
        self.b = 0
        for epoch in range(self.epochs):
            for i, x in enumerate(X):
                if y[i] * (np.dot(x, self.w) - self.b) >= 1:
                    self.w -= self.lr * (2 * 1/self.epochs * self.w)
                else:
                    self.w -= self.lr * (2 * 1/self.epochs * self.w - np.dot(x, y[i]))
                    self.b -= self.lr * y[i]
    def predict(self, X):
        return np.sign(np.dot(X, self.w) - self.b)

In [5]:
X_train_hog = []
for imge in val_img:
    hog_feat = extract_hog_features(imge.reshape((28,28)))
    X_train_hog.append(hog_feat)
X_train_hog = np.array(X_train_hog)

In [6]:
clf = SVM()
clf.fit(X_train_hog,val_L)

In [7]:
X_test_hog = []
for imge in X_test:
    hog_feat = extract_hog_features(imge.reshape((28,28)))
    X_test_hog.append(hog_feat)
X_test_hog = np.array(X_test_hog)

In [8]:
y_pre = clf.predict(X_test_hog)

In [9]:
acc = np.mean(y_pre==y_test)
print(acc)

0.1135
