In [31]:
# coding:utf-8
from __future__ import print_function
from random import shuffle
from past.builtins import xrange
import pickle
import numpy as np

# 文件读取
def unpickle(file):
	with open(file,'rb') as fo:
		dict = pickle.load(fo)
	return dict

def load_file(file):
	dictTrain = unpickle(file + "data_batch_1")
	dataTrain = dictTrain['data']
	labelTrain = dictTrain['labels']

	for i in range(2,6):
		dictTrain = unpickle(file + "data_batch_" + str(i))
		dataTrain = np.vstack([dataTrain,dictTrain['data']])
		labelTrain = np.hstack([labelTrain,dictTrain['labels']])

	dictTest = unpickle(file + "test_batch")
	dataTest = dictTest['data']
	labelTest = dictTest['labels']
	labelTest = np.array(labelTest)

	return dataTrain, labelTrain, dataTest, labelTest


#softmax loss 函数
def softmax_loss_naive(W, X, y, reg):
	'''
		W:权重矩阵
		X:图片训练集(矩阵)
		y:图片训练集标签(数组)
		reg:正则化强度

		return:
			loss:训练集平均loss值
			dW:梯度矩阵
	'''
	#初始化数据
	loss = 0.0
	dW = np.zeros_like(W)
	num_train = X.shape[0]	#样本数
	num_class = W.shape[1]	#样本类别数

	for i in xrange(num_train):
		score = X[i].dot(W)
		score -= np.max(score)	#提高样本稳定性

		correct_score = score[y[i]]
		exp_sum = np.sum(np.exp(score))
		loss += np.log(exp_sum) - correct_score

		for j in xrange(num_class):
			if (j == y[i]):
				dW[:, j] += np.exp(score[j]) / exp_sum * X[i] - X[i]
			else:
				dW[:, j] += np.exp(score[j]) / exp_sum * X[i]


	loss /= num_train
	loss += 0.5 * reg * np.sum(W*W)

	dW /= num_train
	dW += reg * W

	return loss, dW

#线性分类器
class LinearClassifier(object):
	def __init__(self):
		self.W = None

	def train(self, X, y, step_size = 1e-3, reg = 1e-5, num_iters = 100, batch_size = 200, verbose = True):
		'''
		X:图片训练集(矩阵)
		y:图片训练集标签(数组)
		step_size:学习步进速度
		reg:正则化强度
		num_iters:迭代次数
		batch_size:每次迭代图片样本数
		verbose:是否打印信息

		return:
			loss_history:每次训练loss值
		'''
		num_train, dim = X.shape
		num_classes = np.max(y) + 1
	
		if self.W is None:
			self.W = 0.001 * np.random.randn(dim, num_classes)

		loss_history = []

		for it in xrange(num_iters):
			#从样本中不重复随机采batch_size个样本
			sample_index = np.random.choice(num_train, batch_size, replace=False)

			X_batch = X[sample_index, :]
			y_batch = y[sample_index]

			loss, grad = self.loss(X_batch, y_batch, reg)
			loss_history.append(loss)

			self.W += -step_size * grad

			if (verbose and it %10 == 0):
				print('iteration %d / %d, samples: %d, loss: %f' % (it, num_iters, batch_size, loss))

		return loss_history

	def predict(self, X):
		'''
		X:图片训练集(矩阵)

		return:
			y_pred:标签预测值
		'''
		y_pred = np.zeros(X.shape[1])

		score = X.dot(self.W)
		y_pred = np.argmax(score, axis = 1)

		return y_pred


	def loss(self, X_batch, y_batch, reg):
		'''
		X_batch:图片训练集(矩阵)
		y_batch:图片训练集标签(数组)
		reg:正则化强度

		return:
			loss:训练集平均loss值
			dW:梯度矩阵
		'''
		return softmax_loss_naive(self.W, X_batch, y_batch, reg)

In [32]:
from skimage.feature import local_binary_pattern
from PIL import Image
#开始训练
file_path = './'

dataTrFirst, labelTrain, dataTsFirst, labelTest = load_file(file_path)

dataTr = np.zeros((dataTrFirst.shape[0],32*32))
dataTs = np.zeros((dataTsFirst.shape[0],32*32))


for i in range(dataTrFirst.shape[0] -45000):
    img = dataTrFirst[i].reshape((32,32,3))
    img = Image.fromarray(img)
    res = local_binary_pattern(img.convert('L'),8,2,method='uniform')
    dataTr[i] = res.reshape((1,32*32))
print("训练集加载完成")

for i in range(dataTsFirst.shape[0] -1):
    img = dataTsFirst[i].reshape((32,32,3))
    img = Image.fromarray(img)
    res = local_binary_pattern(img.convert('L'),8,2,method='uniform')
    dataTs[i] = res.reshape((1,32*32))
print("测试集加载完成")

dataTrain = dataTrain - np.mean(dataTrain, axis=0)


训练集加载完成
测试集加载完成


In [33]:
LC = LinearClassifier()

print('start training ...')
#train(self, X, y, step_size = 1e-3, reg = 1e-5, num_iters = 100, batch_size = 200, verbose = True)
#在dataTrain中不重复随机抽取batch_size个样本，迭代训练num_iters次
loss_all = LC.train(dataTrain, labelTrain, num_iters = 10000, batch_size = 256)


start training ...
iteration 0 / 10000, samples: 256, loss: 6.407522
iteration 10 / 10000, samples: 256, loss: 356.586537
iteration 20 / 10000, samples: 256, loss: 326.962195
iteration 30 / 10000, samples: 256, loss: 426.494104
iteration 40 / 10000, samples: 256, loss: 431.048330
iteration 50 / 10000, samples: 256, loss: 332.071075
iteration 60 / 10000, samples: 256, loss: 261.378764
iteration 70 / 10000, samples: 256, loss: 269.441075
iteration 80 / 10000, samples: 256, loss: 374.883951
iteration 90 / 10000, samples: 256, loss: 388.917750
iteration 100 / 10000, samples: 256, loss: 237.202494
iteration 110 / 10000, samples: 256, loss: 363.410592
iteration 120 / 10000, samples: 256, loss: 218.306516
iteration 130 / 10000, samples: 256, loss: 282.444498
iteration 140 / 10000, samples: 256, loss: 324.298460
iteration 150 / 10000, samples: 256, loss: 227.434371
iteration 160 / 10000, samples: 256, loss: 293.948717
iteration 170 / 10000, samples: 256, loss: 383.530329
iteration 180 / 10000,

iteration 1510 / 10000, samples: 256, loss: 217.408580
iteration 1520 / 10000, samples: 256, loss: 352.329090
iteration 1530 / 10000, samples: 256, loss: 253.091046
iteration 1540 / 10000, samples: 256, loss: 207.243529
iteration 1550 / 10000, samples: 256, loss: 331.976200
iteration 1560 / 10000, samples: 256, loss: 271.411037
iteration 1570 / 10000, samples: 256, loss: 329.784995
iteration 1580 / 10000, samples: 256, loss: 336.940116
iteration 1590 / 10000, samples: 256, loss: 179.763583
iteration 1600 / 10000, samples: 256, loss: 295.757840
iteration 1610 / 10000, samples: 256, loss: 254.373743
iteration 1620 / 10000, samples: 256, loss: 264.194423
iteration 1630 / 10000, samples: 256, loss: 226.534805
iteration 1640 / 10000, samples: 256, loss: 336.846412
iteration 1650 / 10000, samples: 256, loss: 262.482532
iteration 1660 / 10000, samples: 256, loss: 262.070352
iteration 1670 / 10000, samples: 256, loss: 405.450126
iteration 1680 / 10000, samples: 256, loss: 316.205814
iteration 

iteration 3000 / 10000, samples: 256, loss: 221.418613
iteration 3010 / 10000, samples: 256, loss: 219.631591
iteration 3020 / 10000, samples: 256, loss: 426.214376
iteration 3030 / 10000, samples: 256, loss: 181.161484
iteration 3040 / 10000, samples: 256, loss: 254.068958
iteration 3050 / 10000, samples: 256, loss: 185.482487
iteration 3060 / 10000, samples: 256, loss: 230.757724
iteration 3070 / 10000, samples: 256, loss: 200.745903
iteration 3080 / 10000, samples: 256, loss: 273.225695
iteration 3090 / 10000, samples: 256, loss: 264.586224
iteration 3100 / 10000, samples: 256, loss: 259.379901
iteration 3110 / 10000, samples: 256, loss: 234.097825
iteration 3120 / 10000, samples: 256, loss: 243.064261
iteration 3130 / 10000, samples: 256, loss: 262.251208
iteration 3140 / 10000, samples: 256, loss: 298.116732
iteration 3150 / 10000, samples: 256, loss: 237.621564
iteration 3160 / 10000, samples: 256, loss: 235.582854
iteration 3170 / 10000, samples: 256, loss: 218.403517
iteration 

iteration 4490 / 10000, samples: 256, loss: 221.871507
iteration 4500 / 10000, samples: 256, loss: 237.375326
iteration 4510 / 10000, samples: 256, loss: 302.382985
iteration 4520 / 10000, samples: 256, loss: 249.441348
iteration 4530 / 10000, samples: 256, loss: 207.299195
iteration 4540 / 10000, samples: 256, loss: 345.781957
iteration 4550 / 10000, samples: 256, loss: 188.993721
iteration 4560 / 10000, samples: 256, loss: 221.534107
iteration 4570 / 10000, samples: 256, loss: 203.777095
iteration 4580 / 10000, samples: 256, loss: 366.108027
iteration 4590 / 10000, samples: 256, loss: 403.658607
iteration 4600 / 10000, samples: 256, loss: 263.544144
iteration 4610 / 10000, samples: 256, loss: 184.565110
iteration 4620 / 10000, samples: 256, loss: 158.585723
iteration 4630 / 10000, samples: 256, loss: 329.599254
iteration 4640 / 10000, samples: 256, loss: 245.024884
iteration 4650 / 10000, samples: 256, loss: 236.015098
iteration 4660 / 10000, samples: 256, loss: 200.065944
iteration 

iteration 5980 / 10000, samples: 256, loss: 304.464527
iteration 5990 / 10000, samples: 256, loss: 302.065643
iteration 6000 / 10000, samples: 256, loss: 283.565742
iteration 6010 / 10000, samples: 256, loss: 301.713069
iteration 6020 / 10000, samples: 256, loss: 312.404497
iteration 6030 / 10000, samples: 256, loss: 221.274389
iteration 6040 / 10000, samples: 256, loss: 304.293862
iteration 6050 / 10000, samples: 256, loss: 238.971203
iteration 6060 / 10000, samples: 256, loss: 339.261861
iteration 6070 / 10000, samples: 256, loss: 369.122495
iteration 6080 / 10000, samples: 256, loss: 173.968194
iteration 6090 / 10000, samples: 256, loss: 251.134506
iteration 6100 / 10000, samples: 256, loss: 279.747576
iteration 6110 / 10000, samples: 256, loss: 317.579612
iteration 6120 / 10000, samples: 256, loss: 195.916359
iteration 6130 / 10000, samples: 256, loss: 331.411336
iteration 6140 / 10000, samples: 256, loss: 295.532579
iteration 6150 / 10000, samples: 256, loss: 267.490035
iteration 

iteration 7470 / 10000, samples: 256, loss: 228.709333
iteration 7480 / 10000, samples: 256, loss: 292.707228
iteration 7490 / 10000, samples: 256, loss: 236.966908
iteration 7500 / 10000, samples: 256, loss: 172.262304
iteration 7510 / 10000, samples: 256, loss: 241.949537
iteration 7520 / 10000, samples: 256, loss: 268.403227
iteration 7530 / 10000, samples: 256, loss: 200.336226
iteration 7540 / 10000, samples: 256, loss: 343.009413
iteration 7550 / 10000, samples: 256, loss: 239.601789
iteration 7560 / 10000, samples: 256, loss: 232.766585
iteration 7570 / 10000, samples: 256, loss: 248.773983
iteration 7580 / 10000, samples: 256, loss: 146.455192
iteration 7590 / 10000, samples: 256, loss: 340.285603
iteration 7600 / 10000, samples: 256, loss: 230.112450
iteration 7610 / 10000, samples: 256, loss: 365.723194
iteration 7620 / 10000, samples: 256, loss: 173.102839
iteration 7630 / 10000, samples: 256, loss: 293.393442
iteration 7640 / 10000, samples: 256, loss: 321.058332
iteration 

iteration 8960 / 10000, samples: 256, loss: 163.773669
iteration 8970 / 10000, samples: 256, loss: 290.033563
iteration 8980 / 10000, samples: 256, loss: 168.305381
iteration 8990 / 10000, samples: 256, loss: 277.827997
iteration 9000 / 10000, samples: 256, loss: 297.416457
iteration 9010 / 10000, samples: 256, loss: 301.631342
iteration 9020 / 10000, samples: 256, loss: 171.941602
iteration 9030 / 10000, samples: 256, loss: 261.350655
iteration 9040 / 10000, samples: 256, loss: 187.725387
iteration 9050 / 10000, samples: 256, loss: 369.324298
iteration 9060 / 10000, samples: 256, loss: 205.558509
iteration 9070 / 10000, samples: 256, loss: 280.511130
iteration 9080 / 10000, samples: 256, loss: 215.266383
iteration 9090 / 10000, samples: 256, loss: 234.034801
iteration 9100 / 10000, samples: 256, loss: 200.957194
iteration 9110 / 10000, samples: 256, loss: 164.758399
iteration 9120 / 10000, samples: 256, loss: 354.528871
iteration 9130 / 10000, samples: 256, loss: 306.407794
iteration 

In [34]:
dataTest = dataTest - np.mean(dataTest, axis=0)
print('last loss is %f' %(loss_all[-1]))
#开始预测
print('start predicting ...')
y_pred = LC.predict(dataTest)

hit = 0
for i in xrange(y_pred.size):
	if (y_pred[i] == labelTest[i]):
		hit += 1

print('the accuracy rate is %f ' % (hit/100))

last loss is 258.961734
start predicting ...
the accuracy rate is 26.000000 
