In [26]:
from keras.models import Model
from keras.layers import Input, Dense, Dropout, Flatten, Activation, Reshape
from keras.layers.convolutional import Conv2D, ZeroPadding2D
from keras.layers.pooling import MaxPooling2D, AveragePooling2D
from keras.optimizers import SGD, Adam, Adadelta
import numpy as np
import pandas as pd
from keras.utils import np_utils
import time

In [27]:
train_data=pd.read_csv('train.csv')

In [28]:
train_data.head()

Unnamed: 0,label,feature
0,0,70 80 82 72 58 58 60 63 54 58 60 48 89 115 121...
1,0,151 150 147 155 148 133 111 140 170 174 182 15...
2,2,231 212 156 164 174 138 161 173 182 200 106 38...
3,4,24 32 36 30 32 23 19 20 30 41 21 22 32 34 21 1...
4,6,4 0 0 0 0 0 0 0 0 0 0 0 3 15 23 28 48 50 58 84...


In [29]:
train_data.shape

(28709, 2)

In [30]:
pixel=train_data['feature'].values
label=train_data['label'].values

In [31]:
pixel.shape

(28709,)

In [32]:
label.shape

(28709,)

In [33]:
pixel[0]

'70 80 82 72 58 58 60 63 54 58 60 48 89 115 121 119 115 110 98 91 84 84 90 99 110 126 143 153 158 171 169 172 169 165 129 110 113 107 95 79 66 62 56 57 61 52 43 41 65 61 58 57 56 69 75 70 65 56 54 105 146 154 151 151 155 155 150 147 147 148 152 158 164 172 177 182 186 189 188 190 188 180 167 116 95 103 97 77 72 62 55 58 54 56 52 44 50 43 54 64 63 71 68 64 52 66 119 156 161 164 163 164 167 168 170 174 175 176 178 179 183 187 190 195 197 198 197 198 195 191 190 145 86 100 90 65 57 60 54 51 41 49 56 47 38 44 63 55 46 52 54 55 83 138 157 158 165 168 172 171 173 176 179 179 180 182 185 187 189 189 192 197 200 199 196 198 200 198 197 177 91 87 96 58 58 59 51 42 37 41 47 45 37 35 36 30 41 47 59 94 141 159 161 161 164 170 171 172 176 178 179 182 183 183 187 189 192 192 194 195 200 200 199 199 200 201 197 193 111 71 108 69 55 61 51 42 43 56 54 44 24 29 31 45 61 72 100 136 150 159 163 162 163 170 172 171 174 177 177 180 187 186 187 189 192 192 194 195 196 197 199 200 201 200 197 201 137 58 98 92

In [34]:
type(pixel)

numpy.ndarray

In [35]:
pixels=[ np.fromstring(x, dtype=float, sep=' ') for x in pixel ]

In [36]:
pixels[0]

array([  70.,   80.,   82., ...,  106.,  109.,   82.])

In [37]:
pixels=np.array(pixels)
pixels=pixels.reshape(pixels.shape[0],48,48,1)
pixels=pixels.astype('float32')
pixels=pixels/255

In [38]:
pixels.shape

(28709, 48, 48, 1)

In [39]:
# 将类向量转换为二进制类矩阵
label = np_utils.to_categorical(label, 7)
label.shape

(28709, 7)

In [40]:
#分成训练集和测试集
id=int(pixels.shape[0]*0.8)
X_train=pixels[:id]
X_test=pixels[id:]
y_train=label[:id]
y_test=label[id:]

In [41]:
X_train.shape

(22967, 48, 48, 1)

In [42]:
X_test.shape

(5742, 48, 48, 1)

In [43]:
y_train.shape

(22967, 7)

In [44]:
input_img = Input(shape=(48, 48, 1))

block1 = Conv2D(64, (5, 5), padding='valid', activation='relu')(input_img)
block1 = ZeroPadding2D(padding=(2, 2), data_format='channels_last')(block1)
block1 = MaxPooling2D(pool_size=(5, 5), strides=(2, 2))(block1)
block1 = ZeroPadding2D(padding=(1, 1), data_format='channels_last')(block1)

block2 = Conv2D(64, (3, 3), activation='relu')(block1)
block2 = ZeroPadding2D(padding=(1, 1), data_format='channels_last')(block2)

block3 = Conv2D(64, (3, 3), activation='relu')(block2)
block3 = AveragePooling2D(pool_size=(3, 3), strides=(2, 2))(block3)
block3 = ZeroPadding2D(padding=(1, 1), data_format='channels_last')(block3)

block4 = Conv2D(128, (3, 3), activation='relu')(block3)
block4 = ZeroPadding2D(padding=(1, 1), data_format='channels_last')(block4)

block5 = Conv2D(128, (3, 3), activation='relu')(block4)
block5 = ZeroPadding2D(padding=(1, 1), data_format='channels_last')(block5)
block5 = AveragePooling2D(pool_size=(3, 3), strides=(2, 2))(block5)
block5 = Flatten()(block5)

fc1 = Dense(1024, activation='relu')(block5)
fc1 = Dropout(0.5)(fc1)

fc2 = Dense(1024, activation='relu')(fc1)
fc2 = Dropout(0.5)(fc2)

predict = Dense(7)(fc2)
predict = Activation('softmax')(predict)
model = Model(inputs=input_img, outputs=predict)

# opt = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
# opt = Adam(lr=1e-3)
opt = Adadelta(lr=0.1, rho=0.95, epsilon=1e-08)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, 48, 48, 1)         0         
_________________________________________________________________
conv2d_11 (Conv2D)           (None, 44, 44, 64)        1664      
_________________________________________________________________
zero_padding2d_13 (ZeroPaddi (None, 48, 48, 64)        0         
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 22, 22, 64)        0         
_________________________________________________________________
zero_padding2d_14 (ZeroPaddi (None, 24, 24, 64)        0         
_________________________________________________________________
conv2d_12 (Conv2D)           (None, 22, 22, 64)        36928     
_________________________________________________________________
zero_padding2d_15 (ZeroPaddi (None, 24, 24, 64)        0         
__________

In [45]:
# 定义超参数
batch_size = 128
num_epoch = 12  # 训练轮数

In [46]:
# 计算在一个epoch中哪些位置可以停止（整数倍的batch_size的位置）
num_instances = len(y_train)
iter_per_epoch = int(num_instances / batch_size) + 1
batch_cutoff = [0]
for i in range(iter_per_epoch - 1):
    batch_cutoff.append(batch_size * (i+1))
batch_cutoff.append(num_instances)

In [47]:
batch_cutoff

[0,
 128,
 256,
 384,
 512,
 640,
 768,
 896,
 1024,
 1152,
 1280,
 1408,
 1536,
 1664,
 1792,
 1920,
 2048,
 2176,
 2304,
 2432,
 2560,
 2688,
 2816,
 2944,
 3072,
 3200,
 3328,
 3456,
 3584,
 3712,
 3840,
 3968,
 4096,
 4224,
 4352,
 4480,
 4608,
 4736,
 4864,
 4992,
 5120,
 5248,
 5376,
 5504,
 5632,
 5760,
 5888,
 6016,
 6144,
 6272,
 6400,
 6528,
 6656,
 6784,
 6912,
 7040,
 7168,
 7296,
 7424,
 7552,
 7680,
 7808,
 7936,
 8064,
 8192,
 8320,
 8448,
 8576,
 8704,
 8832,
 8960,
 9088,
 9216,
 9344,
 9472,
 9600,
 9728,
 9856,
 9984,
 10112,
 10240,
 10368,
 10496,
 10624,
 10752,
 10880,
 11008,
 11136,
 11264,
 11392,
 11520,
 11648,
 11776,
 11904,
 12032,
 12160,
 12288,
 12416,
 12544,
 12672,
 12800,
 12928,
 13056,
 13184,
 13312,
 13440,
 13568,
 13696,
 13824,
 13952,
 14080,
 14208,
 14336,
 14464,
 14592,
 14720,
 14848,
 14976,
 15104,
 15232,
 15360,
 15488,
 15616,
 15744,
 15872,
 16000,
 16128,
 16256,
 16384,
 16512,
 16640,
 16768,
 16896,
 17024,
 17152,
 17280,
 

In [48]:
total_start_t = time.time()
best_metrics = 0.0
early_stop_counter = 0
saveevery=3
PATIENCE = 5 # The parameter is used for early stopping

for e in range(num_epoch):
	#shuffle data in every epoch
	rand_idxs = np.random.permutation(num_instances)
	print ('#######')
	print ('Epoch ' + str(e+1))
	print ('#######')
	start_t = time.time()

	for i in range(iter_per_epoch):
		if i % 50 == 0:
			print ('Iteration ' + str(i+1))
		X_batch = []
		Y_batch = []
		''' fill data into each batch '''
		for n in range(batch_cutoff[i], batch_cutoff[i+1]):
			X_batch.append(X_train[rand_idxs[n]])
			Y_batch.append(np.zeros((7, ), dtype=np.float))
# 			X_batch[-1] = np.fromstring(X_batch[-1], dtype=float, sep=' ').reshape((48, 48, 1))
# 			Y_batch[-1][int(train_labels[rand_idxs[n]])] = 1.

		''' use these batch data to train your model '''
		model.train_on_batch(np.asarray(X_batch),np.asarray(Y_batch))

	'''
	The above process is one epoch, and then we can check the performance now.
	'''
	loss_and_metrics = model.evaluate(X_test, y_test, batch_size)
	print ('\nloss & metrics:')
	print (loss_and_metrics)

	'''
	early stop is a mechanism to prevent your model from overfitting
	'''
	if loss_and_metrics[1] >= best_metrics:
		best_metrics = loss_and_metrics[1]
		print ("save best score!! "+str(loss_and_metrics[1]))
		early_stop_counter = 0
	else:
		early_stop_counter += 1

	'''
	Sample code to write result :

	if e == e:
		val_proba = model.predict(val_pixels)
		val_classes = val_proba.argmax(axis=-1)


		with open('result/simple%s.csv' % str(e), 'w') as f:
			f.write('acc = %s\n' % str(loss_and_metrics[1]))
			f.write('id,label')
			for i in range(len(val_classes)):
				f.write('\n' + str(i) + ',' + str(val_classes[i]))
	'''

	print ('Elapsed time in epoch ' + str(e+1) + ': ' + str(time.time() - start_t))

	if (e+1) % saveevery == 0:
		model.save('model-%d.h5' %(e+1))
		print ('Saved model %s!' %str(e+1))

	if early_stop_counter >= PATIENCE:
		print ('Stop by early stopping')
		print ('Best score: '+str(best_metrics))
		break

print ('Elapsed time in total: ' + str(time.time() - total_start_t))


#######
Epoch 1
#######
Iteration 1
Iteration 51
Iteration 101
Iteration 151
loss & metrics:
[1.9437382364472375, 0.138801811319409]
save best score!! 0.138801811319
Elapsed time in epoch 1: 16.646674633026123
#######
Epoch 2
#######
Iteration 1
Iteration 51
Iteration 101
Iteration 151

loss & metrics:
[1.9437382391046374, 0.13880181103394615]
Elapsed time in epoch 2: 14.021574258804321
#######
Epoch 3
#######
Iteration 1
Iteration 51
Iteration 101
Iteration 151
loss & metrics:
[1.9437382337898379, 0.13880181103394615]
Elapsed time in epoch 3: 14.081128120422363
Saved model 3!
#######
Epoch 4
#######
Iteration 1
Iteration 51
Iteration 101
Iteration 151
loss & metrics:
[1.9437382444194367, 0.13880181103394615]
Elapsed time in epoch 4: 14.063432455062866
#######
Epoch 5
#######
Iteration 1
Iteration 51
Iteration 101
Iteration 151
loss & metrics:
[1.9437382444194367, 0.138801811319409]
save best score!! 0.138801811319
Elapsed time in epoch 5: 14.045183897018433
#######
Epoch 6
#######
Ite