-
Notifications
You must be signed in to change notification settings - Fork 64
/
CNN_Model.py
295 lines (276 loc) · 11.7 KB
/
CNN_Model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
# coding: utf-8
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
from sklearn.metrics import confusion_matrix
import time
from datetime import timedelta
import math
print(tf.__version__)
'''load mnist data and print some information'''
data = input_data.read_data_sets("MNIST_data", one_hot=True)
print("Size of")
print("\t training set:\t\t{0}".format(len(data.train.labels)))
print("\t test set:\t\t\t{0}".format(len(data.test.labels)))
print("\t validation set:\t{0}".format(len(data.validation.labels)))
print(data.test.labels[0:9])
data.test.cls = np.array([label.argmax() for label in data.test.labels]) # the true value of images
print(data.test.cls[0:9])
'''define image description'''
img_size = 28
img_flat_size = img_size * img_size
img_shape = (img_size, img_size)
num_classes = 10
num_channels = 1
'''define cnn description'''
filter_size1 = 5 # the first conv filter size is 5x5
num_filters1 = 32 # there are 32 filters
filter_size2 = 5 # the second conv filter size
num_filters2 = 64 # there are 64 filters
fc_size = 1024 # fully-connected layer
'''define a function to plot 9 images'''
def plot_images(images, cls_true, cls_pred=None):
'''
@param images: the images
@param cls_true: true value of images
@param cls_pred: prediction value of images
'''
assert len(images) == len(cls_true) == 9
fig, axes = plt.subplots(3,3)
for i, ax in enumerate(axes.flat):
ax.imshow(images[i].reshape(img_shape), cmap="binary")
if cls_pred is None:
x_label = "True:{}".format(cls_true[i])
else:
x_label = "True:{0},Pred:{1}".format(cls_true[i],cls_pred[i])
ax.set_xlabel(x_label)
ax.set_xticks([])
ax.set_yticks([])
plt.show()
'''plot 9 images of test set'''
images = data.test.images[0:9]
cls_true = data.test.cls[0:9]
plot_images(images, cls_true)
'''define a function to intialize weights'''
def initialize_weights(shape):
'''
@param shape:the shape of weights
'''
return tf.Variable(tf.truncated_normal(shape=shape, stddev=0.1))
'''define a function to intialize biases'''
def initialize_biases(length):
'''
@param length: the length of biases, which is a vector
'''
return tf.Variable(tf.constant(0.1,shape=[length])) # remember the shape is a list
'''define a function to do conv and pooling if used'''
def conv_layer(input,
num_input_channels,
filter_size,
num_output_filters,
use_pooling=True):
'''
@param input: the input of previous layer's output
@param num_input_channels: input channels
@param filter_size: the weights filter size
@param num_output_filters: the output number channels
@param use_pooling: if use pooling operation
'''
shape = [filter_size, filter_size, num_input_channels, num_output_filters]
weights = initialize_weights(shape=shape)
biases = initialize_biases(length=num_output_filters) # one for each filter
layer = tf.nn.conv2d(input=input, filter=weights, strides=[1,1,1,1], padding='SAME')
layer += biases
if use_pooling:
layer = tf.nn.max_pool(value=layer,
ksize=[1,2,2,1],
strides=[1,2,2,1],
padding="SAME") # the kernel function size is 2x2,so the ksize=[1,2,2,1]
layer = tf.nn.relu(layer)
return layer, weights
'''define a function to flat conv layer'''
def flatten_layer(layer):
'''
@param layer: the conv layer
'''
layer_shape = layer.get_shape() # get the shape of the layer(layer_shape == [num_images, img_height, img_width, num_channels])
num_features = layer_shape[1:4].num_elements() # [1:4] means the last three demension, namely the flatten size
layer_flat = tf.reshape(layer, [-1, num_features]) # reshape to flat,-1 means don't care about the number of images
return layer_flat, num_features
'''define a function to do fully-connected'''
def fc_layer(input, num_inputs, num_outputs, use_relu=True):
'''
@param input: the input
@param num_inputs: the input size
@param num_outputs: the output size
@param use_relu: if use relu activation function
'''
weights = initialize_weights(shape=[num_inputs, num_outputs])
biases = initialize_biases(num_outputs)
layer = tf.matmul(input, weights) + biases
if use_relu:
layer = tf.nn.relu(layer)
return layer
'''define the placeholder'''
X = tf.placeholder(tf.float32, shape=[None, img_flat_size], name="X")
X_image = tf.reshape(X, shape=[-1, img_size, img_size, num_channels]) # reshape to the image shape
y_true = tf.placeholder(tf.float32, [None, num_classes], name="y_true")
y_true_cls = tf.argmax(y_true, axis=1)
keep_prob = tf.placeholder(tf.float32) # drop out placeholder
'''define the cnn model'''
layer_conv1, weights_conv1 = conv_layer(input=X_image, num_input_channels=num_channels,
filter_size=filter_size1,
num_output_filters=num_filters1,
use_pooling=True)
print("conv1:",layer_conv1)
layer_conv2, weights_conv2 = conv_layer(input=layer_conv1, num_input_channels=num_filters1,
filter_size=filter_size2,
num_output_filters=num_filters2,
use_pooling=True)
print("conv2:",layer_conv2)
layer_flat, num_features = flatten_layer(layer_conv2) # the num_feature is 7x7x36=1764
print("flatten layer:", layer_flat)
layer_fc1 = fc_layer(layer_flat, num_features, fc_size, use_relu=True)
print("fully-connected layer1:", layer_fc1)
layer_drop_out = tf.nn.dropout(layer_fc1, keep_prob) # dropout operation
layer_fc2 = fc_layer(layer_drop_out, fc_size, num_classes,use_relu=False)
print("fully-connected layer2:", layer_fc2)
y_pred = tf.nn.softmax(layer_fc2)
y_pred_cls = tf.argmax(y_pred, axis=1)
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y_true,
logits=layer_fc2)
cost = tf.reduce_mean(cross_entropy)
optimizer = tf.train.AdamOptimizer(learning_rate=1e-3).minimize(cost) # use AdamOptimizer优化
'''define accuracy'''
correct_prediction = tf.equal(y_true_cls, y_pred_cls)
accuracy = tf.reduce_mean(tf.cast(correct_prediction,dtype=tf.float32))
'''run the data graph'''
session = tf.Session()
session.run(tf.global_variables_initializer())
batch_size = 100
'''define a function to run train the model with bgd'''
total_iterations = 0 # record the total iterations
def optimize(num_iterations):
'''
@param num_iterations: the total interations of train batch_size operation
'''
global total_iterations
start_time = time.time()
for i in range(total_iterations,total_iterations + num_iterations):
x_batch, y_batch = data.train.next_batch(batch_size)
feed_dict = {X: x_batch, y_true: y_batch, keep_prob: 0.5}
session.run(optimizer, feed_dict=feed_dict)
if i % 10 == 0:
acc = session.run(accuracy, feed_dict=feed_dict)
msg = "Optimization Iteration: {0:>6}, Training Accuracy: {1:>6.1%}" # {:>6}means the fixed width,{1:>6.1%}means the fixed width is 6 and keep 1 decimal place
print(msg.format(i + 1, acc))
total_iterations += num_iterations
end_time = time.time()
time_dif = end_time-start_time
print("time usage:"+str(timedelta(seconds=int(round(time_dif)))))
'''define a function to print accuracy'''
feed_test_dict = {X: data.test.images,
y_true: data.test.labels,
keep_prob:0.5}
batch_size_test = 256
def print_test_accuracy(print_error=False,print_confusion_matrix=False):
'''
@param print_error: whether plot the error images
@param print_confusion_matrix: whether plot the confusion_matrix
'''
num_test = len(data.test.images)
cls_pred = np.zeros(shape=num_test, dtype=np.int) # declare the cls_pred, note the dtype is np.int
i = 0
#predict the test set using batch_size
while i < num_test:
j = min(i + batch_size_test, num_test)
images = data.test.images[i:j,:]
labels = data.test.labels[i:j,:]
feed_dict = {X:images,y_true:labels,keep_prob:0.5}
cls_pred[i:j] = session.run(y_pred_cls,feed_dict=feed_dict)
i = j
cls_true = data.test.cls
correct = (cls_true == cls_pred) # or np.equal(cls_true, cls_predplot_error_images(correct,cls_pred)plot_error_images(correct,cls_pred))
# it should be calculated by this way, not tf.reduce_mean() which is a tensor
correct_sum = correct.sum() # correct predictions
acc = float(correct_sum)/num_test
msg = "Accuracy on Test-Set: {0:.1%} ({1} / {2})"
print(msg.format(acc, correct_sum, num_test))
if print_error:
plot_error_pred(cls_pred,correct)
if print_confusion_matrix:
plot_confusin_martrix(cls_pred)
'''define a function to plot error prediction images'''
def plot_error_pred(cls_pred, correct):
'''
@param cls_pred: the prediction value
@param correct:prediciton result
'''
incorrect = (correct==False)
images_error = data.test.images[incorrect]
cls_true = data.test.cls[incorrect]
cls_pred = cls_pred[incorrect]
plot_images(images_error[0:9], cls_true[0:9], cls_pred=cls_pred[0:9])
'''define a function to print confusion matrix'''
def plot_confusin_martrix(cls_pred):
'''
@param cls_pred: the prediction value, because we know the testset true value
'''
cls_true = data.test.cls
cm = confusion_matrix(cls_true, cls_pred)
plt.matshow(cm)
plt.colorbar()
tick_marks = np.arange(num_classes)
plt.xticks(tick_marks, range(num_classes))
plt.yticks(tick_marks, range(num_classes))
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()
'''define a function to plot conv weights'''
def plot_conv_weights(weights,input_channel=0):
'''
@param weights: the conv filter weights, for example: the weights_conv1 and weights_conv2, which are 4 dimension [filter_size, filter_size, num_input_channels, num_output_filters]
@param input_channel: the input_channels
'''
w = session.run(weights)
w_min = np.min(w)
w_max = np.max(w)
num_filters = w.shape[3] # get the number of filters
num_grids = math.ceil(math.sqrt(num_filters))
fig, axes = plt.subplots(num_grids, num_grids)
for i, ax in enumerate(axes.flat):
if i < num_filters:
img = w[:,:,input_channel,i] # the ith weight
ax.imshow(img,vmin=w_min,vmax=w_max,interpolation="nearest",cmap='seismic')
ax.set_xticks([])
ax.set_yticks([])
plt.show()
'''define a function to plot conv output layer'''
def plot_conv_layer(layer, image):
'''
@param layer: the conv layer, which is also a image after conv
@param image: the image info
'''
feed_dict = {X:[image]}
values = session.run(layer, feed_dict=feed_dict)
num_filters = values.shape[3] # get the number of filters
num_grids = math.ceil(math.sqrt(num_filters))
fig, axes = plt.subplots(num_grids,num_grids)
for i, ax in enumerate(axes.flat):
if i < num_filters:
img = values[0,:,:,i]
ax.imshow(img, interpolation="nearest",cmap="binary")
ax.set_xticks([])
ax.set_yticks([])
plt.show()
optimize(999)
print_test_accuracy(print_error=True,print_confusion_matrix=True) # testset accuracy
# the first conv info
plot_conv_weights(weights=weights_conv1)
image1 = data.test.images[0]
plot_conv_layer(layer=layer_conv1, image=image1)
# the second conv info
plot_conv_weights(weights=weights_conv2)
image1 = data.test.images[0]
plot_conv_layer(layer=layer_conv2, image=image1)