forked from Britefury/deep-learning-tutorial-pydata
-
Notifications
You must be signed in to change notification settings - Fork 0
/
pretrained_vgg_models.py
316 lines (258 loc) · 13.3 KB
/
pretrained_vgg_models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
# Code taken from:
# https://github.com/Lasagne/Recipes/blob/master/modelzoo/vgg16.py
#
# VGG-16 16-layer model and VGG-19 19-layer model from the paper:
# "Very Deep Convolutional Networks for Large-Scale Image Recognition"
# Original source: https://gist.github.com/ksimonyan/3785162f95cd2d5fee77
# License: non-commercial use only
# Download pretrained weights from:
# http://s3.amazonaws.com/lasagne/recipes/pretrained/imagenet/vgg16.pkl
# and
# http://s3.amazonaws.com/lasagne/recipes/pretrained/imagenet/vgg19.pkl
import sys, os, pickle
import numpy as np
import lasagne
import skimage.transform, skimage.color
from lasagne.layers import InputLayer, DenseLayer, NonlinearityLayer, DropoutLayer, Pool2DLayer, Conv2DLayer
from lasagne.nonlinearities import softmax
from lasagne.utils import floatX
if sys.version_info[0] == 2:
from urllib import urlretrieve
else:
from urllib.request import urlretrieve
# See if we can use the `britefury-lasagne-helpers` library to get VGG, otherwise we need to get it ourselves,
# see http://github.com/Britefury/britefury-lasagne-helpers
try:
from britefury_lasagne.pretrained import imagenet_vgg
except ImportError:
imagenet_vgg = None
PARAMS_DIR = 'pretrained_models'
VGG16_PATH = os.path.join(PARAMS_DIR, 'vgg16.pkl')
VGG19_PATH = os.path.join(PARAMS_DIR, 'vgg19.pkl')
def download(path, source_url):
if not os.path.exists(PARAMS_DIR):
os.makedirs(PARAMS_DIR)
if not os.path.exists(path):
print('Downloading {0} to {1}'.format(source_url, path))
urlretrieve(source_url, path)
return path
class VGGModel (object):
"""
VGG model base class
Override the `build_network` class method to define the network architecture
"""
def __init__(self, mean_value, class_names, model_name, param_values):
self.network, self.final_layer_name = self.build_network()
self.mean_value = mean_value
self.class_names = class_names
self.model_name = model_name
lasagne.layers.set_all_param_values(self.network[self.final_layer_name], param_values)
@classmethod
def build_network(cls):
raise NotImplementedError('Abstract for type {}'.format(cls))
@property
def final_layer(self):
return self.network[self.final_layer_name]
def prepare_image(self, im, image_size=224):
"""
Prepare an image for classification with VGG; scale and crop to `image_size` x `image_size`.
Convert RGB channel order to BGR.
Subtract mean value.
:param im: input RGB image as numpy array (height, width, channel)
:param image_size: output image size, default=224. If `None`, scaling and cropping will not be done.
:return: (raw_image, vgg_image) where `raw_image` is the scaled and cropped image with dtype=uint8 and
`vgg_image` is the image with BGR channel order and axes (sample, channel, height, width).
"""
# If the image is greyscale, convert it to RGB
if len(im.shape) == 2:
im = im[:, :, np.newaxis]
im = np.repeat(im, 3, axis=2)
if image_size is not None:
# Scale the image so that its smallest dimension is the desired size
h, w, _ = im.shape
if h < w:
if h != image_size:
im = skimage.transform.resize(im, (image_size, w * image_size / h), preserve_range=True)
else:
if w != image_size:
im = skimage.transform.resize(im, (h * image_size / w, image_size), preserve_range=True)
# Crop the central `image_size` x `image_size` region of the image
h, w, _ = im.shape
im = im[h//2 - image_size // 2:h // 2 + image_size // 2, w // 2 - image_size // 2:w // 2 + image_size // 2]
# Convert to uint8 type
rawim = np.copy(im).astype('uint8')
# Images come in RGB channel order, while VGG net expects BGR:
im = im[:, :, ::-1]
# Subtract the mean
im = im - self.mean_value
# Shuffle axes from (height, width, channel) to (channel, height, width)
im = np.swapaxes(np.swapaxes(im, 1, 2), 0, 1)
# Add the sample axis to the image; (channel, height, width) -> (sample, channel, height, width)
im = im[np.newaxis]
return rawim, floatX(im)
def inv_prepare_image(self, image):
"""
Perform the inverse of `prepare_image`; usually used to display an image prepared for classification
using a VGG net.
:param im: the image to process
:return: processed image
"""
if len(image.shape) == 4:
# We have a sample dimension; can collapse it if there is only 1 sample
if image.shape[0] == 1:
image = image[0]
else:
raise ValueError('Sample dimension has > 1 samples ({})'.format(image.shape[0]))
# Move the channel axis: (C, H, W) -> (H, W, C)
image = np.rollaxis(image, 0, 3)
# Add the mean
image = image + self.mean_value
# Clip to [0,255] range
image = image.clip(0.0, 255.0)
# Convert to uint8 type
image = image.astype('uint8')
# Flip channel order BGR to RGB
image = image[:,:,::-1]
return image
@classmethod
def from_loaded_params(cls, loaded_params):
"""
Construct a model given parameters loaded from a pickled VGG model
:param loaded_params: a dictionary resulting from loading a pickled VGG model
:return: the model
"""
return cls(loaded_params['mean value'], loaded_params['synset words'], loaded_params['model name'],
loaded_params['param values'])
@staticmethod
def unpickle_from_path(path):
# Oh... the joys of Py2 vs Py3
with open(path, 'rb') as f:
if sys.version_info[0] == 2:
return pickle.load(f)
else:
u = pickle._Unpickler(f)
u.encoding = 'latin1'
return u.load()
class VGG16Model (VGGModel):
@classmethod
def build_network(cls):
net = {}
# Input layer: shape is of the form (sample, channel, height, width).
# We are using 3 channel images of size 224 x 224.
# We leave the sample dimension with no size (`None`) so that the
# minibatch size is whatever we need it to be when we use it
net['input'] = InputLayer((None, 3, 224, 224))
# First two convolutional layers: 64 filters, 3x3 convolution, 1 pixel padding
net['conv1_1'] = Conv2DLayer(net['input'], 64, 3, pad=1, flip_filters=False)
net['conv1_2'] = Conv2DLayer(net['conv1_1'], 64, 3, pad=1, flip_filters=False)
# 2x2 max-pooling; will reduce size from 224x224 to 112x112
net['pool1'] = Pool2DLayer(net['conv1_2'], 2)
# Two convolutional layers, 128 filters
net['conv2_1'] = Conv2DLayer(net['pool1'], 128, 3, pad=1, flip_filters=False)
net['conv2_2'] = Conv2DLayer(net['conv2_1'], 128, 3, pad=1, flip_filters=False)
# 2x2 max-pooling; will reduce size from 112x112 to 56x56
net['pool2'] = Pool2DLayer(net['conv2_2'], 2)
# Three convolutional layers, 256 filters
net['conv3_1'] = Conv2DLayer(net['pool2'], 256, 3, pad=1, flip_filters=False)
net['conv3_2'] = Conv2DLayer(net['conv3_1'], 256, 3, pad=1, flip_filters=False)
net['conv3_3'] = Conv2DLayer(net['conv3_2'], 256, 3, pad=1, flip_filters=False)
# 2x2 max-pooling; will reduce size from 56x56 to 28x28
net['pool3'] = Pool2DLayer(net['conv3_3'], 2)
# Three convolutional layers, 512 filters
net['conv4_1'] = Conv2DLayer(net['pool3'], 512, 3, pad=1, flip_filters=False)
net['conv4_2'] = Conv2DLayer(net['conv4_1'], 512, 3, pad=1, flip_filters=False)
net['conv4_3'] = Conv2DLayer(net['conv4_2'], 512, 3, pad=1, flip_filters=False)
# 2x2 max-pooling; will reduce size from 28x28 to 14x14
net['pool4'] = Pool2DLayer(net['conv4_3'], 2)
# Three convolutional layers, 512 filters
net['conv5_1'] = Conv2DLayer(net['pool4'], 512, 3, pad=1, flip_filters=False)
net['conv5_2'] = Conv2DLayer(net['conv5_1'], 512, 3, pad=1, flip_filters=False)
net['conv5_3'] = Conv2DLayer(net['conv5_2'], 512, 3, pad=1, flip_filters=False)
# 2x2 max-pooling; will reduce size from 14x14 to 7x7
net['pool5'] = Pool2DLayer(net['conv5_3'], 2)
# Dense layer, 4096 units
net['fc6'] = DenseLayer(net['pool5'], num_units=4096)
# 50% dropout (only applied during training, turned off during prediction)
net['fc6_dropout'] = DropoutLayer(net['fc6'], p=0.5)
# Dense layer, 4096 units
net['fc7'] = DenseLayer(net['fc6_dropout'], num_units=4096)
# 50% dropout (only applied during training, turned off during prediction)
net['fc7_dropout'] = DropoutLayer(net['fc7'], p=0.5)
# Final dense layer, 1000 units: 1 for each class
net['fc8'] = DenseLayer(net['fc7_dropout'], num_units=1000, nonlinearity=None)
# Softmax non-linearity that will generate probabilities
net['prob'] = NonlinearityLayer(net['fc8'], softmax)
return net, 'prob'
@staticmethod
def load_params():
if imagenet_vgg is not None:
return imagenet_vgg.VGG16Model.load_params()
else:
download(VGG16_PATH, 'http://s3.amazonaws.com/lasagne/recipes/pretrained/imagenet/vgg16.pkl')
return VGGModel.unpickle_from_path(VGG16_PATH)
@classmethod
def load(cls):
return cls.from_loaded_params(cls.load_params())
class VGG19Model (VGGModel):
@classmethod
def build_network(cls):
net = {}
# Input layer: shape is of the form (sample, channel, height, width).
# We are using 3 channel images of size 224 x 224.
# We leave the sample dimension with no size (`None`) so that the
# minibatch size is whatever we need it to be when we use it
net['input'] = InputLayer((None, 3, 224, 224))
# First two convolutional layers: 64 filters, 3x3 convolution, 1 pixel padding
net['conv1_1'] = Conv2DLayer(net['input'], 64, 3, pad=1, flip_filters=False)
net['conv1_2'] = Conv2DLayer(net['conv1_1'], 64, 3, pad=1, flip_filters=False)
# 2x2 max-pooling; will reduce size from 224x224 to 112x112
net['pool1'] = Pool2DLayer(net['conv1_2'], 2)
# Two convolutional layers, 128 filters
net['conv2_1'] = Conv2DLayer(net['pool1'], 128, 3, pad=1, flip_filters=False)
net['conv2_2'] = Conv2DLayer(net['conv2_1'], 128, 3, pad=1, flip_filters=False)
# 2x2 max-pooling; will reduce size from 112x112 to 56x56
net['pool2'] = Pool2DLayer(net['conv2_2'], 2)
# Four convolutional layers, 256 filters
net['conv3_1'] = Conv2DLayer(net['pool2'], 256, 3, pad=1, flip_filters=False)
net['conv3_2'] = Conv2DLayer(net['conv3_1'], 256, 3, pad=1, flip_filters=False)
net['conv3_3'] = Conv2DLayer(net['conv3_2'], 256, 3, pad=1, flip_filters=False)
net['conv3_4'] = Conv2DLayer(net['conv3_3'], 256, 3, pad=1, flip_filters=False)
# 2x2 max-pooling; will reduce size from 56x56 to 28x28
net['pool3'] = Pool2DLayer(net['conv3_4'], 2)
# Four convolutional layers, 512 filters
net['conv4_1'] = Conv2DLayer(net['pool3'], 512, 3, pad=1, flip_filters=False)
net['conv4_2'] = Conv2DLayer(net['conv4_1'], 512, 3, pad=1, flip_filters=False)
net['conv4_3'] = Conv2DLayer(net['conv4_2'], 512, 3, pad=1, flip_filters=False)
net['conv4_4'] = Conv2DLayer(net['conv4_3'], 512, 3, pad=1, flip_filters=False)
# 2x2 max-pooling; will reduce size from 28x28 to 14x14
net['pool4'] = Pool2DLayer(net['conv4_4'], 2)
# Four convolutional layers, 512 filters
net['conv5_1'] = Conv2DLayer(net['pool4'], 512, 3, pad=1, flip_filters=False)
net['conv5_2'] = Conv2DLayer(net['conv5_1'], 512, 3, pad=1, flip_filters=False)
net['conv5_3'] = Conv2DLayer(net['conv5_2'], 512, 3, pad=1, flip_filters=False)
net['conv5_4'] = Conv2DLayer(net['conv5_3'], 512, 3, pad=1, flip_filters=False)
# 2x2 max-pooling; will reduce size from 14x14 to 7x7
net['pool5'] = Pool2DLayer(net['conv5_4'], 2)
# Dense layer, 4096 units
net['fc6'] = DenseLayer(net['pool5'], num_units=4096)
# 50% dropout (only applied during training, turned off during prediction)
net['fc6_dropout'] = DropoutLayer(net['fc6'], p=0.5)
# Dense layer, 4096 units
net['fc7'] = DenseLayer(net['fc6_dropout'], num_units=4096)
# 50% dropout (only applied during training, turned off during prediction)
net['fc7_dropout'] = DropoutLayer(net['fc7'], p=0.5)
# Final dense layer, 1000 units: 1 for each class
net['fc8'] = DenseLayer(net['fc7_dropout'], num_units=1000, nonlinearity=None)
# Softmax non-linearity that will generate probabilities
net['prob'] = NonlinearityLayer(net['fc8'], softmax)
return net, 'prob'
@staticmethod
def load_params():
if imagenet_vgg is not None:
return imagenet_vgg.VGG19Model.load_params()
else:
download(VGG19_PATH, 'http://s3.amazonaws.com/lasagne/recipes/pretrained/imagenet/vgg19.pkl')
return VGGModel.unpickle_from_path(VGG19_PATH)
@classmethod
def load(cls):
return cls.from_loaded_params(cls.load_params())