/
archbase.py
367 lines (282 loc) · 13.7 KB
/
archbase.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
from __future__ import absolute_import, division, print_function
from abc import ABCMeta, abstractmethod, abstractproperty
import gc
import os
import sys
import warnings
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.layers import Activation, Layer, LeakyReLU, PReLU, ThresholdedReLU
import six
from energyflow.utils import iter_or_rep
__all__ = ['ArchBase', 'NNBase']
###############################################################################
# ArchBase
###############################################################################
class ArchBase(six.with_metaclass(ABCMeta, object)):
"""Base class for all architectures contained in EnergyFlow. The mechanism of
specifying hyperparameters for all architectures is described here. Methods
common to all architectures are documented here. Note that this class cannot
be instantiated directly as it is an abstract base class.
"""
# ArchBase(*args, **kwargs)
def __init__(self, *args, **kwargs):
"""Accepts arbitrary arguments. Positional arguments (if present) are
dictionaries of hyperparameters, keyword arguments (if present) are
hyperparameters directly. Keyword hyperparameters take precedence over
positional hyperparameter dictionaries.
**Arguments**
- ***args** : arbitrary positional arguments
- Each argument is a dictionary containing hyperparameter (name, value)
pairs.
- ***kwargs** : arbitrary keyword arguments
- Hyperparameters as keyword arguments. Takes precedence over the
positional arguments.
"""
# store all options
self.hps = {}
for d in args:
self.hps.update(d)
self.hps.update(kwargs)
# process hyperparameters
self._process_hps()
# construct model
self._construct_model()
def _proc_arg(self, name, **kwargs):
if 'old' in kwargs and kwargs['old'] in self.hps:
old = kwargs['old']
m = ('\'{}\' is deprecated and will be removed in the future, '
'use \'{}\' instead.').format(old, name)
warnings.warn(FutureWarning(m))
kwargs['default'] = self.hps.pop(old)
return (self.hps.pop(name, kwargs['default']) if 'default' in kwargs
else self.hps.pop(name))
def _verify_empty_hps(self):
# hps should be all empty now
for k in self.hps:
raise ValueError('unrecognized keyword argument {}'.format(k))
del self.hps
@abstractmethod
def _process_hps(self):
pass
@abstractmethod
def _construct_model(self):
pass
# fit(X_train, Y_train, **kwargs)
@abstractmethod
def fit(self):
"""Train the model by fitting the provided training dataset and labels.
Transparently calls the `fit()` method of the underlying model.
**Arguments**
- **X_train** : _numpy.ndarray_
- The training dataset as an array of features for each sample.
- **Y_train** : _numpy.ndarray_
- The labels for the training dataset. May need to be one-hot encoded
depending on the requirements of the underlying model (typically Keras
models will use one-hot encoding whereas the linear model does not.)
- **kwargs** : _dict_
- Keyword arguments passed on to the `fit` method of the underlying
model. Most relevant for neural network models, where the [Keras model
docs](https://keras.io/models/model/#fit) contain detailed information
on the possible arguments.
**Returns**
- Whatever the underlying model's `fit()` returns.
"""
pass
# predict(X_test, **kwargs)
@abstractmethod
def predict(self):
"""Evaluate the model on a dataset. Note that for the `LinearClassifier`
this corresponds to the `predict_proba` method of the underlying
scikit-learn model.
**Arguments**
- **X_test** : _numpy.ndarray_
- The dataset to evaluate the model on.
- **kwargs** : _dict_
- Keyword arguments passed on to the underlying model when
predicting on a dataset.
**Returns**
- _numpy.ndarray_
- The value of the model on the input dataset.
"""
pass
@abstractproperty
def model(self):
"""The underlying model held by this architecture. Note that accessing
an attribute that the architecture does not have will resulting in
attempting to retrieve the attribute from this model. This allows for
interrogation of the EnergyFlow architecture in the same manner as the
underlying model.
**Examples**
- For neural network models:
- `model.layers` will return a list of the layers, where
`model` is any EnergFlow neural network.
- For linear models:
- `model.coef_` will return the coefficients, where `model`
is any EnergyFlow `LinearClassifier` instance.
"""
pass
# pass on unknown attribute lookups to the underlying model
def __getattr__(self, attr):
if hasattr(self.model, attr):
return getattr(self.model, attr)
else:
name = self.__class__.__name__
raise AttributeError("'{}' object has no attribute '{}', ".format(name, attr)
+ "check of underlying model failed")
###############################################################################
# NNBase
###############################################################################
class NNBase(ArchBase):
def _process_hps(self):
"""**Default NN Hyperparameters**
Common hyperparameters that apply to all architectures except for
[`LinearClassifier`](#linearclassifier).
**Compilation Options**
- **loss**=`'categorical_crossentropy'` : _str_
- The loss function to use for the model. See the [Keras loss
function docs](https://keras.io/losses/) for available loss
functions.
- **optimizer**=`'adam'` : Keras optimizer or _str_
- A [Keras optimizer](https://keras.io/optimizers/) instance or a
string referring to one (in which case the default arguments are
used).
- **metrics**=`['accuracy']` : _list_ of _str_
- The [Keras metrics](https://keras.io/metrics/) to apply to the
model.
- **compile_opts**=`{}` : _dict_
- Dictionary of keyword arguments to be passed on to the
[`compile`](https://keras.io/models/model/#compile) method of the
model. `loss`, `optimizer`, and `metrics` (see above) are included
in this dictionary. All other values are the Keras defaults.
**Output Options**
- **output_dim**=`2` : _int_
- The output dimension of the model.
- **output_act**=`'softmax'` : _str_ or Keras activation
- Activation function to apply to the output.
**Callback Options**
- **filepath**=`None` : _str_
- The file path for where to save the model. If `None` then the
model will not be saved.
- **save_while_training**=`True` : _bool_
- Whether the model is saved during training (using the
[`ModelCheckpoint`](https://keras.io/callbacks/#modelcheckpoint)
callback) or only once training terminates. Only relevant if
`filepath` is set.
- **save_weights_only**=`False` : _bool_
- Whether only the weights of the model or the full model are
saved. Only relevant if `filepath` is set.
- **modelcheck_opts**=`{'save_best_only':True, 'verbose':1}` : _dict_
- Dictionary of keyword arguments to be passed on to the
[`ModelCheckpoint`](https://keras.io/callbacks/#modelcheckpoint)
callback, if it is present. `save_weights_only` (see above) is
included in this dictionary. All other arguments are the Keras
defaults.
- **patience**=`None` : _int_
- The number of epochs with no improvement after which the training
is stopped (using the [`EarlyStopping`](https://keras.io/
callbacks/#earlystopping) callback). If `None` then no early stopping
is used.
- **earlystop_opts**=`{'restore_best_weights':True, 'verbose':1}` : _dict_
- Dictionary of keyword arguments to be passed on to the
[`EarlyStopping`](https://keras.io/callbacks/#earlystopping)
callback, if it is present. `patience` (see above) is included in
this dictionary. All other arguments are the Keras defaults.
**Flags**
- **name_layers**=`True` : _bool_
- Whether to give the layers of the model explicit names or let
them be named automatically. One reason to set this to `False`
would be in order to use parts of this model in another model
(all Keras layers in a model are required to have unique names).
- **compile**=`True` : _bool_
- Whether the model should be compiled or not.
- **summary**=`True` : _bool_
- Whether a summary should be printed or not.
"""
# compilation
self.compile_opts = {'loss': self._proc_arg('loss', default='categorical_crossentropy'),
'optimizer': self._proc_arg('optimizer', default='adam'),
'metrics': self._proc_arg('metrics', default=['acc'])}
self.compile_opts.update(self._proc_arg('compile_opts', default={}))
# add these attributes for historical reasons
self.loss = self.compile_opts['loss']
self.optimizer = self.compile_opts['optimizer']
self.metrics = self.compile_opts['metrics']
# output
self.output_dim = self._proc_arg('output_dim', default=2)
self.output_act = self._proc_arg('output_act', default='softmax')
# callbacks
self.filepath = self._proc_arg('filepath', default=None)
if self.filepath is not None:
self.filepath = os.path.expanduser(self.filepath)
self.save_while_training = self._proc_arg('save_while_training', default=True)
self.modelcheck_opts = {'save_best_only': True, 'verbose': 1,
'save_weights_only': self._proc_arg('save_weights_only', default=False)}
self.modelcheck_opts.update(self._proc_arg('modelcheck_opts', default={}))
self.save_weights_only = self.modelcheck_opts['save_weights_only']
self.earlystop_opts = {'restore_best_weights': True, 'verbose': 1,
'patience': self._proc_arg('patience', default=None)}
self.earlystop_opts.update(self._proc_arg('earlystop_opts', default={}))
self.patience = self.earlystop_opts['patience']
# flags
self.name_layers = self._proc_arg('name_layers', default=True)
self.compile = self._proc_arg('compile', default=True)
self.summary = self._proc_arg('summary', default=True)
def _add_act(self, act):
# handle case of act as a layer
if isinstance(act, Layer):
self.model.add(act)
# handle case of act being a string and in ACT_DICT
elif isinstance(act, six.string_types) and act in ACT_DICT:
self.model.add(ACT_DICT[act]())
# default case of regular activation
else:
self.model.add(Activation(act))
def _proc_name(self, name):
return name if self.name_layers else None
def _compile_model(self):
# compile model if specified
if self.compile:
self.model.compile(**self.compile_opts)
# print summary
if self.summary:
self.model.summary()
def fit(self, *args, **kwargs):
# list of callback functions
callbacks = []
# do model checkpointing, used mainly to save model during training instead of at end
if self.filepath and self.save_while_training:
callbacks.append(ModelCheckpoint(self.filepath, **self.modelcheck_opts))
# do early stopping, which now also handle loading best weights at the end
if self.patience is not None:
callbacks.append(EarlyStopping(**self.earlystop_opts))
# update any callbacks that were passed with the two we build in explicitly
kwargs.setdefault('callbacks', []).extend(callbacks)
# do the fitting
hist = self.model.fit(*args, **kwargs)
# handle saving at the end, if we weren't already saving throughout
if self.filepath and not self.save_while_training:
if self.save_weights_only:
self.model.save_weights(self.filepath)
else:
self.model.save(self.filepath)
# take out the trash
gc.collect()
return hist
def predict(self, *args, **kwargs):
return self.model.predict(*args, **kwargs)
@property
def model(self):
return self._model
###############################################################################
# Activation Functions
###############################################################################
ACT_DICT = {'LeakyReLU': LeakyReLU, 'PReLU': PReLU, 'ThresholdedReLU': ThresholdedReLU}
def _get_act_layer(act):
# handle case of act as a layer
if isinstance(act, Layer):
return act
# handle case of act being a string and in ACT_DICT
if isinstance(act, six.string_types) and act in ACT_DICT:
return ACT_DICT[act]()
# default case of passing act into layer
return Activation(act)