-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Try to fit logistic regression to iris dataset using keras.
Adam fails to reach global opt. Trying to use scipy.minimize.BFGS...
- Loading branch information
Showing
3 changed files
with
324 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,249 @@ | ||
# Allow scipy.optimize.minimize to be used by Keras. | ||
# https://github.com/fchollet/keras/issues/5085 | ||
# Original code by ncullen93 | ||
# Modified by murphyk so that get_updates has correct signature instead of | ||
# get_updates(self, params, constraints, loss) | ||
# | ||
# According to | ||
# https://github.com/fchollet/keras/blob/master/keras/optimizers.py | ||
# any keras optimizer has to implement the method | ||
# get_updates(self, loss, params) | ||
# which should return a list of K.update(p, new_p) objects. | ||
|
||
|
||
from __future__ import division | ||
from __future__ import print_function | ||
|
||
import numpy as np | ||
import scipy as sp | ||
|
||
from keras import backend as K | ||
from keras.optimizers import Optimizer | ||
|
||
|
||
class ScipyOpt(Optimizer): | ||
""" | ||
Scipy optimizer | ||
""" | ||
def __init__(self, model, x, y, nb_epoch=500, method='L-BFGS-B', verbose=1, **kwargs): | ||
super(ScipyOpt, self).__init__(**kwargs) | ||
self.__dict__.update(locals()) | ||
self.model = model | ||
self.x = x # input training data | ||
self.y = y # output training data | ||
self.nb_epoch = nb_epoch | ||
self.verbose = verbose | ||
self.epoch_idx = 0 | ||
|
||
|
||
def get_cost_grads(self, params, loss): | ||
""" | ||
Get the loss and gradients of a Keras model. | ||
There are both TensorVariables. | ||
Arguments | ||
--------- | ||
params : list of trainable parameters (TensorVariables) | ||
loss : model loss function | ||
Returns | ||
------- | ||
loss : a TensorVariable | ||
The model loss | ||
grads : a list of TensorVariables | ||
Gradients of model params w.r.t. cost | ||
Effects | ||
------- | ||
None | ||
""" | ||
grads = K.gradients(loss, params) | ||
return loss, grads | ||
|
||
def set_model_params(self, theta): | ||
""" | ||
Sets the Keras model params from a flattened numpy array of theta | ||
Arguments | ||
--------- | ||
theta : a flattened numpy ndarray | ||
The parameter values to set in the model | ||
Returns | ||
------- | ||
None | ||
Effects | ||
------- | ||
- Sets the model parameters to the values in theta | ||
""" | ||
trainable_params = self.unpack_theta(theta) | ||
for trainable_param, layer in zip(trainable_params, self.model.layers): | ||
layer.set_weights(trainable_param) | ||
|
||
def unpack_theta(self, theta): | ||
""" | ||
Converts flattened theta back to tensor shapes of Keras model params | ||
Arguments | ||
--------- | ||
model : a compiled Keras model | ||
theta : a flattened numpy ndarray | ||
Returns | ||
------- | ||
weights : a list of numpy ndarrays in the shape of model params | ||
Effects | ||
------- | ||
None | ||
""" | ||
weights = [] | ||
idx = 0 | ||
for layer in self.model.layers: | ||
layer_weights = [] | ||
for param in layer.get_weights(): | ||
plen = np.prod(param.shape) | ||
layer_weights.append(np.asarray(theta[idx:(idx+plen)].reshape(param.shape), | ||
dtype=np.float32)) | ||
idx += plen | ||
weights.append(layer_weights) | ||
return weights | ||
|
||
def pack_theta(self, trainable_weights): | ||
""" | ||
Flattens a set of shared variables (trainable_weights) | ||
Arguments | ||
--------- | ||
trainable_weights : a list of shared variables | ||
Returns | ||
------- | ||
x : a flattened numpy ndarray of trainable weight values | ||
Effects | ||
------- | ||
None | ||
""" | ||
x = np.empty(0) | ||
for t in trainable_weights: | ||
x = np.concatenate((x,K.get_value(t).reshape(-1))) | ||
return x | ||
|
||
def flatten_grads(self, grads): | ||
""" | ||
Flattens a set of TensorVariables | ||
Arguments | ||
--------- | ||
grads : a list of TensorVariables | ||
Gradients of model params | ||
Returns | ||
------- | ||
x : a flattened list of TensorVariables | ||
Effects | ||
------- | ||
None | ||
""" | ||
x = np.empty(0) | ||
for g in grads: | ||
x = np.concatenate((x,g.reshape(-1))) | ||
return x | ||
|
||
def get_train_fn(self, params, loss): | ||
""" | ||
Get Scipy training function that returns loss and gradients | ||
Arguments | ||
--------- | ||
params : a list of trainable keras TensorVariables | ||
loss : a TensorVariable | ||
Returns | ||
------- | ||
train_fn : a callable python function | ||
A scipy.optimize-compatible function returning loss & grads | ||
Effects | ||
------- | ||
- Alters the trainable parameters of the input Keras model here. | ||
""" | ||
cost, grads = self.get_cost_grads(params, loss) | ||
outs = [cost] | ||
if type(grads) in {list, tuple}: | ||
outs += grads | ||
else: | ||
outs.append(grads) | ||
if self.verbose > 0: | ||
print('Compiling Training Function..') | ||
|
||
# fn = K.function(inputs=[], outputs=outs, | ||
# givens={self.model.model.inputs[0] : self.x, | ||
# self.model.model.targets[0] : self.y, | ||
# self.model.model.sample_weights[0] : np.ones((self.x.shape[0],), dtype=np.float32), | ||
# K.learning_phase() : np.uint8(1)}) | ||
# The above code has a 'givens' kwarg which is not supported by | ||
# https://github.com/fchollet/keras/blob/master/keras/backend/tensorflow_backend.py#L2277 | ||
|
||
|
||
def train_fn(theta): | ||
self.set_model_params(theta) | ||
cost_grads = fn([]) | ||
cost = np.asarray(cost_grads[0], dtype=np.float64) | ||
|
||
if self.verbose > 0: | ||
if self.epoch_idx % 1 == 0: | ||
try: | ||
print('Epoch : %i/%i Loss : %f' % (self.epoch_idx, | ||
self.nb_epoch, cost)) | ||
except ValueError: | ||
pass | ||
grads = np.asarray(self.flatten_grads(cost_grads[1:]), dtype=np.float64) | ||
if self.verbose > 1: | ||
if self.epoch_idx % 25 == 0: | ||
try: | ||
print('Avg. Grad: ' , grads.mean()) | ||
except ValueError: | ||
pass | ||
self.epoch_idx+=1 | ||
return cost, grads | ||
|
||
return train_fn | ||
|
||
#def get_updates(self, params, constraints, loss): | ||
def get_updates(self, params, loss): # murphyk | ||
#self.x = self.model.model.ins[0] | ||
#self.y = self.model.model.ins[1] | ||
#_params = params.copy() | ||
theta0 = self.pack_theta(params) | ||
train_fn = self.get_train_fn(params, loss) | ||
|
||
sp.optimize.minimize(train_fn, theta0, | ||
method=self.method, jac=True, | ||
options={'maxiter':self.nb_epoch,'disp':False}) | ||
|
||
#theta_final = weights.x | ||
self.updates = [] | ||
#final_params = self.unpack_theta(theta_final) | ||
#for orig, final in zip(params, final_params): | ||
# self.updates.append((orig, final)) | ||
return self.updates | ||
|
||
def get_config(self): | ||
config = {'lr': float(K.get_value(self.lr)), | ||
'epsilon': self.epsilon} | ||
base_config = super(ScipyOpt, self).get_config() | ||
return dict(list(base_config.items()) + list(config.items())) | ||
|
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
|
||
# MLP on 3 class Iris data | ||
|
||
|
||
import matplotlib.pyplot as plt | ||
import numpy as np | ||
from sklearn import linear_model, datasets, metrics | ||
|
||
import keras | ||
from keras.models import Sequential | ||
from keras.layers import Dense, Activation | ||
|
||
# import the data | ||
iris = datasets.load_iris() | ||
X = iris.data[:, :2] # we only take the first two features. | ||
Y = iris.target | ||
|
||
ndim = X.shape[1] | ||
nclasses = len(np.unique(Y)) | ||
one_hot_labels = keras.utils.to_categorical(Y, num_classes=nclasses) | ||
|
||
|
||
# logreg as baseline | ||
logreg = linear_model.LogisticRegression(C=1e5, multi_class='multinomial', solver='lbfgs') | ||
logreg.fit(X, Y) | ||
predicted = logreg.predict(X) | ||
accuracy_logreg = metrics.accuracy_score(Y, predicted) | ||
print(accuracy_logreg) # 0.833 | ||
|
||
# MLP with 0 hidden layers - should match logreg | ||
model = Sequential([ | ||
Dense(nclasses, input_shape=(ndim,)), | ||
Activation('softmax'), | ||
]) | ||
|
||
# Fit | ||
|
||
import tensorflow as tf | ||
import scipy | ||
|
||
#import custom_optimizers | ||
import imp | ||
# we assume we're executing from /Users/kpmurphy/github/pyprobml | ||
imp.load_source('custom_opt', 'examples/custom_optimizers.py') | ||
import custom_opt | ||
|
||
opt = keras.optimizers.Adam() | ||
#opt = custom_opt.ScipyOpt(model=model, x=X, y=Y, nb_epoch=10) | ||
|
||
lossfn = keras.losses.categorical_crossentropy | ||
|
||
# | ||
# opt_bfgs_scipy = scipy.optimize.fmin_l_bfgs_b | ||
# lossfn_train = lambda ypred: lossfn(Y, ypred) | ||
# #tfopt = tf.contrib.opt.ScipyOptimizerInterface(lossfn_train, options={'maxiter': 100}) | ||
# opt_bfgs_tf = opt_bfgs_scipy | ||
# opt_bfgs = keras.optimizers.TFOptimizer(opt_bfgs_tf) | ||
|
||
model.compile(loss=lossfn, | ||
optimizer=opt, | ||
metrics=['accuracy']) | ||
history = model.fit(X, one_hot_labels, epochs=500, batch_size=20, verbose=1) | ||
|
||
# Plot training speed | ||
loss_trace = history.history['loss'] | ||
acc_trace = history.history['acc'] | ||
plt.figure() | ||
plt.subplot(1,2,1) | ||
plt.plot(loss_trace) | ||
plt.title('loss') | ||
plt.subplot(1,2,2) | ||
plt.plot(acc_trace) | ||
plt.title('accuracy') | ||
plt.show() | ||
|