Skip to content

Commit

Permalink
Merge pull request #54 from markroxor/batch_descent
Browse files Browse the repository at this point in the history
[MRG]using mini batch for gd
  • Loading branch information
markroxor committed Jul 13, 2018
2 parents 611bb32 + de0ed44 commit d1d993b
Show file tree
Hide file tree
Showing 7 changed files with 172 additions and 199 deletions.
193 changes: 101 additions & 92 deletions docs/notebooks/neural_network.ipynb

Large diffs are not rendered by default.

11 changes: 8 additions & 3 deletions fromscratchtoml/neural_network/activations.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,10 +141,15 @@ def softmax(x, return_deriv=False):
-------
numpy.ndarray : softmax of x
"""
# shifting for numerical stability
# refer https://eli.thegreenplace.net/2016/the-softmax-function-and-its-derivative

x -= np.max(x, axis=-1, keepdims=True)
n = np.exp(x)
d = np.sum(np.exp(x))
d = np.sum(n, axis=-1, keepdims=True)
_softmax = n / d

if return_deriv:
return n / d, 1 - (n / d)
return _softmax, _softmax * (1 - _softmax)

return n / d
return _softmax
11 changes: 5 additions & 6 deletions fromscratchtoml/neural_network/layers/activation.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,20 +76,19 @@ def forward(self, X, return_deriv=False):

return self.output

def back_propogate(self, delta):
def back_propogate(self, dEdO):
"""
Backpropogate the error, this function adds the share of activation layer to the accumulated delta.
Parameters
----------
delta : numpy.ndarray
dEdO : numpy.ndarray
The accumulated delta used for calculating error gradient with respect to parameters.
Returns
-------
numpy.array : The accumulated delta.
numpy.array : Current updated derivative of error with respect to bias.
numpy.array : Current updated derivative of error with respect to weight.
"""
delta = delta * self.output_deriv
return delta, 0, 0

dEdO = dEdO * self.output_deriv
return dEdO
10 changes: 2 additions & 8 deletions fromscratchtoml/neural_network/layers/base_layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,20 +33,14 @@ class Layer(object):
>>> model.fit(X1, y1, batch_size=4, epochs=100)
>>> model.predict(X1, one_hot=True)
"""
def optimize(self, optimizer, der_cost_bias, der_cost_weight):
def optimize(self, optimizer):
"""
Optimize the weights corresponding to the optimizer function supplied.
Parameters
----------
optimizer : fromscratchtoml.neural_network.optimizers
The optimizing procedure followed for updating the weights.
der_cost_bias : numpy.ndarray
The derivative of error with respect to bias.
der_cost_weights : numpy.ndarray
The derivative of error with respect to weights.
"""
if self.trainable:
self.weights = optimizer.update_weights(self.weights, der_cost_weight)
self.biases = optimizer.update_weights(self.biases, der_cost_bias)
pass
return
21 changes: 12 additions & 9 deletions fromscratchtoml/neural_network/layers/dense.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,17 +86,17 @@ def forward(self, X, return_deriv=False):

if self.weights is None:
self.biases = np.random.randn(1, self.units)
self.weights = np.random.randn(X.shape[0], self.units)
self.weights = np.random.randn(X.shape[1], self.units)

self.input = X
self.output = (np.dot(X.T, self.weights) + self.biases).T
self.output = np.dot(X, self.weights) + self.biases

if return_deriv:
return self.output, 0

return self.output

def back_propogate(self, delta):
def back_propogate(self, dEdO):
"""
Backpropogate the error, this function adds the share of dense layer to the accumulated delta.
Expand All @@ -108,10 +108,13 @@ def back_propogate(self, delta):
Returns
-------
numpy.array : The accumulated delta.
numpy.array : Current updated derivative of error with respect to bias.
numpy.array : Current updated derivative of error with respect to weight.
"""
der_error_bias = delta.T
der_error_weight = np.dot(delta, self.input.T).T
delta = np.dot(self.weights, delta)
return delta, der_error_bias, der_error_weight

self.dEdB = np.sum(dEdO)
self.dEdW = np.dot(self.input.T, dEdO)
dEdO = np.dot(dEdO, self.weights.T)
return dEdO

def optimize(self, optimizer):
self.weights = optimizer.update_weights(self.weights, self.dEdW)
self.biases = optimizer.update_weights(self.biases, self.dEdB)
105 changes: 35 additions & 70 deletions fromscratchtoml/neural_network/models/sequential.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class Sequential(BaseModel):
>>> sgd = StochasticGradientDescent(learning_rate=0.1)
>>> model.compile(optimizer=sgd, loss="mean_squared_error")
>>> model.fit(X1, y1, batch_size=4, epochs=100)
>>> model.predict(X1, one_hot=True)
>>> model.predict(X1)
"""
def __init__(self, verbose=False, vis_each_epoch=False):
"""
Expand Down Expand Up @@ -86,7 +86,11 @@ def accuracy(self, X, y):
-------
float : The accuracy in percentage.
"""
y_pred = self.predict(X, one_hot=True)

if len(y.shape) > 1:
y = np.argmax(y, axis=1)
y_pred = self.predict(X)

diff_arr = y - y_pred
total_samples = y.shape[0]

Expand Down Expand Up @@ -118,7 +122,7 @@ def fit(self, X, y, epochs, batch_size=None):
self.__update_batch(batch_X, batch_y)

if self.verbose or epoch == epochs - 1:
y_pred = self.predict(X, one_hot=True)
y_pred = self.predict(X, prob=True)
loss = self.loss(y_pred, y)
acc = self.accuracy(X, y)
print("\nepoch: {}/{} ".format(epoch + 1, epochs), end="")
Expand All @@ -138,66 +142,35 @@ def __update_batch(self, X, Y):
Y : numpy.ndarray
The corresponding label to the input.
"""
# TODO write mini batch SGD
# der_error_bias = None
# der_error_weight = None

for x, y in zip(X, Y):
delta_der_error_bias, delta_der_error_weight = self.back_propogation(x, y)
# if der_error_bias is None:
# der_error_bias, der_error_weight = delta_der_error_bias, delta_der_error_weight
# else:
# der_error_bias += delta_der_error_bias
# der_error_weight += delta_der_error_weight

# updates weights in each layer
# for layer, db, dw in zip(self.layers, der_error_bias, der_error_weight):
# layer.optimize(self.optimizer, db, dw)

def back_propogation(self, x, y):
y_pred = self.forwardpass(X)

_, dEdO = self.loss(y_pred, Y, return_deriv=True)

self.back_propogate_and_update(dEdO, self.optimizer)

def back_propogate_and_update(self, dEdO, optimizer):
"""
Backpropogate the error from the last layer to the first and then optimize the weights.
Parameters
----------
x : numpy.ndarray
The input to the model.
y : numpy.ndarray
The corresponding label to the input.
Returns
-------
numpy.array : The derivative of error with respect to biases.
numpy.array : The derivative of error with respect to weights.
dEdO : numpy.ndarray
The accumulated delta used for calculating error gradient with respect to parameters.
optimizer : fromscratchtoml.neural_network.optimizers
The optimizing procedure followed for updating the weights.
"""
y_pred, y_pred_deriv = self.forwardpass(x, return_deriv=True)

loss, loss_grad = self.loss(y_pred, y, return_deriv=True)

delta = loss_grad

der_error_biases = []
der_error_weights = []

for layer in reversed(self.layers):
# updates delta
delta, der_error_bias, der_error_weight = layer.back_propogate(delta)

if hasattr(layer, 'weights'):
layer.optimize(self.optimizer, der_error_bias, der_error_weight)
dEdO = layer.back_propogate(dEdO)
layer.optimize(optimizer)

der_error_biases.append(der_error_bias)
der_error_weights.append(der_error_weight)

return np.array(der_error_biases[::-1]), np.array(der_error_weights[::-1])

def forwardpass(self, x, return_deriv=False):
def forwardpass(self, X, return_deriv=False):
"""
Forward pass the input through all the layers in the current model.
Parameters
----------
x : numpy.ndarray
X : numpy.ndarray
The input to the model.
return_deriv : bool, optional
If set to true, the function returns derivative of the output along with the output.
Expand All @@ -206,45 +179,37 @@ def forwardpass(self, x, return_deriv=False):
-------
numpy.array : The output of the model.
"""
z = x
Z = X

for layer in self.layers:
z, z_deriv = layer.forward(z, return_deriv=True)
Z, Z_deriv = layer.forward(Z, return_deriv=True)

if return_deriv:
return z, z_deriv
return Z, Z_deriv

return z
return Z

def predict(self, X, one_hot=False):
def predict(self, X, prob=False):
"""
Predicts the ouput of the model based on the trained parameters.
Parameters
----------
X : numpy.ndarray
The input to be predicted.
one_hot : bool, optional
If set to true, it returns output in one hot fashion instead of a single label.
prob : bool, optional
If set to true, it returns output probabilities of each class.
Returns
-------
numpy.array : The prediction.
"""
Z = []
for x in X:
z = self.forwardpass(x)

t = np.zeros_like(z)
if one_hot:
# returns one hot
t[np.argmax(z)] = 1
Z.append(t.flatten())
else:
# returns class
Z.append(np.argmax(z))

return np.array(Z)
predictions = self.forwardpass(X)

if prob is False:
predictions = np.argmax(predictions, axis=1)

return predictions

def add(self, layer):
"""
Expand Down
20 changes: 9 additions & 11 deletions fromscratchtoml/test/neural_network/test_neural_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def setUp(self):
X22 = Distribution.radial_binary(pts=300,
mean=[0, 0],
st=4,
ed=5, seed=20)
ed=5, seed=10)

Y11 = np.ones(X11.shape[0])
Y22 = np.zeros(X11.shape[0])
Expand All @@ -52,9 +52,6 @@ def test_dense_acts_sgd(self):
model.add(Dense(2, seed=7))
model.add(Activation('tanh'))

model.add(Dense(2, seed=5))
model.add(Activation('softmax'))

model.add(Dense(2, seed=2))
model.add(Activation('relu'))

Expand All @@ -65,18 +62,19 @@ def test_dense_acts_sgd(self):
model.add(Activation('linear'))

model.add(Dense(2, seed=6))
model.add(Activation('softmax'))

sgd = StochasticGradientDescent(learning_rate=0.05)
model.compile(optimizer=sgd, loss="mean_squared_error")

model.fit(self.X_train, self.y_train, epochs=14)
model.fit(self.X_train, self.y_train, epochs=14, batch_size=4)

expected_biases = np.array([[0.08650937, 1.00013189]], dtype=np.float128)
self.assertTrue(np.allclose(expected_biases, model.layers[-1].biases))
expected_biases = np.array([[1.38503523, -0.51962709]], dtype=np.float128)
self.assertTrue(np.allclose(expected_biases, model.layers[-2].biases))

expected_weights = np.array([[-0.49908263, -0.17316507], [-0.42623203, 0.48448988]], dtype=np.float128)
self.assertTrue(np.allclose(expected_weights, model.layers[-1].weights))
expected_weights = np.array([[-1.31788536, 1.49334281], [-0.10027775, -1.39507145]], dtype=np.float128)
self.assertTrue(np.allclose(expected_weights, model.layers[-2].weights))

predictions = model.predict(self.X_test, one_hot=1)
predictions = model.predict(self.X_test)

self.assertTrue(np.allclose(predictions, self.y_test))
self.assertTrue(np.allclose((predictions), np.argmax(self.y_test, axis=1)))

0 comments on commit d1d993b

Please sign in to comment.