Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MRG]using mini batch for gd #54

Merged
merged 1 commit into from
Jul 13, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
193 changes: 101 additions & 92 deletions docs/notebooks/neural_network.ipynb

Large diffs are not rendered by default.

11 changes: 8 additions & 3 deletions fromscratchtoml/neural_network/activations.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,10 +141,15 @@ def softmax(x, return_deriv=False):
-------
numpy.ndarray : softmax of x
"""
# shifting for numerical stability
# refer https://eli.thegreenplace.net/2016/the-softmax-function-and-its-derivative

x -= np.max(x, axis=-1, keepdims=True)
n = np.exp(x)
d = np.sum(np.exp(x))
d = np.sum(n, axis=-1, keepdims=True)
_softmax = n / d

if return_deriv:
return n / d, 1 - (n / d)
return _softmax, _softmax * (1 - _softmax)

return n / d
return _softmax
11 changes: 5 additions & 6 deletions fromscratchtoml/neural_network/layers/activation.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,20 +76,19 @@ def forward(self, X, return_deriv=False):

return self.output

def back_propogate(self, delta):
def back_propogate(self, dEdO):
"""
Backpropogate the error, this function adds the share of activation layer to the accumulated delta.

Parameters
----------
delta : numpy.ndarray
dEdO : numpy.ndarray
The accumulated delta used for calculating error gradient with respect to parameters.

Returns
-------
numpy.array : The accumulated delta.
numpy.array : Current updated derivative of error with respect to bias.
numpy.array : Current updated derivative of error with respect to weight.
"""
delta = delta * self.output_deriv
return delta, 0, 0

dEdO = dEdO * self.output_deriv
return dEdO
10 changes: 2 additions & 8 deletions fromscratchtoml/neural_network/layers/base_layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,20 +33,14 @@ class Layer(object):
>>> model.fit(X1, y1, batch_size=4, epochs=100)
>>> model.predict(X1, one_hot=True)
"""
def optimize(self, optimizer, der_cost_bias, der_cost_weight):
def optimize(self, optimizer):
"""
Optimize the weights corresponding to the optimizer function supplied.

Parameters
----------
optimizer : fromscratchtoml.neural_network.optimizers
The optimizing procedure followed for updating the weights.
der_cost_bias : numpy.ndarray
The derivative of error with respect to bias.
der_cost_weights : numpy.ndarray
The derivative of error with respect to weights.
"""
if self.trainable:
self.weights = optimizer.update_weights(self.weights, der_cost_weight)
self.biases = optimizer.update_weights(self.biases, der_cost_bias)
pass
return
21 changes: 12 additions & 9 deletions fromscratchtoml/neural_network/layers/dense.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,17 +86,17 @@ def forward(self, X, return_deriv=False):

if self.weights is None:
self.biases = np.random.randn(1, self.units)
self.weights = np.random.randn(X.shape[0], self.units)
self.weights = np.random.randn(X.shape[1], self.units)

self.input = X
self.output = (np.dot(X.T, self.weights) + self.biases).T
self.output = np.dot(X, self.weights) + self.biases

if return_deriv:
return self.output, 0

return self.output

def back_propogate(self, delta):
def back_propogate(self, dEdO):
"""
Backpropogate the error, this function adds the share of dense layer to the accumulated delta.

Expand All @@ -108,10 +108,13 @@ def back_propogate(self, delta):
Returns
-------
numpy.array : The accumulated delta.
numpy.array : Current updated derivative of error with respect to bias.
numpy.array : Current updated derivative of error with respect to weight.
"""
der_error_bias = delta.T
der_error_weight = np.dot(delta, self.input.T).T
delta = np.dot(self.weights, delta)
return delta, der_error_bias, der_error_weight

self.dEdB = np.sum(dEdO)
self.dEdW = np.dot(self.input.T, dEdO)
dEdO = np.dot(dEdO, self.weights.T)
return dEdO

def optimize(self, optimizer):
self.weights = optimizer.update_weights(self.weights, self.dEdW)
self.biases = optimizer.update_weights(self.biases, self.dEdB)
105 changes: 35 additions & 70 deletions fromscratchtoml/neural_network/models/sequential.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class Sequential(BaseModel):
>>> sgd = StochasticGradientDescent(learning_rate=0.1)
>>> model.compile(optimizer=sgd, loss="mean_squared_error")
>>> model.fit(X1, y1, batch_size=4, epochs=100)
>>> model.predict(X1, one_hot=True)
>>> model.predict(X1)
"""
def __init__(self, verbose=False, vis_each_epoch=False):
"""
Expand Down Expand Up @@ -86,7 +86,11 @@ def accuracy(self, X, y):
-------
float : The accuracy in percentage.
"""
y_pred = self.predict(X, one_hot=True)

if len(y.shape) > 1:
y = np.argmax(y, axis=1)
y_pred = self.predict(X)

diff_arr = y - y_pred
total_samples = y.shape[0]

Expand Down Expand Up @@ -118,7 +122,7 @@ def fit(self, X, y, epochs, batch_size=None):
self.__update_batch(batch_X, batch_y)

if self.verbose or epoch == epochs - 1:
y_pred = self.predict(X, one_hot=True)
y_pred = self.predict(X, prob=True)
loss = self.loss(y_pred, y)
acc = self.accuracy(X, y)
print("\nepoch: {}/{} ".format(epoch + 1, epochs), end="")
Expand All @@ -138,66 +142,35 @@ def __update_batch(self, X, Y):
Y : numpy.ndarray
The corresponding label to the input.
"""
# TODO write mini batch SGD
# der_error_bias = None
# der_error_weight = None

for x, y in zip(X, Y):
delta_der_error_bias, delta_der_error_weight = self.back_propogation(x, y)
# if der_error_bias is None:
# der_error_bias, der_error_weight = delta_der_error_bias, delta_der_error_weight
# else:
# der_error_bias += delta_der_error_bias
# der_error_weight += delta_der_error_weight

# updates weights in each layer
# for layer, db, dw in zip(self.layers, der_error_bias, der_error_weight):
# layer.optimize(self.optimizer, db, dw)

def back_propogation(self, x, y):
y_pred = self.forwardpass(X)

_, dEdO = self.loss(y_pred, Y, return_deriv=True)

self.back_propogate_and_update(dEdO, self.optimizer)

def back_propogate_and_update(self, dEdO, optimizer):
"""
Backpropogate the error from the last layer to the first and then optimize the weights.

Parameters
----------
x : numpy.ndarray
The input to the model.
y : numpy.ndarray
The corresponding label to the input.

Returns
-------
numpy.array : The derivative of error with respect to biases.
numpy.array : The derivative of error with respect to weights.
dEdO : numpy.ndarray
The accumulated delta used for calculating error gradient with respect to parameters.
optimizer : fromscratchtoml.neural_network.optimizers
The optimizing procedure followed for updating the weights.
"""
y_pred, y_pred_deriv = self.forwardpass(x, return_deriv=True)

loss, loss_grad = self.loss(y_pred, y, return_deriv=True)

delta = loss_grad

der_error_biases = []
der_error_weights = []

for layer in reversed(self.layers):
# updates delta
delta, der_error_bias, der_error_weight = layer.back_propogate(delta)

if hasattr(layer, 'weights'):
layer.optimize(self.optimizer, der_error_bias, der_error_weight)
dEdO = layer.back_propogate(dEdO)
layer.optimize(optimizer)

der_error_biases.append(der_error_bias)
der_error_weights.append(der_error_weight)

return np.array(der_error_biases[::-1]), np.array(der_error_weights[::-1])

def forwardpass(self, x, return_deriv=False):
def forwardpass(self, X, return_deriv=False):
"""
Forward pass the input through all the layers in the current model.

Parameters
----------
x : numpy.ndarray
X : numpy.ndarray
The input to the model.
return_deriv : bool, optional
If set to true, the function returns derivative of the output along with the output.
Expand All @@ -206,45 +179,37 @@ def forwardpass(self, x, return_deriv=False):
-------
numpy.array : The output of the model.
"""
z = x
Z = X

for layer in self.layers:
z, z_deriv = layer.forward(z, return_deriv=True)
Z, Z_deriv = layer.forward(Z, return_deriv=True)

if return_deriv:
return z, z_deriv
return Z, Z_deriv

return z
return Z

def predict(self, X, one_hot=False):
def predict(self, X, prob=False):
"""
Predicts the ouput of the model based on the trained parameters.

Parameters
----------
X : numpy.ndarray
The input to be predicted.
one_hot : bool, optional
If set to true, it returns output in one hot fashion instead of a single label.
prob : bool, optional
If set to true, it returns output probabilities of each class.

Returns
-------
numpy.array : The prediction.
"""
Z = []
for x in X:
z = self.forwardpass(x)

t = np.zeros_like(z)
if one_hot:
# returns one hot
t[np.argmax(z)] = 1
Z.append(t.flatten())
else:
# returns class
Z.append(np.argmax(z))

return np.array(Z)
predictions = self.forwardpass(X)

if prob is False:
predictions = np.argmax(predictions, axis=1)

return predictions

def add(self, layer):
"""
Expand Down
20 changes: 9 additions & 11 deletions fromscratchtoml/test/neural_network/test_neural_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def setUp(self):
X22 = Distribution.radial_binary(pts=300,
mean=[0, 0],
st=4,
ed=5, seed=20)
ed=5, seed=10)

Y11 = np.ones(X11.shape[0])
Y22 = np.zeros(X11.shape[0])
Expand All @@ -52,9 +52,6 @@ def test_dense_acts_sgd(self):
model.add(Dense(2, seed=7))
model.add(Activation('tanh'))

model.add(Dense(2, seed=5))
model.add(Activation('softmax'))

model.add(Dense(2, seed=2))
model.add(Activation('relu'))

Expand All @@ -65,18 +62,19 @@ def test_dense_acts_sgd(self):
model.add(Activation('linear'))

model.add(Dense(2, seed=6))
model.add(Activation('softmax'))

sgd = StochasticGradientDescent(learning_rate=0.05)
model.compile(optimizer=sgd, loss="mean_squared_error")

model.fit(self.X_train, self.y_train, epochs=14)
model.fit(self.X_train, self.y_train, epochs=14, batch_size=4)

expected_biases = np.array([[0.08650937, 1.00013189]], dtype=np.float128)
self.assertTrue(np.allclose(expected_biases, model.layers[-1].biases))
expected_biases = np.array([[1.38503523, -0.51962709]], dtype=np.float128)
self.assertTrue(np.allclose(expected_biases, model.layers[-2].biases))

expected_weights = np.array([[-0.49908263, -0.17316507], [-0.42623203, 0.48448988]], dtype=np.float128)
self.assertTrue(np.allclose(expected_weights, model.layers[-1].weights))
expected_weights = np.array([[-1.31788536, 1.49334281], [-0.10027775, -1.39507145]], dtype=np.float128)
self.assertTrue(np.allclose(expected_weights, model.layers[-2].weights))

predictions = model.predict(self.X_test, one_hot=1)
predictions = model.predict(self.X_test)

self.assertTrue(np.allclose(predictions, self.y_test))
self.assertTrue(np.allclose((predictions), np.argmax(self.y_test, axis=1)))