-
Notifications
You must be signed in to change notification settings - Fork 643
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Simon Kozlov
authored and
Simon Kozlov
committed
Mar 12, 2019
1 parent
2166e63
commit a382f2e
Showing
11 changed files
with
1,645 additions
and
0 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
import os | ||
|
||
import numpy as np | ||
import scipy.io as io | ||
|
||
|
||
def load_data_mat(filename, max_samples): | ||
raw = io.loadmat(filename) | ||
X = raw['X'] # Array of [32, 32, 3, n_samples] | ||
y = raw['y'] # Array of [n_samples, 1] | ||
X = np.moveaxis(X, [3], [0]) | ||
y = y.flatten() | ||
# Fix up class 0 to be 0 | ||
y[y == 10] = 0 | ||
return X[:max_samples], y[:max_samples] | ||
|
||
|
||
def load_svhn(folder, max_train, max_test): | ||
train_X, train_y = load_data_mat(os.path.join(folder, "train_32x32.mat"), max_train) | ||
test_X, test_y = load_data_mat(os.path.join(folder, "test_32x32.mat"), max_test) | ||
return train_X, train_y, test_X, test_y | ||
|
||
|
||
def random_split_train_val(X, y, num_val, seed=42): | ||
np.random.seed(seed) | ||
|
||
indices = np.arange(X.shape[0]) | ||
np.random.shuffle(indices) | ||
|
||
train_indices = indices[:-num_val] | ||
train_X = X[train_indices] | ||
train_y = y[train_indices] | ||
|
||
val_indices = indices[-num_val:] | ||
val_X = X[val_indices] | ||
val_y = y[val_indices] | ||
|
||
return train_X, train_y, val_X, val_y |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
mkdir data | ||
cd data | ||
wget -c http://ufldl.stanford.edu/housenumbers/train_32x32.mat http://ufldl.stanford.edu/housenumbers/test_32x32.mat |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,136 @@ | ||
import numpy as np | ||
|
||
|
||
def check_gradient(f, x, delta=1e-5, tol=1e-4): | ||
""" | ||
Checks the implementation of analytical gradient by comparing | ||
it to numerical gradient using two-point formula | ||
Arguments: | ||
f: function that receives x and computes value and gradient | ||
x: np array, initial point where gradient is checked | ||
delta: step to compute numerical gradient | ||
tol: tolerance for comparing numerical and analytical gradient | ||
Return: | ||
bool indicating whether gradients match or not | ||
""" | ||
assert isinstance(x, np.ndarray) | ||
assert x.dtype == np.float | ||
|
||
fx, analytic_grad = f(x) | ||
|
||
assert analytic_grad.shape == x.shape | ||
|
||
it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite']) | ||
while not it.finished: | ||
ix = it.multi_index | ||
analytic_grad_at_ix = analytic_grad[ix] | ||
numeric_grad_at_ix = 0 | ||
|
||
# TODO Copy from previous assignment | ||
raise Exception("Not implemented!") | ||
|
||
if not np.isclose(numeric_grad_at_ix, analytic_grad_at_ix, tol): | ||
print("Gradients are different at %s. Analytic: %2.5f, Numeric: %2.5f" % ( | ||
ix, analytic_grad_at_ix, numeric_grad_at_ix)) | ||
return False | ||
|
||
it.iternext() | ||
|
||
print("Gradient check passed!") | ||
return True | ||
|
||
|
||
def check_layer_gradient(layer, x, delta=1e-5, tol=1e-4): | ||
""" | ||
Checks gradient correctness for the input and output of a layer | ||
Arguments: | ||
layer: neural network layer, with forward and backward functions | ||
x: starting point for layer input | ||
delta: step to compute numerical gradient | ||
tol: tolerance for comparing numerical and analytical gradient | ||
Returns: | ||
bool indicating whether gradients match or not | ||
""" | ||
output = layer.forward(x) | ||
output_weight = np.random.randn(*output.shape) | ||
|
||
def helper_func(x): | ||
output = layer.forward(x) | ||
loss = np.sum(output * output_weight) | ||
d_out = np.ones_like(output) * output_weight | ||
grad = layer.backward(d_out) | ||
return loss, grad | ||
|
||
return check_gradient(helper_func, x, delta, tol) | ||
|
||
|
||
def check_layer_param_gradient(layer, x, | ||
param_name, | ||
delta=1e-5, tol=1e-4): | ||
""" | ||
Checks gradient correctness for the parameter of the layer | ||
Arguments: | ||
layer: neural network layer, with forward and backward functions | ||
x: starting point for layer input | ||
param_name: name of the parameter | ||
delta: step to compute numerical gradient | ||
tol: tolerance for comparing numerical and analytical gradient | ||
Returns: | ||
bool indicating whether gradients match or not | ||
""" | ||
param = layer.params()[param_name] | ||
initial_w = param.value | ||
|
||
output = layer.forward(x) | ||
output_weight = np.random.randn(*output.shape) | ||
|
||
def helper_func(w): | ||
param.value = w | ||
output = layer.forward(x) | ||
loss = np.sum(output * output_weight) | ||
d_out = np.ones_like(output) * output_weight | ||
layer.backward(d_out) | ||
grad = param.grad | ||
return loss, grad | ||
|
||
return check_gradient(helper_func, initial_w, delta, tol) | ||
|
||
|
||
def check_model_gradient(model, X, y, | ||
delta=1e-5, tol=1e-4): | ||
""" | ||
Checks gradient correctness for all model parameters | ||
Arguments: | ||
model: neural network model with compute_loss_and_gradients | ||
X: batch of input data | ||
y: batch of labels | ||
delta: step to compute numerical gradient | ||
tol: tolerance for comparing numerical and analytical gradient | ||
Returns: | ||
bool indicating whether gradients match or not | ||
""" | ||
params = model.params() | ||
|
||
for param_key in params: | ||
print("Checking gradient for %s" % param_key) | ||
param = params[param_key] | ||
initial_w = param.value | ||
|
||
def helper_func(w): | ||
param.value = w | ||
loss = model.compute_loss_and_gradients(X, y) | ||
grad = param.grad | ||
return loss, grad | ||
|
||
if not check_gradient(helper_func, initial_w, delta, tol): | ||
return False | ||
|
||
return True |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
import numpy as np | ||
|
||
|
||
def l2_regularization(W, reg_strength): | ||
""" | ||
Computes L2 regularization loss on weights and its gradient | ||
Arguments: | ||
W, np array - weights | ||
reg_strength - float value | ||
Returns: | ||
loss, single value - l2 regularization loss | ||
gradient, np.array same shape as W - gradient of weight by l2 loss | ||
""" | ||
# TODO: Copy from the previous assignment | ||
raise Exception("Not implemented!") | ||
return loss, grad | ||
|
||
|
||
def softmax_with_cross_entropy(preds, target_index): | ||
""" | ||
Computes softmax and cross-entropy loss for model predictions, | ||
including the gradient | ||
Arguments: | ||
predictions, np array, shape is either (N) or (N, batch_size) - | ||
classifier output | ||
target_index: np array of int, shape is (1) or (batch_size) - | ||
index of the true class for given sample(s) | ||
Returns: | ||
loss, single value - cross-entropy loss | ||
dprediction, np array same shape as predictions - gradient of predictions by loss value | ||
""" | ||
# TODO: Copy from the previous assignment | ||
raise Exception("Not implemented!") | ||
|
||
return loss, d_preds | ||
|
||
|
||
class Param: | ||
""" | ||
Trainable parameter of the model | ||
Captures both parameter value and the gradient | ||
""" | ||
|
||
def __init__(self, value): | ||
self.value = value | ||
self.grad = np.zeros_like(value) | ||
|
||
|
||
class ReLULayer: | ||
def __init__(self): | ||
pass | ||
|
||
def forward(self, X): | ||
# TODO: Implement forward pass | ||
# Hint: you'll need to save some information about X | ||
# to use it later in the backward pass | ||
raise Exception("Not implemented!") | ||
|
||
def backward(self, d_out): | ||
""" | ||
Backward pass | ||
Arguments: | ||
d_out, np array (batch_size, num_features) - gradient | ||
of loss function with respect to output | ||
Returns: | ||
d_result: np array (batch_size, num_features) - gradient | ||
with respect to input | ||
""" | ||
# TODO: Implement backward pass | ||
raise Exception("Not implemented!") | ||
return d_result | ||
|
||
def params(self): | ||
# ReLU Doesn't have any parameters | ||
return {} | ||
|
||
|
||
class FullyConnectedLayer: | ||
def __init__(self, n_input, n_output): | ||
self.W = Param(0.001 * np.random.randn(n_input, n_output)) | ||
self.B = Param(0.001 * np.random.randn(1, n_output)) | ||
self.X = None | ||
|
||
def forward(self, X): | ||
# TODO: Implement forward pass | ||
raise Exception("Not implemented!") | ||
|
||
def backward(self, d_out): | ||
""" | ||
Backward pass | ||
Computes gradient with respect to input and | ||
accumulates gradients within self.W and self.B | ||
Arguments: | ||
d_out, np array (batch_size, n_output) - gradient | ||
of loss function with respect to output | ||
Returns: | ||
d_result: np array (batch_size, n_input) - gradient | ||
with respect to input | ||
""" | ||
# TODO: Implement backward pass | ||
# Compute both gradient with respect to input | ||
# and gradients with respect to W and B | ||
# Add gradients of W and B to their `grad` attribute | ||
|
||
# It should be pretty similar to linear classifier from | ||
# the previous assignment | ||
|
||
raise Exception("Not implemented!") | ||
|
||
return d_input | ||
|
||
def params(self): | ||
return {'W': self.W, 'B': self.B} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
def multiclass_accuracy(prediction, ground_truth): | ||
""" | ||
Computes metrics for multiclass classification | ||
Arguments: | ||
prediction, np array of int (num_samples) - model predictions | ||
ground_truth, np array of int (num_samples) - true labels | ||
Returns: | ||
accuracy - ratio of accurate predictions to total samples | ||
""" | ||
|
||
# TODO: Implement computing accuracy | ||
raise Exception("Not implemented!") | ||
|
||
return 0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
import numpy as np | ||
|
||
from layers import FullyConnectedLayer, ReLULayer, softmax_with_cross_entropy, l2_regularization | ||
|
||
|
||
class TwoLayerNet: | ||
""" Neural network with two fully connected layers """ | ||
|
||
def __init__(self, hidden_layer_size, reg): | ||
""" | ||
Initializes the neural network | ||
Arguments: | ||
hidden_layer_size, int - number of neurons in the hidden layer | ||
reg, float - L2 regularization strength | ||
""" | ||
self.reg = reg | ||
# TODO Create necessary layers | ||
raise Exception("Not implemented!") | ||
|
||
def compute_loss_and_gradients(self, X, y): | ||
""" | ||
Computes total loss and updates parameter gradients | ||
on a batch of training examples | ||
Arguments: | ||
X, np array (batch_size, input_features) - input data | ||
y, np array of int (batch_size) - classes | ||
""" | ||
# TODO Compute loss and fill param gradients | ||
# by running forward and backward passes through the model | ||
|
||
# After that, implement l2 regularization on all params | ||
# Hint: use self.params() | ||
raise Exception("Not implemented!") | ||
|
||
return loss | ||
|
||
def predict(self, X): | ||
""" | ||
Produces classifier predictions on the set | ||
Arguments: | ||
X, np array (test_samples, num_features) | ||
Returns: | ||
y_pred, np.array of int (test_samples) | ||
""" | ||
# TODO: Implement predict | ||
# Hint: some of the code of the compute_loss_and_gradients | ||
# can be reused | ||
pred = np.zeros(X.shape[0], np.int) | ||
|
||
raise Exception("Not implemented!") | ||
return pred | ||
|
||
def params(self): | ||
result = {} | ||
|
||
# TODO Implement aggregating all of the params | ||
|
||
raise Exception("Not implemented!") | ||
|
||
return result |
Oops, something went wrong.