Assignment 2

sim0nsays · Mar 12, 2019 · a382f2e · a382f2e
1 parent 2166e63
commit a382f2e
Show file tree

Hide file tree

Showing 11 changed files with 1,645 additions and 0 deletions.
diff --git a/assignments/assignment2/Neural Network.ipynb b/assignments/assignment2/Neural Network.ipynb
diff --git a/assignments/assignment2/PyTorch.ipynb b/assignments/assignment2/PyTorch.ipynb
diff --git a/assignments/assignment2/dataset.py b/assignments/assignment2/dataset.py
@@ -0,0 +1,38 @@
+import os
+
+import numpy as np
+import scipy.io as io
+
+
+def load_data_mat(filename, max_samples):
+    raw = io.loadmat(filename)
+    X = raw['X']  # Array of [32, 32, 3, n_samples]
+    y = raw['y']  # Array of [n_samples, 1]
+    X = np.moveaxis(X, [3], [0])
+    y = y.flatten()
+    # Fix up class 0 to be 0
+    y[y == 10] = 0
+    return X[:max_samples], y[:max_samples]
+
+
+def load_svhn(folder, max_train, max_test):
+    train_X, train_y = load_data_mat(os.path.join(folder, "train_32x32.mat"), max_train)
+    test_X, test_y = load_data_mat(os.path.join(folder, "test_32x32.mat"), max_test)
+    return train_X, train_y, test_X, test_y
+
+
+def random_split_train_val(X, y, num_val, seed=42):
+    np.random.seed(seed)
+
+    indices = np.arange(X.shape[0])
+    np.random.shuffle(indices)
+
+    train_indices = indices[:-num_val]
+    train_X = X[train_indices]
+    train_y = y[train_indices]
+
+    val_indices = indices[-num_val:]
+    val_X = X[val_indices]
+    val_y = y[val_indices]
+
+    return train_X, train_y, val_X, val_y
diff --git a/assignments/assignment2/download_data.sh b/assignments/assignment2/download_data.sh
@@ -0,0 +1,3 @@
+mkdir data
+cd data
+wget -c http://ufldl.stanford.edu/housenumbers/train_32x32.mat http://ufldl.stanford.edu/housenumbers/test_32x32.mat
diff --git a/assignments/assignment2/gradient_check.py b/assignments/assignment2/gradient_check.py
@@ -0,0 +1,136 @@
+import numpy as np
+
+
+def check_gradient(f, x, delta=1e-5, tol=1e-4):
+    """
+    Checks the implementation of analytical gradient by comparing
+    it to numerical gradient using two-point formula
+
+    Arguments:
+      f: function that receives x and computes value and gradient
+      x: np array, initial point where gradient is checked
+      delta: step to compute numerical gradient
+      tol: tolerance for comparing numerical and analytical gradient
+
+    Return:
+      bool indicating whether gradients match or not
+    """
+    assert isinstance(x, np.ndarray)
+    assert x.dtype == np.float
+
+    fx, analytic_grad = f(x)
+
+    assert analytic_grad.shape == x.shape
+
+    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
+    while not it.finished:
+        ix = it.multi_index
+        analytic_grad_at_ix = analytic_grad[ix]
+        numeric_grad_at_ix = 0
+
+        # TODO Copy from previous assignment
+        raise Exception("Not implemented!")
+
+        if not np.isclose(numeric_grad_at_ix, analytic_grad_at_ix, tol):
+            print("Gradients are different at %s. Analytic: %2.5f, Numeric: %2.5f" % (
+                  ix, analytic_grad_at_ix, numeric_grad_at_ix))
+            return False
+
+        it.iternext()
+
+    print("Gradient check passed!")
+    return True
+
+
+def check_layer_gradient(layer, x, delta=1e-5, tol=1e-4):
+    """
+    Checks gradient correctness for the input and output of a layer
+
+    Arguments:
+      layer: neural network layer, with forward and backward functions
+      x: starting point for layer input
+      delta: step to compute numerical gradient
+      tol: tolerance for comparing numerical and analytical gradient
+
+    Returns:
+      bool indicating whether gradients match or not
+    """
+    output = layer.forward(x)
+    output_weight = np.random.randn(*output.shape)
+
+    def helper_func(x):
+        output = layer.forward(x)
+        loss = np.sum(output * output_weight)
+        d_out = np.ones_like(output) * output_weight
+        grad = layer.backward(d_out)
+        return loss, grad
+
+    return check_gradient(helper_func, x, delta, tol)
+
+
+def check_layer_param_gradient(layer, x,
+                               param_name,
+                               delta=1e-5, tol=1e-4):
+    """
+    Checks gradient correctness for the parameter of the layer
+
+    Arguments:
+      layer: neural network layer, with forward and backward functions
+      x: starting point for layer input
+      param_name: name of the parameter
+      delta: step to compute numerical gradient
+      tol: tolerance for comparing numerical and analytical gradient
+
+    Returns:
+      bool indicating whether gradients match or not
+    """
+    param = layer.params()[param_name]
+    initial_w = param.value
+
+    output = layer.forward(x)
+    output_weight = np.random.randn(*output.shape)
+
+    def helper_func(w):
+        param.value = w
+        output = layer.forward(x)
+        loss = np.sum(output * output_weight)
+        d_out = np.ones_like(output) * output_weight
+        layer.backward(d_out)
+        grad = param.grad
+        return loss, grad
+
+    return check_gradient(helper_func, initial_w, delta, tol)
+
+
+def check_model_gradient(model, X, y,
+                         delta=1e-5, tol=1e-4):
+    """
+    Checks gradient correctness for all model parameters
+
+    Arguments:
+      model: neural network model with compute_loss_and_gradients
+      X: batch of input data
+      y: batch of labels
+      delta: step to compute numerical gradient
+      tol: tolerance for comparing numerical and analytical gradient
+
+    Returns:
+      bool indicating whether gradients match or not
+    """
+    params = model.params()
+
+    for param_key in params:
+        print("Checking gradient for %s" % param_key)
+        param = params[param_key]
+        initial_w = param.value
+
+        def helper_func(w):
+            param.value = w
+            loss = model.compute_loss_and_gradients(X, y)
+            grad = param.grad
+            return loss, grad
+
+        if not check_gradient(helper_func, initial_w, delta, tol):
+            return False
+
+    return True
diff --git a/assignments/assignment2/layers.py b/assignments/assignment2/layers.py
@@ -0,0 +1,121 @@
+import numpy as np
+
+
+def l2_regularization(W, reg_strength):
+    """
+    Computes L2 regularization loss on weights and its gradient
+
+    Arguments:
+      W, np array - weights
+      reg_strength - float value
+
+    Returns:
+      loss, single value - l2 regularization loss
+      gradient, np.array same shape as W - gradient of weight by l2 loss
+    """
+    # TODO: Copy from the previous assignment
+    raise Exception("Not implemented!")
+    return loss, grad
+
+
+def softmax_with_cross_entropy(preds, target_index):
+    """
+    Computes softmax and cross-entropy loss for model predictions,
+    including the gradient
+
+    Arguments:
+      predictions, np array, shape is either (N) or (N, batch_size) -
+        classifier output
+      target_index: np array of int, shape is (1) or (batch_size) -
+        index of the true class for given sample(s)
+
+    Returns:
+      loss, single value - cross-entropy loss
+      dprediction, np array same shape as predictions - gradient of predictions by loss value
+    """
+    # TODO: Copy from the previous assignment
+    raise Exception("Not implemented!")
+
+    return loss, d_preds
+
+
+class Param:
+    """
+    Trainable parameter of the model
+    Captures both parameter value and the gradient
+    """
+
+    def __init__(self, value):
+        self.value = value
+        self.grad = np.zeros_like(value)
+
+
+class ReLULayer:
+    def __init__(self):
+        pass
+
+    def forward(self, X):
+        # TODO: Implement forward pass
+        # Hint: you'll need to save some information about X
+        # to use it later in the backward pass
+        raise Exception("Not implemented!")
+
+    def backward(self, d_out):
+        """
+        Backward pass
+
+        Arguments:
+        d_out, np array (batch_size, num_features) - gradient
+           of loss function with respect to output
+
+        Returns:
+        d_result: np array (batch_size, num_features) - gradient
+          with respect to input
+        """
+        # TODO: Implement backward pass
+        raise Exception("Not implemented!")
+        return d_result
+
+    def params(self):
+        # ReLU Doesn't have any parameters
+        return {}
+
+
+class FullyConnectedLayer:
+    def __init__(self, n_input, n_output):
+        self.W = Param(0.001 * np.random.randn(n_input, n_output))
+        self.B = Param(0.001 * np.random.randn(1, n_output))
+        self.X = None
+
+    def forward(self, X):
+        # TODO: Implement forward pass
+        raise Exception("Not implemented!")
+
+    def backward(self, d_out):
+        """
+        Backward pass
+        Computes gradient with respect to input and
+        accumulates gradients within self.W and self.B
+
+        Arguments:
+        d_out, np array (batch_size, n_output) - gradient
+           of loss function with respect to output
+
+        Returns:
+        d_result: np array (batch_size, n_input) - gradient
+          with respect to input
+        """
+        # TODO: Implement backward pass
+        # Compute both gradient with respect to input
+        # and gradients with respect to W and B
+        # Add gradients of W and B to their `grad` attribute
+
+        # It should be pretty similar to linear classifier from
+        # the previous assignment
+
+        raise Exception("Not implemented!")
+
+        return d_input
+
+    def params(self):
+        return {'W': self.W, 'B': self.B}
diff --git a/assignments/assignment2/metrics.py b/assignments/assignment2/metrics.py
@@ -0,0 +1,16 @@
+def multiclass_accuracy(prediction, ground_truth):
+    """
+    Computes metrics for multiclass classification
+
+    Arguments:
+    prediction, np array of int (num_samples) - model predictions
+    ground_truth, np array of int (num_samples) - true labels
+
+    Returns:
+    accuracy - ratio of accurate predictions to total samples
+    """
+
+    # TODO: Implement computing accuracy
+    raise Exception("Not implemented!")
+
+    return 0
diff --git a/assignments/assignment2/model.py b/assignments/assignment2/model.py
@@ -0,0 +1,64 @@
+import numpy as np
+
+from layers import FullyConnectedLayer, ReLULayer, softmax_with_cross_entropy, l2_regularization
+
+
+class TwoLayerNet:
+    """ Neural network with two fully connected layers """
+
+    def __init__(self, hidden_layer_size, reg):
+        """
+        Initializes the neural network
+
+        Arguments:
+        hidden_layer_size, int - number of neurons in the hidden layer
+        reg, float - L2 regularization strength
+        """
+        self.reg = reg
+        # TODO Create necessary layers
+        raise Exception("Not implemented!")
+
+    def compute_loss_and_gradients(self, X, y):
+        """
+        Computes total loss and updates parameter gradients
+        on a batch of training examples
+
+        Arguments:
+        X, np array (batch_size, input_features) - input data
+        y, np array of int (batch_size) - classes
+        """
+        # TODO Compute loss and fill param gradients
+        # by running forward and backward passes through the model
+
+        # After that, implement l2 regularization on all params
+        # Hint: use self.params()
+        raise Exception("Not implemented!")
+
+        return loss
+
+    def predict(self, X):
+        """
+        Produces classifier predictions on the set
+
+        Arguments:
+          X, np array (test_samples, num_features)
+
+        Returns:
+          y_pred, np.array of int (test_samples)
+        """
+        # TODO: Implement predict
+        # Hint: some of the code of the compute_loss_and_gradients
+        # can be reused
+        pred = np.zeros(X.shape[0], np.int)
+
+        raise Exception("Not implemented!")
+        return pred
+
+    def params(self):
+        result = {}
+
+        # TODO Implement aggregating all of the params
+
+        raise Exception("Not implemented!")
+
+        return result