In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# %load nn.py
"""nn

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1PzWci5sVAxAE9ZAeDPLhatxxUCrnUfiG
"""

# -*- coding: utf-8 -*-
"""nn.ipynb

Automatically generated by Colaboratory.

"""

import math
from typing import List

import numpy as np
from scipy.special import expit  # Sigmoid function

class SimpleNetwork:
    """A simple feedforward network where all units have sigmoid activation.
    """

    @classmethod
    def random(cls, *layer_units: int):
        """Creates a feedforward neural network with the given number of units
        for each layer.

        :param layer_units: Number of units for each layer
        :return: the neural network
        """

        def uniform(n_in, n_out):
            epsilon = math.sqrt(6) / math.sqrt(n_in + n_out)
            return np.random.uniform(-epsilon, +epsilon, size=(n_in, n_out))

        pairs = zip(layer_units, layer_units[1:])
        return cls(*[uniform(i, o) for i, o in pairs])

    def __init__(self, *layer_weights: np.ndarray):
        """Creates a neural network from a list of weight matrices.
        The weights correspond to transformations from one layer to the next, so
        the number of layers is equal to one more than the number of weight
        matrices.

        :param layer_weights: A list of weight matrices
        """
        self.weights = list(layer_weights)

    def predict(self, input_matrix: np.ndarray) -> np.ndarray:
        """Performs forward propagation over the neural network starting with
        the given input matrix.

        Each unit's output should be calculated by taking a weighted sum of its
        inputs (using the appropriate weight matrix) and passing the result of
        that sum through a logistic sigmoid activation function.

        :param input_matrix: The matrix of inputs to the network, where each
        row in the matrix represents an instance for which the neural network
        should make a prediction
        :return: A matrix of predictions, where each row is the predicted
        outputs - each in the range (0, 1) - for the corresponding row in the
        input matrix.
        """
        h = input_matrix
        for weight in self.weights:
            h = expit(np.dot(h, weight))
        return h

    def predict_zero_one(self, input_matrix: np.ndarray) -> np.ndarray:
        """Performs forward propagation over the neural network starting with
        the given input matrix, and converts the outputs to binary (0 or 1).

        Outputs will be converted to 0 if they are less than 0.5, and converted
        to 1 otherwise.

        :param input_matrix: The matrix of inputs to the network, where each
        row in the matrix represents an instance for which the neural network
        should make a prediction
        :return: A matrix of predictions, where each row is the predicted
        outputs - each either 0 or 1 - for the corresponding row in the input
        matrix.
        """
        predictions = self.predict(input_matrix)
        return np.where(predictions < 0.5, 0, 1)

    def gradients(self,
                  input_matrix: np.ndarray,
                  output_matrix: np.ndarray) -> List[np.ndarray]:
        """Performs back-propagation to calculate the gradients for each of
        the weight matrices.

        :param input_matrix: The matrix of inputs to the network, where each
        row in the matrix represents an instance for which the neural network
        should make a prediction
        :param output_matrix: A matrix of expected outputs, where each row is
        the expected outputs - each either 0 or 1 - for the corresponding row in
        the input matrix.
        :return: the gradient matrix for each weight matrix
        """
        # Forward pass
        activations = [input_matrix]
        a_list = []
        h = input_matrix
        for weight in self.weights:
            a = np.dot(h, weight)
            a_list.append(a)
            h = expit(a)
            activations.append(h)

        # Backward pass
        gradients = []
        error = activations[-1] - output_matrix
        for l in range(len(self.weights) - 1, -1, -1):
            g = error * activations[l + 1] * (1 - activations[l + 1])
            grad = np.dot(activations[l].T, g) / input_matrix.shape[0]
            gradients.append(grad)
            if l > 0:
                error = np.dot(g, self.weights[l].T)

        gradients.reverse()
        return gradients

    def train(self,
              input_matrix: np.ndarray,
              output_matrix: np.ndarray,
              iterations: int = 10,
              learning_rate: float = 0.1) -> None:
        """Trains the neural network on an input matrix and an expected output
        matrix.

        :param input_matrix: The matrix of inputs to the network, where each
        row in the matrix represents an instance for which the neural network
        should make a prediction
        :param output_matrix: A matrix of expected outputs, where each row is
        the expected outputs - each either 0 or 1 - for the corresponding row in
        the input matrix.
        :param iterations: The number of gradient descent steps to take.
        :param learning_rate: The size of gradient descent steps to take, a
        number that the gradients should be multiplied by before updating the
        model weights.
        """
        for _ in range(iterations):
            gradients = self.gradients(input_matrix, output_matrix)
            for i in range(len(self.weights)):
                self.weights[i] -= learning_rate * gradients[i]

In [3]:
with open('nn.py', 'r') as file:
    code = file.read()

In [4]:
print(code)

# -*- coding: utf-8 -*-
"""nn

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1PzWci5sVAxAE9ZAeDPLhatxxUCrnUfiG
"""

# -*- coding: utf-8 -*-
"""nn.ipynb

Automatically generated by Colaboratory.

"""

import math
from typing import List

import numpy as np
from scipy.special import expit  # Sigmoid function

class SimpleNetwork:
    """A simple feedforward network where all units have sigmoid activation.
    """

    @classmethod
    def random(cls, *layer_units: int):
        """Creates a feedforward neural network with the given number of units
        for each layer.

        :param layer_units: Number of units for each layer
        :return: the neural network
        """

        def uniform(n_in, n_out):
            epsilon = math.sqrt(6) / math.sqrt(n_in + n_out)
            return np.random.uniform(-epsilon, +epsilon, size=(n_in, n_out))

        pairs = zip(layer_units, layer_units[1:])
        return cls(*[uniform(i,

In [5]:
# %load test_nn.py
"""test_nn

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1bbYMedBgr6lw0butpwFJ2FSKYJWQ3d1L
"""

# -*- coding: utf-8 -*-
"""test_nn.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1mGHNhilHEMqcWpR53TUAvZyVt6TpgfkS
"""

import math

import numpy as np
import pytest

import nn



def set_seed():
    np.random.seed(42)



def test_predict():
    net = nn.SimpleNetwork(np.array([[.1, -.2],
                                     [-.3, .4],
                                     [.5, -.6]]),
                           np.array([[-.7, .8, -.9],
                                     [.10, -.11, .12]]))
    input_matrix = np.array([[13, -14, 15],
                             [-16, 17, -18]])
    predictions = net.predict(input_matrix)
    [[h11, h12],
     [h21, h22]] = [[s(13 * .1 + -14 * -.3 + 15 * .5),
                     s(13 * -.2 + -14 * .4 + 15 * -.6)],
                    [s(-16 * .1 + 17 * -.3 - 18 * .5),
                     s(-16 * -.2 + 17 * .4 + -18 * -.6)]]
    np.testing.assert_allclose(predictions, np.array(
        [[s(h11 * -.7 + h12 * .10),
          s(h11 * .8 + h12 * -.11),
          s(h11 * -.9 + h12 * .12)],
         [s(h21 * -.7 + h22 * .10),
          s(h21 * .8 + h22 * -.11),
          s(h21 * -.9 + h22 * .12)]]))

    binary_predictions = net.predict_zero_one(input_matrix)
    np.testing.assert_array_equal(binary_predictions, np.array([[0, 1, 0],
                                                                [1, 0, 1]]))



def test_gradients():
    net = nn.SimpleNetwork(np.array([[.1, .3, .5],
                                     [-.5, -.3, -.1]]),
                           np.array([[.2, -.2],
                                     [.4, -.4],
                                     [.6, -.6]]))
    input_matrix = np.array([[0, 0],
                             [1, 0],
                             [0, 1],
                             [1, 1]])
    output_matrix = np.array([[1, 1],
                              [0, 1],
                              [1, 0],
                              [0, 0]])
    [[hi11, hi12, hi13],
     [hi21, hi22, hi23],
     [hi31, hi32, hi33],
     [hi41, hi42, hi43]] = hi = [
        [.1 * 0 + -.5 * 0, .3 * 0 + -.3 * 0, .5 * 0 + -.1 * 0],
        [.1 * 1 + -.5 * 0, .3 * 1 + -.3 * 0, .5 * 1 + -.1 * 0],
        [.1 * 0 + -.5 * 1, .3 * 0 + -.3 * 1, .5 * 0 + -.1 * 1],
        [.1 * 1 + -.5 * 1, .3 * 1 + -.3 * 1, .5 * 1 + -.1 * 1]]
    [[h11, h12, h13],
     [h21, h22, h23],
     [h31, h32, h33],
     [h41, h42, h43]] = [[s(x) for x in row] for row in hi]
    [[oi11, oi12],
     [oi21, oi22],
     [oi31, oi32],
     [oi41, oi42]] = oi = [
        [h11 * .2 + h12 * .4 + h13 * .6, h11 * -.2 + h12 * -.4 + h13 * -.6],
        [h21 * .2 + h22 * .4 + h23 * .6, h21 * -.2 + h22 * -.4 + h23 * -.6],
        [h31 * .2 + h32 * .4 + h33 * .6, h31 * -.2 + h32 * -.4 + h33 * -.6],
        [h41 * .2 + h42 * .4 + h43 * .6, h41 * -.2 + h42 * -.4 + h43 * -.6]]
    [[o11, o12],
     [o21, o22],
     [o31, o32],
     [o41, o42]] = [[s(x) for x in row] for row in oi]
    [[do11, do12],
     [do21, do22],
     [do31, do32],
     [do41, do42]] = [[(o11 - 1) * sg(oi11), (o12 - 1) * sg(oi12)],
                      [(o21 - 0) * sg(oi21), (o22 - 1) * sg(oi22)],
                      [(o31 - 1) * sg(oi31), (o32 - 0) * sg(oi32)],
                      [(o41 - 0) * sg(oi41), (o42 - 0) * sg(oi42)]]
    [[dh11, dh12, dh13],
     [dh21, dh22, dh23],
     [dh31, dh32, dh33],
     [dh41, dh42, dh43]] = [[(.2 * do11 + -.2 * do12) * sg(hi11),
                             (.4 * do11 + -.4 * do12) * sg(hi12),
                             (.6 * do11 + -.6 * do12) * sg(hi13)],
                            [(.2 * do21 + -.2 * do22) * sg(hi21),
                             (.4 * do21 + -.4 * do22) * sg(hi22),
                             (.6 * do21 + -.6 * do22) * sg(hi23)],
                            [(.2 * do31 + -.2 * do32) * sg(hi31),
                             (.4 * do31 + -.4 * do32) * sg(hi32),
                             (.6 * do31 + -.6 * do32) * sg(hi33)],
                            [(.2 * do41 + -.2 * do42) * sg(hi41),
                             (.4 * do41 + -.4 * do42) * sg(hi42),
                             (.6 * do41 + -.6 * do42) * sg(hi43)]]

    [input_to_hidden_gradient,
     hidden_to_output_gradient] = net.gradients(input_matrix, output_matrix)

    np.testing.assert_allclose(hidden_to_output_gradient, np.array(
        [[(do11 * h11 + do21 * h21 + do31 * h31 + do41 * h41) / 4,
          (do12 * h11 + do22 * h21 + do32 * h31 + do42 * h41) / 4],
         [(do11 * h12 + do21 * h22 + do31 * h32 + do41 * h42) / 4,
          (do12 * h12 + do22 * h22 + do32 * h32 + do42 * h42) / 4],
         [(do11 * h13 + do21 * h23 + do31 * h33 + do41 * h43) / 4,
          (do12 * h13 + do22 * h23 + do32 * h33 + do42 * h43) / 4]]))

    np.testing.assert_allclose(input_to_hidden_gradient, np.array(
        [[(0 * dh11 + 1 * dh21 + 0 * dh31 + 1 * dh41) / 4,
          (0 * dh12 + 1 * dh22 + 0 * dh32 + 1 * dh42) / 4,
          (0 * dh13 + 1 * dh23 + 0 * dh33 + 1 * dh43) / 4],
         [(0 * dh11 + 0 * dh21 + 1 * dh31 + 1 * dh41) / 4,
          (0 * dh12 + 0 * dh22 + 1 * dh32 + 1 * dh42) / 4,
          (0 * dh13 + 0 * dh23 + 1 * dh33 + 1 * dh43) / 4]]))



def test_train_greater_than_half():
    inputs = np.random.uniform(size=(100, 1))
    outputs = (inputs > 0.5).astype(int)

    net = nn.SimpleNetwork.random(1, 5, 5, 1)
    assert len(net.gradients(inputs, outputs)) == 3
    net.train(inputs, outputs, iterations=1000, learning_rate=1)

    test_inputs = np.array([[0.0], [0.1], [0.2], [0.3], [0.4],
                            [0.6], [0.7], [0.8], [0.9], [1.0]])
    test_outputs = np.array([[0], [0], [0], [0], [0],
                             [1], [1], [1], [1], [1]])
    assert (net.predict_zero_one(test_inputs) == test_outputs).sum() >= 9



def test_train_xor():
    inputs = np.array([[0, 0, 1], [0, 1, 1], [1, 0, 1], [1, 1, 1]])
    outputs = np.array([[0], [1], [1], [0]])

    net = nn.SimpleNetwork.random(3, 3, 1)
    assert len(net.gradients(inputs, outputs)) == 2
    net.train(inputs, outputs, iterations=5000, learning_rate=0.5)

    assert np.all(net.predict_zero_one(inputs) == outputs)



def test_train_learning_rate():
    inputs = np.array([[0, 0, 1], [0, 1, 1], [1, 0, 1], [1, 1, 1]])
    outputs = np.array([[0], [1], [1], [1]])

    net = nn.SimpleNetwork.random(3, 3, 1)
    net.train(inputs, outputs, iterations=5000, learning_rate=0.01)

    assert np.sometrue(net.predict_zero_one(inputs) != outputs)

    net = nn.SimpleNetwork.random(3, 3, 1)
    net.train(inputs, outputs, iterations=5000, learning_rate=1)

    assert np.all(net.predict_zero_one(inputs) == outputs)


def s(x):
    return 1 / (1 + math.exp(-x))


def sg(x):
    return s(x) * (1 - s(x))

In [6]:
!pytest

platform linux -- Python 3.11.11, pytest-8.3.4, pluggy-1.5.0
rootdir: /content
plugins: typeguard-4.4.1, anyio-3.7.1, langsmith-0.3.5
collected 5 items                                                                                  [0m

test_nn.py [32m.[0m[32m.[0m[32m.[0m[32m.[0m[32m.[0m[32m                                                                             [100%][0m

