In [None]:
import numpy as np

class Sigmoid:
    def f(self, x):
        return 1 / (1 + np.exp(-x))

    def d_f(self, x):
        f_x = self.f(x)
        return f_x * (1 - f_x)

def hyptan(x):
    e_2w = np.exp(2 * x)
    return (e_2w - 1) / (e_2w + 1)

def silu(x):
    return x / (1 + np.exp(-x))

def relu(x):
    return np.maximum(0, x)

class MeanSquared:
    def f(self, true_output, expected_output):
        return (true_output - expected_output)**2

    def d_f(self, true_output, expected_output):
        return 2 * (true_output - expected_output)

In [None]:
class Layer:
    def __init__(self, nodes_in, nodes_out, activation = Sigmoid(), error = MeanSquared()):
        self._nodes_in = nodes_in
        self._nodes_out = nodes_out

        self._inputs = np.zeros(shape = nodes_in)

        self._activation = activation
        self._error = error

        self._weights =  np.random.uniform(-25., 25., size = (nodes_in, nodes_out))
        self._biases = np.zeros(shape = nodes_out)

        self._gradient_w = np.zeros(shape = (nodes_in, nodes_out))
        self._gradient_b = np.zeros(shape = nodes_out)

        self._z_vals = np.zeros(shape = nodes_out)
        self._activation_vals = np.zeros(shape = nodes_out)

    def __call__(self, inputs):
        self._inputs = inputs

        for out_node in range(self._nodes_out):
            self._z_vals[out_node] = self._biases[out_node] + np.dot(inputs, self._weights[..., out_node])
            self._activation_vals[out_node] = self._activation.f(self._z_vals[out_node])

        return self._activation_vals

    def copy(self, weights, biases):
      self._weights = weights
      self._biases = biases

    def _calc_output_node_vals(self, expected_output):
        node_values = np.zeros(shape = self._nodes_out)

        for out_node in range(self._nodes_out):
            cost_derivative = self._error.d_f(self._activation_vals[out_node], expected_output[out_node])
            activation_val_derivative = self._activation.d_f(self._z_vals[out_node])
            node_values[out_node] = cost_derivative * activation_val_derivative

        return node_values

    def _calc_hidden_node_vals(self, old_layer, old_node_vals):
        new_node_vals = np.zeros(shape = self._nodes_out)

        for new_node in range(new_node_vals.shape[0]):
            new_node_val = 0

            for old_node in range(old_node_vals.shape[0]):
                weighted_derivative = old_layer._weights[new_node, old_node]
                new_node_val += weighted_derivative * old_node_vals[old_node]

            new_node_val *= self._activation.d_f(self._z_vals[new_node])
            new_node_vals[new_node] = new_node_val
        return new_node_vals

    def _update_gradients(self, node_values):
        for out_node in range(self._nodes_out):
            for in_node in range(self._nodes_in):
                cost_weight_derivative = self._inputs[in_node] * node_values[out_node]
                self._gradient_w[in_node, out_node] += cost_weight_derivative

            self._gradient_b[out_node] += node_values[out_node]

    def _apply_gradients(self, learning_rate):
        self._biases -= (learning_rate * self._gradient_b)
        self._weights -= (learning_rate * self._gradient_w)

    def _clear_gradients(self):
        self._gradient_w = np.zeros(shape = (self._nodes_in, self._nodes_out))
        self._gradient_b = np.zeros(shape = self._nodes_out)


In [None]:
class NeuralNet:
    def __init__(self, layer_sizes, batch_size = 1000, epoch = 1):
        self._layers = []

        self._BATCH_SIZE = batch_size
        self._EPOCH = epoch

        for i in range(len(layer_sizes) - 1):
            self._layers.append(LayerClass(layer_sizes[i], layer_sizes[i + 1]))

        self._outputs = np.zeros(shape = layer_sizes[len(layer_sizes) - 1])

    def _calc_outputs(self, inputs):
        for layer in self._layers:
            inputs = layer(inputs)

        self._outputs = inputs

    def __call__(self, inputs):
        self._calc_outputs(inputs)
        return self._outputs

    def _cost(self, data_point):
        self._calc_outputs(data_point['input'])
        output_layer = self._layers[len(self._layers) - 1]
        cost = 0.

        for out_node in range(self._outputs.shape[0]):
            cost += output_layer._error.f(self._outputs[out_node], data_point['expected_output'][out_node])

        return cost

    def _avg_cost(self, data_points):
        total_cost = 0.

        for data_point in data_points:
            total_cost += self._cost(data_point)

        return total_cost / len(data_points)

    def _apply_gradients(self, learning_rate):
        for layer in self._layers:
            layer._apply_gradients(learning_rate)

    def _clear_gradients(self) :
        for layer in self._layers:
            layer._clear_gradients()

    def _back_prop(self, data_point):
        self._calc_outputs(data_point['input'])

        output_layer = self._layers[len(self._layers) - 1]
        node_values = output_layer._calc_output_node_vals(data_point['expected_output'])
        output_layer._update_gradients(node_values)

        for layer in range(len(self._layers) - 2, -1, -1):
            hidden_layer = self._layers[layer]
            node_values = hidden_layer._calc_hidden_node_vals(self._layers[layer + 1], node_values)
            hidden_layer._update_gradients(node_values)

    def learn(self, batch, learning_rate):
        avg_vals = []

        for epoch in range(self._EPOCH):
            for pos in range(0, len(batch), self._BATCH_SIZE):
                mini_batch = batch[pos : np.minimum(pos + self._BATCH_SIZE, len(batch))]

                for data_point in mini_batch:
                    self._back_prop(data_point)

                self._apply_gradients(learning_rate / np.minimum(self._BATCH_SIZE, len(mini_batch)))
                self._clear_gradients()
                # avg_vals.append(self._avg_cost(mini_batch))
            
            # self._apply_gradients(learning_rate / len(batch))
            # self._clear_gradients()
            avg_vals.append(self._avg_cost(batch))

        return avg_vals

        # h = .0001
        #
        # for mini_batch in range(0, len(traing_data), 1000):
        #     original_cost = self._total_cost(training_data[mini_batch : np.minimum(mini_batch + 1000, len(traing_data))])
        #
        #     for layer in self._layers:
        #         for in_node in range(layer._nodes_in):
        #             for out_node in range(layer._nodes_out):
        #                 layer._weights[in_node, out_node] += h
        #                 delta_cost = self._total_cost(traing_data[mini_batch : np.minimum(mini_batch + 1000, len(traing_data))]) - original_cost
        #                 layer._weights[in_node, out_node] -= h
        #                 layer._gradient_w[in_node, out_node] = delta_cost / h
        #
        #         for out_node in range(layer._nodes_out):
        #             layer._biases[out_node] += h
        #             delta_cost = self._total_cost(traing_data[mini_batch : np.minimum(mini_batch + 1000, len(traing_data))]) - original_cost
        #             layer._biases[out_node] -= hack
        #             layer._gradient_b[out_node] delta_cost / h
        #
        #         layer._apply_gradients(learning_rate)


In [None]:
fruit_bot = NeuralNet((2, 3, 2))

fruit_data = []
for _ in range(10000000):
    rand = np.random.uniform(5, 10, size = 2)

    if rand[0] >= 5. or rand[1] >= 5.:
        output = np.array([1, 0])
    else:
        output = np.array([0, 1])

    fruit_data.append({
        'input': rand,
        'expected_output': output
    })

fruit_bot.learn(fruit_data, .01)

print(fruit_bot(np.array([10., 10.]))) # poison [0, 1]
print(fruit_bot(np.array([7., 9.]))) # poison [0, 1]
print(fruit_bot(np.array([5., 6.]))) # poison [0, 1]
print(fruit_bot(np.array([5., 1.]))) # poison [0, 1]
print(fruit_bot(np.array([2., 4.]))) # good [1, 0]
print(fruit_bot(np.array([4., 3.]))) # good [1, 0]
print(fruit_bot(np.array([1., 1.]))) # good [1, 0]