# Neural Networks from Scratch

Following a YouTube tutorial

## Hardcoding inputs, weights and biases

In [1]:
inputs = [1, 2, 3, 2.5]
weights = [[0.2, 0.8, -0.5, 1.0],
          [0.5, -0.91, 0.26, -0.5],
          [-0.26, -0.27, 0.17, 0.87]]
biases = [2, 3, 0.5]

## Testing out zip() and for loops

In [2]:
a = zip(weights, biases)
for neuron_weights, neuron_bias in a:
    print(neuron_weights, neuron_bias)

[0.2, 0.8, -0.5, 1.0] 2
[0.5, -0.91, 0.26, -0.5] 3
[-0.26, -0.27, 0.17, 0.87] 0.5


## Hardcoding dot product of weights and biases

In [3]:
layer_outputs = [] # Output of the current layer
for neuron_weights, neuron_bias in zip(weights, biases):
    neuron_output = 0 # Output of a given neuron
    for n_input, weight in zip(inputs, neuron_weights):
        neuron_output += n_input*weight
    neuron_output += neuron_bias
    layer_outputs.append(neuron_output)

print(layer_outputs)

[4.8, 1.21, 2.385]


## Looking at numpy arrays and matrices

In [4]:
import numpy as np
l = np.array([1,5,6,2])
lol = np.array([[1,5,6,2],
              [3,2,1,3]])
print(l.shape, lol.shape)

(4,) (2, 4)


## Checking out numpy.dot

In [5]:
a = [1,2,3]
b = [2,3,4]

dot_product = a[0]*b[0] + a[1]*b[1] + a[2]*b[2]
print(dot_product)
print(np.dot(a, b))

20
20


## Analyzing shape for dot product

In [6]:
outputs = np.dot(weights, inputs) + biases
# wrong_order = np.dot(inputs, weights) + biases
print(outputs)
# print(wrong_order)

[4.8   1.21  2.385]


## Using a batch of inputs instead of a single set of inputs

In [7]:
input_batch = [[1,2,3,2.5],
               [2.0,5.0, -1.0, 2-0],
               [-1.5, 2.7, 3.3, -0.8]]
# dot product doesn't work anymore, because input shape changed
# outputs = np.dot(weights, input_batch) + biases
outputs = np.dot(input_batch, np.transpose(weights)) + biases
print(outputs)

[[ 4.8    1.21   2.385]
 [ 8.9   -1.81   0.2  ]
 [ 1.41   1.051  0.026]]


## Adding a second layer by hand and passing data through

In [8]:
weights2 = [[0.1, -0.14, 0.5],
            [-0.5, 0.12, -0.33],
            [-0.44, 0.73, -0.13]]
biases2 = [-1, 2, -0.5]

layer1_outputs = np.dot(input_batch, np.transpose(weights)) + biases
layer2_outputs = np.dot(layer1_outputs, np.transpose(weights2)) + biases2

print(layer2_outputs)

[[ 0.5031  -1.04185 -2.03875]
 [ 0.2434  -2.7332  -5.7633 ]
 [-0.99314  1.41254 -0.35655]]


## Defining a DenseLayer class with forward method

In [9]:
X = [[1, 2, 3, 2.5],
      [2.0, 5.0, -1.0, 2-0],
      [-1.5, 2.7, 3.3, -0.8]]

np.random.seed(0)

class DenseLayer:
    def __init__(self, n_inputs, n_neurons):
        self.weights = 0.1 * np.random.randn(n_inputs, n_neurons)
        self.biases = np.zeros((1, n_neurons))
    def forward(self, inputs):
        self.output = np.dot(inputs, self.weights) + self.biases
        
# print(0.1*np.random.randn(4, 3))

layer1 = DenseLayer(4, 5)
layer2 = DenseLayer(5, 2)

layer1.forward(X)
# print(layer1.output)
layer2.forward(layer1.output)
print(layer2.output)

[[ 0.148296   -0.08397602]
 [ 0.14100315 -0.01340469]
 [ 0.20124979 -0.07290616]]


## Defining ReLU

In [10]:
inputs = [0, 2, -1, 3.3, -2.7, 1.1, 2.2, -100]
output = []

for i in inputs:
    output.append(max(0, i))
        
print(output)

[0, 2, 0, 3.3, 0, 1.1, 2.2, 0]


In [11]:
class ReLU:
    def forward(self, inputs):
        self.output = np.maximum(0, inputs)

In [15]:
def createData(points, classes):
    X = np.zeros((points*classes, 2))
    y = np.zeros(points*classes, dtype='uint8')
    for class_number in range(classes):
        ix = range(points*class_number, points*(class_number+1))
        r = np.linspace(0.0, 1, points)
        t = np.linspace(class_number*4, (class_number+1)*4, points) + np.random.randn(points)*0.2
        X[ix] = np.c_[r*np.sin(t*2.5), r*np.cos(t*2.5)]
        y[ix] = class_number
    return X, y

In [16]:
import matplotlib.pyplot as plt

def plotData(X, y):
    plt.scatter(X[:,0], X[:,1])
    plt.show()

    plt.scatter(X[:,0], X[:,1], c=y, cmap="brg")
    plt.show()

In [20]:
X, y = createData(100, 3)
# plotData(X, y)

layer1 = DenseLayer(2, 5)
activation1 = ReLU()

layer1.forward(X)

activation1.forward(layer1.output)
print(activation1.output)

[[0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00]
 [0.00000000e+00 5.11957148e-04 0.00000000e+00 1.14906097e-04
  1.56964612e-03]
 [0.00000000e+00 1.03266675e-03 0.00000000e+00 2.22147887e-04
  3.15134128e-03]
 ...
 [8.80520411e-02 3.87443779e-03 0.00000000e+00 0.00000000e+00
  0.00000000e+00]
 [7.45596547e-02 1.64846014e-02 0.00000000e+00 0.00000000e+00
  0.00000000e+00]
 [0.00000000e+00 7.29942983e-02 0.00000000e+00 0.00000000e+00
  1.33861702e-01]]


## Softmax Activation

### Exponantiation to get rid of negative values

In [21]:
import math

layer_outputs = [4.8, 1.21, 2.385]
E = math.e

exp_values = []
for output in layer_outputs:
    exp_values.append(E**output)

print(exp_values)

[121.51041751873483, 3.353484652549023, 10.859062664920513]


### Normalization

In [22]:
norm_base = sum(exp_values)
norm_values = []

for value in exp_values:
    norm_values.append(value / norm_base)

print(norm_values)
print(sum(norm_values))

[0.8952826639572619, 0.024708306782099374, 0.0800090292606387]
0.9999999999999999


### Using numpy

In [25]:
exp_values = np.exp(layer_outputs)
norm_values = exp_values / np.sum(exp_values)

print(norm_values)

[0.89528266 0.02470831 0.08000903]


### Turning input into batch

In [34]:
layer_outputs = [[4.8, 1.21, 2.385],
                 [8.9, -1.81, 0.2],
                 [1.41, 1.051, 0.026]]

exp_values = np.exp(layer_outputs)
norm_values = exp_values / np.sum(exp_values, axis=1, keepdims=True)

print(norm_values)
print(norm_values[0][0] + norm_values[0][1] + norm_values[0][2])

[[8.95282664e-01 2.47083068e-02 8.00090293e-02]
 [9.99811129e-01 2.23163963e-05 1.66554348e-04]
 [5.13097164e-01 3.58333899e-01 1.28568936e-01]]
0.9999999999999999


In [37]:
class Softmax:
    def forward(self, inputs):
        exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))
        probabilites = exp_values / np.sum(exp_values, axis=1, keepdims=True)
        self.output = probabilites

X, y = createData(100, 3)

dense1 = DenseLayer(2, 3)
activation1 = ReLU()
dense2 = DenseLayer(3, 3)
activation2 = Softmax()

dense1.forward(X)
activation1.forward(dense1.output)
dense2.forward(activation1.output)
activation2.forward(dense2.output)

print(activation2.output[:5])

[[0.33333333 0.33333333 0.33333333]
 [0.33328305 0.33337131 0.33334564]
 [0.33319169 0.3333926  0.3334157 ]
 [0.33325769 0.33339378 0.33334853]
 [0.33333333 0.33333333 0.33333333]]


## Loss function

### Categorical Cross-Entropy

In [40]:
softmax_output = [0.7, 0.1, 0.2]
target_output = [1, 0, 0] # one-hot encoding

loss = -(np.log(softmax_output[0])*target_output[0] + 
         np.log(softmax_output[1])*target_output[1] + 
         np.log(softmax_output[2])*target_output[2])

print(loss)

loss = -(np.log(softmax_output[0]))
print(loss)

0.35667494393873245
0.35667494393873245


In [49]:
softmax_outputs = np.array([[0.7, 0.1, 0.2],
                            [0.1, 0.5, 0.4],
                            [0.02, 0.9, 0.08]])

class_targets = [0, 1, 1] # for example, 0 means cat, 1 means dog, 2 means pigeon, so here the first input is a cat and the two others are dogs

print(softmax_outputs[[0,1,2], class_targets]) # numpy magic, [0,1,2] selects all three rows of inputs
print(softmax_outputs[range(len(softmax_outputs)), class_targets])

neg_loss = -np.log(softmax_outputs[range(len(softmax_outputs)), class_targets])
avg_loss = np.mean(neg_loss)
print(avg_loss)

[0.7 0.5 0.9]
[0.7 0.5 0.9]
0.38506088005216804


**Problem:** neagtive log of 0 is infinity which results in errors

In [50]:
class Loss:
    def calculate(self, output, y):
        sample_losses = self.forward(output, y)
        data_loss = np.mean(sample_losses)
        return data_loss

class CategoricalCrossentropy(Loss):
    def forward(self, y_pred, y_true):
        samples = len(y_pred)
        y_pred_clipped = np.clip(y_pred, 1e-7, 1-1e-7) # to resovle our problem

        # check if scalar values or one hot encoded values have been passed
        # scalar: [0, 1]
        # one-hot encoded: [[1,0],[0,1]]
        if len(y_true.shape) == 1:
            correct_confidences = y_pred_clipped[range(samples), y_true]
        else:
            correct_confidences = np.sum(y_pred_clipped*y_true, axis=1)

        negative_log_likelihoods = -np.log(correct_confidences)
        return negative_log_likelihoods
    
loss_function = CategoricalCrossentropy()
loss = loss_function.calculate(activation2.output, y)

print("Loss: ", loss)

Loss:  1.098118544117172
