## Convolutional Neural Networks

### Coding a Neuron

In [None]:
inputs=[1, 2, 3]
weights=[0.2, 0.8, -0.5]
bias=2

output=inputs[0]*weights[0]+inputs[1]*weights[1]+inputs[2]*weights[2]+bias
print(output)

### Coding a Layer

In [None]:
inputs=[1, 2, 3, 2.5]
weights=[
    [0.2, 0.8, -0.5, 1],
    [0.5, -0.91, 0.26, -0.5],
    [-0.26, -0.27, 0.17, 0.87]
]
biases=[2, 3, 0.5]

weights1=weights[0] # w11, w12, w13, w14
weights2=weights[1] # w21, w22, w23, w24
weights3=weights[2] # w31, w32, w33, w34

bias1=biases[0] # b1
bias2=biases[1] # b2
bias3=biases[2] # b3

outputs=[
    # Neuron 1
    inputs[0]*weights1[0]+
    inputs[1]*weights1[1]+
    inputs[2]*weights1[2]+
    inputs[3]*weights1[3]+bias1,
    # Neuron 2
    inputs[0]*weights2[0]+
    inputs[1]*weights2[1]+
    inputs[2]*weights2[2]+
    inputs[3]*weights2[3]+bias2,
    # Neuron 3
    inputs[0]*weights3[0]+
    inputs[1]*weights3[1]+
    inputs[2]*weights3[2]+
    inputs[3]*weights3[3]+bias3
]

print(outputs)

In [None]:
layer_outputs=[]

for neuron_weights, neuron_bias in zip(weights, biases):
    neuron_output=0
    for n_input, weight in zip(inputs, neuron_weights):
        neuron_output+=n_input*weight
    neuron_output+=neuron_bias
    layer_outputs.append(neuron_output)

print(layer_outputs)

In [None]:
# Neuron through numpy

import numpy as np

inputs=np.array([1.0, 2.0, 3.0, 2.5])
weights=np.array([0.2, 0.8, -0.5, 1.0])
bias=2.0

outputs=np.dot(inputs, weights)+bias
print(outputs)

In [None]:
# Layer through numpy

inputs=np.array([1, 2, 3, 2.5])
weights=np.array([
    [0.2, 0.8, -0.5, 1],
    [0.5, -0.91, 0.26, -0.5],
    [-0.26, -0.27, 0.17, 0.87]
])
biases=[2, 3, 0.5]

outputs=np.dot(weights, inputs)+biases
print(outputs)

In [None]:
# Batch through numpy

inputs=np.array([
    [1, 2, 3, 2.5],
    [2, 5, -1, 2.0],
    [-1.5, 2.7, 3.3, -0.8]
])
weights=[
    [0.2, 0.8, -0.5, 1],
    [0.5, -0.91, 0.26, -0.5],
    [-0.26, -0.27, 0.17, 0.87]
]
biases=[2, 3, 0.5]

# The biases array which is 1x3 replicates the row twice to become 3x3 - Broadcasting operation
# This helps for matrix addition
outputs=np.dot(inputs, np.array(weights).T)+biases
print(outputs)

### Coding Multiple Layers

In [None]:
# Input Layer
inputs=np.array([
    [1, 2, 3, 2.5],
    [2, 5, -1, 2.0],
    [-1.5, 2.7, 3.3, -0.8]
])

# Hidden Layer 1
weights_1=[
    [0.2, 0.8, -0.5, 1],
    [0.5, -0.91, 0.26, -0.5],
    [-0.26, -0.27, 0.17, 0.87]
]
bias_1=[2, 3, 0.5]

# Hidden Layer 2
weights_2=[
    [0.1, -0.14, 0.5],
    [-0.5, 0.12, -0.33],
    [-0.44, 0.73, -0.13]
]
bias_2=[-1, 2, -0.5]

output_1=np.dot(inputs, np.array(weights_1).T)+bias_1
print(f"Output of 1st hidden layer:\n{output_1}\n")

output_2=np.dot(output_1, np.array(weights_2).T)+bias_2
print(f"Output of 2nd hidden layer:\n{output_2}")

### Best Practice

In [None]:
# Input Layer
inputs=[
    [1, 2, 3, 2.5],
    [2, 5, -1, 2.0],
    [-1.5, 2.7, 3.3, -0.8]
]

# Hidden Layer 1
weights_1=[
    [0.2, 0.8, -0.5, 1],
    [0.5, -0.91, 0.26, -0.5],
    [-0.26, -0.27, 0.17, 0.87]
]
bias_1=[2, 3, 0.5]

# Hidden Layer 2
weights_2=[
    [0.1, -0.14, 0.5],
    [-0.5, 0.12, -0.33],
    [-0.44, 0.73, -0.13]
]
bias_2=[-1, 2, -0.5]

input_array=np.array(inputs)
weight1_array=np.array(weights_1)
bias1_array=np.array(bias_1)
weight2_array=np.array(weights_2)
bias2_array=np.array(bias_2)

layer_1_output=np.dot(input_array, weight1_array.T)+bias1_array
layer_2_output=np.dot(layer_1_output, weight2_array.T)+bias2_array
print(layer_2_output)

### Generating Non-Linear Training Data

In [None]:
from nnfs.datasets import spiral_data
import nnfs
import matplotlib.pyplot as plt

nnfs.init()

# 100 samples for each class
x, y=spiral_data(samples=100, classes=3)
print(x.shape)
print(y.shape)

In [None]:
plt.scatter(x[:, 0], x[:, 1])
plt.show()

In [None]:
plt.scatter(x[:, 0], x[:, 1], c=y, cmap="brg")
plt.show()

### Coding a Dense Layer

In [None]:
class DenseLayer:
    def __init__(self, n_inputs, n_neurons):
        self.weights=0.01*np.random.randn(n_neurons, n_inputs)
        self.biases=np.zeros((1, n_neurons))

    def forward(self, inputs):
        self.output=np.dot(inputs, self.weights.T)+self.biases

np.random.seed(123)

# Create a dense layer with 2 input features and 3 neurons
dense_1=DenseLayer(2, 3)
# Perform forward pass on our training data through this layer
dense_1.forward(x)
# Shape of the output
print(dense_1.output.shape)
# Getting the outputs of first few samples
print(dense_1.output[:5])

### Array Summation in NumPy

In [None]:
a=np.array([
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9]
])

In [None]:
total_sum=np.sum(a)
print(total_sum)

In [None]:
column_sum=np.sum(a, axis=0)
print(column_sum)

In [None]:
row_sum=np.sum(a, axis=1)
print(row_sum)

In [None]:
print(row_sum.shape, column_sum.shape)

In [None]:
# To retain the dimension from (3,) to (3,1)
column_sum=np.sum(a, axis=0, keepdims=True)
row_sum=np.sum(a, axis=1, keepdims=True)

print(column_sum)
print(row_sum)

In [None]:
print(row_sum.shape, column_sum.shape)

### Broadcasting Operations

In [None]:
b_1=np.max(a, axis=1, keepdims=True)
print(b_1)

In [None]:
b_2=np.max(a, axis=0, keepdims=True)
print(b_2)

In [None]:
final_ans_1=a-b_1
print(final_ans_1)

In [None]:
final_ans_2=a-b_2
print(final_ans_2)

In [None]:
arr=np.array([1, 2, 3])

ans=a+arr
print(ans)

### Activation Functions

#### ReLU Activation Function

In [None]:
inputs=[0, 2, -1, 3.3, -2.7, 1.1, 2.2, -100]
output=np.maximum(0, inputs)
print(output)

In [None]:
class ActivationReLU:
    # Forward pass
    def forward(self, inputs):
        self.output=np.maximum(0, inputs)

#### Softmax Activation Function

In [None]:
inputs=[
    [1, 2, 3, 2.5],
    [2, 5, -1, 2.0],
    [-1.5, 2.7, 3.3, -0.8]
]

# Get unnormalized probabilities
exp_values=np.exp(inputs-np.max(inputs, axis=1, keepdims=True))
# Normalize for each sample
probabilities=exp_values/np.sum(exp_values, axis=1, keepdims=True)

print(probabilities)
print(np.sum(probabilities, axis=1, keepdims=True))

In [None]:
class ActivationSoftmax:
    # Forward pass
    def forward(self, inputs):
        exp_values=np.exp(inputs-np.max(inputs, axis=1, keepdims=True))
        probabilities=exp_values/np.sum(exp_values, axis=1, keepdims=True)
        self.output=probabilities

### Coding a Forward Pass - Without Loss

In [None]:
print(f"Inputs: {x.shape}")

dense_1=DenseLayer(n_inputs=2, n_neurons=3)
activation_1=ActivationReLU()

dense_2=DenseLayer(n_inputs=3, n_neurons=3)
activation_2=ActivationSoftmax()

dense_1.forward(x)
activation_1.forward(dense_1.output)

dense_2.forward(activation_1.output)
activation_2.forward(dense_2.output)

print(f"Outputs: {activation_2.output.shape}")

### Categorical Cross Entropy Loss

`Case 1`: Class targets are just numbers

`Red`: 0, `Green`: 1, `Blue`: 2

In [None]:
softmax_outputs=np.array([
    [0.7, 0.1, 0.2],
    [0.1, 0.5, 0.4],
    [0.02, 0.9, 0.08]
])
class_targets=[0, 1, 1]

print(softmax_outputs[[0, 1, 2], class_targets])

In [None]:
neg_log=-np.log(softmax_outputs[range(len(softmax_outputs)), class_targets])
average_loss=np.mean(neg_log)
print(f"Average Loss: {average_loss}")

`Case 2`: Class targets are one-hot encoded

In [None]:
y_true_check=np.array([
    [1, 0, 0],
    [0, 1, 0],
    [0, 1, 0]
])

y_pred_check=np.array([
    [0.7, 0.1, 0.2],
    [0.1, 0.5, 0.4],
    [0.02, 0.9, 0.08]
])

y_probs=y_true_check*y_pred_check
y_req_probs=np.sum(y_probs, axis=1)
req_outputs=-np.log(y_req_probs)
average_loss=np.mean(req_outputs)

print(f"Average Loss: {average_loss}")

In [None]:
class Loss:
    # Calculate the regularization loss with given model outputs and ground truth values
    def calculate(self, output, y):
        # Calculate sample losses
        sample_losses=self.forward(output, y)
        # Calculate mean loss
        data_loss=np.mean(sample_losses)
        return data_loss

In [None]:
class CategoricalCrossEntropyLoss(Loss):
    def forward(self, y_pred, y_true):
        # Number of samples in a batch
        samples=len(y_pred)

        # Clip data to prevent 0 division
        # Clip both sides to not drag mean towards any value
        y_pred_clipped=np.clip(y_pred, 1e-7, 1-1e-7)

        # For categorical targets
        if len(y_true.shape)==1:
            correct_confidences=y_pred_clipped[np.arange(samples), y_true]
        # For one-hot encoded targets
        elif len(y_true.shape)==2:
            correct_confidences=np.sum(
                y_pred_clipped*y_true,
                axis=1
            )

        neg_log_likelihood=-np.log(correct_confidences)
        return neg_log_likelihood

In [None]:
softmax_outputs=np.array([
    [0.7, 0.1, 0.2],
    [0.1, 0.5, 0.4],
    [0.02, 0.9, 0.08]
])

class_targets=np.array([
    [1, 0, 0],
    [0, 1, 0],
    [0, 1, 0]
])

loss_function=CategoricalCrossEntropyLoss()

loss=loss_function.calculate(softmax_outputs, class_targets)
print(f"Categorical Loss: {loss}")

### Coding a Forward Pass - With Loss

In [None]:
dense_1=DenseLayer(2, 3)
activation_1=ActivationReLU()

dense_2=DenseLayer(3, 3)
activation_2=ActivationSoftmax()

loss_func=CategoricalCrossEntropyLoss()

dense_1.forward(x)
activation_1.forward(dense_1.output)

dense_2.forward(activation_1.output)
activation_2.forward(dense_2.output)

loss=loss_func.calculate(activation_2.output, y)
print(f"Loss: {loss}")

In [None]:
# Calculating the accuracy
predictions=np.argmax(activation_2.output, axis=1)

if len(y.shape)==2:
    y=np.argmax(y, axis=1)

accuracy=np.mean(predictions==y)
print(f"Accuracy: {accuracy}")

### Optimization

#### Randomly choosing weights and biases - Doesn't work for any kind of data

- Pick weights and biases randomly.

- Calculate the loss.

- Iterate.

- Choose weights and biases which gives the lowest loss.

In [None]:
dense_1=DenseLayer(2, 3)
activation_1=ActivationReLU()

dense_2=DenseLayer(3, 3)
activation_2=ActivationSoftmax()

loss_func=CategoricalCrossEntropyLoss()

# Helper variables
lowest_loss=9999999
best_dense1_weights=dense_1.weights.copy()
best_dense1_biases=dense_1.biases.copy()
best_dense2_weights=dense_2.weights.copy()
best_dense2_biases=dense_2.biases.copy()

for iteration in range(10000):
    # Generate new set of weights for iteration
    dense_1.weights=0.05*np.random.randn(3, 2)
    dense_1.biases=0.05*np.random.randn(1, 3)
    dense_2.weights=0.05*np.random.randn(3, 3)
    dense_2.biases=0.05*np.random.randn(1, 3)

    # Perform a forward pass for the training data
    dense_1.forward(x)
    activation_1.forward(dense_1.output)
    dense_2.forward(activation_1.output)
    activation_2.forward(dense_2.output)

    loss=loss_func.calculate(activation_2.output, y)

    predictions=np.argmax(activation_2.output, axis=1)
    accuracy=np.mean(predictions==y)

    if loss<lowest_loss:
        print(f"New set of weights found, Iteration {iteration}, Loss: {loss}, Accuracy: {accuracy}")
        best_dense1_weights=dense_1.weights.copy()
        best_dense1_biases=dense_1.biases.copy()
        best_dense2_weights=dense_2.weights.copy()
        best_dense2_biases=dense_2.biases.copy()
        lowest_loss=loss

#### Randomly adjusting weights and biases - Works better but fails for complex data

- If loss decreases for some `w` and `b`; choose the next value of weights and biases close to w, b.

- If the loss increases, dont update weights, bias values.

In [None]:
dense_1=DenseLayer(2, 3)
activation_1=ActivationReLU()

dense_2=DenseLayer(3, 3)
activation_2=ActivationSoftmax()

loss_func=CategoricalCrossEntropyLoss()

# Helper variables
lowest_loss=9999999
best_dense1_weights=dense_1.weights.copy()
best_dense1_biases=dense_1.biases.copy()
best_dense2_weights=dense_2.weights.copy()
best_dense2_biases=dense_2.biases.copy()

for iteration in range(10000):
    # Update weights with some small random values for iteration
    dense_1.weights+=0.05*np.random.randn(3, 2)
    dense_1.biases+=0.05*np.random.randn(1, 3)
    dense_2.weights+=0.05*np.random.randn(3, 3)
    dense_2.biases+=0.05*np.random.randn(1, 3)

    # Perform a forward pass for the training data
    dense_1.forward(x)
    activation_1.forward(dense_1.output)
    dense_2.forward(activation_1.output)
    activation_2.forward(dense_2.output)

    loss=loss_func.calculate(activation_2.output, y)

    predictions=np.argmax(activation_2.output, axis=1)
    accuracy=np.mean(predictions==y)

    if loss<lowest_loss:
        print(f"New set of weights found, Iteration {iteration}, Loss: {loss}, Accuracy: {accuracy}")
        best_dense1_weights=dense_1.weights.copy()
        best_dense1_biases=dense_1.biases.copy()
        best_dense2_weights=dense_2.weights.copy()
        best_dense2_biases=dense_2.biases.copy()
        lowest_loss=loss
    else:
        dense_1.weights=best_dense1_weights.copy()
        dense_1.biases=best_dense1_biases.copy()
        dense_2.weights=best_dense2_weights.copy()
        dense_2.biases=best_dense2_biases.copy()

### Back-Propogation

In [None]:
# Initial parameters
weights=np.array([-3.0, -1.0, 2.0])
bias=1.0
inputs=np.array([1.0, -2.0, 3.0])
target_output=0.0
learning_rate=0.001

def relu(x):
    return np.maximum(x, 0)

def relu_derivative(x):
    return np.where(x>0, 1.0, 0.0)

for iteration in range(200):
    # Forward pass
    linear_output=np.dot(weights, inputs)+bias
    output=relu(linear_output)
    loss=(output-target_output)**2

    # Backward pass
    dloss_doutput=2*(output-target_output)
    doutput_dlinear=relu_derivative(linear_output)
    dlinear_dweights=inputs
    dlinear_dbias=1.0

    dloss_dlinear=dloss_doutput*doutput_dlinear
    dloss_dweights=dloss_dlinear*dlinear_dweights
    dloss_dbias=dloss_dlinear*dlinear_dbias

    # Update weights and bias
    weights-=learning_rate*dloss_dweights
    bias-=learning_rate*dloss_dbias

    print(f'Iteration {iteration+1}, Loss: {loss}')

print(f'\nFinal weights: {weights}')
print(f'Final bias: {bias}')

In [None]:
# With multiple neurons

inputs=np.array([1, 2, 3, 4])
weights=np.array([
    [0.1, 0.2, 0.3, 0.4],
    [0.5, 0.6, 0.7, 0.8],
    [0.9, 1.0, 1.1, 1.2]
])
biases=np.array([0.1, 0.2, 0.3])
learning_rate=0.001

def relu(x):
    return np.maximum(x, 0)

def relu_derivative(x):
    return np.where(x>0, 1.0, 0.0)

for iteration in range(200):
    z=np.dot(weights, inputs)+biases
    a=relu(z)
    y=np.sum(a)
    loss=y**2

    # Backward pass

    # Gradient of loss w.r.t y
    dL_dy=2*y
    # Gradient of y w.r.t a
    dy_da=np.ones_like(a)
    # Gradient of loss w.r.t a
    dL_da=dL_dy*dy_da
    # Gradient of a w.r.t z
    da_dz=relu_derivative(z)
    # Gradient of loss w.r.t z
    dL_dz=dL_da*da_dz

    # Gradient of z w.r.t weights and biases
    dL_dw=np.outer(dL_dz, inputs)
    dL_db=dL_dz

    weights-=learning_rate*dL_dw
    biases-=learning_rate*dL_db

    if iteration%20==0:
        print(f'Iteration {iteration}, Loss: {loss}')

print(f'\nFinal weights: {weights}')
print(f'Final biases: {biases}')

### Back-Propogation through Matrices

#### Gradients of Loss w.r.t Weights

In [None]:
dl_dz=np.array([
    [1.0, 1.0, 1.0],
    [2.0, 2.0, 2.0],
    [3.0, 3.0, 3.0]
])

inputs=np.array([
    [1, 2, 3, 2.5],
    [2, 5, -1, 2],
    [-1.5, 2.7, 3.3, -0.8]
])

dl_dweights=np.dot(inputs.T, dl_dz)
print(dl_dweights)

#### Gradients of Loss w.r.t Biases

In [None]:
dl_dz=np.array([
    [1.0, 1.0, 1.0],
    [2.0, 2.0, 2.0],
    [3.0, 3.0, 3.0]
])

biases=np.array([[2, 3, 0.5]])

dl_dbiases=np.sum(dl_dz, axis=0, keepdims=True)
print(dl_dbiases)

#### Gradients of Loss w.r.t Inputs

In [None]:
dl_dz=np.array([
    [1.0, 1.0, 1.0],
    [2.0, 2.0, 2.0],
    [3.0, 3.0, 3.0]
])

weights=np.array([
    [0.2, 0.8, -0.5, 1],
    [0.5, -0.91, 0.26, -0.5],
    [-0.26, -0.27, 0.17, 0.87]
]).T

dl_dinputs=np.dot(dl_dz, weights.T)
print(dl_dinputs)

In [None]:
class DenseLayer:
    def __init__(self, n_inputs, n_neurons):
        self.weights=0.01*np.random.randn(n_neurons, n_inputs)
        self.biases=np.zeros((1, n_neurons))

    def forward(self, inputs):
        self.inputs=inputs
        self.output=np.dot(inputs, self.weights.T)+self.biases

    def backward(self, dl_dz):
        self.dweights=np.dot(self.inputs.T, dl_dz)
        self.dbiases=np.sum(dl_dz, axis=0, keepdims=True)
        self.dinputs=np.dot(dl_dz, self.weights.T)