### Example for 1 neuron

In [7]:
#example of neuron. 4 neurons. every input has unique weight. input could come from input layer or output from other layer of neurons

inputs = [1, 2, 3, 2.5]
weights = [.2, .8, -.5, 1.0]
bias = 2

output = inputs[0]*weights[0] + inputs[1]*weights[1] + inputs[2]*weights[2] + inputs[3]*weights[3] + bias
output

4.8

### Example for 3 neurons

In [8]:
#modeling output layer: 3 output neurons, 4 inputs

inputs = [1, 2, 3, 2.5]
weights = [[.2, .8, -.5, 1.0], [.5, -.91, .26, -.5], [-.26, -.27, .17, .87]]
biases = [2, 3, .5]

layer_outputs = [] #output of current layer

for neuron_weights, neuron_bias in zip(weights, biases):
    neuron_output = 0 #output of given neuron
    
    for n_input, weight in zip (inputs, neuron_weights):
        neuron_output += n_input * weight
    
    neuron_output += neuron_bias
    layer_outputs.append(neuron_output)
    
layer_outputs

[4.8, 1.21, 2.385]

### Now with Numpy

In [9]:
#n.b. shape: row, col in 2D: number of elements in given dimension. tensor is object that can be represented as array.
import numpy as np

inputs = [1, 2, 3, 2.5]
weights = [[.2, .8, -.5, 1.0], [.5, -.91, .26, -.5], [-.26, -.27, .17, .87]]
biases = [2, 3, .5]

output = np.dot(weights, inputs) + bias
print(output)

[4.8   0.21  3.885]


### Batches

In [10]:
#do parallel calculations using batches -- show multiple samples at a time to optimize efficiency
#but beware of overfitment -- trying to fit to all samples at once. batch size of 32 is most common.

inputs = [[1, 2, 3, 2.5],
          [2, 5, -1, 2],
          [-1.5, 2.7, 3.3, -.8]]

weights = [[.2, .8, -.5, 1.0], [.5, -.91, .26, -.5], [-.26, -.27, .17, .87]]
biases = [2, 3, .5]

#need to transpose the weight matrix so dimension aligns
output = np.dot(inputs, np.array(weights).T) + biases
print(output)

[[ 4.8    1.21   2.385]
 [ 8.9   -1.81   0.2  ]
 [ 1.41   1.051  0.026]]


In [11]:
#now with multiple layers

inputs = [[1, 2, 3, 2.5],
          [2, 5, -1, 2],
          [-1.5, 2.7, 3.3, -.8]]

weights = [[.2, .8, -.5, 1.0], [.5, -.91, .26, -.5], [-.26, -.27, .17, .87]]
biases = [2, 3, .5]

weights2 = [[.1, -.14, .5], [-.5, .12, -.33], [-.44, .73, -.13]]
biases2 = [2, 3, .5]

layer1_outputs = np.dot(inputs, np.array(weights).T) + biases
layer2_outputs = np.dot(layer1_outputs, np.array(weights2).T) + biases

#need to transpose the weight matrix so dimension aligns
output = np.dot(inputs, np.array(weights).T) + biases
print(layer2_outputs)

[[ 3.5031  -0.04185 -1.03875]
 [ 3.2434  -1.7332  -4.7633 ]
 [ 2.00686  2.41254  0.64345]]


### Creating the Layer_Dense class

In [73]:
#captial X denotes input

np.random.seed(0) #seed the random number generator; don't need this with nnfs

'''
X = [[1, 2, 3, 2.5],
     [2, 5, -1, 2],
     [-1.5, 2.7, 3.3, -.8]]
     
'''

#we want the values of the neurons to not get too large or the program will diverge
#consider normalizing X; choose weights between -.1 and .1, bias != 0

#now 2 'hidden' layers
class Layer_Dense:
    def __init__(self, n_inputs, n_neurons): #inputs: how many features in each sample; can make neurons any number you want
        self.weights = 0.01 * np.random.randn(n_inputs, n_neurons) #randomly initialize weights
        self.biases = np.zeros((1, n_neurons)) #here we're initlizaing biases to all 0
    def forward(self, inputs): #function to pass data through the next layer
        self.output = np.dot(inputs, self.weights) + self.biases
    
#print(0.1*np.random.randn(4,3))

layer1 = Layer_Dense(4, 5)
layer2 = Layer_Dense(5, 2) #input of layer2 must match input of layer 2; output can be whatever

layer1.forward(X)
print(layer1.output)

layer2.forward(layer1.output)
print(layer2.output)

ValueError: shapes (300,2) and (4,5) not aligned: 2 (dim 1) != 4 (dim 0)

### Activation function

In [42]:
#ex1: Heaviside step function
#ex2: sigmoid -- easier to train than on heaviside due to granularity
#ex3: rectified linear ReLU: sigmoid has "vanishing gradient" problem. still granular. fast: very simple calculation.

#what is point of using activation? need nonlinear activation function to fit nonlinear data.

#pip install nnfs

import nnfs
from nnfs.datasets import spiral_data

nnfs.init() #sets random seed, default datatype

inputs = [0, 2, -1, 3.3, -2.7, 1.1, 2.2, -100]
output = []

#ReLU function!
for i in inputs:
    '''
    if i > 0:
        output.append(i)
    elif i <= 0:
        output.append(0)
    '''   
    output.append(max(0,i)) #appends max of 0 or i

#print(output)

class Activation_ReLU:
    def forward(self, inputs):
        self.output = np.maximum(0, inputs)

#function to generate data
'''        
def create_data(points, classes): 
    X = np.zeros((points*classes, 2))
    y = np.zeros(points*classes, dtype='uint8')
    for class_number in range(classes):
        ix = range(points*class_number, points*(class_number + 1))
        r = np.linspace(0.1, 1, points) #radius
        t = np.linspace(class_number*4, (class_number + 1)*4, points) + np.random.randn(points)*.2
        X[ix] = np.c_[r*np.sin(t*2.5), r*np.cos(t*2.5)]
        y[ix] = class_number
    return X, y

import matplotlib.pyplot as plt

print('here')
X,y = create_data(100,3)

plt.scatter(X[:,0], X[:,1])
plt.show()

plt.scatter(X[:,0], X[:,1], c=y, cmap='brg')
plt.show()

      

X,y = spiral_data(100,3) #number of points, classes. gives sample data

layer1 = Layer_Dense(2, 5) #number of features per sample: 2 unique features
activation1 = Activation_ReLU()

layer1.forward(X)

activation1.forward(layer1.output)
print(activation1.output)
'''

[[0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 4.65504505e-04
  4.56846210e-05]
 [0.00000000e+00 5.93469958e-05 0.00000000e+00 2.03573116e-04
  6.10024377e-04]
 ...
 [1.13291524e-01 0.00000000e+00 0.00000000e+00 8.11079666e-02
  0.00000000e+00]
 [1.34588361e-01 0.00000000e+00 3.09493970e-02 5.66337556e-02
  0.00000000e+00]
 [1.07817926e-01 0.00000000e+00 0.00000000e+00 8.72561932e-02
  0.00000000e+00]]


### Softmax activation function

In [76]:
layer_outputs = [[4.8, 1.21, 2.385],
                 [8.9, -1.81, 0.2],
                 [1.41, 1.051, 0.026]]
#need a way of relating neurons to each other; or what if output neurons are negative?
#--> exponential function

#Softmax: exponentiation and normalization

exp_vals = np.exp(layer_outputs)
#print(np.sum(layer_outputs, axis=1, keepdims=True)) 

#axis 1 gives sum of rows; keepdims keeps orientation/dimension the same
norm_vals = exp_vals / np.sum(exp_vals, axis=1, keepdims=True) 
    
#print(norm_vals)

#how to prevent overflow: subtract max value: output restricted from 0 to 1

class Activation_Softmax:
    def forward(self, inputs):
        exp_vals = np.exp(inputs - np.max(inputs, axis=1, keepdims=True)) #subtract max per row
        probabilites = exp_vals / np.sum(exp_vals, axis=1, keepdims=True)
        self.output = probabilites
        

X, y = spiral_data(samples=100, classes=3)

dense1 = Layer_Dense(2,3)
activation1 = Activation_ReLU()

dense2 = Layer_Dense(3,3) #output layer
activation2 = Activation_Softmax()

dense1.forward(X)
activation1.forward(dense1.output)
dense2.forward(activation1.output)
activation2.forward(dense2.output)

print(activation2.output[:5]) #[:5] indicates first 5

[[0.33333334 0.33333334 0.33333334]
 [0.33333337 0.33333322 0.3333334 ]
 [0.33333337 0.33333322 0.3333334 ]
 [0.33333334 0.33333334 0.33333334]
 [0.33333343 0.3333329  0.3333337 ]]


### Loss function

In [80]:
#categorical cross entropy: -log of predicted value at label index
#one-hot encoding: length is number of classes, 0 everywher except for a 1 at the label index
#we compare one hot vector to prediction value

#n.b. np.log(b) gives the natural log of b.

softmax_output = [0.7, 0.1, 0.2] #example output from output layer
target_output = [1,0,0]

loss = -(np.log(softmax_output[0])*target_output[0] + np.log(softmax_output[1])*target_output[1] + np.log(softmax_output[2])*target_output[2])
#print(loss)

softmax_outputs = np.array([[.7, .1, .2],
                            [.5, .1, .4],
                            [.02, .9, .08]])

class_targets = [0, 1, 1] #which class is correct for each output
neg_log = -np.log(
    softmax_outputs[range(len(softmax_outputs)), class_targets
                             ])
    
average_loss = np.mean(neg_log)
#print(average_loss)

class Loss:
    def calculate(self, output, y):
        sample_losses = self.forward(output, y)
        data_loss = np.mean(sample_losses)
        return data_loss

class Loss_CategoricalCrossEntropy(Loss):
    def forward(self, y_pred, y_true):
        samples = len(y_pred)
        y_pred_clipped = np.clip(y_pred, 1e-7, 1-1e-7) #need to clip close to 0
        
        if len(y_true.shape) == 1: #scalar
            correct_confidences = y_pred_clipped[range(samples), y_true]
        elif len(y_true.shape) ==2: #one hot encoded
            correct_confidences = np.sum(y_pred_clipped*y_true, axis=1)
        
        negative_log_likelihoods = -np.log(correct_confidences)
        return negative_log_likelihoods
    
loss_function = Loss_CategoricalCrossEntropy()
loss = loss_function.calculate(activation2.output, y)

predictions = np.argmax(softmax_outputs, axis=1) #argmax returns index of largest value in array
accuracy = np.mean(predictions == class_targets)

print(accuracy)

0.6666666666666666


### Optimization: backpropogation