<a href="https://colab.research.google.com/github/kylemccullough1/MachineLearning/blob/main/Assignment_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Question 1

# The update rule is the gradients derived from the backpropagation through
# the MSE loss, the hidden layer, and then the activation of the hidden layer
# through the sigmoid activation function. You train this neural network by
# going through the forward pass from the inputs into the sigmoid activation
# function for the hidden layer, and then through the activation linear for
# the output layer, and then through the MSE loss. And then using 
# backpropagation to go back through the layers over and over until convergence.
# This is different from the update rule for binary classification using log loss
# because it goes through a different loss function causing a different
# backpropagation gradient.


In [105]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
# Question 2

# Dense layer
class Layer_Dense:
  # Layer initialization
  def __init__(self, n_inputs, n_neurons,
    weight_regularizer_l1=0, weight_regularizer_l2=0,
    bias_regularizer_l1=0, bias_regularizer_l2=0):
    # Initialize weights and biases
    self.weights = 0.01 * np.random.randn(n_inputs, n_neurons)
    self.biases = np.zeros((1, n_neurons))
  # Forward pass
  def forward(self, inputs):
    # Remember input values
    self.inputs = inputs
    # Calculate output values from inputs, weights and biases
    self.output = np.dot(inputs, self.weights) + self.biases
  # Backward pass
  def backward(self, dvalues):
    # Gradients on parameters
    self.dweights = np.dot(self.inputs.T, dvalues.T)
    self.dbiases = np.sum(dvalues, axis=0, keepdims=True)
    # Gradient on values
    self.dinputs = np.dot(dvalues, self.weights)

# Sigmoid activation
class Activation_Sigmoid:
  # Forward pass
  def forward(self, inputs):
    # Save input and calculate/save output
    # of the sigmoid function
    self.inputs = inputs
    self.output = 1 / (1 + np.exp(-inputs))
  # Backward pass
  def backward(self, dvalues):
    # Derivative - calculates from output of the sigmoid function
    print('dvalues', dvalues)
    print('output', self.output)
    self.dinputs = dvalues * (1 - self.output) * self.output

class Activation_ReLU:
  # Forward pass
  def forward(self, inputs):
    # Remember input values
    self.inputs = inputs
    # Calculate output values from inputs
    self.output = np.maximum(0, inputs)
  # Backward pass
  def backward(self, dvalues):
    # Since we need to modify original variable,
    # let's make a copy of values first
    self.dinputs = dvalues.copy()
    # Zero gradient where input values were negative
    self.dinputs[self.inputs <= 0] = 0

# Linear activation
class Activation_Linear:
  # Forward pass
  def forward(self, inputs):
    # Just remember values
    self.inputs = inputs
    self.output = inputs
  # Backward pass
  def backward(self, dvalues):
    # derivative is 1, 1 * dvalues = dvalues - the chain rule
    self.dinputs = dvalues.copy()

# Common loss class
class Loss:
  # Calculates the data and regularization losses
  # given model output and ground truth values
  def calculate(self, output, y):
    # Calculate sample losses
    sample_losses = self.forward(output, y)
    # Calculate mean loss
    data_loss = np.mean(sample_losses)
    # Return loss
    return data_loss

# Mean Squared Error loss
class Loss_MeanSquaredError(Loss): 
  # Forward pass
  def forward(self, y_pred, y_true):
    # Calculate loss
    sample_losses = np.mean((y_true - y_pred.T)**2, axis=-1)
    # Return losses
    return sample_losses
  # Backward pass
  def backward(self, dvalues, y_true):
    # Number of samples
    samples = len(dvalues)
    # Number of outputs in every sample
    # We'll use the first sample to count them
    outputs = len(dvalues[0])
    # Gradient on values
    self.dinputs = -2 * (y_true - dvalues.T) / outputs
    # Normalize gradient
    self.dinputs = self.dinputs / samples

X_train = np.loadtxt("X_train.csv")
# X_train = X_train.T
Y_train = np.loadtxt("Y_train.csv") 
#X_test = np.loadtxt("X_test.csv") 
# Y_test = np.loadtxt("Y_test.csv") 
# Create Dense layer with 2 input feature and 100 output values
dense1 = Layer_Dense(2, 100)
# Create ReLU activation (to be used with Dense layer):
activation1 = Activation_Sigmoid()
# Create second Dense layer with 64 input features (as we take output
# of previous layer here) and 64 output values
dense2 = Layer_Dense(100, 100)
# Create ReLU activation (to be used with Dense layer):
activation2 = Activation_Sigmoid()
# Create third Dense layer with 64 input features (as we take output
# of previous layer here) and 1 output value
dense3 = Layer_Dense(100, 2)
# Create Linear activation:
# Create loss function
activation3 = Activation_Linear()
# Create loss function
loss_function = Loss_MeanSquaredError()
# Accuracy precision for accuracy calculation
# There are no really accuracy factor for regression problem,
# but we can simulate/approximate it. We'll calculate it by checking
# how many values have a difference to their ground truth equivalent
# less than given precision
# We'll calculate this precision as a fraction of standard deviation
# of all the ground truth values
accuracy_precision = np.std(Y_train) / 250
# Train in loop
for epoch in range(10001):
  # Perform a forward pass of our training data through this layer
  dense1.forward(X_train)
  # Perform a forward pass through activation function
  # takes the output of first dense layer here
  activation1.forward(dense1.output)
  # Perform a forward pass through second Dense layer
  # takes outputs of activation function
  # of first layer as inputs
  dense2.forward(activation1.output)
  # Perform a forward pass through activation function
  # takes the output of second dense layer here
  activation2.forward(dense2.output)
  # Perform a forward pass through activation function
  # takes the output of third dense layer here
  activation2.forward(dense2.output)
  # Calculate the data loss
  dense3.forward(activation2.output)
  # Perform a forward pass through activation function
  # takes the output of third dense layer here
  activation3.forward(dense3.output)
  # Calculate the data loss
  data_loss = loss_function.calculate(activation2.output, Y_train)
  # Calculate overall loss
  loss = data_loss
  # Calculate accuracy from output of activation2 and targets
  # To calculate it we're taking absolute difference between
  # predictions and ground truth values and compare if differences
  # are lower than given precision value
  predictions = activation2.output
  accuracy = np.mean(np.absolute(predictions.T - Y_train) < accuracy_precision)
  if not epoch % 100:
    print(f'epoch: {epoch}, ' +
    f'acc: {accuracy:.3f}, ' +
    f'loss: {loss:.3f} (' +
    f'data_loss: {data_loss:.3f}, ' )
  # Backward pass
  loss_function.backward(activation3.output, Y_train)
  activation3.backward(loss_function.dinputs)
  dense3.backward(activation3.dinputs)
  activation2.backward(dense3.dinputs)
  dense2.backward(activation2.dinputs)
  activation1.backward(dense2.dinputs)
  dense1.backward(activation1.dinputs)

epoch: 0, acc: 0.000, loss: 5709.013 (data_loss: 5709.013, 
dvalues [[ 0.00685757 -0.02550638]
 [ 0.00685413 -0.02550956]]
output [[0.48507692 0.50230109 0.48095163 ... 0.49126084 0.48845818 0.48431609]
 [0.48509608 0.50239722 0.48106166 ... 0.4914056  0.48861568 0.48463112]
 [0.48508615 0.50239475 0.4811726  ... 0.49148588 0.48873255 0.48468525]
 ...
 [0.48503332 0.50222321 0.48120139 ... 0.49139381 0.48869082 0.48424628]
 [0.48506439 0.50229272 0.4810731  ... 0.491345   0.48858376 0.48436036]
 [0.48510228 0.50232525 0.48073137 ... 0.49111415 0.48823422 0.48425961]]


ValueError: ignored

1. The activation function I chose was just a linear regression activation function that doesn't change the input to output value. This is because the network is for a regression problem, and didn't require any special activation to see the correct results.
2. 2 neurons should be in the output layer, because there were 2 input features.
# At this point I got stuck on how to fix my program