In [None]:
import pandas as pd
import numpy as np

# https://www.kaggle.com/datasets/uciml/iris
# This dataset is in the local directory

In [None]:
class NeuralNetwork:
  # Constructor
  def __init__(self, activation="sigmoid"):
    self.df = None
    self.input_layer_size = None
    self.hidden_layer_size = None
    self.output_layer_size = None
    self.learning_rate = 0.1

    self.X_train = None
    self.X_test = None
    self.y_train = None
    self.y_test = None

    self.weights_between_input_hidden = None
    self.weights_between_hidden_output = None
    self.bias_from_hidden = None
    self.bias_from_output = None

    self.output_out = None
    self.hidden_out = None

    self.activation = activation

    if activation == 'sigmoid' :
      self.activation = 'sigmoid'

    elif activation == 'tanh' :
      self.activation = 'tanh'

    elif activation == 'relu' :
      self.activation = 'relu'


  #creating  weights
  # delete this
  #rows, columns = (4,6)
  #arr = [[0]*columns]*rows
  #print(arr)


  #variables for gradient descent optimization
  ####CHANGE
  decayFact = 0
  maxIter = 1000

  # ACTIVATION FUNCTIONS ----------------------------------------
  # Sigmoid Activation Function
  def sigmoid(self, x):
    return 1 / (1 + np.exp(-x))

  # Derivative of Sigmoid Function
  def sigmoid_derivative(self, x):
    return x * (1 - x)

  # Tanh Activation Function
  def tanh(self, x):
    return np.tanh(x)

  # Derivative of Tanh Function
  def tanh_derivative(self, x):
    return 1 - np.tanh(x)**2

  # ReLu Activation Function
  def relu(self, x):
    return np.maximum(0,x)

  # Derivative of ReLu Function
  def relu_derivative(self, x):
    return (x > 0) * 1


  # END OF ACTIVATION FUNCTIONS ----------------------------------------

  # Getting the dataframe
  def load_iris_data(self):
    # Load the dataset into a DataFrame
    iris_df = pd.read_csv("Iris.csv")
    self.df = iris_df

  # Manually encode the 'Species' column into numeric values
  def encode_species(self):
    species_mapping = {'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2}
    self.df['Species'] = self.df['Species'].map(species_mapping)

  # Drop the 'Id' column if it exists
  def drop_id_column(self):
    self.df.drop(columns=['Id'], inplace=True)

  def preprocess_iris_dataset(self):
    self.encode_species()
    self.drop_id_column()

  # Split the df into training and testing datasets
  def split_dataset(self, test_size=0.2, random_state=69):
    np.random.seed(random_state)
    shuffled_indices = np.random.permutation(len(self.df))

    test_set_size = int(len(self.df) * test_size)
    test_indices = shuffled_indices[:test_set_size]
    train_indices = shuffled_indices[test_set_size:]

    self.X_train = self.df.iloc[train_indices].drop(columns=['Species']).reset_index(drop=True)
    self.X_test = self.df.iloc[test_indices].drop(columns=['Species']).reset_index(drop=True)
    self.y_train = self.df.iloc[train_indices]['Species'].reset_index(drop=True)
    self.y_test = self.df.iloc[test_indices]['Species'].reset_index(drop=True)

  # Should be 4, since the number of attributes of the dataset is 4
  def set_input_size(self):
    num_columns = self.X_train.shape[1]
    self.input_layer_size = num_columns

  # Should be 3, since the number of categories in the dataset is 3
  def set_output_size(self):
    unique_values_count = self.y_train.nunique()
    self.output_layer_size = unique_values_count

  # Will only have one hidden layer of size n (default of 6 nodes)
  def set_attributes(self, hidden_layer_size = 6):
    self.set_input_size()
    self.set_output_size()
    self.hidden_layer_size = hidden_layer_size

  def show_attributes(self):
    print("Input Size: " , self.input_layer_size)
    print("Hidden Size: " , self.hidden_layer_size)
    print("Output Size: " , self.output_layer_size)

  # Creating the weights between each Layer as well as the Biases
  def create_weights_bias(self):
    self.weights_between_input_hidden = np.random.randn(self.input_layer_size, self.hidden_layer_size)
    self.weights_between_hidden_output = np.random.randn(self.hidden_layer_size, self.output_layer_size)
    self.bias_from_hidden = np.random.randn(self.hidden_layer_size)
    self.bias_from_output = np.random.randn(self.output_layer_size)


  # Forward Propagation
  def forward_propagation(self, X):
    hidden_net = np.dot(X, self.weights_between_input_hidden) + self.bias_from_hidden
    if self.activation == 'sigmoid':
      self.hidden_out = self.sigmoid(hidden_net)
    if self.activation == 'tanh':
      self.hidden_out = self.tanh(hidden_net)
    if self.activation == 'relu':
      self.hidden_out = self.relu(hidden_net)

    output_net = np.dot(self.hidden_out, self.weights_between_hidden_output) + self.bias_from_output
    if self.activation == 'sigmoid':
      self.output_out = self.sigmoid(output_net)
    if self.activation == 'tanh':
      self.output_out = self.tanh(output_net)
    if self.activation == 'relu':
      self.output_out = self.relu(output_net)

  # Backward Propagation
  def backward_propogation(self, X, y):
    error = y - self.output_out
    if self.activation == 'sigmoid':
      delta_output = self.sigmoid_derivative(self.output_out) * error
      delta_hidden = self.weights_between_hidden_output.dot(delta_output) * self.sigmoid_derivative(self.hidden_out)


    if self.activation == 'tanh':
      delta_output = self.tanh_derivative(self.output_out) * error
      delta_hidden = self.weights_between_hidden_output.dot(delta_output) * self.tanh_derivative(self.hidden_out)

    if self.activation == 'relu':
      delta_output = self.relu_derivative(self.output_out) * error
      delta_hidden = self.weights_between_hidden_output.dot(delta_output) * self.relu_derivative(self.hidden_out)


    # Update Weights

    self.weights_between_hidden_output +=  np.outer(self.hidden_out, delta_output) * self.learning_rate
    self.weights_between_input_hidden += np.outer(X, delta_hidden) * self.learning_rate

    #Update Bias
    self.bias_from_output += np.sum(delta_output, axis=0) * self.learning_rate
    self.bias_from_hidden += np.sum(delta_hidden, axis=0) * self.learning_rate


  # Used to encode the y values
  def one_hot_encode(self, index, size):
    encoded = np.zeros(size)
    encoded[index] = 1
    return encoded

  def train(self, activation = 'sigmoid', learning_rate = 0.1, epochs = 10):
    # Changing the activation
    if activation == 'tanh':
      self.activation = 'tanh'

    elif activation == 'relu':
      self.activation = 'relu'

    self.learning_rate = learning_rate

    for epoch in range(epochs):

      correct_prediction   = 0
      incorrect_prediction = 0

      for (index_X, row_X), (index_Y, row_Y) in zip(self.X_train.iterrows(), pd.DataFrame(self.y_train).iterrows()):
        X_row = row_X.to_numpy()
        y_value = row_Y.to_numpy()[0]

        y_encode = self.one_hot_encode(y_value, self.output_layer_size)

        self.forward_propagation(X_row)
        self.backward_propogation(X_row, y_encode)

        max_index = np.argmax(self.output_out)
        if y_encode[max_index] !=0:
          correct_prediction += 1
        else:
          incorrect_prediction +=1
      print("Epoch: ", epoch + 1)
      print("Correct Predictions: ", correct_prediction)
      print("Incorrect Predictions: ", incorrect_prediction)
      print("Accuracy: ", correct_prediction / (incorrect_prediction+correct_prediction))
      print()

  def test(self):
    correct_prediction   = 0
    incorrect_prediction = 0

    for (index_X, row_X), (index_Y, row_Y) in zip(self.X_test.iterrows(), pd.DataFrame(self.y_test).iterrows()):
      X_row = row_X.to_numpy()
      y_value = row_Y.to_numpy()[0]
      y_encode = self.one_hot_encode(y_value, self.output_layer_size)
      self.forward_propagation(X_row)
      max_index = np.argmax(self.output_out)
      if y_encode[max_index] !=0:
        correct_prediction += 1
      else:
        incorrect_prediction +=1

    print("Testing Dataset")
    print("Correct Predictions: ", correct_prediction)
    print("Incorrect Predictions: ", incorrect_prediction)
    print("Accuracy: ", correct_prediction / (incorrect_prediction+correct_prediction))
    print()

  # Setters (used for testing)
  def set_learning_rate(self, learning_rate):
    self.learning_rate = learning_rate

  def set_activation(self, activation):
    self.activation = activation

  # Getters
  def get_X_train(self):
    return self.X_train

  def get_X_test(self):
    return self.X_test

  def get_y_train(self):
    return self.y_train

  def get_y_test(self):
    return self.y_test

  def get_activation(self):
    return self.activation

  # Print Statements
  # Used for debugging/testing

  # Print the head of our dataset
  def print_head(self):
    print(self.df.head())

  # Shows each bias/weight array
  def show_weight_bias(self):
    print("weights_between_input_hidden")
    print(self.weights_between_input_hidden)
    print("\n\n")
    print("weights_between_hidden_output")
    print(self.weights_between_hidden_output)
    print("\n\n")
    print("bias_from_hidden")
    print(self.bias_from_hidden)
    print("\n\n")
    print("bias_from_output")
    print(self.bias_from_output)
    print("\n\n")

In [None]:
# Calling the class
nn = NeuralNetwork(activation="sigmoid")

# Loading the data
nn.load_iris_data()
nn.preprocess_iris_dataset()
nn.split_dataset(random_state=2)
# Create the input, hidden, and output layers.
# Hidden layer has default 6 neurons
# Can change hidden_layer_size
#nn.set_attributes(hidden_layer_size=10)
nn.set_attributes()
nn.show_attributes()
print("\n")
nn.create_weights_bias()

nn.train(epochs = 100, activation="tanh", learning_rate = 0.01)
nn.test()


Input Size:  4
Hidden Size:  6
Output Size:  3


Epoch:  1
Correct Predictions:  49
Incorrect Predictions:  71
Accuracy:  0.4083333333333333

Epoch:  2
Correct Predictions:  87
Incorrect Predictions:  33
Accuracy:  0.725

Epoch:  3
Correct Predictions:  93
Incorrect Predictions:  27
Accuracy:  0.775

Epoch:  4
Correct Predictions:  95
Incorrect Predictions:  25
Accuracy:  0.7916666666666666

Epoch:  5
Correct Predictions:  102
Incorrect Predictions:  18
Accuracy:  0.85

Epoch:  6
Correct Predictions:  103
Incorrect Predictions:  17
Accuracy:  0.8583333333333333

Epoch:  7
Correct Predictions:  104
Incorrect Predictions:  16
Accuracy:  0.8666666666666667

Epoch:  8
Correct Predictions:  104
Incorrect Predictions:  16
Accuracy:  0.8666666666666667

Epoch:  9
Correct Predictions:  107
Incorrect Predictions:  13
Accuracy:  0.8916666666666667

Epoch:  10
Correct Predictions:  108
Incorrect Predictions:  12
Accuracy:  0.9

Epoch:  11
Correct Predictions:  108
Incorrect Predictions:  12
Accur

In [None]:
import numpy as np
import copy
from numpy.random import permutation

# Attempting anoter
#class for gradient Descent line

#this is what'll be used for the model
#2 weights w0 - intercept and w1 - slope
class Line():

  def __init__(self):
    self.weights = [np.random.uniform(0,1,1) for _ in range(2)]
    self.derivateFuncs = [self.dx_w0, self.dx_w1]


#will evaluate line yhat given x

  def evaluate(self, x):
    return self.weights[0]+ self.weights[1] *x

#calculates all parital deviatives and returns them
#x point on plane
#y is output of x
# returns an array of partial derviatives
  def derivate(self, x, y):
    partialDerivs = []

    yHat = self.evaluate(x)
    partialDerivs.append(self.dx_w0(x, y, yHat ))
    partialDerivs.append(self.dx_w1(x, y, yHat ))

    return partialDerivs


# parital derviative of w0
#returns the gradient at that point for x and y
  def dx_w0(self, x, y, yHat):
    return 2*(yHat-y)

#partial derivative of w1
#returns gradient at w1
  def dx_w1(self, x, y, yHat):
    return 2*x*(yHat-y)

  def __str__(self):
    return f"y = {self.weights[0]} + {self.weights[1]} * x"


#helper functions


#returns one randomly selected x and y
#xs all points on plane
#y all outputs on plane
def stochastic(xs, ys):
   perm = permutation(len(xs))
   x = xs[perm[0]]
   y = ys[perm[0]]

   return x, y

#estimates mean gradient over all point for w1
#returns mean gradient all x and y for w1
def gradient(dx, evaluate, xs, ys):
   N = len(ys)
   total = 0
   for x, y in zip(xs, ys):
     yHat = evaluate(x)
     total = total + dx(x, y, yHat)

   gradient = total/N
   return gradient



#gradient descent optimization
# built on momentum model
#model: Line class
#xs: feature of dataset
# ys the continous value (target)
#learning rate is learning rate
# decayFact is decay Factor
# maxIter is maximum number of iterations
def nesterov(model, xs, ys, learningRate, decayFact, maxIter):

  #needed to keep track of previous geradient
  g = [0 for _ in range(len(model.weights))]

  for i in range(maxIter):

    #choosing random x and y
    x, y = stochastic(xs,ys)

    #calculating the gradient
    for idx, gradient in enumerate(model.derivate(x,y)):
      prevWeight = model.weights[idx]
      model.weiths[idx] = decayFact * gradient
      g[idx] = decayFact*g[idx] + learningRate*gradient
      model.weights[idx] = prevWeight

      #updating the model parameters
      model.weights[idx] = model.weights[idx] - g[idx]

    if i % 100 == 0:
      print(f"iteration is {i}")
      print(model)

"""
class Optimizers:
  def __init__(self, learning_rate):
    self.learning_rate = learning_rate

  def SGD(self,  params, grads):
    #Stochastic gradient descent
    updated_params = []

    for param, grad  in zip(params, grads):
        delta = self.learning_rate * grad
        param -= delta

        updated_params.apppend(
          param
        )

    return updated_params
"""


'\nclass Optimizers:\n  def __init__(self, learning_rate):\n    self.learning_rate = learning_rate\n\n  def SGD(self,  params, grads):\n    #Stochastic gradient descent\n    updated_params = []\n\n    for param, grad  in zip(params, grads):\n        delta = self.learning_rate * grad\n        param -= delta\n\n        updated_params.apppend(\n          param\n        )\n\n    return updated_params\n'