In [281]:
from sklearn import datasets
import pandas as pd
import numpy as np

# load iris dataset
iris = datasets.load_iris()

# since this is a bunch, create a dataframe

iris_df=pd.DataFrame(iris.data)
iris_df['class']=iris.target
iris_df.columns=['sepal_len', 'sepal_wid', 'petal_len', 'petal_wid', 'class']
iris_df.dropna(how="all", inplace=True) # remove any empty lines
iris = iris_df.sample(frac=1).reset_index(drop=True)

print(iris)


     sepal_len  sepal_wid  petal_len  petal_wid  class
0          5.7        2.8        4.1        1.3      1
1          7.3        2.9        6.3        1.8      2
2          5.0        3.3        1.4        0.2      0
3          6.9        3.1        4.9        1.5      1
4          5.6        3.0        4.5        1.5      1
..         ...        ...        ...        ...    ...
145        5.2        3.5        1.5        0.2      0
146        5.0        3.5        1.3        0.3      0
147        5.0        3.6        1.4        0.2      0
148        7.7        2.8        6.7        2.0      2
149        5.2        4.1        1.5        0.1      0

[150 rows x 5 columns]


In [462]:
class Model:
    numberOfClasses = 0
    numberOfFeatures = 0
    numberOfHiddenLayerUnits = 0
    X = None
    Y = None
    X_forTraining = None
    X_forTesting = None
    Y_forTraining = None
    Y_forTesting = None
    Y_train_encoded = None
    Y_test_encoded = None
    W1 = None
    b1 = None
    W2 = None
    b2 = None
    splitRange = 75
    
    # One-hot encode the labels for categorical cross-entropy
    def one_hot_encode(self, labels, num_classes):
        return np.eye(num_classes)[labels]
        
    def prepareInputData(self, inputDataFrame: pd.DataFrame, numberOfClasses: int, numberOfFeatures: int, numberOfHiddenLayerUnits: int, splitRange):
        self.numberOfClasses = numberOfClasses
        self.numberOfFeatures = numberOfFeatures
        self.numberOfHiddenLayerUnits = numberOfHiddenLayerUnits
        self.splitRange = splitRange
        self.X = inputDataFrame.iloc[:, 0:numberOfFeatures]
        self.Y = inputDataFrame.iloc[:, -1]
        
        
        self.X_forTraining, self.X_forTesting = self.X.iloc[:self.splitRange], self.X.iloc[self.splitRange:]
        self.Y_forTraining,self.Y_forTesting = self.Y.iloc[:self.splitRange], self.Y.iloc[self.splitRange:]
        
        print(self.Y_forTraining)
        print(self.Y_forTesting)

        self.Y_train_encoded = self.one_hot_encode(self.Y_forTraining, self.numberOfClasses)
        self.Y_test_encoded = self.one_hot_encode(self.Y_forTesting, self.numberOfClasses)
    
    def initializeStartParameters(self):
        np.random.seed(42)
        self.W1 = np.random.randn(self.numberOfFeatures, self.numberOfHiddenLayerUnits)
        self.b1 = np.zeros((1, self.numberOfHiddenLayerUnits))
        self.W2 = np.random.randn(self.numberOfHiddenLayerUnits, self.numberOfClasses)
        self.b2 = np.zeros((1, self.numberOfClasses))
    
    def relu(self, Z):
        return np.maximum(0, Z)

    def relu_derivative(self,Z):
        return Z > 0

    def softmax(self,Z):
        Z = Z.to_numpy()
        expZ = np.exp(Z - np.max(Z, axis=1, keepdims=True))
        return expZ / np.sum(expZ, axis=1, keepdims=True)

    def compute_loss(self,Y, Y_hat):
        m = Y.shape[0]
        return -np.sum(Y * np.log(Y_hat + 1e-9)) / m
    
    def forwardPropagation(self, X):
        Z1 = X.dot(self.W1) + self.b1
        A1 = self.relu(Z1)
        Z2 = A1.dot(self.W2) + self.b2
        A2 = self.softmax(Z2)
        return {"Z1": Z1, "A1": A1, "Z2": Z2, "A2": A2}
    
    def backwardPropagation(self, X, Y, cache):
        m = X.shape[0]

        dZ2 = cache["A2"] - Y
        dW2 = (1 / m) * np.dot(cache["A1"].T, dZ2)
        
        db2 = (1 / m) * np.sum(dZ2, axis=0, keepdims=True)
        dZ1 = np.dot(dZ2, self.W2.T) * self.relu_derivative(cache["Z1"])

        dW1 = (1 / m) * np.dot(X.T, dZ1)
        
        db1 = (1 / m) * np.sum(dZ1.to_numpy(), axis=0, keepdims=True)

        return {"dW1": dW1, "db1": db1, "dW2": dW2, "db2": db2}

    def updateParameters(self, grads, learningRate):
        self.W1 -= learningRate * grads["dW1"]
        self.b1 -= learningRate * grads["db1"]
        self.W2 -= learningRate * grads["dW2"]
        self.b2 -= learningRate * grads["db2"]
        return params

    def evaluate( self):
        # Perform forward propagation to get predictions
        cache = self.forwardPropagation(self.X_forTesting)
        predictions = np.argmax(cache["A2"], axis=1)  # Choose the class with the highest probability

        print(predictions)
        print(self.Y_forTesting.to_numpy())
    
        # Calculate accuracy
        accuracy = np.mean(predictions == self.Y_forTesting) * 100  # Compare predictions to true labels and compute percentage
        return accuracy

In [465]:
Iris = Model()
Iris.prepareInputData(iris, 3, 4, 5, 140)
Iris.initializeStartParameters()


0      1
1      2
2      0
3      1
4      1
      ..
135    0
136    2
137    1
138    0
139    1
Name: class, Length: 140, dtype: int64
140    0
141    2
142    0
143    1
144    2
145    0
146    0
147    0
148    2
149    0
Name: class, dtype: int64


In [466]:

learning_rate=0.01
epochs=1000

for epoch in range(epochs):
    # Forward pass on training data
    train_cache = Iris.forwardPropagation(Iris.X_forTraining)
    
    loss = Iris.compute_loss(Iris.Y_train_encoded, train_cache["A2"])
    
    # Backward pass (compute gradients)
    grads = Iris.backwardPropagation(Iris.X_forTraining, Iris.Y_train_encoded, train_cache)
    
    # Update parameters
    params = Iris.updateParameters(grads, learning_rate)
    
    # Print the loss every 100 epochs
    if epoch % 100 == 0:
        test_cache = Iris.forwardPropagation(Iris.X_forTesting)
        test_loss = Iris.compute_loss(Iris.Y_test_encoded, test_cache["A2"])
        
        print(f"Epoch {epoch}, Training loss: {loss:.4f}, Test loss: {test_loss:.4f}")
accuracy = Iris.evaluate()

print(f"Accuracy: {accuracy :.2f}%")

Iris.initializeStartParameters()

Epoch 0, Training loss: 7.3519, Test loss: 10.2846
Epoch 100, Training loss: 0.6116, Test loss: 0.4361
Epoch 200, Training loss: 0.5246, Test loss: 0.3252
Epoch 300, Training loss: 0.4684, Test loss: 0.2696
Epoch 400, Training loss: 0.4092, Test loss: 0.2265
Epoch 500, Training loss: 0.3581, Test loss: 0.1898
Epoch 600, Training loss: 0.3139, Test loss: 0.1577
Epoch 700, Training loss: 0.2767, Test loss: 0.1304
Epoch 800, Training loss: 0.2459, Test loss: 0.1082
Epoch 900, Training loss: 0.2206, Test loss: 0.0905
[0 2 0 1 2 0 0 0 2 0]
[0 2 0 1 2 0 0 0 2 0]
Accuracy: 100.00%
