In [3]:
from sklearn import datasets

# load iris dataset
iris = datasets.load_iris()

In [4]:
import pandas as pd

# since this is a bunch, create a dataframe
iris_df=pd.DataFrame(iris.data)
iris_df['class']=iris.target
iris_df.columns=['sepal_len', 'sepal_wid', 'petal_len', 'petal_wid', 'class']
iris_df.dropna(how="all", inplace=True) # remove any empty lines

#view the iris dataframe
print(type(iris_df))

shuffled_df = iris_df.sample(frac=1).reset_index(drop=True)

print(shuffled_df)

<class 'pandas.core.frame.DataFrame'>
     sepal_len  sepal_wid  petal_len  petal_wid  class
0          4.6        3.2        1.4        0.2      0
1          6.7        3.1        5.6        2.4      2
2          5.1        3.3        1.7        0.5      0
3          5.7        4.4        1.5        0.4      0
4          6.7        3.3        5.7        2.5      2
..         ...        ...        ...        ...    ...
145        5.0        2.3        3.3        1.0      1
146        7.7        3.8        6.7        2.2      2
147        6.5        3.2        5.1        2.0      2
148        6.5        3.0        5.2        2.0      2
149        5.6        3.0        4.1        1.3      1

[150 rows x 5 columns]


In [5]:
import numpy as np

# Define our neural network class
class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        # Hidden layer initialization
        np.random.seed(42)
        self.W1 = np.random.rand(input_size, hidden_size)
        self.b1 = np.zeros((1, hidden_size))

        # Output layer initialization
        self.W2 = np.random.rand(hidden_size, output_size)
        self.b2 = np.zeros((1, output_size))

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def relu(self, x):
        return np.maximum(0, x)

    def softmax(self, x):
        """Compute softmax values for each sets of scores in x."""
        # Subtract the max value from each score to prevent overflow in exp
        e_x = np.exp(x - np.max(x, axis=-1, keepdims=True))
        return e_x / e_x.sum(axis=-1, keepdims=True)

    def forward(self, x):
        # calculate the activations in the hidden layer
        self.Z1 = np.dot(x, self.W1) + self.b1
        self.A1 = self.relu(self.Z1)

        # calculate activations for the output layer
        self.Z2 = np.dot(self.A1, self.W2) + self.b2
        y_hat = self.softmax(self.Z2)
        return y_hat

    def backprop(self, X, y, learning_rate):
        m = X.shape[0]
        # Forward pass, Returns A2
        predictions = self.forward(X)
        
        # Calculate the error of the output layer vs ground truth
        loss = predictions - y
        # Calculate the dW2, i.e. the derivatives of the Loss function wiht regards to the Loss function
        dW2 = (1/m) * np.dot(self.A1.T, loss)

        # Calculate the 
        dZ1 = np.dot(loss, self.W2.T) * (self.Z1 > 0)
        dW1 = (1/m) * np.dot(X.T, dZ1)

        # Update weights and biases
        self.W2 -= learning_rate * dW2
        self.b2 -= learning_rate * (1/m) * np.sum(loss, axis=0, keepdims=True)
        self.W1 -= learning_rate * dW1
        self.b1 -= learning_rate * (1/m) * np.sum(dZ1, axis=0, keepdims=True)

    def compute_loss(self, predictions, targets):
        return -np.mean(targets * np.log(predictions + 1e-9))

    def train(self, X_train, Y_train, X_test, Y_test, epochs, learning_rate):
        for epoch in range(epochs):
            predictions = self.forward(X_train)
            loss = self.compute_loss(predictions, Y_train)

            self.backprop(X_train, Y_train, learning_rate)
            #print(f"W2: {self.W2}")

            # Compute accuracy on testing data
            test_predictions = np.argmax(self.forward(X_test), axis=1)
            test_accuracy = np.mean(test_predictions == np.argmax(Y_test, axis=1))

            if epoch % 100 == 0:
                print(f"Epoch {epoch}, Loss: {loss:.4f}, Test Accuracy: {test_accuracy*100:.4f}%")

        

In [6]:
def fit_one_hot_encoder(labels):
    return np.unique(labels)

def one_hot_encode(labels, num_classes):
    return np.eye(num_classes)[labels]   
# def transform_one_hot_encoder(labels, categories):
#     # Create a dictionary to map each category to a unique index
#     category_index = {category: idx for idx, category in enumerate(categories)}
#     print(type(category_index))
    
#     # Initialize the one-hot encoded matrix with zeros
#     one_hot_encoded = np.zeros((len(labels), len(categories)))
    
#     # Encode each label as a one-hot vector
#     for i, label in enumerate(labels):
#         scalar_label = label[0]
#         index = category_index[scalar_label]
#         one_hot_encoded[i, index] = 1
        
#     return one_hot_encoded

In [69]:
# create a new instance of a neural network
nn = NeuralNetwork(input_size = 4, hidden_size = 1, output_size = 3)

X = shuffled_df[['sepal_len', 'sepal_wid', 'petal_len', 'petal_wid']].values

X_train, X_val = np.split(X, [120])

y = shuffled_df['class'].values

#y_onehot = transform_one_hot_encoder(y, fit_one_hot_encoder(y))
y_onehot = one_hot_encode(y, 3)
Y_train_one_hot, Y_val_one_hot = np.split(y_onehot, [120])

# ----------------------
# Paul test

# from sklearn.preprocessing import OneHotEncoder
# from sklearn.model_selection import train_test_split

# Y = shuffled_df['class'].values.reshape(-1, 1)

# X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=0.2, random_state=42)

# encoder = OneHotEncoder()
# Y_train_one_hot = encoder.fit_transform(Y_train).toarray()  # Fit and transform Y_train
# Y_val_one_hot = encoder.transform(Y_val).toarray()  # Transform Y_val based on the same encoding

#-----------------------
# Try with new order
# X_train, X_test = np.split(X, [120])

# y = shuffled_df['class'].values

# y_train, y_test = np.split(y, [120])

# Y_train_one_hot = one_hot_encode(y_train, 3)
# Y_val_one_hot = one_hot_encode(y_test, 3)

# Print our values
# print(f"X_train: {X_train}")
# print(f"Y_train_one_hot: {Y_train_one_hot}")
# print(f"X_val: {X_val}")
# print(f"Y_val_one_hot: {Y_val_one_hot}")

nn.train(X_train, Y_train_one_hot, X_val, Y_val_one_hot, epochs=1000, learning_rate=0.125)

result = nn.forward(X)
test_predictions = np.argmax(nn.forward(X), axis=1).reshape(-1, 1)
print(test_predictions)
print(Y)
test_accuracy = np.mean(test_predictions == Y)

print(f"test_accuracy: {test_accuracy}")

for index in range(test_predictions.size):
    if test_predictions[index] != Y[index]:
        print(f"index: {index}, feature: {X[index]}, Y: {result[index]}")



Epoch 0, Loss: 0.4015, Test Accuracy: 46.6667%
Epoch 100, Loss: 0.1644, Test Accuracy: 76.6667%
Epoch 200, Loss: 0.1237, Test Accuracy: 90.0000%
Epoch 300, Loss: 0.0983, Test Accuracy: 96.6667%
Epoch 400, Loss: 0.0805, Test Accuracy: 93.3333%
Epoch 500, Loss: 0.0686, Test Accuracy: 93.3333%
Epoch 600, Loss: 0.0604, Test Accuracy: 93.3333%
Epoch 700, Loss: 0.0545, Test Accuracy: 93.3333%
Epoch 800, Loss: 0.0499, Test Accuracy: 93.3333%
Epoch 900, Loss: 0.0464, Test Accuracy: 93.3333%
[[0]
 [2]
 [0]
 [0]
 [2]
 [0]
 [1]
 [2]
 [2]
 [1]
 [2]
 [0]
 [0]
 [1]
 [0]
 [2]
 [0]
 [2]
 [2]
 [0]
 [0]
 [1]
 [1]
 [2]
 [0]
 [2]
 [1]
 [1]
 [0]
 [1]
 [1]
 [1]
 [1]
 [0]
 [0]
 [0]
 [0]
 [1]
 [1]
 [2]
 [2]
 [1]
 [2]
 [0]
 [2]
 [1]
 [1]
 [0]
 [0]
 [1]
 [2]
 [1]
 [2]
 [2]
 [1]
 [0]
 [0]
 [0]
 [0]
 [1]
 [1]
 [0]
 [1]
 [1]
 [0]
 [0]
 [2]
 [2]
 [0]
 [0]
 [0]
 [0]
 [0]
 [2]
 [0]
 [0]
 [1]
 [2]
 [0]
 [0]
 [0]
 [0]
 [0]
 [2]
 [2]
 [0]
 [2]
 [1]
 [1]
 [2]
 [2]
 [2]
 [2]
 [2]
 [2]
 [1]
 [0]
 [1]
 [2]
 [1]
 [0]
 [2]
 [