## PyTorch vs Manual Model for Classification
In this section I would compare the performance of a classification task using PyTorch instances and an instance of the used NNetwork class with the IRIS dataset (lately used to train MNIST dataset).

Import the data

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.metrics import classification_report

In [8]:
# Seed for reproducibility
np.random.seed(42)

# Define the sigmoid function and its derivative
def sigmoid(z):
    return 1.0 / (1.0 + np.exp(-z))

def sigmoid_prime(z):
    return sigmoid(z) * (1 - sigmoid(z))

In [9]:
# Neural Network Class
class NNetwork(object):
    def __init__(self, sizes):
        self.num_layers = len(sizes)
        self.sizes = sizes
        self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
        self.weights = [np.random.randn(y, x) for x, y in zip(sizes[:-1], sizes[1:])]

    def feedforward(self, a):
        for b, w in zip(self.biases, self.weights):
            a = sigmoid(np.dot(w, a) + b)
        return a

    def evaluate(self, test_data):
        test_results = [(np.argmax(self.feedforward(x)), np.argmax(y)) for (x, y) in test_data]
        return sum(int(x == y) for (x, y) in test_results)

    def cost_derivative(self, output_activations, y):
        return (output_activations - y)

    def backprop(self, x, y):
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        activation = x
        activations = [x]
        zs = []
        for b, w in zip(self.biases, self.weights):
            z = np.dot(w, activation) + b
            zs.append(z)
            activation = sigmoid(z)
            activations.append(activation)
        delta = self.cost_derivative(activations[-1], y) * sigmoid_prime(zs[-1])
        nabla_b[-1] = delta
        nabla_w[-1] = np.dot(delta, activations[-2].transpose())
        for l in range(2, self.num_layers):
            z = zs[-l]
            sp = sigmoid_prime(z)
            delta = np.dot(self.weights[-l + 1].transpose(), delta) * sp
            nabla_b[-l] = delta
            nabla_w[-l] = np.dot(delta, activations[-l - 1].transpose())
        return (nabla_b, nabla_w)

    def update_mini_batch(self, mini_batch, eta_learning_rate):
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        for x, y in mini_batch:
            delta_nabla_b, delta_nabla_w = self.backprop(x, y)
            nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
            nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
        self.weights = [w - (eta_learning_rate / len(mini_batch)) * nw for w, nw in zip(self.weights, nabla_w)]
        self.biases = [b - (eta_learning_rate / len(mini_batch)) * nb for b, nb in zip(self.biases, nabla_b)]

    def sgd(self, training_data, epochs, mini_batch_size, eta_learning_rate, test_data=None):
        training_data = list(training_data)
        n = len(training_data)
        if test_data:
            test_data = list(test_data)
            n_test = len(test_data)
        for j in range(epochs):
            np.random.shuffle(training_data)
            mini_batches = [training_data[k:k + mini_batch_size] for k in range(0, n, mini_batch_size)]
            for mini_batch in mini_batches:
                self.update_mini_batch(mini_batch, eta_learning_rate)
            if test_data:
                print(f'Epoch {j}: {self.evaluate(test_data)} / {n_test}')
            else:
                print(f'Epoch {j} complete')

In [10]:
# Load and preprocess the IRIS dataset
def load_iris_data():
    df = pd.read_csv('data/iris.csv', na_values=["NA", "?"])
    X = df[['sepal_l', 'sepal_w', 'petal_l', 'petal_w']].values.astype(np.float32)
    y = df['species'].values

    # Encoding the labels in y
    encoder = OneHotEncoder(sparse_output=False)
    y_encoded = encoder.fit_transform(y.reshape(-1, 1)).astype(np.float32)

    # Split training and validation data
    x_train, x_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

    # Normalize x set
    scaler = StandardScaler()
    x_train = scaler.fit_transform(x_train)
    x_test = scaler.transform(x_test)

    return x_train, x_test, y_train, y_test

def iris_data_wrapper(X, y):
    inputs = [np.reshape(x, (4, 1)) for x in X]
    results = [np.reshape(y_i, (3, 1)) for y_i in y]
    data = list(zip(inputs, results))
    return data

In [12]:
# Load normalized and encoded data
x_train, x_test, y_train, y_test = load_iris_data()

# Prepare data for NNetwork
train_data = iris_data_wrapper(x_train, y_train)
test_data = iris_data_wrapper(x_test, y_test)
print(test_data[0])

(array([[ 0.3545167 ],
       [-0.5850599 ],
       [ 0.55777514],
       [ 0.02224758]], dtype=float32), array([[0.],
       [1.],
       [0.]], dtype=float32))


In [6]:
# Initialize and train the network
net = NNetwork([4, 30, 3])
net.sgd(train_data, 30, 10, 3.0, test_data=test_data)

Epoch 0: 27 / 30
Epoch 1: 27 / 30
Epoch 2: 27 / 30
Epoch 3: 29 / 30
Epoch 4: 30 / 30
Epoch 5: 28 / 30
Epoch 6: 30 / 30
Epoch 7: 29 / 30
Epoch 8: 30 / 30
Epoch 9: 30 / 30
Epoch 10: 27 / 30
Epoch 11: 30 / 30
Epoch 12: 30 / 30
Epoch 13: 30 / 30
Epoch 14: 30 / 30
Epoch 15: 30 / 30
Epoch 16: 30 / 30
Epoch 17: 30 / 30
Epoch 18: 29 / 30
Epoch 19: 30 / 30
Epoch 20: 30 / 30
Epoch 21: 29 / 30
Epoch 22: 29 / 30
Epoch 23: 30 / 30
Epoch 24: 30 / 30
Epoch 25: 30 / 30
Epoch 26: 30 / 30
Epoch 27: 30 / 30
Epoch 28: 30 / 30
Epoch 29: 30 / 30


In [7]:
# Function to get the model's predictions and compare with the actual labels
def compare_outputs(net, test_data):
    predictions = []
    actuals = []
    for x, y in test_data:
        predicted = np.argmax(net.feedforward(x))
        actual = np.argmax(y)
        predictions.append(predicted)
        actuals.append(actual)
        print(f'Predicted: {predicted}, Actual: {actual}')

    return predictions, actuals

# Compare outputs on the test data
predictions, actuals = compare_outputs(net, test_data)

# Calculate and print the accuracy
correct_predictions = sum(int(p == a) for p, a in zip(predictions, actuals))
total_samples = len(test_data)
accuracy = correct_predictions / total_samples
print(f'Accuracy: {accuracy * 100:.2f}%')

# Since the labels are integers (0, 1, 2), they directly map to the class indices
print(classification_report(actuals, predictions, target_names=['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']))

Predicted: 1, Actual: 1
Predicted: 0, Actual: 0
Predicted: 2, Actual: 2
Predicted: 1, Actual: 1
Predicted: 1, Actual: 1
Predicted: 0, Actual: 0
Predicted: 1, Actual: 1
Predicted: 2, Actual: 2
Predicted: 1, Actual: 1
Predicted: 1, Actual: 1
Predicted: 2, Actual: 2
Predicted: 0, Actual: 0
Predicted: 0, Actual: 0
Predicted: 0, Actual: 0
Predicted: 0, Actual: 0
Predicted: 1, Actual: 1
Predicted: 2, Actual: 2
Predicted: 1, Actual: 1
Predicted: 1, Actual: 1
Predicted: 2, Actual: 2
Predicted: 0, Actual: 0
Predicted: 2, Actual: 2
Predicted: 0, Actual: 0
Predicted: 2, Actual: 2
Predicted: 2, Actual: 2
Predicted: 2, Actual: 2
Predicted: 2, Actual: 2
Predicted: 2, Actual: 2
Predicted: 0, Actual: 0
Predicted: 0, Actual: 0
Accuracy: 100.00%
                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicolor       1.00      1.00      1.00         9
 Iris-virginica       1.00      1.00      1.00        11

       accuracy                 