## PyTorch vs Manual Model for Classification
In this section I would compare the performance of a classification task using PyTorch instances and an instance of the used NNetwork class with the IRIS dataset (lately used to train MNIST dataset). 
First I will use Stochastic Gradient Descend as optimizer and Sigmod activation function since these are used in the original Manual Model. The output of the output layer was evaluated with a np.argmax as equivalent to softmax activation function. The architecture was [4, 30, 3]
The same configuration is used to train the same preprocesed dataset but with PyTorch using Secuential.

Notes:

- LabelEncoder is used with CrossEntropyLoss and LogSoftMax
- OneHotEncoder is used with most optimizers and SoftMax

### Manual model
Source: http://neuralnetworksanddeeplearning.com/chap1.html

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.metrics import classification_report

In [None]:
# Seed for reproducibility
np.random.seed(42)

# Define the sigmoid function and its derivative
def sigmoid(z):
    return 1.0 / (1.0 + np.exp(-z))

def sigmoid_prime(z):
    return sigmoid(z) * (1 - sigmoid(z))

In [None]:
# Neural Network Class
class NNetwork(object):
    def __init__(self, sizes):
        self.num_layers = len(sizes)
        self.sizes = sizes
        self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
        self.weights = [np.random.randn(y, x) for x, y in zip(sizes[:-1], sizes[1:])]

    def feedforward(self, a):
        for b, w in zip(self.biases, self.weights):
            a = sigmoid(np.dot(w, a) + b)
        return a

    def evaluate(self, test_data):
        test_results = [(np.argmax(self.feedforward(x)), np.argmax(y)) for (x, y) in test_data]
        return sum(int(x == y) for (x, y) in test_results)

    def cost_derivative(self, output_activations, y):
        return (output_activations - y)

    def backprop(self, x, y):
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        activation = x
        activations = [x]
        zs = []
        for b, w in zip(self.biases, self.weights):
            z = np.dot(w, activation) + b
            zs.append(z)
            activation = sigmoid(z)
            activations.append(activation)
        delta = self.cost_derivative(activations[-1], y) * sigmoid_prime(zs[-1])
        nabla_b[-1] = delta
        nabla_w[-1] = np.dot(delta, activations[-2].transpose())
        for l in range(2, self.num_layers):
            z = zs[-l]
            sp = sigmoid_prime(z)
            delta = np.dot(self.weights[-l + 1].transpose(), delta) * sp
            nabla_b[-l] = delta
            nabla_w[-l] = np.dot(delta, activations[-l - 1].transpose())
        return (nabla_b, nabla_w)

    def update_mini_batch(self, mini_batch, eta_learning_rate):
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        for x, y in mini_batch:
            delta_nabla_b, delta_nabla_w = self.backprop(x, y)
            nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
            nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
        self.weights = [w - (eta_learning_rate / len(mini_batch)) * nw for w, nw in zip(self.weights, nabla_w)]
        self.biases = [b - (eta_learning_rate / len(mini_batch)) * nb for b, nb in zip(self.biases, nabla_b)]

    def sgd(self, training_data, epochs, mini_batch_size, eta_learning_rate, test_data=None):
        training_data = list(training_data)
        n = len(training_data)
        if test_data:
            test_data = list(test_data)
            n_test = len(test_data)
        for j in range(epochs):
            np.random.shuffle(training_data)
            mini_batches = [training_data[k:k + mini_batch_size] for k in range(0, n, mini_batch_size)]
            for mini_batch in mini_batches:
                self.update_mini_batch(mini_batch, eta_learning_rate)
            if test_data:
                print(f'Epoch {j}: {self.evaluate(test_data)} / {n_test}')
            else:
                print(f'Epoch {j} complete')

In [None]:
# Load and preprocess the IRIS dataset
def load_iris_data():
    df = pd.read_csv('data/iris.csv', na_values=["NA", "?"])
    X = df[['sepal_l', 'sepal_w', 'petal_l', 'petal_w']].values.astype(np.float32)
    y = df['species'].values

    # Encoding the labels in y
    encoder = OneHotEncoder(sparse_output=False)
    y_encoded = encoder.fit_transform(y.reshape(-1, 1)).astype(np.float32)

    # Split training and validation data
    x_train, x_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

    # Normalize x set
    scaler = StandardScaler()
    x_train = scaler.fit_transform(x_train)
    x_test = scaler.transform(x_test)

    return x_train, x_test, y_train, y_test

def iris_data_wrapper(X, y):
    inputs = [np.reshape(x, (4, 1)) for x in X]
    results = [np.reshape(y_i, (3, 1)) for y_i in y]
    data = list(zip(inputs, results))
    print(data[0])
    return data

In [None]:
# Load normalized and encoded data
x_train, x_test, y_train, y_test = load_iris_data()

# Prepare data for NNetwork
train_data = iris_data_wrapper(x_train, y_train)
test_data = iris_data_wrapper(x_test, y_test)

In [None]:
# Initialize and train the network
net = NNetwork([4, 30, 3])
net.sgd(train_data, 30, 10, 3.0, test_data=test_data)

In [None]:
# Function to get the model's predictions and compare with the actual labels
def compare_outputs(net, test_data):
    predictions = []
    actuals = []
    for x, y in test_data:
        predicted = np.argmax(net.feedforward(x))
        actual = np.argmax(y)
        predictions.append(predicted)
        actuals.append(actual)
        print(f'Predicted: {predicted}, Actual: {actual}')

    return predictions, actuals

# Compare outputs on the test data
predictions, actuals = compare_outputs(net, test_data)

# Calculate and print the accuracy
correct_predictions = sum(int(p == a) for p, a in zip(predictions, actuals))
total_samples = len(test_data)
accuracy = correct_predictions / total_samples
print(f'Accuracy: {accuracy * 100:.2f}%')

# Since the labels are integers (0, 1, 2), they directly map to the class indices
print(classification_report(actuals, predictions, target_names=['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']))

## PyTorch model

In [1]:
import numpy as np
import torch
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from torch import nn
from torch.utils.data import DataLoader, TensorDataset

In [2]:
# Load and preprocess the IRIS dataset
def load_iris_data():
    df = pd.read_csv('data/iris.csv', na_values=["NA", "?"])
    X = df[['sepal_l', 'sepal_w', 'petal_l', 'petal_w']].values.astype(np.float32)

    # Encoding the labels in y
    encoder = OneHotEncoder(sparse_output=False)
    y_encoded = encoder.fit_transform(df["species"].values.reshape(-1, 1))
    print(f"Y encoded: {y_encoded}")
    # Split training and validation data
    x_train, x_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)
    species = df['species'].unique()

    # Normalize x set
    scaler = StandardScaler()
    x_train = scaler.fit_transform(x_train)
    x_test = scaler.transform(x_test)
    
    return x_train, x_test, y_train, y_test, species

In [3]:
# select the training device
device = "cpu"

# load and normalize data
x_train, x_test, y_train, y_test, species = load_iris_data()

# numpy to tensor
x_train = torch.tensor(x_train, device=device, dtype=torch.float32)
y_train = torch.tensor(y_train, device=device, dtype=torch.float32)

x_test = torch.tensor(x_test, device=device, dtype=torch.float32)
y_test = torch.tensor(y_test, device=device, dtype=torch.float32)

# create torch datasets
batch_size = 16

dataset_train = TensorDataset(x_train, y_train)
dataloader_train = DataLoader(dataset_train, batch_size=batch_size, shuffle=True)

dataset_test = TensorDataset(x_test, y_test)
dataloader_test = DataLoader(dataset_test, batch_size=batch_size, shuffle=True)

Y encoded: [[1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 

In [4]:
print(len(species))

3


In [5]:
# Pytorch model using Secuential
model = nn.Sequential(
    nn.Linear(x_train.shape[1], 30),
    nn.Sigmoid(),
    nn.Linear(30, len(species)),
    nn.Softmax(dim=1)
)

In [6]:
loss_fn = nn.MSELoss()

optimizer = torch.optim.SGD(model.parameters(), lr=3)

In [7]:
# Training loop
num_epochs = 30
for epoch in range(num_epochs):
    model.train()
    for batch_x, batch_y in dataloader_train:
        optimizer.zero_grad()
        outputs = model(batch_x)
        loss = loss_fn(outputs, batch_y)
        loss.backward()
        optimizer.step()
    
    # Print loss for every epoch
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}")


Epoch 1/30, Loss: 0.14309106767177582
Epoch 2/30, Loss: 0.1917189210653305
Epoch 3/30, Loss: 0.03181104362010956
Epoch 4/30, Loss: 0.08225158601999283
Epoch 5/30, Loss: 0.019021781161427498
Epoch 6/30, Loss: 0.0162745900452137
Epoch 7/30, Loss: 0.10030723363161087
Epoch 8/30, Loss: 0.0672171488404274
Epoch 9/30, Loss: 0.052516818046569824
Epoch 10/30, Loss: 0.02624068595468998
Epoch 11/30, Loss: 0.016457010060548782
Epoch 12/30, Loss: 0.030427031219005585
Epoch 13/30, Loss: 0.009953872300684452
Epoch 14/30, Loss: 0.00733126001432538
Epoch 15/30, Loss: 0.05865040794014931
Epoch 16/30, Loss: 0.02733949013054371
Epoch 17/30, Loss: 0.035035815089941025
Epoch 18/30, Loss: 0.034509118646383286
Epoch 19/30, Loss: 0.004441320896148682
Epoch 20/30, Loss: 0.002584404544904828
Epoch 21/30, Loss: 0.01274974923580885
Epoch 22/30, Loss: 0.05057146027684212
Epoch 23/30, Loss: 0.01773739792406559
Epoch 24/30, Loss: 0.026749292388558388
Epoch 25/30, Loss: 0.025101276114583015
Epoch 26/30, Loss: 0.00086

In [8]:
# Evaluation
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Set model to evaluation mode
model.eval()

# Initialize lists to hold true and predicted values
true_labels = []
predicted_labels = []

# Disable gradient calculation for evaluation
with torch.no_grad():
    for batch_x, batch_y in dataloader_test:
        outputs = model(batch_x)
        _, predicted = torch.max(outputs, 1)
        true_labels.extend(batch_y.argmax(dim=1).cpu().numpy())
        predicted_labels.extend(predicted.cpu().numpy())

# Convert lists to numpy arrays
true_labels = np.array(true_labels)
predicted_labels = np.array(predicted_labels)

# Calculate evaluation metrics
accuracy = accuracy_score(true_labels, predicted_labels)
precision = precision_score(true_labels, predicted_labels, average='weighted')
recall = recall_score(true_labels, predicted_labels, average='weighted')
f1 = f1_score(true_labels, predicted_labels, average='weighted')

# Print evaluation metrics
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")

Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0
