## Multiclass Classification - Iris [numpy]

- created: 2024.11.17

### Data

In [1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load data
x, y = load_iris(return_X_y=True)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

print(x_train_scaled.shape, y_train.shape)
print(x_test_scaled.shape, y_test.shape)

(120, 4) (120,)
(30, 4) (30,)


### [scikit-learn] Modeling and Training

In [2]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# sklearn model
model = SVC()
model.fit(x_train_scaled, y_train)

# Training
y_pred = model.predict(x_train_scaled)
acc = accuracy_score(y_train, y_pred)
print(f'Train Accuracy: {acc:.4f}')

# Evaluation
y_pred = model.predict(x_test_scaled)
acc = accuracy_score(y_test, y_pred)
print(f'Test  Accuracy: {acc:.4f}')

Train Accuracy: 0.9667
Test  Accuracy: 1.0000


### [numpy] Modeling and Training

- Manual backward propagation
- Manual update of weights and biases

In [3]:
import numpy as np
from scipy.special import expit as sigmoid

def one_hot(y, n_classes):
    return np.eye(n_classes)[y]

def softmax(x):
    e_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return e_x / np.sum(e_x, axis=1, keepdims=True)

def cross_entropy(y_pred, y_true):
    batch_size = y_pred.shape[0] if y_pred.ndim == 2 else 1
    return -np.sum(y_true*np.log(y_pred + 1.0E-8))/batch_size

def accuracy(y_pred, y_true):
    return (y_pred.argmax(1) == y_true.argmax(1)).mean()

## Data
x, y = x_train_scaled, one_hot(y_train, n_classes=3)
print(x.shape, y.shape)

(120, 4) (120, 3)


In [4]:
## Model: 2-layer MLP
np.random.seed(42)
input_size, hidden_size, output_size = 4, 100, 3

w1 = np.random.randn(input_size, hidden_size)   # weight of 1st layer
b1 = np.zeros(hidden_size)                      # bias of 1st layer
w2 = np.random.randn(hidden_size, output_size)  # weight of 2nd layer
b2 = np.zeros(output_size)                      # bias of 2nd layer

## Train
n_epochs = 10000
learning_rate = 0.01

for epoch in range(1, n_epochs + 1):
    # Forward propagation
    z1 = np.dot(x, w1) + b1
    a1 = sigmoid(z1)
    z2 = np.dot(a1, w2) + b2
    out = softmax(z2)

    loss = cross_entropy(out, y)
    score = accuracy(out, y)

    # Backward progapation
    grad_z2 = (z2 - y) / y.shape[0]
    grad_w2 = np.dot(a1.T, grad_z2)
    grad_b2 = np.sum(grad_z2, axis=0)

    grad_a1 = np.dot(grad_z2, w2.T)
    grad_z1 = a1 * (1 - a1) * grad_a1
    grad_w1 = np.dot(x.T, grad_z1)
    grad_b1 = np.sum(grad_z1, axis=0)

    # Update weights and biases
    w1 -= learning_rate * grad_w1
    b1 -= learning_rate * grad_b1
    w2 -= learning_rate * grad_w2
    b2 -= learning_rate * grad_b2

    if epoch % (n_epochs // 10) == 0:
        print(f"[{epoch}/{n_epochs}] loss: {loss.item():.2f} score: {score:.4f}")

[1000/10000] loss: 0.65 score: 0.9333
[2000/10000] loss: 0.63 score: 0.9667
[3000/10000] loss: 0.62 score: 0.9833
[4000/10000] loss: 0.61 score: 0.9833
[5000/10000] loss: 0.60 score: 0.9833
[6000/10000] loss: 0.60 score: 0.9833
[7000/10000] loss: 0.60 score: 0.9833
[8000/10000] loss: 0.60 score: 0.9833
[9000/10000] loss: 0.60 score: 0.9833
[10000/10000] loss: 0.60 score: 0.9833
