## Multiclass Classification - Iris [pytorch]

- created: 2024.11.17

### Data

In [1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load data
x, y = load_iris(return_X_y=True)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

print(x_train_scaled.shape, y_train.shape)
print(x_test_scaled.shape, y_test.shape)

(120, 4) (120,)
(30, 4) (30,)


### [scikit-learn] Modeling and Training

In [2]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# sklearn model
model = SVC()
model.fit(x_train_scaled, y_train)

# Training
y_pred = model.predict(x_train_scaled)
acc = accuracy_score(y_train, y_pred)
print(f'Train Accuracy: {acc:.4f}')

# Evaluation
y_pred = model.predict(x_test_scaled)
acc = accuracy_score(y_test, y_pred)
print(f'Test  Accuracy: {acc:.4f}')

Train Accuracy: 0.9667
Test  Accuracy: 1.0000


### [pytorch-1] Modeling and Training

- Manual backward propagation
- Manual update of weights and biases

In [35]:
import torch
import torch.nn as nn
import torch.optim as optim

def accuracy(y_pred, y_true):
    y_pred = y_pred.argmax(dim=1)
    return torch.eq(y_pred, y_true).float().mean()

## Data
x = torch.tensor(x_train_scaled).float()
y = torch.tensor(y_train).long()
y = nn.functional.one_hot(y).float()
print(x.shape, y.shape)

torch.Size([120, 4]) torch.Size([120, 3])


In [None]:
## Model: 2-layer MLP
torch.manual_seed(42)
input_size, hidden_size, output_size = 4, 100, 3

w1 = torch.randn(input_size, hidden_size)
b1 = torch.zeros(hidden_size)
w2 = torch.randn(hidden_size, output_size)
b2 = torch.zeros(output_size)

## Train
n_epochs = 10000
learning_rate = 0.01

for epoch in range(1, n_epochs + 1):
    # Forward propagation
    z1 = torch.mm(x, w1) + b1
    a1 = torch.sigmoid(z1)
    z2 = torch.mm(a1, w2) + b2
    out = torch.softmax(z2, dim=1)

    loss = nn.functional.cross_entropy(out, y)
    score = accuracy(out, y.argmax(dim=1))

    # Backward progapation
    grad_z2 = (z2 - y) / y.shape[0]
    grad_w2 = torch.mm(a1.T, grad_z2)
    grad_b2 = torch.sum(grad_z2, dim=0)

    grad_a1 = torch.mm(grad_z2, w2.T)
    grad_z1 = a1 * (1 - a1) * grad_a1
    grad_w1 = torch.mm(x.T, grad_z1)
    grad_b1 = torch.sum(grad_z1, dim=0)

    # Update weights and biases
    w1 -= learning_rate * grad_w1
    b1 -= learning_rate * grad_b1
    w2 -= learning_rate * grad_w2
    b2 -= learning_rate * grad_b2

    if epoch % (n_epochs // 10) == 0:
        print(f"[{epoch}/{n_epochs}] loss: {loss.item():.2f} score: {score.item():.4f}")

[1000/10000] loss: 0.92 score: 0.9583
[2000/10000] loss: 0.91 score: 0.9667
[3000/10000] loss: 0.90 score: 0.9667
[4000/10000] loss: 0.90 score: 0.9667
[5000/10000] loss: 0.89 score: 0.9750
[6000/10000] loss: 0.89 score: 0.9750
[7000/10000] loss: 0.89 score: 0.9750
[8000/10000] loss: 0.89 score: 0.9750
[9000/10000] loss: 0.89 score: 0.9750
[10000/10000] loss: 0.89 score: 0.9750


### [pytorch-2] Modeling and Training

- Automatic backward propagation: torch.autograd.grad()
- Manual update of weights and biases

In [63]:
## Model: 2-layer MLP
torch.manual_seed(42)
input_size, hidden_size, output_size = 4, 100, 3

w1 = torch.randn(input_size, hidden_size).requires_grad_()
b1 = torch.zeros(hidden_size).requires_grad_()
w2 = torch.randn(hidden_size, output_size).requires_grad_()
b2 = torch.zeros(output_size).requires_grad_()

## Train
n_epochs = 10000
learning_rate = 0.01

for epoch in range(1, n_epochs + 1):
    # Forward propagation
    z1 = torch.mm(x, w1) + b1
    a1 = torch.sigmoid(z1)
    z2 = torch.mm(a1, w2) + b2
    out = torch.softmax(z2, dim=1)

    loss = nn.functional.cross_entropy(out, y)
    score = accuracy(out, y.argmax(dim=1))

    # # Backward progapation
    grads = torch.autograd.grad(loss, [w1, b1, w2, b2], create_graph=True)

    # Update weights and biases
    with torch.no_grad():
        w1 -= learning_rate * grads[0]
        b1 -= learning_rate * grads[1]
        w2 -= learning_rate * grads[2]
        b2 -= learning_rate * grads[3]

    if epoch % (n_epochs // 10) == 0:
        print(f"[{epoch}/{n_epochs}] loss: {loss.item():.2f} score: {score.item():.4f}")

[1000/10000] loss: 0.89 score: 0.6583
[2000/10000] loss: 0.70 score: 0.8917
[3000/10000] loss: 0.64 score: 0.9667
[4000/10000] loss: 0.62 score: 0.9583
[5000/10000] loss: 0.61 score: 0.9583
[6000/10000] loss: 0.61 score: 0.9667
[7000/10000] loss: 0.60 score: 0.9667
[8000/10000] loss: 0.60 score: 0.9667
[9000/10000] loss: 0.60 score: 0.9667
[10000/10000] loss: 0.59 score: 0.9750


### [pytorch-3] Modeling and Training

- Automatic backward propagation: loss.backward()
- Manual update of weights and biases

In [64]:
## Model: 2-layer MLP
torch.manual_seed(42)
input_size, hidden_size, output_size = 4, 100, 3

w1 = torch.randn(input_size, hidden_size).requires_grad_()
b1 = torch.zeros(hidden_size).requires_grad_()
w2 = torch.randn(hidden_size, output_size).requires_grad_()
b2 = torch.zeros(output_size).requires_grad_()

## Train
n_epochs = 10000
learning_rate = 0.01

for epoch in range(1, n_epochs + 1):
    # Forward propagation
    z1 = torch.mm(x, w1) + b1
    a1 = torch.sigmoid(z1)
    z2 = torch.mm(a1, w2) + b2
    out = torch.softmax(z2, dim=1)

    loss = nn.functional.cross_entropy(out, y)
    score = accuracy(out, y.argmax(dim=1))

    # # Backward progapation
    loss.backward()

    # Update weights and biases
    with torch.no_grad():
        w1 -= learning_rate * w1.grad
        b1 -= learning_rate * b1.grad
        w2 -= learning_rate * w2.grad
        b2 -= learning_rate * b2.grad

        w1.grad.zero_()
        b1.grad.zero_()
        w2.grad.zero_()
        b2.grad.zero_()

    if epoch % (n_epochs // 10) == 0:
        print(f"[{epoch}/{n_epochs}] loss: {loss.item():.2f} score: {score.item():.4f}")

[1000/10000] loss: 0.89 score: 0.6583
[2000/10000] loss: 0.70 score: 0.8917
[3000/10000] loss: 0.64 score: 0.9667
[4000/10000] loss: 0.62 score: 0.9583
[5000/10000] loss: 0.61 score: 0.9583
[6000/10000] loss: 0.61 score: 0.9667
[7000/10000] loss: 0.60 score: 0.9667
[8000/10000] loss: 0.60 score: 0.9667
[9000/10000] loss: 0.60 score: 0.9667
[10000/10000] loss: 0.59 score: 0.9750


### [pytorch-4] Modeling and Training

- Automatic backward propagation: loss.backward()
- Automatic update of weights and biases: optimizer

In [None]:
## Model: 2-layer MLP
torch.manual_seed(42)
input_size, hidden_size, output_size = 4, 100, 3

w1 = torch.randn(input_size, hidden_size).requires_grad_()
b1 = torch.zeros(hidden_size).requires_grad_()
w2 = torch.randn(hidden_size, output_size).requires_grad_()
b2 = torch.zeros(output_size).requires_grad_()

## Train
n_epochs = 10000
learning_rate = 0.01

optimizer = optim.SGD([w1, b1, w2, b2], lr=learning_rate)

for epoch in range(1, n_epochs + 1):
    # Forward propagation
    z1 = torch.mm(x, w1) + b1
    a1 = torch.sigmoid(z1)
    z2 = torch.mm(a1, w2) + b2
    out = torch.softmax(z2, dim=1)

    loss = nn.functional.cross_entropy(out, y)
    score = accuracy(out, y.argmax(dim=1))

    # # Backward progapation
    loss.backward()

    # Update weights and biases
    optimizer.step()
    optimizer.zero_grad()

    if epoch % (n_epochs // 10) == 0:
        print(f"[{epoch}/{n_epochs}] loss: {loss.item():.2f} score: {score.item():.4f}")

[1000/10000] loss: 0.89 score: 0.6583
[2000/10000] loss: 0.70 score: 0.8917
[3000/10000] loss: 0.64 score: 0.9667
[4000/10000] loss: 0.62 score: 0.9583
[5000/10000] loss: 0.61 score: 0.9583
[6000/10000] loss: 0.61 score: 0.9667
[7000/10000] loss: 0.60 score: 0.9667
[8000/10000] loss: 0.60 score: 0.9667
[9000/10000] loss: 0.60 score: 0.9667
[10000/10000] loss: 0.59 score: 0.9750


### [pytorch-5] Modeling and Training

- MLP model: torch.nn.Linear()
- Automatic backward propagation: loss.backward()
- Automatic update of weights and biases: optimizer

In [68]:
## Model: 2-layer MLP
torch.manual_seed(42)
input_size, hidden_size, output_size = 4, 100, 3

model = nn.Sequential(
    nn.Linear(input_size, hidden_size),
    nn.Sigmoid(),
    nn.Linear(hidden_size, output_size),
    nn.Softmax(dim=1),
)

## Train
n_epochs = 10000
learning_rate = 0.01

optimizer = optim.SGD(model.parameters(), lr=learning_rate)

for epoch in range(1, n_epochs + 1):
    # Forward propagation
    out = model(x)

    loss = nn.functional.cross_entropy(out, y)
    score = accuracy(out, y.argmax(dim=1))

    # # Backward progapation
    loss.backward()

    # Update weights and biases
    optimizer.step()
    optimizer.zero_grad()

    if epoch % (n_epochs // 10) == 0:
        print(f"[{epoch}/{n_epochs}] loss: {loss.item():.2f} score: {score.item():.4f}")

[1000/10000] loss: 0.84 score: 0.8250
[2000/10000] loss: 0.77 score: 0.8917
[3000/10000] loss: 0.73 score: 0.9000
[4000/10000] loss: 0.71 score: 0.9167
[5000/10000] loss: 0.69 score: 0.9417
[6000/10000] loss: 0.67 score: 0.9417
[7000/10000] loss: 0.66 score: 0.9417
[8000/10000] loss: 0.65 score: 0.9500
[9000/10000] loss: 0.64 score: 0.9500
[10000/10000] loss: 0.63 score: 0.9583


### [pytorch-6] Modeling and Training

- MLP model: torch.nn.Linear() with initialization
- Automatic backward propagation: loss.backward()
- Automatic update of weights and biases: optimizer

In [None]:
## Model: 2-layer MLP
torch.manual_seed(42)
input_size, hidden_size, output_size = 4, 100, 3

class MLP(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        self.linear1 = nn.Linear(input_dim, hidden_dim)
        self.linear2 = nn.Linear(hidden_dim, output_dim)
        
        ## initialization
        torch.nn.init.normal_(self.linear1.weight)
        torch.nn.init.normal_(self.linear2.weight)

    def forward(self, x):
        x = self.linear1(x)
        x = torch.sigmoid(x)
        x = self.linear2(x)  
        x = torch.softmax(x, dim=1)
        return x

model = MLP(input_size, hidden_size, output_size)

## Train
n_epochs = 10000
learning_rate = 0.01

optimizer = optim.SGD(model.parameters(), lr=learning_rate)

for epoch in range(1, n_epochs + 1):
    # Forward propagation
    out = model(x)

    loss = nn.functional.cross_entropy(out, y)
    score = accuracy(out, y.argmax(dim=1))

    # # Backward progapation
    loss.backward()

    # Update weights and biases
    optimizer.step()
    optimizer.zero_grad()

    if epoch % (n_epochs // 10) == 0:
        print(f"[{epoch}/{n_epochs}] loss: {loss.item():.2f} score: {score.item():.4f}")

[1000/10000] loss: 0.68 score: 0.9000
[2000/10000] loss: 0.65 score: 0.9250
[3000/10000] loss: 0.63 score: 0.9583
[4000/10000] loss: 0.62 score: 0.9500
[5000/10000] loss: 0.61 score: 0.9500
[6000/10000] loss: 0.60 score: 0.9500
[7000/10000] loss: 0.60 score: 0.9667
[8000/10000] loss: 0.60 score: 0.9750
[9000/10000] loss: 0.59 score: 0.9750
[10000/10000] loss: 0.59 score: 0.9750


### [pytorch-7] Modeling and Training

- Model output: logit without activation function
- Loss Function: nn.CrossEntropyLoss() (No one-hot encoding, long type)
- MLP model: torch.nn.Linear() with initialization
- Automatic backward propagation: loss.backward()
- Automatic update of weights and biases: optimizer

In [None]:
## Model: 2-layer MLP
torch.manual_seed(42)
input_size, hidden_size, output_size = 4, 100, 3

class MLP(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        self.linear1 = nn.Linear(input_dim, hidden_dim)
        self.linear2 = nn.Linear(hidden_dim, output_dim)
        
        ## initialization
        torch.nn.init.normal_(self.linear1.weight)
        torch.nn.init.normal_(self.linear2.weight)

    def forward(self, x):
        x = self.linear1(x)
        x = torch.sigmoid(x)
        x = self.linear2(x)  
        return x

model = MLP(input_size, hidden_size, output_size)

## Train
n_epochs = 10000
learning_rate = 0.01

loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

for epoch in range(1, n_epochs + 1):
    # Forward propagation
    out = model(x)

    loss = loss_fn(out, y.argmax(dim=1))
    score = accuracy(out, y.argmax(dim=1))

    # # Backward progapation
    loss.backward()

    # Update weights and biases
    optimizer.step()
    optimizer.zero_grad()

    if epoch % (n_epochs // 10) == 0:
        print(f"[{epoch}/{n_epochs}] loss: {loss.item():.2f} score: {score.item():.4f}")

[1000/10000] loss: 0.14 score: 0.9500
[2000/10000] loss: 0.10 score: 0.9583
[3000/10000] loss: 0.09 score: 0.9750
[4000/10000] loss: 0.08 score: 0.9750
[5000/10000] loss: 0.07 score: 0.9750
[6000/10000] loss: 0.07 score: 0.9750
[7000/10000] loss: 0.07 score: 0.9750
[8000/10000] loss: 0.06 score: 0.9750
[9000/10000] loss: 0.06 score: 0.9750
[10000/10000] loss: 0.06 score: 0.9750


### [pytorch-8] Modeling and Training

- Model output: log softmax of logit
- Loss functin: nn.NLLLoss() negative log likelihood loss (No one-hot encoding)
- MLP model: torch.nn.Linear() with initialization
- Automatic backward propagation: loss.backward()
- Automatic update of weights and biases: optimizer

In [74]:
## Model: 2-layer MLP
torch.manual_seed(42)
input_size, hidden_size, output_size = 4, 100, 3

class MLP(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        self.linear1 = nn.Linear(input_dim, hidden_dim)
        self.linear2 = nn.Linear(hidden_dim, output_dim)

        ## initialization
        torch.nn.init.normal_(self.linear1.weight)
        torch.nn.init.normal_(self.linear2.weight)

    def forward(self, x):
        x = self.linear1(x)
        x = torch.sigmoid(x)
        x = self.linear2(x)
        x = nn.functional.log_softmax(x, dim=1) # x = nn.LogSoftmax(dim=1)(x)
        return x

model = MLP(input_size, hidden_size, output_size)

## Train
n_epochs = 10000
learning_rate = 0.01

loss_fn = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

for epoch in range(1, n_epochs + 1):
    # Forward propagation
    out = model(x)

    loss = loss_fn(out, y.argmax(dim=1))
    score = accuracy(out, y.argmax(dim=1))

    # # Backward progapation
    loss.backward()

    # Update weights and biases
    optimizer.step()
    optimizer.zero_grad()

    if epoch % (n_epochs // 10) == 0:
        print(f"[{epoch}/{n_epochs}] loss: {loss.item():.2f} score: {score.item():.4f}")

[1000/10000] loss: 0.14 score: 0.9500
[2000/10000] loss: 0.10 score: 0.9583
[3000/10000] loss: 0.09 score: 0.9750
[4000/10000] loss: 0.08 score: 0.9750
[5000/10000] loss: 0.07 score: 0.9750
[6000/10000] loss: 0.07 score: 0.9750
[7000/10000] loss: 0.07 score: 0.9750
[8000/10000] loss: 0.06 score: 0.9750
[9000/10000] loss: 0.06 score: 0.9750
[10000/10000] loss: 0.06 score: 0.9750
