In [1]:
# Multi-Class Classification on Iris Dataset using PyTorch

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, confusion_matrix

In [2]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cpu


In [3]:

# Load Iris dataset from URL
data = pd.read_csv("https://gist.githubusercontent.com/netj/8836201/raw/6f9306ad21398ea43cba4f7d537619d0e07d5ae3/iris.csv")


In [4]:
# Encode target labels as integers
le = LabelEncoder()
data['variety'] = le.fit_transform(data['variety'])  # setosa=0, versicolor=1, virginica=2

In [5]:
# Features and labels
X = data.drop('variety', axis=1).values
y = data['variety'].values


In [6]:
# Scale features
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [None]:
# Convert to PyTorch tensors
X = torch.tensor(X, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.long)  # ALWAYS use long for multi-class classification

In [8]:
# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [9]:
# Send data to device
X_train, X_test = X_train.to(device), X_test.to(device)
y_train, y_test = y_train.to(device), y_test.to(device)

In [None]:
# Define a simple feedforward neural network
class IrisNet(nn.Module):
    def __init__(self):
        super(IrisNet, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(4, 10),  # input layer
            nn.ReLU(),
            nn.Linear(10, 6),  # hidden layer
            nn.ReLU(),
            nn.Linear(6, 3)    # output layer for 3 classes
            
                    # We don't have Softmax here because CrossEntropyLoss() includes this in the loss evaluation function
        )

    def forward(self, x):
        return self.network(x)

In [None]:
# Instantiate model, loss function and optimizer
model = IrisNet().to(device)
criterion = nn.CrossEntropyLoss()  # for multi-class classification, this already includes Softmax output function
optimizer = optim.Adam(model.parameters(), lr=0.01)

In [12]:
# Training loop
n_epochs = 100
for epoch in range(n_epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train)  # raw logits
    loss = criterion(outputs, y_train)
    loss.backward()
    optimizer.step()

    # Compute training accuracy
    _, predicted_train = torch.max(outputs, 1)
    train_acc = accuracy_score(y_train.cpu(), predicted_train.cpu())

    # Validation accuracy and loss
    model.eval()
    with torch.no_grad():
        val_outputs = model(X_test)
        val_loss = criterion(val_outputs, y_test)
        _, predicted_val = torch.max(val_outputs, 1)
        val_acc = accuracy_score(y_test.cpu(), predicted_val.cpu())

    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{n_epochs}], Loss: {loss.item():.4f}, Val Loss: {val_loss.item():.4f}, Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}")


Epoch [10/100], Loss: 0.9282, Val Loss: 0.8584, Train Acc: 0.5000, Val Acc: 0.6842
Epoch [20/100], Loss: 0.6374, Val Loss: 0.5350, Train Acc: 0.8482, Val Acc: 0.8421
Epoch [30/100], Loss: 0.4343, Val Loss: 0.3503, Train Acc: 0.8661, Val Acc: 0.8684
Epoch [40/100], Loss: 0.2930, Val Loss: 0.2202, Train Acc: 0.9196, Val Acc: 0.9474
Epoch [50/100], Loss: 0.1909, Val Loss: 0.1317, Train Acc: 0.9286, Val Acc: 0.9737
Epoch [60/100], Loss: 0.1222, Val Loss: 0.0738, Train Acc: 0.9643, Val Acc: 1.0000
Epoch [70/100], Loss: 0.0872, Val Loss: 0.0477, Train Acc: 0.9643, Val Acc: 1.0000
Epoch [80/100], Loss: 0.0704, Val Loss: 0.0347, Train Acc: 0.9732, Val Acc: 1.0000
Epoch [90/100], Loss: 0.0622, Val Loss: 0.0276, Train Acc: 0.9732, Val Acc: 1.0000
Epoch [100/100], Loss: 0.0571, Val Loss: 0.0217, Train Acc: 0.9821, Val Acc: 1.0000


In [13]:
# Final evaluation
model.eval()
with torch.no_grad():
    final_outputs = model(X_test)
    _, final_pred = torch.max(final_outputs, 1)
    acc = accuracy_score(y_test.cpu(), final_pred.cpu())
    cm = confusion_matrix(y_test.cpu(), final_pred.cpu())


In [14]:

print("\nTest Accuracy:", acc)
print("Confusion Matrix:\n", cm)


Test Accuracy: 1.0
Confusion Matrix:
 [[15  0  0]
 [ 0 11  0]
 [ 0  0 12]]
