In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from torch.nn import functional as F
from torch.utils.data import DataLoader, TensorDataset
from sklearn.datasets import load_iris
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split

In [None]:
class MLP(nn.Module):
    """
    A multi-layer perceptron (MLP) module consisting of two fully connected layers
    with GELU activation and dropout regularization.

    :param dim: Input and output dimension of the model.
    :param multiplier: Multiplier for hidden dimension size.
    :param bias: Whether to use bias in linear layers.
    :param dropout: Dropout rate applied after each layer.
    """
    def __init__(
        self,
        dim: int,
        multiplier: int,
        bias: bool = True,
        dropout: float = 0.1,
    ):
        super().__init__()
        hidden_dim = dim * multiplier
        self.fc1 = nn.Sequential(
            nn.Linear(dim, hidden_dim, bias=bias),
            nn.GELU(),
            nn.Dropout(dropout),
        )
        self.fc2 = nn.Sequential(
            nn.Linear(hidden_dim, dim, bias=bias),
            nn.Dropout(dropout),
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        Forward pass through the MLP.

        :param x: Input tensor of shape (*, dim).
        :return: Output tensor of shape (*, dim).
        """
        x = self.fc1(x)
        x = self.fc2(x)
        return x

In [None]:
def load_and_preprocess_data(test_size: float = 0.2) -> tuple[TensorDataset, TensorDataset]:
    """
    Load and preprocess the Iris dataset. Features are standardized to have a mean of 0 and a
    standard distribution of 1, and the dataset is split into training and testing splits.

    :param test_size: Proportion of the dataset to use as the test split.
    :return: A tuple of (train_dataset, test_dataset) as PyTorch TensorDatasets.
    """
    iris = load_iris()
    X = iris.data
    y = iris.target

    pca = PCA(n_components=2)
    X_pca = pca.fit_transform(X)
    plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y)
    plt.show()

    mean, std = X.mean(axis=0, keepdims=True), X.std(axis=0, keepdims=True)
    X = (X - mean) / std

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)

    X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train, dtype=torch.long)
    X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
    y_test_tensor = torch.tensor(y_test, dtype=torch.long)

    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

    return train_dataset, test_dataset

In [None]:
def train_model(
    model: nn.Module,
    train_loader: DataLoader,
    optimizer: optim.Optimizer,
    num_epochs: int = 100
) -> None:
    """
    Train the model on the Iris Flower dataset.

    :param model: The neural network model to be trained.
    :param train_loader: DataLoader for the training dataset.
    :param optimizer: Optimizer for model parameters (e.g., SGD or Adam).
    :param num_epochs: Number of epochs to train the model for.
    """
    model.train()
    for epoch in range(1, num_epochs + 1):
        running_loss = 0.0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = F.cross_entropy(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        if epoch % 10 == 0:
            print(f"Epoch [{epoch}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")

In [None]:
@torch.no_grad()
def test_model(
    model: nn.Module,
    test_loader: DataLoader,
) -> None:
    """
    Evaluate the trained model on the test dataset.

    :param model: The trained neural network model.
    :param test_loader: DataLoader for the test dataset.
    """
    model.eval()
    correct = 0
    total = 0
    test_loss = 0.0
    for inputs, labels in test_loader:
        outputs = model(inputs)
        loss = F.cross_entropy(outputs, labels)
        test_loss += loss.item()
        _, predicted = torch.max(outputs, dim=1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f"Test Loss: {test_loss/len(test_loader):.4f}, Test Accuracy: {accuracy:.2f}%")

In [None]:
train_dataset, test_dataset = load_and_preprocess_data()

print(f"Training on {len(train_dataset)} samples")
print(f"Testing on {len(test_dataset)} samples")

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

In [None]:
model = nn.Sequential(
    nn.Linear(4, 5),
    MLP(5, 4),
    nn.Linear(5, 3),
)
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
train_model(model, train_loader, optimizer, num_epochs=150)

In [None]:
test_model(model, test_loader)

In [None]:
with torch.no_grad():
    data = list(train_dataset) + list(test_dataset)
    x, y = zip(*data)
    x = torch.stack(x)
    y = torch.stack(y)

    features = model[:-1](x)
    pca = PCA(n_components=2)
    features_pca = pca.fit_transform(features)
    plt.scatter(features_pca[:, 0], features_pca[:, 1], c=y)
    plt.show()