[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/rslab-ntua/MSc_GBDA/blob/master/2025/GBDA_2025_Lab1.ipynb)

In [None]:
# # Introduction to Neural Networks: Perceptrons and MLPs
#
# This notebook covers the fundamentals of neural networks, starting with the Perceptron
# and moving to Multi-Layer Perceptrons (MLPs) using PyTorch.

import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import seaborn as sns

# Set random seeds for reproducibility
np.random.seed(42)
torch.manual_seed(42)

# Set plotting style
plt.style.use("ggplot")

In [None]:
# ## Part 1: Creating and Visualizing Dummy Data
#
# First, let's create some linearly separable data to demonstrate the Perceptron


# Generate linearly separable data
def generate_linearly_separable_data(n_samples=100):
    """
    Generate linearly separable data for binary classification.

    Args:
        n_samples: Number of samples to generate

    Returns:
        X: Features
        y: Labels (0 or 1)
    """
    # Generate two clusters
    X = np.zeros((n_samples, 2))
    y = np.zeros(n_samples)

    # First cluster (class 0)
    X[: n_samples // 2, 0] = np.random.normal(loc=-2, scale=1, size=n_samples // 2)
    X[: n_samples // 2, 1] = np.random.normal(loc=-2, scale=1, size=n_samples // 2)
    y[: n_samples // 2] = 0

    # Second cluster (class 1)
    X[n_samples // 2 :, 0] = np.random.normal(loc=2, scale=1, size=n_samples // 2)
    X[n_samples // 2 :, 1] = np.random.normal(loc=2, scale=1, size=n_samples // 2)
    y[n_samples // 2 :] = 1

    return X, y


# Generate data
X_linear, y_linear = generate_linearly_separable_data(200)

# Visualize the data
plt.figure(figsize=(10, 6))
plt.scatter(
    X_linear[y_linear == 0, 0],
    X_linear[y_linear == 0, 1],
    color="blue",
    label="Class 0",
    alpha=0.7,
)
plt.scatter(
    X_linear[y_linear == 1, 0],
    X_linear[y_linear == 1, 1],
    color="red",
    label="Class 1",
    alpha=0.7,
)
plt.title("Linearly Separable Data", fontsize=15)
plt.xlabel("Feature 1", fontsize=12)
plt.ylabel("Feature 2", fontsize=12)
plt.legend(fontsize=12)
plt.grid(True, alpha=0.3)
plt.show()


# Now let's create some non-linearly separable data for the MLP
def generate_nonlinear_data(n_samples=100):
    """
    Generate non-linearly separable data (XOR-like pattern).

    Args:
        n_samples: Number of samples to generate

    Returns:
        X: Features
        y: Labels (0 or 1)
    """
    X = np.random.randn(n_samples, 2) * 2
    y = np.zeros(n_samples)

    # XOR-like pattern: points in quadrants 1 and 3 are class 1
    # points in quadrants 2 and 4 are class 0
    for i in range(n_samples):
        if X[i, 0] * X[i, 1] > 0:  # Quadrants 1 and 3
            y[i] = 1
        else:  # Quadrants 2 and 4
            y[i] = 0

    return X, y


# Generate non-linear data
X_nonlinear, y_nonlinear = generate_nonlinear_data(200)

# Visualize the non-linear data
plt.figure(figsize=(10, 6))
plt.scatter(
    X_nonlinear[y_nonlinear == 0, 0],
    X_nonlinear[y_nonlinear == 0, 1],
    color="blue",
    label="Class 0",
    alpha=0.7,
)
plt.scatter(
    X_nonlinear[y_nonlinear == 1, 0],
    X_nonlinear[y_nonlinear == 1, 1],
    color="red",
    label="Class 1",
    alpha=0.7,
)
plt.title("Non-Linearly Separable Data (XOR-like Pattern)", fontsize=15)
plt.xlabel("Feature 1", fontsize=12)
plt.ylabel("Feature 2", fontsize=12)
plt.legend(fontsize=12)
plt.grid(True, alpha=0.3)
plt.axhline(y=0, color="k", linestyle="--", alpha=0.3)
plt.axvline(x=0, color="k", linestyle="--", alpha=0.3)
plt.show()

In [None]:
# ## Part 2: Simple Perceptron Implementation in PyTorch
#
# Now let's implement a simple Perceptron model using PyTorch


class Perceptron(nn.Module):
    """
    A simple Perceptron model implemented in PyTorch.

    The Perceptron is the simplest form of a neural network, consisting of:
    - Input layer (features)
    - A single neuron with weights and bias
    - Activation function (Step function in the original Perceptron)
    """

    def __init__(self, input_dim):
        """
        Initialize the Perceptron.

        Args:
            input_dim: Number of input features
        """
        super(Perceptron, self).__init__()
        # Linear layer with one output (binary classification)
        self.linear = nn.Linear(input_dim, 1)

    def forward(self, x):
        """
        Forward pass through the Perceptron.

        Args:
            x: Input tensor of shape (batch_size, input_dim)

        Returns:
            Output tensor after passing through the model
        """
        # Apply linear transformation (wx + b)
        x = self.linear(x)
        # Apply sigmoid activation to get probability
        return torch.sigmoid(x)


# Convert numpy arrays to PyTorch tensors
X_linear_tensor = torch.FloatTensor(X_linear)
y_linear_tensor = torch.FloatTensor(y_linear).reshape(-1, 1)

# Initialize the Perceptron model
perceptron = Perceptron(input_dim=2)
print(perceptron)

# Define loss function and optimizer
criterion = nn.BCELoss()  # Binary Cross Entropy Loss
optimizer = optim.SGD(perceptron.parameters(), lr=0.1)

# Training loop
num_epochs = 1000
losses = []

for epoch in range(num_epochs):
    # Forward pass
    outputs = perceptron(X_linear_tensor)
    loss = criterion(outputs, y_linear_tensor)
    losses.append(loss.item())

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Print progress
    if (epoch + 1) % 100 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

# Plot the loss curve
plt.figure(figsize=(10, 6))
plt.plot(losses)
plt.title("Perceptron Training Loss", fontsize=15)
plt.xlabel("Epoch", fontsize=12)
plt.ylabel("Loss", fontsize=12)
plt.grid(True, alpha=0.3)
plt.show()


# Visualize the decision boundary
def plot_decision_boundary(model, X, y):
    """
    Plot the decision boundary of a model.

    Args:
        model: Trained PyTorch model
        X: Input features
        y: Target labels
    """
    # Set min and max values and give it some padding
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    h = 0.01

    # Generate a grid of points with distance h between them
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))

    # Predict the function value for the whole grid
    Z = model(torch.FloatTensor(np.c_[xx.ravel(), yy.ravel()]))
    Z = Z.detach().numpy().reshape(xx.shape)

    # Plot the contour and training examples
    plt.figure(figsize=(10, 6))
    contour = plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral, alpha=0.8)
    plt.colorbar(contour, label='Probability of Class (+)')
    plt.scatter(X[y == 0, 0], X[y == 0, 1], color="blue", label="Class 0", alpha=0.7)
    plt.scatter(X[y == 1, 0], X[y == 1, 1], color="red", label="Class 1", alpha=0.7)
    plt.title("Perceptron Decision Boundary", fontsize=15)
    plt.xlabel("Feature 1", fontsize=12)
    plt.ylabel("Feature 2", fontsize=12)
    plt.legend(fontsize=12)
    plt.grid(True, alpha=0.3)
    plt.show()


# Plot the decision boundary
plot_decision_boundary(perceptron, X_linear, y_linear)

# Let's try the Perceptron on the non-linear data
X_nonlinear_tensor = torch.FloatTensor(X_nonlinear)
y_nonlinear_tensor = torch.FloatTensor(y_nonlinear).reshape(-1, 1)

# Initialize a new Perceptron
perceptron_nonlinear = Perceptron(input_dim=2)

# Train on non-linear data
optimizer = optim.SGD(perceptron_nonlinear.parameters(), lr=0.1)
losses_nonlinear = []

for epoch in range(num_epochs):
    outputs = perceptron_nonlinear(X_nonlinear_tensor)
    loss = criterion(outputs, y_nonlinear_tensor)
    losses_nonlinear.append(loss.item())

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 100 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

# Plot the loss curve for non-linear data
plt.figure(figsize=(10, 6))
plt.plot(losses_nonlinear)
plt.title("Perceptron Training Loss on Non-Linear Data", fontsize=15)
plt.xlabel("Epoch", fontsize=12)
plt.ylabel("Loss", fontsize=12)
plt.grid(True, alpha=0.3)
plt.show()

# Plot the decision boundary for non-linear data
plot_decision_boundary(perceptron_nonlinear, X_nonlinear, y_nonlinear)
print("Notice how the Perceptron fails to separate the non-linear data correctly!")

In [None]:
# ## Part 3: Multi-Layer Perceptron (MLP) in PyTorch
#
# Now let's implement an MLP to handle the non-linear data

class MLP(nn.Module):
    """
    A Multi-Layer Perceptron (MLP) implemented in PyTorch.

    The MLP consists of:
    - Input layer
    - One or more hidden layers with non-linear activations
    - Output layer
    """
    def __init__(self, input_dim, hidden_dim, output_dim=1):
        """
        Initialize the MLP.

        Args:
            input_dim: Number of input features
            hidden_dim: Number of neurons in the hidden layer
            output_dim: Number of output neurons (1 for binary classification)
        """
        super(MLP, self).__init__()

        # Define the network architecture
        self.layer1 = nn.Linear(input_dim, hidden_dim)
        self.layer2 = nn.Linear(hidden_dim, hidden_dim)
        self.layer3 = nn.Linear(hidden_dim, output_dim)

        # Activation functions
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        """
        Forward pass through the MLP.

        Args:
            x: Input tensor of shape (batch_size, input_dim)

        Returns:
            Output tensor after passing through the model
        """
        # First hidden layer
        x = self.layer1(x)
        x = self.relu(x)

        # Second hidden layer
        x = self.layer2(x)
        x = self.relu(x)

        # Output layer
        x = self.layer3(x)
        x = self.sigmoid(x)

        return x

# Initialize the MLP model
mlp = MLP(input_dim=2, hidden_dim=16)
print(mlp)

# Define loss function and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(mlp.parameters(), lr=0.01)

# Training loop
num_epochs = 1000
mlp_losses = []

for epoch in range(num_epochs):
    # Forward pass
    outputs = mlp(X_nonlinear_tensor)
    loss = criterion(outputs, y_nonlinear_tensor)
    mlp_losses.append(loss.item())

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Print progress
    if (epoch+1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Plot the loss curve
plt.figure(figsize=(10, 6))
plt.plot(mlp_losses)
plt.title('MLP Training Loss on Non-Linear Data', fontsize=15)
plt.xlabel('Epoch', fontsize=12)
plt.ylabel('Loss', fontsize=12)
plt.grid(True, alpha=0.3)
plt.show()

# Plot the decision boundary for the MLP
plot_decision_boundary(mlp, X_nonlinear, y_nonlinear)
print("Notice how the MLP can learn the non-linear decision boundary!")

# Compare the loss curves of Perceptron and MLP on non-linear data
plt.figure(figsize=(10, 6))
plt.plot(losses_nonlinear, label='Perceptron')
plt.plot(mlp_losses, label='MLP')
plt.title('Perceptron vs MLP Training Loss on Non-Linear Data', fontsize=15)
plt.xlabel('Epoch', fontsize=12)
plt.ylabel('Loss', fontsize=12)
plt.legend(fontsize=12)
plt.grid(True, alpha=0.3)
plt.show()

In [None]:
# ## Part 4: Measuring Loss Demo for Iris Dataset Classification
#
# Now let's apply our MLP to a real-world dataset: the Iris dataset

# Load the Iris dataset
iris = datasets.load_iris()
X = iris.data
y = iris.target

# For simplicity, let's convert this to a binary classification problem
# We'll classify setosa (class 0) vs non-setosa (classes 1 and 2)
y_binary = (y > 0).astype(int)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_binary, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convert to PyTorch tensors
X_train_tensor = torch.FloatTensor(X_train)
y_train_tensor = torch.FloatTensor(y_train).reshape(-1, 1)
X_test_tensor = torch.FloatTensor(X_test)
y_test_tensor = torch.FloatTensor(y_test).reshape(-1, 1)

# Visualize the Iris dataset (first two features)
plt.figure(figsize=(10, 6))
plt.scatter(X_train[y_train == 0, 0], X_train[y_train == 0, 1],
            color='blue', label='Setosa', alpha=0.7)
plt.scatter(X_train[y_train == 1, 0], X_train[y_train == 1, 1],
            color='red', label='Non-Setosa', alpha=0.7)
plt.title('Iris Dataset: Setosa vs Non-Setosa (First Two Features)', fontsize=15)
plt.xlabel('Sepal Length (Standardized)', fontsize=12)
plt.ylabel('Sepal Width (Standardized)', fontsize=12)
plt.legend(fontsize=12)
plt.grid(True, alpha=0.3)
plt.show()

# Create an MLP for the Iris dataset
iris_mlp = MLP(input_dim=4, hidden_dim=10)
print(iris_mlp)

# Define loss function and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(iris_mlp.parameters(), lr=0.01)

# Training loop with validation
num_epochs = 500
train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []

for epoch in range(num_epochs):
    # Training phase
    iris_mlp.train()
    train_outputs = iris_mlp(X_train_tensor)
    train_loss = criterion(train_outputs, y_train_tensor)
    train_losses.append(train_loss.item())

    # Calculate training accuracy
    train_preds = (train_outputs > 0.5).float()
    train_acc = (train_preds == y_train_tensor).float().mean()
    train_accuracies.append(train_acc.item())

    # Backward pass and optimization
    optimizer.zero_grad()
    train_loss.backward()
    optimizer.step()

    # Validation phase
    iris_mlp.eval()
    with torch.no_grad():
        val_outputs = iris_mlp(X_test_tensor)
        val_loss = criterion(val_outputs, y_test_tensor)
        val_losses.append(val_loss.item())

        # Calculate validation accuracy
        val_preds = (val_outputs > 0.5).float()
        val_acc = (val_preds == y_test_tensor).float().mean()
        val_accuracies.append(val_acc.item())

    # Print progress
    if (epoch+1) % 50 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss.item():.4f}, '
              f'Val Loss: {val_loss.item():.4f}, Train Acc: {train_acc.item():.4f}, '
              f'Val Acc: {val_acc.item():.4f}')

# Plot the training and validation loss curves
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(train_losses, label='Training Loss')
plt.plot(val_losses, label='Validation Loss')
plt.title('Loss Curves', fontsize=15)
plt.xlabel('Epoch', fontsize=12)
plt.ylabel('Loss', fontsize=12)
plt.legend(fontsize=12)
plt.grid(True, alpha=0.3)

plt.subplot(1, 2, 2)
plt.plot(train_accuracies, label='Training Accuracy')
plt.plot(val_accuracies, label='Validation Accuracy')
plt.title('Accuracy Curves', fontsize=15)
plt.xlabel('Epoch', fontsize=12)
plt.ylabel('Accuracy', fontsize=12)
plt.legend(fontsize=12)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# Evaluate the model on the test set
iris_mlp.eval()
with torch.no_grad():
    test_outputs = iris_mlp(X_test_tensor)
    test_preds = (test_outputs > 0.5).float()
    test_acc = (test_preds == y_test_tensor).float().mean()
    print(f'Test Accuracy: {test_acc.item():.4f}')

# Visualize the confusion matrix
from sklearn.metrics import confusion_matrix, classification_report

test_preds_np = test_preds.numpy().flatten()
y_test_np = y_test_tensor.numpy().flatten()

cm = confusion_matrix(y_test_np, test_preds_np)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False)
plt.title('Confusion Matrix', fontsize=15)
plt.xlabel('Predicted Label', fontsize=12)
plt.ylabel('True Label', fontsize=12)
plt.xticks([0.5, 1.5], ['Setosa', 'Non-Setosa'])
plt.yticks([0.5, 1.5], ['Setosa', 'Non-Setosa'])
plt.show()

# Print classification report
print("Classification Report:")
print(classification_report(y_test_np, test_preds_np, target_names=['Setosa', 'Non-Setosa']))

# Visualize the decision boundary in 2D (using the first two features)
def plot_iris_decision_boundary(model, X, y):
    """
    Plot the decision boundary for the Iris dataset using the first two features.

    Args:
        model: Trained PyTorch model
        X: Input features (first two features only)
        y: Target labels
    """
    # Extract the first two features
    X_2d = X[:, :2]

    # Set min and max values and give it some padding
    x_min, x_max = X_2d[:, 0].min() - 0.5, X_2d[:, 0].max() + 0.5
    y_min, y_max = X_2d[:, 1].min() - 0.5, X_2d[:, 1].max() + 0.5
    h = 0.01

    # Generate a grid of points with distance h between them
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))

    # Create a grid with all features set to their means except the first two
    X_mean = X.mean(axis=0)
    grid = np.zeros((xx.ravel().shape[0], X.shape[1]))
    grid[:, 0] = xx.ravel()
    grid[:, 1] = yy.ravel()
    for i in range(2, X.shape[1]):
        grid[:, i] = X_mean[i]

    # Predict the function value for the whole grid
    Z = model(torch.FloatTensor(grid))
    Z = Z.detach().numpy().reshape(xx.shape)

    # Plot the contour and training examples
    plt.figure(figsize=(10, 6))
    contour = plt.contourf(xx, yy, Z, cmap=plt.cm.RdBu, alpha=0.8)
    plt.colorbar(contour, label='Probability of Class 1')
    plt.scatter(X_2d[y == 0, 0], X_2d[y == 0, 1], color='blue', label='Setosa', alpha=0.7)
    plt.scatter(X_2d[y == 1, 0], X_2d[y == 1, 1], color='red', label='Non-Setosa', alpha=0.7)
    plt.title('Decision Boundary (First Two Features)', fontsize=15)
    plt.xlabel('Sepal Length (Standardized)', fontsize=12)
    plt.ylabel('Sepal Width (Standardized)', fontsize=12)
    plt.legend(fontsize=12)
    plt.grid(True, alpha=0.3)
    plt.show()

# Plot the decision boundary for the Iris dataset
plot_iris_decision_boundary(iris_mlp, X_test, y_test)

# ## Conclusion
#
# In this notebook, we've covered:
# 1. Creating and visualizing dummy data for classification
# 2. Implementing a simple Perceptron in PyTorch
# 3. Building a Multi-Layer Perceptron (MLP) for non-linear data
# 4. Applying an MLP to the Iris dataset and measuring performance
#
# Key takeaways:
# - Perceptrons can only learn linear decision boundaries
# - MLPs with hidden layers can learn complex, non-linear decision boundaries
# - PyTorch provides a flexible framework for building and training neural networks
# - Proper evaluation using train/test splits and metrics like accuracy and confusion matrices is essential