In [None]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import make_moons

# Define dataset
data = make_moons(n_samples=1000, noise=0.1, random_state=42)
X = 4.0 * (data[0] - np.array([0.5, 0.25]))
y = data[1]

# Plot dataset
colors = np.array(["#3057D3", "#D33030"])
plt.figure(figsize=(5, 3))
plt.scatter(X[:, 0], X[:, 1], s=10, color=colors[(y + 1) // 2])
plt.xlabel("Feature 1")
plt.ylabel("Feature 2")
plt.title("Data")
plt.tight_layout()
plt.savefig("../assets/classification_data.svg", format="svg")

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Scale
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print(f"X_train.shape: {X_train.shape}")
print(f"y_train.shape: {y_train.shape}")
print(f" X_test.shape: {X_test.shape}")
print(f" y_test.shape: {y_test.shape}")

In [None]:
from tinytorch import Tensor

# Create tinytorch tensor objects
Xt_train = Tensor(X_train_scaled)
yt_train = Tensor(y_train)
Xt_test = Tensor(X_test_scaled)
yt_test = Tensor(y_test)

In [None]:
from tinytorch import MLP, Activation, Tensor

# Define a simple mlp for classification
mlp = MLP(
    n_input=2,
    layers=[
        (24, Activation.RELU),  # hidden layer 1
        (12, Activation.RELU),  # hidden layer 2
        (1, Activation.SIGMOID),  # output layer: 1 neuron with Sigmoid activation
    ],
)
display(mlp)

epochs = 200
lr = 5e-1

# Track both loss and accuracy
metrics = {"epoch": [], "train_loss": [], "test_loss": [], "train_acc": [], "test_acc": []}

# Training loop
for i in range(0, epochs):
    # Forward pass to get probabilities
    y_train_probs = mlp(Xt_train)
    y_test_probs = mlp(Xt_test)

    # Calculate accuracy (probabilities > 0.5 for binary classification)
    y_train_pred = (y_train_probs.data > 0.5).astype(np.float32)
    y_test_pred = (y_test_probs.data > 0.5).astype(np.float32)

    train_acc = np.mean(y_train_pred == yt_train.data)
    test_acc = np.mean(y_test_pred == yt_test.data)

    # Zero gradients
    mlp.flush_grads()

    # Binary cross-entropy loss
    neg_logl_train = -(
        yt_train * y_train_probs.log() + (1 - yt_train) * (1 - y_train_probs).log()
    ).sum() / len(yt_train)
    neg_logl_test = -(
        yt_test * y_test_probs.log() + (1 - yt_test) * (1 - y_test_probs).log()
    ).sum() / len(yt_test)

    # Bookkeeping
    epoch = i + 1
    train_loss = neg_logl_train.data.item()
    test_loss = neg_logl_test.data.item()

    print(
        f"epoch {epoch:03d}: "
        f"loss[train]={train_loss:.3f}, loss[test]={test_loss:.3f} | "
        f"acc[train]={train_acc:.3f}, acc[test]={test_acc:.3f}"
    )

    # Store metrics
    metrics["epoch"].append(epoch)
    metrics["train_loss"].append(train_loss)
    metrics["test_loss"].append(test_loss)
    metrics["train_acc"].append(train_acc)
    metrics["test_acc"].append(test_acc)

    # Backward pass
    neg_logl_train.backward()

    # Gradient descent update to parameters
    for param in mlp.parameters:
        param.data += -lr * param.grad

# Plot training curves
plt.figure(figsize=(8, 3))

# Plot loss
plt.subplot(1, 2, 1)
plt.plot(metrics["epoch"], metrics["train_loss"], label="Train")
plt.plot(metrics["epoch"], metrics["test_loss"], label="Test")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.grid()
plt.title("Loss vs Epoch")

# Plot accuracy
plt.subplot(1, 2, 2)
plt.plot(metrics["epoch"], metrics["train_acc"], label="Train")
plt.plot(metrics["epoch"], metrics["test_acc"], label="Test")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()
plt.title("Accuracy vs Epoch")
plt.grid()
plt.tight_layout()
plt.savefig("../assets/classification_training.svg")

In [None]:
from matplotlib.colors import Normalize

# Create meshgrid for visualization
xx, yy = np.meshgrid(np.linspace(-7, 7, 150), np.linspace(-7, 7, 150))
X_mesh = np.c_[xx.ravel(), yy.ravel()]  # (150*150, 2)

# Scale mesh points using the same scaler used for training data
X_mesh_scaled = scaler.transform(X_mesh)

# Get predictions for mesh points
X_mesh_tensor = Tensor(X_mesh_scaled)
mesh_probs = mlp(X_mesh_tensor)
mesh_probs = mesh_probs.data.reshape(xx.shape)

# Plot decision boundary and data points
plt.figure(figsize=(5, 3))

# Plot decision boundary with red-blue gradient
norm = Normalize(vmin=0, vmax=1)
levels = np.linspace(0, 1, num=11, endpoint=True)
contour = plt.contourf(xx, yy, mesh_probs, alpha=0.3, cmap="RdBu_r", norm=norm, levels=levels)
plt.colorbar(contour, label="Probability")

# Plot data points with matching red/blue colors
class_colors = ["#3057D3", "#D33030"]  # Blue, Red
plt.scatter(
    X_train[:, 0],
    X_train[:, 1],
    c=[class_colors[int(y)] for y in y_train],
    s=10,
    alpha=0.8,
)

# Add decision boundary contour
plt.contour(xx, yy, mesh_probs, levels=[0.5], colors="black", linestyles="--", linewidths=2)
plt.xlabel("Feature 1")
plt.ylabel("Feature 2")
plt.title("Decision Boundary")
plt.tight_layout()
plt.savefig("../assets/classification_results.svg")