In [1]:
import torch
import torch.nn.functional as F
from sklearn.datasets import load_digits
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
import numpy as np

In [2]:
digits = load_digits()
X = digits.data.astype('float32') / 16.0  # scale to [0,1]
y = digits.target
X, y

(array([[0.    , 0.    , 0.3125, ..., 0.    , 0.    , 0.    ],
        [0.    , 0.    , 0.    , ..., 0.625 , 0.    , 0.    ],
        [0.    , 0.    , 0.    , ..., 1.    , 0.5625, 0.    ],
        ...,
        [0.    , 0.    , 0.0625, ..., 0.375 , 0.    , 0.    ],
        [0.    , 0.    , 0.125 , ..., 0.75  , 0.    , 0.    ],
        [0.    , 0.    , 0.625 , ..., 0.75  , 0.0625, 0.    ]],
       dtype=float32),
 array([0, 1, 2, ..., 8, 9, 8]))

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train.shape, y_train.shape, len(X_train)

((1437, 64), (1437,), 1437)

In [4]:
# One-hot encode labels
enc = OneHotEncoder(sparse_output=False)
y_train_oh = enc.fit_transform(y_train.reshape(-1, 1)).astype('float32')
y_test_oh = enc.transform(y_test.reshape(-1, 1)).astype('float32')

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
X_train = torch.tensor(X_train, device=device)
y_train_oh = torch.tensor(y_train_oh, device=device)
X_test = torch.tensor(X_test, device=device)
y_test = torch.tensor(y_test, device=device)
y_test_oh = torch.tensor(y_test_oh, device=device)

  return torch._C._cuda_getDeviceCount() > 0


In [None]:
class MLP:
    def __init__(self, input_size, hidden_size, output_size, lr=0.5):
        torch.manual_seed(42)
        self.W1 = torch.randn(input_size, hidden_size, device=device) * 0.01
        self.b1 = torch.zeros(1, hidden_size, device=device)
        self.W2 = torch.randn(hidden_size, output_size, device=device) * 0.01
        self.b2 = torch.zeros(1, output_size, device=device)
        self.lr = lr
    
    def forward(self, X):
        self.Z1 = X @ self.W1 + self.b1
        self.A1 = torch.sigmoid(self.Z1)
        self.Z2 = self.A1 @ self.W2 + self.b2
        self.A2 = torch.sigmoid(self.Z2)
        return self.A2

    def backward(self, X, Y):
        m = X.shape[0]
        # Output layer
        dA2 = (self.A2 - Y)
        dZ2 = dA2 * self.A2 * (1 - self.A2)  # sigmoid derivative
        dW2 = (self.A1.T @ dZ2) / m
        db2 = torch.sum(dZ2, dim=0, keepdim=True) / m

        # Hidden layer
        dA1 = dZ2 @ self.W2.T
        dZ1 = dA1 * self.A1 * (1 - self.A1)
        dW1 = (X.T @ dZ1) / m
        db1 = torch.sum(dZ1, dim=0, keepdim=True) / m

        self.dW1, self.db1 = dW1, db1
        self.dW2, self.db2 = dW2, db2

    def update(self):
        self.W1 -= self.lr * self.dW1
        self.b1 -= self.lr * self.db1
        self.W2 -= self.lr * self.dW2
        self.b2 -= self.lr * self.db2
    
    def train(self, X, Y, epochs=50, batch_size=32):
        for epoch in range(epochs):
            # Shuffle
            idx = torch.randperm(X.shape[0], device=device)
            X, Y = X[idx], Y[idx]

            for i in range(0, X.shape[0], batch_size):
                X_batch = X[i:i+batch_size]
                Y_batch = Y[i:i+batch_size]

                self.forward(X_batch)
                self.backward(X_batch, Y_batch)
                self.update()

            # Loss for epoch
            with torch.no_grad():
                y_pred = self.forward(X)
                loss = torch.mean((y_pred - Y) ** 2).item()
            print(f"Epoch {epoch+1}/{epochs} - Loss: {loss:.4f}")

    def predict(self, X):
        with torch.no_grad():
            y_pred = self.forward(X)
            return torch.argmax(y_pred, dim=1)

In [7]:
mlp = MLP(input_size=64, hidden_size=32, output_size=10, lr=0.5)
mlp.train(X_train, y_train_oh, epochs=50, batch_size=32)

y_pred_test = mlp.predict(X_test)
acc = torch.mean((y_pred_test == y_test).float()).item()
print(f"Test Accuracy: {acc * 100:.2f}%")

Epoch 1/50 - Loss: 0.0900
Epoch 2/50 - Loss: 0.0900
Epoch 3/50 - Loss: 0.0900
Epoch 4/50 - Loss: 0.0900
Epoch 5/50 - Loss: 0.0900
Epoch 6/50 - Loss: 0.0900
Epoch 7/50 - Loss: 0.0900
Epoch 8/50 - Loss: 0.0900
Epoch 9/50 - Loss: 0.0900
Epoch 10/50 - Loss: 0.0900
Epoch 11/50 - Loss: 0.0899
Epoch 12/50 - Loss: 0.0899
Epoch 13/50 - Loss: 0.0899
Epoch 14/50 - Loss: 0.0899
Epoch 15/50 - Loss: 0.0899
Epoch 16/50 - Loss: 0.0898
Epoch 17/50 - Loss: 0.0898
Epoch 18/50 - Loss: 0.0897
Epoch 19/50 - Loss: 0.0897
Epoch 20/50 - Loss: 0.0896
Epoch 21/50 - Loss: 0.0895
Epoch 22/50 - Loss: 0.0894
Epoch 23/50 - Loss: 0.0892
Epoch 24/50 - Loss: 0.0890
Epoch 25/50 - Loss: 0.0888
Epoch 26/50 - Loss: 0.0884
Epoch 27/50 - Loss: 0.0879
Epoch 28/50 - Loss: 0.0873
Epoch 29/50 - Loss: 0.0865
Epoch 30/50 - Loss: 0.0855
Epoch 31/50 - Loss: 0.0843
Epoch 32/50 - Loss: 0.0825
Epoch 33/50 - Loss: 0.0805
Epoch 34/50 - Loss: 0.0781
Epoch 35/50 - Loss: 0.0755
Epoch 36/50 - Loss: 0.0730
Epoch 37/50 - Loss: 0.0699
Epoch 38/5