In [79]:
import numpy as np
import torch
from torchviz import make_dot

In [86]:
class MLP(torch.nn.Module):
    def __init__(self, X_train , y_train, loss_function = 'cross_entropy'):
        super(MLP, self).__init__()
        self.loss_function = loss_function
        self.layers = []
        self.Weights = []
        self.Biases = []
        self.X_train = X_train
        self.y_train = y_train
        # convert to torch tensor
        self.X_train = torch.tensor(X_train, dtype=torch.float32)
        self.y_train = torch.tensor(y_train, dtype=torch.float32)
        self.y_train = self.one_hot(self.y_train)

    def one_hot(self, y):
        maxi = int(torch.max(y).item())
        y_onehot = torch.zeros(y.size(0), maxi + 1)
        y_onehot.scatter_(1, y.view(-1, 1).long(), 1)
        return y_onehot


    def add_layer(self, in_features, out_features, activation = 'relu'):
        size = (in_features, out_features)
        self.Weights.append(torch.nn.Parameter(torch.randn(size, requires_grad=True)))
        self.Biases.append(torch.nn.Parameter(torch.randn(out_features, requires_grad=True)))
        layer = {'Weight': self.Weights[-1], 'Bias': self.Biases[-1], 'Activation': activation}
        self.layers.append(layer)

    def train(self, epochs = 100, learning_rate = 0.01):
        overall_loss = 0
        for epoch in range(epochs):
            # Shuffle the data
            indices = torch.randperm(self.X_train.size(0))
            X_train = self.X_train[indices]
            y_train = self.y_train[indices]

            for i in range(X_train.size(0)):
                x = X_train[i]  
                for layer in self.layers:
                    Z = torch.matmul(x, layer['Weight']) + layer['Bias']
                    if layer['Activation'] == 'relu':
                        Z = torch.relu(Z)
                    elif layer['Activation'] == 'sigmoid':
                        Z= torch.sigmoid(Z)
                    elif layer['Activation'] == 'tanh':
                        Z= torch.tanh(Z)
                    elif layer['Activation'] == 'softmax':
                        Z = torch.softmax(Z, dim=0)
                    x = Z
                final_activation = x

                # Compute loss
                if self.loss_function == 'cross_entropy':
                    loss = self.loss_cross_entropy(final_activation, y_train[i])
                elif self.loss_function == 'mse':
                    loss = self.loss_mse(final_activation, y_train[i])

                overall_loss += loss.item()

                # Backward pass
                loss.backward()

                # Update weights
                with torch.no_grad():
                    for layer in self.layers:
                        layer['Weight'] -= learning_rate * layer['Weight'].grad
                        layer['Bias'] -= learning_rate * layer['Bias'].grad

                        # Zero the gradients
                        layer['Weight'].grad.zero_()
                        layer['Bias'].grad.zero_()
                make_dot(loss).render("attached", format="png")
            print(f'Epoch {epoch}, Loss: {overall_loss}, Accuracy: {self.accuracy(final_activation, y_train[i])}')
            overall_loss = 0
            
    def accuracy(self, activation, y_batch):
        m = y_batch.size(0)
        correct = 0
        for i in range(m):
            y_pred = torch.argmax(activation[i])
            y_true = torch.argmax(y_batch[i])
            if y_pred == y_true:
                correct += 1
        return correct / m

    def loss_cross_entropy(self, activation, y):
        m = y.size(0)
        # print(activation)
        # print(y)
        loss = 0
        for i in range(m):
            loss = -torch.sum(y[i] * torch.log(activation[i]))
        return loss
        
    def loss_mse(self, activation, y_batch):
        m = y_batch.size(0)
        loss = torch.sum((activation - y_batch) ** 2) / (2 * m)
        return loss

    def test(self,X_test,Y_test):
        X_test = torch.tensor(X_test, dtype=torch.float32)
        Y_test = torch.tensor(Y_test, dtype=torch.float32)
        Y_test = self.one_hot(Y_test)
        correct = 0
        total = 0
        with torch.no_grad():
            for i in range(X_test.size(0)):
                x = X_test[i]
                for layer in self.layers:
                    x = torch.matmul(x, layer['Weight']) + layer['Bias']
                    if layer['Activation'] == 'relu':
                        x = torch.relu(x)
                    elif layer['Activation'] == 'sigmoid':
                        x = torch.sigmoid(x)
                    elif layer['Activation'] == 'tanh':
                        x = torch.tanh(x)
                    elif layer['Activation'] == 'softmax':
                        x = torch.softmax(x, dim=0)
                y_pred = x
                y_pred = torch.argmax(y_pred)
                y_true = torch.argmax(Y_test[i])
                if y_pred == y_true:
                    correct += 1
                total += 1
        print(f'Accuracy: {correct / total}')    

In [87]:
import numpy as np
from keras.datasets import mnist

# Load MNIST data
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

train_images = train_images.reshape((60000, 28 * 28))
train_images = train_images.astype('float32') / 255

test_images = test_images.reshape((10000, 28 * 28))
test_images = test_images.astype('float32') / 255

# Create MLP
mlp = MLP(train_images, train_labels, loss_function='cross_entropy')
mlp.add_layer(784, 128, activation='relu')
mlp.add_layer(128, 64, activation='relu')
mlp.add_layer(64, 10, activation='softmax')

# Train MLP
mlp.train(epochs=10, learning_rate=0.01)

# Test MLP
mlp.test(test_images, test_labels)
print(f'Test accuracy: {mlp.test(test_images, test_labels)}')


Epoch 0, Loss: nan, Accuracy: 1.0


KeyboardInterrupt: 

In [None]:
# training it on cifar100 dataset
from keras.datasets import cifar100
(train_images, train_labels), (test_images, test_labels) = cifar100.load_data()

train_images = train_images.reshape((50000, 32 * 32 * 3))
train_images = train_images.astype('float32') / 255

test_images = test_images.reshape((10000, 32 * 32 * 3))
test_images = test_images.astype('float32') / 255

# Create MLP
mlp = MLP(train_images, train_labels, loss_function='cross_entropy')
mlp.add_layer(3072, 1024, activation='relu')
mlp.add_layer(1024, 512, activation='relu')
mlp.add_layer(512, 256, activation='relu')
mlp.add_layer(256, 100, activation='softmax')


# Train MLP
mlp.train(epochs=10, learning_rate=0.01)

# Test MLP
mlp.test(test_images, test_labels)

Epoch 0, Loss: nan, Accuracy: 1.0
Epoch 1, Loss: nan, Accuracy: 1.0
Epoch 2, Loss: nan, Accuracy: 1.0
Epoch 3, Loss: nan, Accuracy: 1.0


KeyboardInterrupt: 