In [None]:
import minima as mi
from functools import partial

In [None]:
import minima as mi
import numpy as np
import minima.nn as nn
import minima.optim as optim
from minima.data import *

In [None]:
def accuracy(out, yb):
    # import pdb; pdb.set_trace()
    count = 0
    for i in range(len(out)):
        if out[i].item() == yb[i].item():
            count += 1
    return count / len(out)
    # return (out.argmax(dim=1)==yb).float().mean()

In [None]:
def load_mnist(path, kind='train'):
    import os
    import gzip
    import numpy as np

    """Load MNIST data from `path`"""
    labels_path = os.path.join(path,
                               '%s-labels-idx1-ubyte.gz'
                               % kind)
    images_path = os.path.join(path,
                               '%s-images-idx3-ubyte.gz'
                               % kind)

    with gzip.open(labels_path, 'rb') as lbpath:
        labels = np.frombuffer(lbpath.read(), dtype=np.uint8,
                               offset=8)

    with gzip.open(images_path, 'rb') as imgpath:
        images = np.frombuffer(imgpath.read(), dtype=np.uint8,
                               offset=16).reshape(len(labels), 784)

    return images, labels


def normalize(data):
    mean = np.mean(data)
    std = np.std(data)
    normalized_data = (data - mean) / std
    return normalized_data


X_train, y_train = load_mnist('datasets/fashion', kind='train')
X_test, y_test = load_mnist('datasets/fashion', kind='t10k')

X_train = np.copy(X_train)
X_test = np.copy(X_test)
X_train = X_train / np.array(255.0, dtype=np.float64)
X_test = X_test / np.array(255.0, dtype=np.float64)
X_train = normalize(X_train)
X_test = normalize(X_test)

In [None]:
X_train.shape, y_train.shape

((60000, 784), (60000,))

In [None]:
X_test.shape, y_test.shape

((10000, 784), (10000,))

In [None]:
type(X_train)

numpy.ndarray

In [None]:
X_tr, y_tr, X_val, y_val = map(mi.Tensor, (X_train, y_train, X_test, y_test))

In [None]:
type(X_tr), y_tr[:10]

(minima.autograd.Tensor, mi.Tensor([9 0 0 3 0 2 7 2 5 5]))

In [None]:
class NeuralNetwork(nn.Module):
    def __init__(self, input_shape, output_shape):
        super(NeuralNetwork, self).__init__()
        self.dense1 = nn.Linear(in_features=input_shape, out_features=24)
        self.dense2 = nn.Linear(24, 24)
        self.dense3 = nn.Linear(24, 24)
        self.dense4 = nn.Linear(24, output_shape)
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax()

    def forward(self, x):
        x = self.relu(self.dense1(x))
        x = self.relu(self.dense2(x))
        x = self.relu(self.dense3(x))
        # print(self.dense4(x))
        x = self.dense4(x)
        return x

# Create the neural network
input_shape = X_tr.shape[1]  # Replace with the actual input shape
output_shape = 10  # Replace with the actual output shape

network = NeuralNetwork(input_shape, output_shape)
network

NeuralNetwork(
  (dense1): Linear(in_features=784, out_features=24, bias=True)
  (dense2): Linear(in_features=24, out_features=24, bias=True)
  (dense3): Linear(in_features=24, out_features=24, bias=True)
  (dense4): Linear(in_features=24, out_features=10, bias=True)
  (relu): ReLU()
  (softmax): Softmax()
)

In [None]:
# Custom Dataset class
class MyDataset(Dataset):
    def __init__(self, X, y):
        self.X = mi.Tensor(X)
        self.y = mi.Tensor(y)
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, index):
        return self.X[index], self.y[index]

tr_ds = MyDataset(X_tr, y_tr)
val_ds = MyDataset(X_val, y_val)

# Creating the data loader
batch_size = 10
tr_dl = DataLoader(tr_ds, batch_size=batch_size, shuffle=True)
val_dl = DataLoader(val_ds, batch_size=64, shuffle=False)

In [None]:
xb, yb = next(iter(tr_dl))
xb.shape, yb

((10, 784), mi.Tensor([6 6 7 6 9 9 3 4 6 4]))

In [None]:
xb.numpy().mean(), xb.numpy().std()

(0.010457627275600019, 1.0153364259848787)

In [None]:
def train():
    network = NeuralNetwork(input_shape, output_shape)
    opt = optim.SGD(network.parameters(), lr=0.01)
    bce = nn.CrossEntropyLoss()
    
    network.train()
    num_epochs = 10
    
    for epoch in range(num_epochs):
        
        # Training phase
        network.train()
        tot_loss,tot_acc,count = 0.,0.,0
        for xb, yb in tr_dl:
            preds = network(xb)
            loss = bce(preds, yb)
            # import pdb; pdb.set_trace()
            loss.backward()
            opt.step()
            opt.zero_grad()
            
            # Calculate accuracy & loss
            predicted_labels = preds.argmax(axis=1)
            n = len(xb)
            count += n
            tot_loss += loss.item()*n
            tot_acc  += mi.Tensor.accuracy(predicted_labels, yb).item()*n
        
        # Print epoch-wise loss and accuracy
        # print(f"epoch {epoch + 1:02d}/{num_epochs:02d} - loss: {avg_train_loss:.4f} - acc: {avg_train_acc:.4f} - val_loss: {avg_val_loss:.4f} - val_acc: {avg_val_acc:.4f}")
        print(f"epoch {epoch + 1:02d}/{num_epochs:02d} - loss: {tot_loss/count:.4f} - acc: {tot_acc/count:.4f}")


In [None]:
train()

epoch 01/10 - loss: 0.5462 - acc: 0.8011
epoch 02/10 - loss: 0.4102 - acc: 0.8499
epoch 03/10 - loss: 0.3750 - acc: 0.8635
epoch 04/10 - loss: 0.3546 - acc: 0.8710
epoch 05/10 - loss: 0.3382 - acc: 0.8776
epoch 06/10 - loss: 0.3279 - acc: 0.8794
epoch 07/10 - loss: 0.3175 - acc: 0.8825
epoch 08/10 - loss: 0.3086 - acc: 0.8864
epoch 09/10 - loss: 0.3024 - acc: 0.8880
epoch 10/10 - loss: 0.2961 - acc: 0.8917


In [None]:
train()

In [None]:
def fit(epochs, lr, model, loss_func, opt_fn, train_dl, valid_dl):
    opt = opt_fn(model.parameters(), lr=lr)
    for epoch in range(epochs):
        model.train()
        train_tot_loss, train_tot_acc, t_count = 0.,0.,0
        for xb,yb in train_dl:
            preds = model(xb)
            loss = loss_func(preds, yb)
            loss.backward()
            opt.step()
            opt.zero_grad()

            # Calculate accuracy & loss
            predicted_labels = preds.argmax(axis=1)
            n = len(xb)
            t_count += n
            train_tot_loss += loss.item()*n
            train_tot_acc  += mi.Tensor.accuracy(predicted_labels, yb).item()*n

            

        model.eval()
        # with torch.no_grad():
            val_tot_loss, val_tot_acc,v_count = 0.,0.,0
            for xb,yb in valid_dl:
                preds = model(xb)

                pred_labels = preds.argmax(axis=1)
                n = len(xb)
                v_count += n
                val_tot_acc  += mi.Tensor.accuracy(pred_labels, yb).item()*n
                val_tot_loss += loss_func(preds,yb).item()*n
                
        print(f"epoch {epoch + 1:02d}/{epochs:02d} - loss: {train_tot_loss/t_count:.4f} - acc: {train_tot_acc/t_count:.4f} - val_loss: {val_tot_loss/v_count:.4f} - val_acc: {val_tot_acc/v_count:.4f}")

In [None]:
fit(5, 0.01, network, nn.CrossEntropyLoss(), optim.SGD, tr_dl, val_dl)

epoch 01/05 - loss: 0.3559 - acc: 0.8699 - val_loss: 0.4342 - val_acc: 0.8355
epoch 02/05 - loss: 0.3415 - acc: 0.8731 - val_loss: 0.3908 - val_acc: 0.8585
epoch 03/05 - loss: 0.3293 - acc: 0.8779 - val_loss: 0.4098 - val_acc: 0.8539
epoch 04/05 - loss: 0.3203 - acc: 0.8815 - val_loss: 0.3903 - val_acc: 0.8627
epoch 05/05 - loss: 0.3114 - acc: 0.8839 - val_loss: 0.3966 - val_acc: 0.8600
