# Import libraries

In [27]:
from autodiff import Value
from autodiff.nn import *

import numpy as np
import math
import pickle

# Load and preprocess data

In [2]:
from datasets import load_dataset

# Load the MNIST dataset
mnist_dataset = load_dataset('mnist')

# Access the training and testing datasets
train_dataset = mnist_dataset['train']
test_dataset = mnist_dataset['test']

# Print the shapes of the datasets
print(f'Training dataset shape: {train_dataset.shape}')
print(f'Testing dataset shape: {test_dataset.shape}')

def preprocess_image(img):
    array = np.array(img)
    flat_array = array.reshape(-1)
    flat_array = flat_array / 255
    flat_array = list(flat_array)
    return flat_array

X_train = list(map(lambda x: preprocess_image(x['image']), train_dataset))[:1000]
y_train = [x['label'] for x in train_dataset][:1000]


X_test = list(map(lambda x: preprocess_image(x['image']), test_dataset))[:1000]
y_test = [x['label'] for x in test_dataset][:1000]

#Shuffle the training data
combined = list(zip(X_train, y_train))
random.shuffle(combined)
X_train, y_train = zip(*combined)

Found cached dataset mnist (C:/Users/shahz/.cache/huggingface/datasets/mnist/mnist/1.0.0/9d494b7f466d6931c64fb39d58bb1249a4d85c9eb9865d9bc20960b999e2a332)


  0%|          | 0/2 [00:00<?, ?it/s]

Training dataset shape: (60000, 2)
Testing dataset shape: (10000, 2)


# Defining model and performing training

### Define model

In [28]:
model = MLP(28*28, [32,32,10])

### Define softmax and loss

In [29]:
def softmax(logits):
    exp_logits = [value.exp() for value in logits]
    total = sum(exp_logits)
    return [(value/total) for value in exp_logits]

    
def cross_entropy_loss(predictions, label):
    return -1 *softmax(predictions)[label].log()


### Perform training

In [30]:
batch_size = 16
no_of_steps = math.ceil(len(X_train)/batch_size)
epochs = 10

for _ in range(epochs):
    print(f'Epoch:{_+1}')

    combined = list(zip(X_train, y_train))
    random.shuffle(combined)
    X_train, y_train = zip(*combined)
    
    for i in range(no_of_steps):
        X_batch = X_train[(i*batch_size):((i+1)*batch_size)]
        y_batch = y_train[(i*batch_size):((i+1)*batch_size)]

        input = [list(map(Value, X_list)) for X_list in X_batch]

        forward = list(map(model, input))
        loss_lst = [cross_entropy_loss(prediction, ground_truth) for prediction, ground_truth in zip(forward, y_batch)]
        average_loss = sum(loss_lst) / len(loss_lst)

        forward = [[num.data for num in sublist] for sublist in forward]
        accuracy = [my_list.index(max(my_list)) for my_list in forward]
        accuracy = [1 if acc == y else 0 for acc,y in zip(accuracy,y_batch)]
        accuracy = sum(accuracy) / len(accuracy) 
        

        print(f'Step {i+1}')
        print('Loss for this step:', average_loss)
        print('Accuracy:', accuracy)
    
        
        model.zero_grad()
        average_loss.backward()

        for p in model.parameters():
            p.data -= 0.001 * p.grad


Epoch:1
Step 1
Loss for this step: Value(data=84.71208311638756, grad=0)
Accuracy: 0.0625
Step 2
Loss for this step: Value(data=40.986077433061695, grad=0)
Accuracy: 0.0
Step 3
Loss for this step: Value(data=27.75498087343429, grad=0)
Accuracy: 0.125
Step 4
Loss for this step: Value(data=19.618400661264513, grad=0)
Accuracy: 0.25
Step 5
Loss for this step: Value(data=23.156014555738558, grad=0)
Accuracy: 0.125
Step 6
Loss for this step: Value(data=25.18481063545945, grad=0)
Accuracy: 0.0625
Step 7
Loss for this step: Value(data=19.129943130482598, grad=0)
Accuracy: 0.1875
Step 8
Loss for this step: Value(data=15.581019968023845, grad=0)
Accuracy: 0.25
Step 9
Loss for this step: Value(data=22.50501469644824, grad=0)
Accuracy: 0.0
Step 10
Loss for this step: Value(data=12.351369004797835, grad=0)
Accuracy: 0.125
Step 11
Loss for this step: Value(data=16.627393790000674, grad=0)
Accuracy: 0.0625
Step 12
Loss for this step: Value(data=12.735803251335003, grad=0)
Accuracy: 0.25
Step 13
Loss

# Save weights

In [1]:
weights = [n.data for n in model.parameters()]
with open("weights/mnist_model_weights_32.pkl", "wb") as file:
    # Use the dump method of the pickle module to save the list to the file
    pickle.dump(weights, file)

NameError: name 'model' is not defined

# Load weights

In [8]:
with open("weights/mnist_model_weights_32.pkl", "rb") as file:
    # Use the dump method of the pickle module to save the list to the file
    weights = pickle.load(file)

model.load_weights(weights)

# Perfrom validation

In [None]:
batch_size = 16
no_of_steps = math.ceil(len(X_train)/batch_size)
epochs = 10
total_loss = 0
    
for i in range(no_of_steps):
    X_batch = X_test[(i*batch_size):((i+1)*batch_size)]
    y_batch = y_test[(i*batch_size):((i+1)*batch_size)]

    input = [list(map(Value, X_list)) for X_list in X_batch]

    forward = list(map(model, input))
    
    loss_lst = [cross_entropy_loss(prediction, ground_truth) for prediction, ground_truth in zip(forward, y_batch)]
    average_loss = sum(loss_lst) / len(loss_lst)
    total_loss += average_loss

    forward = [[num.data for num in sublist] for sublist in forward]
    accuracy = [my_list.index(max(my_list)) for my_list in forward]
    accuracy = [1 if acc == y else 0 for acc,y in zip(accuracy,y_batch)]
    accuracy = sum(accuracy) / len(accuracy)

    print(f'Step {i+1}')
    print('Loss for this step:', average_loss)
    print('Accuracy:', accuracy)
    
   

Step 1
Loss for this step: Value(data=3.816225268411391, grad=0)
Accuracy: 0.375
Step 2
Loss for this step: Value(data=4.171489769272255, grad=0)
Accuracy: 0.25
Step 3
Loss for this step: Value(data=3.274247063940989, grad=0)
Accuracy: 0.125


KeyboardInterrupt: 

In [None]:
forward

[[Value(data=0.09424117365933707, grad=0),
  Value(data=0.4184609605078055, grad=0),
  Value(data=-0.664599063941032, grad=0),
  Value(data=-1.583431277894505, grad=0),
  Value(data=-0.017398324827849143, grad=0),
  Value(data=-0.7167853363564802, grad=0),
  Value(data=-0.4549307072219745, grad=0),
  Value(data=1.2038332274594987, grad=0),
  Value(data=0.373443543342243, grad=0),
  Value(data=0.8276204631562917, grad=0)],
 [Value(data=-2.1411602271465524, grad=0),
  Value(data=9.342420766679076, grad=0),
  Value(data=1.426822689023521, grad=0),
  Value(data=1.0807890493819232, grad=0),
  Value(data=1.525374580104971, grad=0),
  Value(data=-2.7366710983190337, grad=0),
  Value(data=-3.1013386643815863, grad=0),
  Value(data=-7.525030972078762, grad=0),
  Value(data=-4.048029728097313, grad=0),
  Value(data=-5.154371912577857, grad=0)],
 [Value(data=-0.09159542030479202, grad=0),
  Value(data=6.057653340343903, grad=0),
  Value(data=-0.8909368096575161, grad=0),
  Value(data=3.5007844887

In [None]:
[[num.data for num in sublist] for sublist in forward]

[[0.09424117365933707,
  0.4184609605078055,
  -0.664599063941032,
  -1.583431277894505,
  -0.017398324827849143,
  -0.7167853363564802,
  -0.4549307072219745,
  1.2038332274594987,
  0.373443543342243,
  0.8276204631562917],
 [-2.1411602271465524,
  9.342420766679076,
  1.426822689023521,
  1.0807890493819232,
  1.525374580104971,
  -2.7366710983190337,
  -3.1013386643815863,
  -7.525030972078762,
  -4.048029728097313,
  -5.154371912577857],
 [-0.09159542030479202,
  6.057653340343903,
  -0.8909368096575161,
  3.5007844887938986,
  3.7670957157016156,
  2.785540893347297,
  -1.4174161423345775,
  -2.1144482987143767,
  -1.701174886678726,
  -2.7923579206408284],
 [0.022787071091541974,
  -0.06794451262116072,
  -0.02291231521074643,
  0.025861598215127055,
  -0.03349018423849103,
  0.03171775816031195,
  -0.029382200323463108,
  0.026757418764064983,
  0.0002677626151923731,
  0.04518976147224434],
 [-7.190850118776851,
  7.904008371580309,
  3.5942784904048968,
  4.057915906987867,
 