<a href="https://colab.research.google.com/github/shukurullo2004/Machine-learnings/blob/main/mnist_scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
from torchvision import datasets, transforms
import numpy as np
from torch.utils.data import DataLoader
torch.manual_seed(42)

transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=64, shuffle=False)


In [None]:
X_train = np.concatenate([x.numpy().reshape(-1, 28*28) for x, _ in train_loader])
y_train = np.concatenate([y.numpy() for _, y in train_loader])

X_test = np.concatenate([x.numpy().reshape(-1, 28*28) for x, _ in test_loader])
y_test = np.concatenate([y.numpy() for _, y in test_loader])


In [None]:
import numpy as np
parameters = {}
def init_params():
    global parameters

    input_size = 28 * 28
    hidden_size = 128
    output_size = 10

    parameters['W1'] = np.random.randn(input_size, hidden_size)
    parameters['b1'] = np.zeros((1, hidden_size))
    parameters['W2'] = np.random.randn(hidden_size, hidden_size)
    parameters['b2'] = np.zeros((1, hidden_size))
    parameters['W3'] = np.random.randn(hidden_size, output_size)
    parameters['b3'] = np.zeros((1, output_size))

    return parameters
init_params()
parameters.keys()

In [None]:
def relu(x):
  return np.maximum(0,x)

def linear(x,w,b):
  return (np.dot(x,w)+b)

def softmax(x):
    exp_values = np.exp(x - np.max(x, axis=1, keepdims=True))
    probabilities = exp_values / np.sum(exp_values, axis=1, keepdims=True)
    return probabilities


In [None]:
def forward_propagation(X, parameters):
    W1, b1, W2, b2, W3, b3 = (
        parameters['W1'],
        parameters['b1'],
        parameters['W2'],
        parameters['b2'],
        parameters['W3'],
        parameters['b3']
    )

    z1 = linear(X,W1,b1)
    a1 = relu(z1)

    z2 = linear(a1,W2,b2)
    a2 = relu(z2)

    z3 = linear(a2,W3,b3)
    y_pred = softmax(z3)
    return y_pred, {'a1':a1, 'a2':a2, 'y_pred':y_pred}

def backward_propagation(X, y_true, parameters, cache):
    m = X.shape[0]
    W1, b1, W2, b2, W3, b3 = (
        parameters['W1'], parameters['b1'],
        parameters['W2'], parameters['b2'],
        parameters['W3'], parameters['b3']
    )
    a1, a2, y_pred = cache['a1'], cache['a2'], cache['y_pred']

    dz3 = y_pred - y_true
    dW3 = np.dot(a2.T, dz3) / m
    db3 = np.sum(dz3, axis=0, keepdims=True) / m

    dz2 = np.dot(dz3, W3.T) * (a2 > 0)
    dW2 = np.dot(a1.T, dz2) / m
    db2 = np.sum(dz2, axis=0, keepdims=True) / m

    dz1 = np.dot(dz2, W2.T) * (a1 > 0)
    dW1 = np.dot(X.T, dz1) / m
    db1 = np.sum(dz1, axis=0, keepdims=True) / m

    gradients = {'W1': dW1, 'b1': db1, 'W2': dW2, 'b2': db2, 'W3': dW3, 'b3': db3}

    return gradients

def sparse_categorical_crossentropy(probabilities, targets, epsilon=1e-10):
    num_samples = probabilities.shape[0]

    # Add epsilon to avoid log(0)
    probabilities = np.clip(probabilities, epsilon, 1 - epsilon)

    predicted_probabilities = probabilities[np.arange(num_samples), targets]

    negative_log_probabilities = -np.log(predicted_probabilities)

    loss = np.sum(negative_log_probabilities) / num_samples

    return loss


def update_parameters(parameters, gradients, learning_rate):
    for param_name in parameters.keys():
        parameters[param_name] -= learning_rate * gradients[param_name]

In [None]:
def accuracy_fn(y_true, y_pred):
    y_pred_indices = np.argmax(y_pred, axis=1)
    correct = np.sum(y_true == y_pred_indices)
    acc = (correct / len(y_true)) * 100
    return acc


In [None]:
def one_hot_encode(labels, num_classes):
    encoded_labels = np.zeros((len(labels), num_classes))
    for i, label in enumerate(labels):
        encoded_labels[i, label] = 1
    return encoded_labels

y_train_one_hot = one_hot_encode(y_train, num_classes=10)

In [None]:
init_params()
epochs =300

train_loss = []
train_acc = []

for epoch in range(epochs):
    y_pred, cache = forward_propagation(X_train, parameters)

    # One-hot encode labels
    y_train_one_hot = one_hot_encode(y_train, 10)

    loss = sparse_categorical_crossentropy(y_pred, y_train)
    acc = accuracy_fn(y_train, y_pred)
    gradients = backward_propagation(X_train, y_train_one_hot, parameters, cache)

    update_parameters(parameters, gradients, learning_rate=0.01)
    train_loss.append(loss)
    train_acc.append(acc)
    if epoch % 10 == 0:

        print(f'Epoch {epoch}, Loss: {loss}, Acc: {acc}')

In [None]:
import matplotlib.pyplot as plt
epoch_range = range(epochs)
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(epoch_range, train_loss, label='Training Loss')
plt.title('Training Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

# plt.subplot(1, 2, 2)
plt.plot(epoch_range, train_acc, label='Training Accuracy', color='orange')
# plt.title('Training Accuracy')
# plt.xlabel('Epoch')
# plt.ylabel('Accuracy (%)')
# plt.legend()

plt.tight_layout()
plt.show()

In [None]:
import matplotlib.pyplot as plt
import random

def plot_images(images, true_labels, pred_labels):
    plt.figure(figsize=(10, 5))
    for i in range(len(images)):
        plt.subplot(1, len(images), i + 1)
        plt.imshow(images[i].reshape(28, 28), cmap='gray')
        plt.title(f'True: {true_labels[i]}\nPred: {pred_labels[i]}')
        plt.axis('off')
    plt.show()

num_samples_to_plot = 10
random_indices = random.sample(range(len(X_test)), num_samples_to_plot)
sample_images = X_test[random_indices]
true_labels = y_test[random_indices]

y_pred, _ = forward_propagation(sample_images, parameters)
pred_labels = np.argmax(y_pred, axis=1)

plot_images(sample_images, true_labels, pred_labels)
