In [None]:
import numpy as np
!pip install idx2numpy
import idx2numpy
import numpy as np
data_path = '/content/drive/MyDrive/Colab Notebooks/archive/'
train_images_path = data_path + 'train-images.idx3-ubyte'
train_labels_path = data_path + 'train-labels.idx1-ubyte'
test_images_path = data_path + 't10k-images.idx3-ubyte'
test_labels_path = data_path + 't10k-labels.idx1-ubyte'

X_train = idx2numpy.convert_from_file(train_images_path)
y_train = idx2numpy.convert_from_file(train_labels_path)


X_test = idx2numpy.convert_from_file(test_images_path)
y_test = idx2numpy.convert_from_file(test_labels_path)


X_train = X_train.reshape(X_train.shape[0], -1).T / 255.0
X_test = X_test.reshape(X_test.shape[0], -1).T / 255.0


def one_hot_encode(y, num_classes=10):
    return np.eye(num_classes)[y].T

Y_train = one_hot_encode(y_train)
Y_test = one_hot_encode(y_test)

def softmax(z):

    max_z = np.max(z, axis=0, keepdims=True)
    exp_z = np.exp(z - max_z)
    softmax_output = exp_z / np.sum(exp_z, axis=0, keepdims=True)
    return softmax_output

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def relu(z):
    return np.maximum(0, z)

def layer_size(X, Y):
    n_x = X.shape[0]
    n_h = 4
    n_y = Y.shape[0]
    return (n_x, n_h, n_y)

def initialize_parameters(n_x, n_h, n_y):
    W1 = np.random.randn(n_h, n_x) * 0.01
    b1 = np.zeros((n_h, 1))
    W2 = np.random.randn(n_y, n_h) * 0.01
    b2 = np.zeros((n_y, 1))

    parameters = {"W1": W1, "b1": b1, "W2": W2, "b2": b2}
    return parameters

def forward_propagation(X, parameters):
    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']

    Z1 = np.dot(W1, X) + b1
    A1 = np.tanh(Z1)
    Z2 = np.dot(W2, A1) + b2
    A2 = softmax(Z2)

    cache = {"Z1": Z1, "A1": A1, "Z2": Z2, "A2": A2}
    return A2, cache

def compute_cost(A2, Y):
    m = Y.shape[1]
    log_probs = -np.sum(Y * np.log(A2 + 1e-8)) / m
    return log_probs

def backward_propagation(parameters, cache, X, Y):
    m = X.shape[1]
    W1 = parameters['W1']
    W2 = parameters['W2']
    A1 = cache['A1']
    A2 = cache['A2']

    dZ2 = A2 - Y
    dW2 = np.dot(dZ2, A1.T) / m
    db2 = np.sum(dZ2, axis=1, keepdims=True) / m
    dZ1 = np.dot(W2.T, dZ2) * (1 - np.power(A1, 2))
    dW1 = np.dot(dZ1, X.T) / m
    db1 = np.sum(dZ1, axis=1, keepdims=True) / m

    grads = {"dW1": dW1, "db1": db1, "dW2": dW2, "db2": db2}
    return grads

def update_parameters(parameters, grads, learning_rate):
    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']

    W1 = W1 - learning_rate * grads['dW1']
    b1 = b1 - learning_rate * grads['db1']
    W2 = W2 - learning_rate * grads['dW2']
    b2 = b2 - learning_rate * grads['db2']

    parameters = {"W1": W1, "b1": b1, "W2": W2, "b2": b2}
    return parameters

def predict(X, parameters):
    A2, _ = forward_propagation(X, parameters)
    return np.argmax(A2, axis=0)

def neural_network(X, Y, num_iterations, learning_rate):
    n_x = X.shape[0]
    n_h = 64
    n_y = 10

    parameters = initialize_parameters(n_x, n_h, n_y)
    costs = []

    for i in range(num_iterations):
        A2, cache = forward_propagation(X, parameters)
        cost = compute_cost(A2, Y)
        grads = backward_propagation(parameters, cache, X, Y)
        parameters = update_parameters(parameters, grads, learning_rate)

        if i % 100 == 0:
            print(f"Cost after iteration {i}: {cost}")
            costs.append(cost)

    return parameters, costs


In [None]:

num_iterations = 2500
learning_rate = 0.05
trained_parameters, training_costs = neural_network(X_train, Y_train, num_iterations, learning_rate)



In [None]:
test_predictions = predict(X_test, trained_parameters)
accuracy = np.mean(test_predictions == y_test)
print(f"Test accuracy: {accuracy * 100:.2f}%")
import matplotlib.pyplot as plt

plt.plot(range(0, num_iterations, 100), training_costs)
plt.xlabel('Iterations')
plt.ylabel('Cost')
plt.title('Training Cost')
plt.show()