In [146]:
import numpy as np
import kagglehub
import os
from PIL import Image
from IPython.display import display

# Download MNIST CSV dataset
path = kagglehub.dataset_download("oddrationale/mnist-in-csv")
print("Path to dataset files:", path)


Path to dataset files: C:\Users\ma\.cache\kagglehub\datasets\oddrationale\mnist-in-csv\versions\2


In [178]:

# Identify CSV files
csv_files = [f for f in os.listdir(path) if f.endswith('.csv')]
if not csv_files:
    raise FileNotFoundError(f"No CSV files found in {path}")

csv_path = os.path.join(path, csv_files[0])

# Load data
data = np.genfromtxt(csv_path, delimiter=',', skip_header=1)
labels = data[:, 0].astype(int)
pixels = data[:, 1:].astype(np.uint8)
scaled_pixels = pixels / 255.0


class Layer:
    def __init__(self, number, size, inputs=1):
        self.number = number
        self.size = size
        self.data = np.zeros((inputs, size), dtype=np.float64)

class Weight:
    def __init__(self, prev, next):
        self.value = (np.random.randn(prev.size, next.size).astype(np.float64))

class Bias:
    def __init__(self, next):
        self.value = np.random.randn(1, next.size).astype(np.float64)

class Neural:
    def __init__(self, activation):
        self.activation = activation
        self.layers = []
        self.weights = []
        self.biases = []

    def add_layer(self, layer):
        self.layers.append(layer)
        if len(self.layers) > 1:
            self.weights.append(Weight(self.layers[len(self.layers)-2], self.layers[len(self.layers)-1]))
            self.biases.append(Bias(self.layers[len(self.layers)-1]))

    def forward(self):
        self.z_values = []
        for i in range(1, len(self.layers)):
            prev_data = self.layers[i - 1].data     
            w = self.weights[i - 1].value            
            b = self.biases[i - 1].value            
            z = np.dot(prev_data, w) + b
            self.z_values.append(z)
            self.layers[i].data = self.activation(z)

    @property
    def output(self):
        return self.layers[len(self.layers)-1].data

def sigmoid(x):
    x = np.clip(x, -500, 500)
    return 1 / (1 + np.exp(-x))

# cost function = (predicted-real)^2 
def costFunction(predicted, real):
    return np.sum((predicted - real) ** 2)



In [179]:
input_taken = 5
indices = list(range(input_taken))

input_data = scaled_pixels[indices] 

actual = np.zeros((input_taken, 10), dtype=np.float64)
for i in range(len(indices)):
    idx = indices[i]
    label = labels[idx]
    actual[i, label] = 1
    # display(Image.fromarray(pixels[idx].reshape(28,28), mode = 'L'))


# print(actual)


In [180]:

model = Neural(sigmoid)

input_layer = Layer(1, 784, inputs=input_taken)
input_layer.data = input_data

model.add_layer(input_layer)
model.add_layer(Layer(2, 107, inputs=input_taken))
model.add_layer(Layer(3, 26, inputs=input_taken))
model.add_layer(Layer(4, 10, inputs=input_taken))

model.forward()

result = np.argmax(model.output, axis=1)
true_labels = labels[indices]

print("Predicted Labels:", result)
print("True Labels     :", true_labels)

cost = costFunction(model.output,actual)
print("Cost:", cost)

print((model.output).shape)
print(model.output)


Predicted Labels: [3 1 3 3 3]
True Labels     : [7 2 1 0 4]
Cost: 21.595649892891544
(5, 10)
[[2.08568811e-03 9.31446173e-01 9.94029030e-01 9.99994690e-01
  9.54012386e-01 7.37855520e-03 6.09315716e-01 4.38983423e-01
  9.77917770e-03 9.38870341e-01]
 [2.15044685e-03 9.99834592e-01 9.62779608e-01 9.99742803e-01
  5.91627130e-01 2.28578053e-01 8.63682947e-01 7.56609880e-01
  9.69827028e-04 9.38732486e-01]
 [1.06186249e-02 9.92856864e-01 9.97624487e-01 9.99963580e-01
  2.64365074e-01 1.21391458e-01 6.92822763e-01 1.17827710e-01
  5.92633555e-04 6.52975061e-01]
 [8.17665515e-04 8.49720277e-01 9.94997966e-01 9.99992398e-01
  2.91080419e-01 1.62087894e-01 7.69915844e-01 1.09852141e-01
  9.85096515e-04 9.17577452e-01]
 [7.39161300e-02 9.76175713e-01 8.72409155e-01 9.99417776e-01
  7.86616711e-01 1.06159349e-02 3.50713804e-01 3.85249027e-01
  2.69064871e-02 5.96017479e-01]]


In [181]:
# print(actual) actual is one hot encoded matrix
#real is parameter of various functions

In [182]:
def sigmoid_prime(x):
    s = sigmoid(x)
    return s * (1 - s)

In [183]:
print(true_labels)
print(result)
# print(scaled_pixels)
# print(model.output)


[7 2 1 0 4]
[3 1 3 3 3]


In [184]:
arra = np.array(true_labels)
print(arra)
# print(actual)

[7 2 1 0 4]


In [202]:
def delta_lastlayer(real, predicted, deactivated_matrix):
    number = input_taken
    difference = real - predicted
    # a = np.array(difference)   
    # a = a.reshape(-1, 1)           
    # repeated = np.repeat(a, 10, axis=1) 
    # print(f"{real} - {predicted} = {difference}")
    ans = (difference) * sigmoid_prime(deactivated_matrix)
    return ans



In [203]:
sigmoid_prime(model.z_values[-1]).shape

(5, 10)

In [204]:
# arrrr = delta_lastlayer(arra, result, model.z_values[-1])
arrrr = delta_lastlayer(actual, model.output, model.z_values[-1])

In [188]:
def delta_calc(current_layer_number ):   #give current layer in backward terms like -1-2-3 and so on
    if (current_layer_number == -1):
        return arrrr
    else:
        answer = np.dot(delta_calc(current_layer_number + 1), np.transpose(model.weights[current_layer_number + 1].value)) * sigmoid_prime(model.layers[current_layer_number].data)
        # print(answer)
        return answer

In [189]:
numberOfWeightOrBiasMatrices = 3
delta_list = []     #deltas will be stored in order -3 , -2, -1 that is (for layer 1, layer 2, layer 3(output layer))
for i in (range(numberOfWeightOrBiasMatrices)):
    temp = delta_calc(i - numberOfWeightOrBiasMatrices)
    delta_list.append(temp)
    print("Shape of delta matrix of Layer",i-numberOfWeightOrBiasMatrices,temp.shape)

print(len(delta_list))      #no. of delta matrices added


Shape of delta matrix of Layer -3 (5, 107)
Shape of delta matrix of Layer -2 (5, 26)
Shape of delta matrix of Layer -1 (5, 10)
3


In [205]:
def dJdW(current_layernumber):         #input layer number in negatives
    input_of_current_layer = np.transpose(model.layers[current_layernumber-1].data)
    ans = np.dot(input_of_current_layer, delta_calc(current_layernumber) )
    return ans


In [206]:
def dJdB(currentlayernumber):       #input layer number in negatives
    ans = np.sum(delta_calc(currentlayernumber), axis = 0)
    return ans

In [207]:
learning_rate = 1


In [None]:
# def update_parameters(learning_rate):
#     for i in range(len(model.weights)):
#         w_grad = dJdW(-(len(model.weights)-i))
#         b_grad = dJdB(-(len(model.biases)-i))
#         model.weights[i].value -= learning_rate * w_grad
#         model.biases[i].value  -= learning_rate * b_grad


In [None]:
epochs = 1000

# for epoch in range(epochs):
#     model.forward()  # forward pass
#     cost = costFunction(model.output, actual)
    
#     # Calculate deltas again
#     delta_list.clear()
#     for i in range(numberOfWeightOrBiasMatrices):
#         temp = delta_calc(i - numberOfWeightOrBiasMatrices)
#         delta_list.append(temp)

#     update_parameters(learning_rate)
    
#     if epoch % 100 == 0:
#         print(f"Epoch {epoch}, Cost: {cost}")


Epoch 0, Cost: 21.595649892891544
Epoch 100, Cost: 39.39911291183949
Epoch 200, Cost: 39.99982730815431
Epoch 300, Cost: 40.001661402984155
Epoch 400, Cost: 40.02653982887857
Epoch 500, Cost: 40.31136431378264
Epoch 600, Cost: 41.7085899975422
Epoch 700, Cost: 43.66544935281328
Epoch 800, Cost: 44.62556131780675
Epoch 900, Cost: 44.90751555815546


In [232]:
new_weights = np.array((model.weights[-1].value) - (dJdW(-1)))
# model.weights[-1].value.shape
print(new_weights.shape)

(26, 10)


In [None]:
new_biases = np.array((model.biases[-1].value) - (dJdB(-1)))

print(new_biases.shape)

(1, 10)
