### Reading the data


In [1]:
#### Splitting the data into test and train datasets

import pandas as pd
from sklearn.model_selection import train_test_split

# Load the MNIST dataset
mnist_data = pd.read_csv("mnist_train.csv")

# Assuming the labels are in the first column and pixel values start from the second column
X = mnist_data.iloc[:, 1:]
Y = mnist_data.iloc[:, 0]
# print(X)
# print(X.shape)
# print(Y)
# print(Y.shape)

# Split the data into training and testing sets
# By default, the test_size is set to 0.25, meaning 25% of the data will be used for testing
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state=42)

# Print the shapes of the resulting sets
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", Y_train.shape)
print("y_test shape:", Y_test.shape)
# print("X_test is : " ,X_test)
# print(X_test.shape)
# print("Y_test is : " , Y_test)
print(Y_test.shape)

X_train shape: (45000, 784)
X_test shape: (15000, 784)
y_train shape: (45000,)
y_test shape: (15000,)
(15000,)


### Updating the weights and bias

In [42]:
import numpy as np

np.random.seed(230499)
no_of_rows, no_of_cols = X_train.shape

# Initialize weights and biases
weights = np.zeros([no_of_cols, 10])  # Assuming 10 classes
bias = np.random.rand(1, 10)

learning_rate = 0.00001
num_epochs = 1000

# Convert target labels to one-hot encoding
target_one_hot = np.eye(10)[Y_train]

# Forward Pass
inputs = np.array(X_train)

for epoch in range(num_epochs):
    # Forward Pass
    logits = np.dot(inputs, weights) + bias
    exp_logits = np.exp(logits)
    softmax_output = exp_logits / np.sum(exp_logits, axis=1, keepdims=True)

    # Cross-entropy loss
    loss = -np.sum(target_one_hot * np.log(softmax_output + 1e-8)) / no_of_rows
    
    # Optional: Print the loss for monitoring convergence
    if epoch % 100 == 0:
        print(f"Epoch {epoch}/{num_epochs}, Cross-Entropy Loss: {loss}")

    # Backward Pass
    d_logits = softmax_output - target_one_hot
    d_weights = np.dot(inputs.T, d_logits) / no_of_rows
    d_bias = np.sum(d_logits, axis=0, keepdims=True) / no_of_rows

    # Update weights and bias
    weights -= learning_rate * d_weights
    bias -= learning_rate * d_bias

# After training, 'weights' and 'bias' will be adjusted for classification.
print("Trained Weights shape:", weights.shape)
print("Trained Bias shape:", bias.shape)


Epoch 0/1000, Cross-Entropy Loss: 2.3115208110030423
Epoch 100/1000, Cross-Entropy Loss: 0.3771888817315826
Epoch 200/1000, Cross-Entropy Loss: 0.33784919031490723
Epoch 300/1000, Cross-Entropy Loss: 0.32015421176649206
Epoch 400/1000, Cross-Entropy Loss: 0.30941356213108134
Epoch 500/1000, Cross-Entropy Loss: 0.3019431035421183
Epoch 600/1000, Cross-Entropy Loss: 0.29631796140716743
Epoch 700/1000, Cross-Entropy Loss: 0.2918562423514657
Epoch 800/1000, Cross-Entropy Loss: 0.28818574308202044


KeyboardInterrupt: 

In [7]:
import numpy as np

np.random.seed(230499)
no_of_rows, no_of_cols = X_train.shape

# Initialize weights and biases
weights = np.zeros([no_of_cols, 10])  # Assuming 10 classes
bias = np.random.rand(1, 10)

learning_rate = 0.00001
num_epochs = 5000

# Convert target labels to one-hot encoding
target_one_hot = np.eye(10)[Y_train]

# Forward Pass
inputs = np.array(X_train)

logits = np.dot(inputs, weights) + bias

exp_logits = np.exp(-logits)
y = 1/(1 + exp_logits)

[[0.58692288 0.68806999 0.67215694 ... 0.57511545 0.59606365 0.62362052]
 [0.58692288 0.68806999 0.67215694 ... 0.57511545 0.59606365 0.62362052]
 [0.58692288 0.68806999 0.67215694 ... 0.57511545 0.59606365 0.62362052]
 ...
 [0.58692288 0.68806999 0.67215694 ... 0.57511545 0.59606365 0.62362052]
 [0.58692288 0.68806999 0.67215694 ... 0.57511545 0.59606365 0.62362052]
 [0.58692288 0.68806999 0.67215694 ... 0.57511545 0.59606365 0.62362052]]
(45000, 10)
45000


### Testing the accuracy.

In [14]:
# Forward Pass on Test Set
inputs_test = np.array(X_test)
logits_test = np.dot(inputs_test, weights) + bias
softmax_output_test = np.exp(logits_test) / np.sum(np.exp(logits_test), axis=1, keepdims=True)

# Predicted labels (argmax to get the class with the highest probability)
predicted_labels = np.argmax(softmax_output_test, axis=1)
predicted_labels = predicted_labels.reshape(-1, 1)
print(predicted_labels.shape)
Y_test = np.array(Y_test).reshape(-1, 1)
print(Y_test.shape)
# Accuracy calculation
accuracy = np.mean(predicted_labels == Y_test)
print(f"Accuracy on Test Set: {accuracy * 100:.2f}%")

(15000, 1)
(15000, 1)
Accuracy on Test Set: 92.24%


### Predicting the digits.

In [32]:
# Assuming 'weights' and 'bias' are the trained parameters

def predict_class(input_vector):
    input_vector = np.array(input_vector).reshape(1, -1)
    logits = np.dot(input_vector, weights) + bias
    softmax_output = np.exp(logits) / np.sum(np.exp(logits), axis=1, keepdims=True)
    predicted_class = np.argmax(softmax_output, axis=1)[0]
    return predicted_class
X_test = np.array(X_test)
# # Example usage
input_example = X_test[1005]  # Replace with the input vector you want to predict
predicted_class_example = predict_class(input_example)
print(f"Predicted Class: {predicted_class_example}")
print(Y_test[1005])


Predicted Class: 9
[9]


### One Hidden Layer.

In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np

# Load the MNIST dataset
mnist_data = pd.read_csv("mnist_train.csv")

# Assuming the labels are in the first column and pixel values start from the second column
X = mnist_data.iloc[:, 1:]
Y = mnist_data.iloc[:, 0]

# Split the data into training and testing sets
# By default, the test_size is set to 0.25, meaning 25% of the data will be used for testing
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state=42)
X_train = np.array(X_train)
X_test = np.array(X_test)
Y_train = np.array(Y_train)
Y_test = np.array(Y_test)

In [5]:


np.random.seed(123)
no_of_rows, no_of_cols = X_train.shape

# Initialize weights and biases for the first layer (hidden layer)
hidden_layer_neurons = 10
weights_hidden = np.random.randn(no_of_cols, hidden_layer_neurons)
bias_hidden = np.random.rand(1, hidden_layer_neurons)

# Initialize weights and biases for the output layer
weights_output = np.zeros([hidden_layer_neurons, 10])  # Assuming 10 classes
bias_output = np.random.rand(1, 10)

learning_rate = 1
num_epochs = 1000

# Convert target labels to one-hot encoding
target_one_hot = np.eye(10)[Y_train]

# Forward Pass
inputs = np.array(X_train)

for epoch in range(num_epochs):
    # Forward Pass - Hidden Layer
    hidden_layer_output = 1 / (1 + np.exp(-(np.dot(inputs, weights_hidden) + bias_hidden)))

    # Forward Pass - Output Layer
    logits = np.dot(hidden_layer_output, weights_output) + bias_output
    exp_logits = np.exp(logits)
    softmax_output = exp_logits / np.sum(exp_logits, axis=1, keepdims=True)

    # Cross-entropy loss
    loss = -np.sum(target_one_hot * np.log(softmax_output + 1e-8)) / no_of_rows

    # Optional: Print the loss for monitoring convergence
    if epoch % 100 == 0:
        print(f"Epoch {epoch}/{num_epochs}, Cross-Entropy Loss: {loss}")

    # Backward Pass
    d_logits = softmax_output - target_one_hot

    # Backward Pass - Output Layer
    d_weights_output = np.dot(hidden_layer_output.T, d_logits) / no_of_rows
    d_bias_output = np.sum(d_logits, axis=0, keepdims=True) / no_of_rows

    # Backward Pass - Hidden Layer
    d_hidden_layer = np.dot(d_logits, weights_output.T) * hidden_layer_output * (1 - hidden_layer_output)
    d_weights_hidden = np.dot(inputs.T, d_hidden_layer) / no_of_rows
    d_bias_hidden = np.sum(d_hidden_layer, axis=0, keepdims=True) / no_of_rows

    # Update weights and biases
    weights_output -= learning_rate * d_weights_output
    bias_output -= learning_rate * d_bias_output
    weights_hidden -= learning_rate * d_weights_hidden
    bias_hidden -= learning_rate * d_bias_hidden

# After training, 'weights_hidden', 'bias_hidden', 'weights_output', and 'bias_output' will be adjusted for classification.
print("Trained Weights (Hidden Layer) shape:", weights_hidden.shape)
print("Trained Bias (Hidden Layer) shape:", bias_hidden.shape)
print("Trained Weights (Output Layer) shape:", weights_output.shape)
print("Trained Bias (Output Layer) shape:", bias_output.shape)


  hidden_layer_output = 1 / (1 + np.exp(-(np.dot(inputs, weights_hidden) + bias_hidden)))


Epoch 0/1000, Cross-Entropy Loss: 2.3372976045009226
Epoch 100/1000, Cross-Entropy Loss: 1.305240538308685
Epoch 200/1000, Cross-Entropy Loss: 0.9851093945331099
Epoch 300/1000, Cross-Entropy Loss: 0.855250681962822
Epoch 400/1000, Cross-Entropy Loss: 0.7754462981821066
Epoch 500/1000, Cross-Entropy Loss: 0.7168516966258359
Epoch 600/1000, Cross-Entropy Loss: 0.6772684746085585
Epoch 700/1000, Cross-Entropy Loss: 0.6476989241909253
Epoch 800/1000, Cross-Entropy Loss: 0.627892051697263
Epoch 900/1000, Cross-Entropy Loss: 0.608518167367975
Trained Weights (Hidden Layer) shape: (784, 10)
Trained Bias (Hidden Layer) shape: (1, 10)
Trained Weights (Output Layer) shape: (10, 10)
Trained Bias (Output Layer) shape: (1, 10)


In [3]:
# Forward Pass for Testing
def predict(X, weights_hidden, bias_hidden, weights_output, bias_output):
    hidden_layer_output = 1 / (1 + np.exp(-(np.dot(X, weights_hidden) + bias_hidden)))
    logits = np.dot(hidden_layer_output, weights_output) + bias_output
    exp_logits = np.exp(logits)
    softmax_output = exp_logits / np.sum(exp_logits, axis=1, keepdims=True)
    return np.argmax(softmax_output, axis=1)

# Convert target labels to one-hot encoding for test set
target_one_hot_test = np.eye(10)[Y_test]

# Use the trained weights and biases for prediction
predictions = predict(X_test, weights_hidden, bias_hidden, weights_output, bias_output)

# Calculate accuracy
accuracy = np.mean(predictions == Y_test)
print(f"Accuracy: {accuracy * 100:.2f}%")


Accuracy: 80.56%


  hidden_layer_output = 1 / (1 + np.exp(-(np.dot(X, weights_hidden) + bias_hidden)))


In [11]:
def predict_class_with_hidden(input_vector, weights_hidden, bias_hidden, weights_output, bias_output):
    # Forward Pass - Hidden Layer
    hidden_layer_output = 1 / (1 + np.exp(-(np.dot(input_vector, weights_hidden) + bias_hidden)))

    # Forward Pass - Output Layer
    logits = np.dot(hidden_layer_output, weights_output) + bias_output
    softmax_output = np.exp(logits) / np.sum(np.exp(logits), axis=1, keepdims=True)

    # Predicted Class
    predicted_class = np.argmax(softmax_output, axis=1)[0]
    return predicted_class

# Example usage
input_example = X_test[1]  # Replace with the input vector you want to predict
predicted_class_example = predict_class_with_hidden(input_example, weights_hidden, bias_hidden, weights_output, bias_output)

print(f"Predicted Class: {predicted_class_example}")
print(f"True Class: {Y_test[1]}")


Predicted Class: 3
True Class: 3


  hidden_layer_output = 1 / (1 + np.exp(-(np.dot(input_vector, weights_hidden) + bias_hidden)))
