In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Loading the data
train_data = pd.read_csv('fashion-mnist_train.csv')
test_data = pd.read_csv('fashion-mnist_test.csv')

# Separation
train_labels = train_data['label'].values
test_labels = test_data['label'].values

# Normalize pixel values
train_images = train_data.drop('label', axis=1).values / 255.0
test_images = test_data.drop('label', axis=1).values / 255.0

# Reshape images
train_images = train_images.reshape(-1, 28, 28, 1)
test_images = test_images.reshape(-1, 28, 28, 1)

# Convert labels to one-hot encoding
def one_hot_encode(labels, num_classes):
    return np.eye(num_classes)[labels]

train_labels_encoded = one_hot_encode(train_labels, 10)
test_labels_encoded = one_hot_encode(test_labels, 10)
print(train_labels_encoded)

# imp functions
def relu(x):
    return np.maximum(0, x)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def relu_derivative(x):
    return (x > 0).astype(float)

def cross_entropy(predictions, targets):
    return -np.mean(np.sum(targets * np.log(predictions + 1e-15), axis=1))

def accuracy(predictions, labels):
    return np.mean(np.argmax(predictions, axis=1) == np.argmax(labels, axis=1))

def convolve2d(image, kernel, stride=1, padding=0):
    pass  

def max_pooling2d(image, size=2, stride=2):
    # Max pooling function definition
    output_height = (image.shape[0] - size) // stride + 1
    output_width = (image.shape[1] - size) // stride + 1
    output = np.zeros((output_height, output_width, image.shape[2]))

    for i in range(0, output_height):
        for j in range(0, output_width):
            output[i, j, :] = np.max(image[i*stride:i*stride+size, j*stride:j*stride+size, :], axis=(0, 1))

    return output

class SimpleNeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        # Initialize random weights 
        self.l1_weights = np.random.randn(input_size, hidden_size) * np.sqrt(1. / input_size)
        self.l2_weights = np.random.randn(hidden_size, output_size) * np.sqrt(1. / hidden_size)
    
    def forward(self, x):
        # Forward pass 
        self.l1_output = relu(np.dot(x, self.l1_weights))
        self.l2_output = sigmoid(np.dot(self.l1_output, self.l2_weights))
        return self.l2_output
    
    def backward(self, y, learning_rate):
        # Calculate error for the fully connected layer 2 (l2)
        l2_error = self.l2_output - y 
        
        # Calculate gradient for the fully connected layer 2 (l2) weights
        l2_grad = np.dot(self.l1_output.T, l2_error) / len(y)
        
        l1_error = np.dot(l2_error, self.l2_weights.T) * relu_derivative(self.l1_output)
        l1_grad = np.dot(self.flat.T, l1_error) / len(y)
        conv2_error = np.dot(l1_error, self.l1_weights.T).reshape(self.conv2_output.shape) * relu_derivative(self.conv2_output)
        conv2_grad = np.array([convolve2d(self.conv1_output[i], conv2_error[i]) for i in range(len(y))])
        conv1_error = np.dot(conv2_error, self.conv2_weights.T).reshape(self.conv1_output.shape) * relu_derivative(self.conv1_output)
        conv1_grad = np.array([convolve2d(self.x[i], conv1_error[i]) for i in range(len(y))])
        
        # Updating weights 
        self.l2_weights -= learning_rate * l2_grad
        self.l1_weights -= learning_rate * l1_grad
        self.conv2_weights -= learning_rate * np.mean(conv2_grad, axis=0)
        self.conv1_weights -= learning_rate * np.mean(conv1_grad, axis=0)
        


[[0. 0. 1. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 1.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 1. 0.]
 [0. 0. 0. ... 0. 1. 0.]
 [0. 0. 0. ... 1. 0. 0.]]
