In [12]:
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import pickle


In [13]:
# Transform and load dataset
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
train_data = datasets.FashionMNIST(root="data", train=True, download=True, transform=transform)
train_loader = DataLoader(train_data, batch_size=64, shuffle=True)


In [14]:
class DenseLayer:
    def __init__(self, input_size, output_size):
        self.weights = np.random.randn(input_size, output_size) * 0.01
        self.bias = np.zeros((1, output_size))
        
    def forward(self, X):
        self.input = X
        # print("YEs in Dense Layer")
        return np.dot(X, self.weights) + self.bias

    def backward(self, grad_output, learning_rate):
        grad_input = np.dot(grad_output, self.weights.T)
        grad_weights = np.dot(self.input.T, grad_output)
        grad_bias = np.sum(grad_output, axis=0, keepdims=True)
        
        # Update weights and biases
        self.weights -= learning_rate * grad_weights
        self.bias -= learning_rate * grad_bias
        
        return grad_input


In [15]:
class BatchNormalization:
    def __init__(self, size, momentum=0.9):
        self.gamma = np.ones((1, size))
        self.beta = np.zeros((1, size))
        self.momentum = momentum
        self.running_mean = np.zeros((1, size))
        self.running_var = np.ones((1, size))

    def forward(self, X, training=True):
        if training:
            mean = np.mean(X, axis=0, keepdims=True)
            var = np.var(X, axis=0, keepdims=True)
            X_norm = (X - mean) / np.sqrt(var + 1e-8)
            self.running_mean = self.momentum * self.running_mean + (1 - self.momentum) * mean
            self.running_var = self.momentum * self.running_var + (1 - self.momentum) * var
        else:
            X_norm = (X - self.running_mean) / np.sqrt(self.running_var + 1e-8)
        # print("YEs in BN")
        return self.gamma * X_norm + self.beta

    def backward(self, grad_output):
        return grad_output  # Stub, we can refine later


In [16]:
class ReLU:
    def forward(self, X):
        self.input = X
        # print("YEs in ReLU")
        return np.maximum(0, X)
    
    def backward(self, grad_output):
        return grad_output * (self.input > 0)


In [17]:
def softmax(logits):
    exp_logits = np.exp(logits - np.max(logits, axis=1, keepdims=True))  # stability trick
    return exp_logits / np.sum(exp_logits, axis=1, keepdims=True)

def cross_entropy_loss(predictions, labels):
    one_hot_labels = np.zeros_like(predictions)
    one_hot_labels[np.arange(len(labels)), labels] = 1
    probs = softmax(predictions)
    loss = -np.mean(np.sum(one_hot_labels * np.log(probs + 1e-8), axis=1))  # Add epsilon to avoid log(0)
    return loss

def cross_entropy_gradient(predictions, labels):
    one_hot_labels = np.zeros_like(predictions)
    one_hot_labels[np.arange(len(labels)), labels] = 1
    probs = softmax(predictions)
    return (probs - one_hot_labels) / len(labels)


In [18]:
class NeuralNetwork:
    def __init__(self):
        self.layers = [
            DenseLayer(28*28, 128),
            BatchNormalization(128),
            ReLU(),
            DenseLayer(128, 10)  # Output layer with 10 classes for FashionMNIST
        ]

    def forward(self, X):
        for layer in self.layers:
            X = layer.forward(X)
        return X

    def predict(self, X):
        logits = self.forward(X)
        probabilities = softmax(logits)
        return np.argmax(probabilities, axis=1)


In [19]:
def train(network, train_loader, epochs=10, learning_rate=0.001):
    for epoch in range(epochs):
        losses = []
        for images, labels in tqdm(train_loader):
            images = images.view(-1, 28*28).numpy()
            labels = labels.numpy()

            predictions = network.forward(images)
            loss = cross_entropy_loss(predictions, labels)
            losses.append(loss)

            grad_output = cross_entropy_gradient(predictions, labels)
            for layer in reversed(network.layers):
                if isinstance(layer, DenseLayer):
                    grad_output = layer.backward(grad_output, learning_rate)
                else:
                    grad_output = layer.backward(grad_output)

        print(f"Epoch {epoch+1}, Loss: {np.mean(losses)}")


In [20]:
# Initialize and train the network
network = NeuralNetwork()
train(network, train_loader, epochs=10, learning_rate=0.001)

# Save the model
with open("1905105.pickle", "wb") as f:
    pickle.dump(network, f)


  0%|          | 0/938 [00:00<?, ?it/s]

100%|██████████| 938/938 [00:24<00:00, 38.22it/s]


Epoch 1, Loss: 1.7526639840706704


100%|██████████| 938/938 [00:19<00:00, 49.06it/s]


Epoch 2, Loss: 1.2691464567588226


100%|██████████| 938/938 [00:20<00:00, 46.88it/s]


Epoch 3, Loss: 1.1020526005439706


100%|██████████| 938/938 [00:19<00:00, 48.90it/s]


Epoch 4, Loss: 0.9979376054811355


100%|██████████| 938/938 [00:18<00:00, 49.92it/s]


Epoch 5, Loss: 0.9313366874504296


100%|██████████| 938/938 [00:21<00:00, 43.07it/s]


Epoch 6, Loss: 0.8820911275757519


100%|██████████| 938/938 [00:21<00:00, 43.64it/s]


Epoch 7, Loss: 0.844588215601471


100%|██████████| 938/938 [00:19<00:00, 48.30it/s]


Epoch 8, Loss: 0.8171254780712017


100%|██████████| 938/938 [00:18<00:00, 50.37it/s]


Epoch 9, Loss: 0.7930072273094022


100%|██████████| 938/938 [00:18<00:00, 51.80it/s]

Epoch 10, Loss: 0.7728278619664192





In [21]:
def evaluate(network, data_loader):
    correct_predictions = 0
    total_predictions = 0

    for images, labels in data_loader:
        images = images.view(-1, 28*28).numpy()
        labels = labels.numpy()

        # Get predictions
        predicted_labels = network.predict(images)
        correct_predictions += np.sum(predicted_labels == labels)
        total_predictions += len(labels)

    accuracy = correct_predictions / total_predictions
    print(f"Accuracy: {accuracy:.4f}")


In [22]:
# After training, evaluate on a test set
test_data = datasets.FashionMNIST(root="data", train=False, download=True, transform=transform)
test_loader = DataLoader(test_data, batch_size=64, shuffle=False)
evaluate(network, test_loader)

Accuracy: 0.7356
