In [1]:
import numpy as np
import pickle
from tqdm import tqdm
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, f1_score


In [2]:
class DenseLayer:
    def __init__(self, input_dim, output_dim):
        self.weights = np.random.randn(input_dim, output_dim) * 0.01
        self.bias = np.zeros((1, output_dim))
    
    def forward(self, X):
        self.input = X
        self.output = np.dot(X, self.weights) + self.bias
        return self.output
    
    def backward(self, d_output, learning_rate):
        d_input = np.dot(d_output, self.weights.T)
        d_weights = np.dot(self.input.T, d_output)
        d_bias = np.sum(d_output, axis=0, keepdims=True)
        
        # Update weights and biases
        self.weights -= learning_rate * d_weights
        self.bias -= learning_rate * d_bias
        
        return d_input


In [3]:
class BatchNormalization:
    def __init__(self, input_dim, epsilon=1e-8):
        self.epsilon = epsilon
        self.gamma = np.ones((1, input_dim))
        self.beta = np.zeros((1, input_dim))
    
    def forward(self, X):
        self.mean = np.mean(X, axis=0)
        self.variance = np.var(X, axis=0)
        self.X_normalized = (X - self.mean) / np.sqrt(self.variance + self.epsilon)
        self.output = self.gamma * self.X_normalized + self.beta
        return self.output
    
    def backward(self, d_output, learning_rate):
        N = d_output.shape[0]
        
        dX_normalized = d_output * self.gamma
        d_variance = np.sum(dX_normalized * (self.input - self.mean) * -0.5 * np.power(self.variance + self.epsilon, -1.5), axis=0)
        d_mean = np.sum(dX_normalized * -1 / np.sqrt(self.variance + self.epsilon), axis=0) + d_variance * np.mean(-2 * (self.input - self.mean), axis=0)
        
        d_input = (dX_normalized / np.sqrt(self.variance + self.epsilon)) + (d_variance * 2 * (self.input - self.mean) / N) + (d_mean / N)
        
        d_gamma = np.sum(d_output * self.X_normalized, axis=0)
        d_beta = np.sum(d_output, axis=0)
        
        # Update gamma and beta
        self.gamma -= learning_rate * d_gamma
        self.beta -= learning_rate * d_beta
        
        return d_input


In [4]:
class ReLU:
    def forward(self, X):
        self.input = X
        return np.maximum(0, X)
    
    def backward(self, d_output):
        return d_output * (self.input > 0)


In [5]:
class Dropout:
    def __init__(self, rate):
        self.rate = rate
    
    def forward(self, X, is_training=True):
        if is_training:
            self.mask = (np.random.rand(*X.shape) > self.rate)
            return X * self.mask
        return X
    
    def backward(self, d_output):
        return d_output * self.mask


In [18]:
class SoftmaxCrossEntropy:
    def forward(self, logits, labels):
        # Apply softmax, then compute cross-entropy loss
        self.logits = logits
        self.labels = labels
        
        # Calculate softmax
        exp_values = np.exp(logits - np.max(logits, axis=1, keepdims=True))
        probabilities = exp_values / np.sum(exp_values, axis=1, keepdims=True)
        self.probs = probabilities
        
        # Calculate cross-entropy loss
        batch_size = logits.shape[0]
        correct_logprobs = -np.log(probabilities[range(batch_size), labels])
        loss = np.sum(correct_logprobs) / batch_size
        return loss
    
    def backward(self, logits, labels):
        # Compute gradient of the loss w.r.t logits
        batch_size = logits.shape[0]
        d_logits = self.probs
        d_logits[range(batch_size), labels] -= 1
        d_logits /= batch_size
        
        return d_logits


In [7]:
class AdamOptimizer:
    def __init__(self, params, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8):
        self.params = params
        self.lr = learning_rate
        self.beta1 = beta1
        self.beta2 = beta2
        self.epsilon = epsilon
        self.m = [np.zeros_like(p) for p in self.params]
        self.v = [np.zeros_like(p) for p in self.params]
        self.t = 0

    def update(self, grads):
        self.t += 1
        for i in range(len(self.params)):
            self.m[i] = self.beta1 * self.m[i] + (1 - self.beta1) * grads[i]
            self.v[i] = self.beta2 * self.v[i] + (1 - self.beta2) * (grads[i] ** 2)
            
            m_hat = self.m[i] / (1 - self.beta1 ** self.t)
            v_hat = self.v[i] / (1 - self.beta2 ** self.t)
            
            self.params[i] -= self.lr * m_hat / (np.sqrt(v_hat) + self.epsilon)


In [13]:
class FeedForwardNN:
    def __init__(self, layers):
        self.layers = layers
        self.loss_fn = SoftmaxCrossEntropy()
    
    def forward(self, X):
        for layer in self.layers:
            X = layer.forward(X)
        return X
    
    def backward(self, d_output, learning_rate):
        for layer in reversed(self.layers):
            d_output = layer.backward(d_output, learning_rate)
    
    def train(self, X, Y, epochs, batch_size, learning_rate):
            for epoch in range(epochs):
                loss = 0
                for i in range(0, len(X), batch_size):
                    X_batch = X[i:i + batch_size]
                    Y_batch = Y[i:i + batch_size]

                    # Forward pass to get logits
                    logits = self.forward(X_batch)
                    
                    # Calculate loss
                    loss += self.loss_fn.forward(logits, Y_batch)
                    
                    # Backward pass
                    d_output = self.loss_fn.backward(logits, Y_batch)
                    self.backward(d_output, learning_rate)
                
                print(f"Epoch {epoch + 1}, Loss: {loss / len(X)}")

    def save(self, filename):
        with open(filename, 'wb') as f:
            pickle.dump(self, f)
    
    @staticmethod
    def load(filename):
        with open(filename, 'rb') as f:
            return pickle.load(f)


In [14]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Define transformations to convert images to tensors and normalize
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))  # Normalize to have mean 0.5 and std 0.5
])

# Download and load the FashionMNIST dataset
train_dataset = datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.FashionMNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Convert data to numpy arrays
def data_to_numpy(data_loader):
    X, y = [], []
    for images, labels in data_loader:
        X.append(images.view(images.size(0), -1).numpy())  # Flatten 28x28 images
        y.append(np.eye(10)[labels.numpy()])  # One-hot encode the labels
    return np.vstack(X), np.vstack(y)

train_data, train_labels = data_to_numpy(train_loader)
test_data, test_labels = data_to_numpy(test_loader)


In [19]:
# Define the network architecture
layers = [
    DenseLayer(784, 256),
    BatchNormalization(256),
    ReLU(),
    Dropout(0.2),
    DenseLayer(256, 128),
    BatchNormalization(128),
    ReLU(),
    Dropout(0.2),
    DenseLayer(128, 10)
]

# Initialize and train the model
model = FeedForwardNN(layers)
model.train(train_data, train_labels, epochs=10, batch_size=64, learning_rate=0.001)
model.save("model.pkl")


IndexError: arrays used as indices must be of integer (or boolean) type