In [None]:
import torch
from torchvision import datasets, transforms
import numpy as np
import matplotlib.pyplot as plt

In [None]:
transform_ = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5))])

train_dataset = datasets.MNIST(root="data/", download=False, train=True, transform=transform_)
test_dataset = datasets.MNIST(root="data/", download=False, train=False, transform=transform_)

batch_size = 32
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

In [None]:
d = 28*28
d1 = 300
k = 10
num_epochs = 100
lr = 0.001

In [None]:
def initialize_weights(d, d1, k, random):
    if random:
        W1 = np.random.uniform(-1, 1, (d1, d))
        W2 = np.random.uniform(-1, 1, (k, d1))
    else:
        W1 = np.zeros((d1, d))
        W2 = np.zeros((k, d1))
    return W1, W2

def sigmoid(x):
    return 1/(1+np.exp(-x))

def softmax(x):
    exp_x = np.exp(x-np.max(x, axis=-1, keepdims=True))
    return exp_x/exp_x.sum(axis=-1, keepdims=True)

In [None]:
W1, W2 = initialize_weights(d, d1, k, True)

In [None]:
train_losses = []
train_accuracies = []

for epoch in range(num_epochs):
    loss_epoch = 0
    for image_b, label_b in train_loader:
        X_train = image_b.view(-1, d).numpy()
        Y_train = np.eye(k)[label_b.numpy()]
        
        Z1 = np.dot(W1, X_train.T)
        A1 = sigmoid(Z1)
        Z2 = np.dot(W2, A1)
        Y_out = softmax(Z2.T)
        
        dZ2 = Y_out-Y_train
        dW2 = np.dot(dZ2.T, A1.T)/(X_train.shape[0])
        dA1 = np.dot(dZ2, W2)
        dZ1 = dA1*A1.T*(1-A1.T)
        dW1 = np.dot(dZ1.T, X_train)/(X_train.shape[0])
        
        W1 -= lr*dW1
        W2 -= lr*dW2
        
        loss_epoch += (-np.sum(Y_train*np.log(Y_out), axis=1).mean())
    print(W1, W2)
    train_losses.append(loss_epoch/len(train_loader))
    print(f"Epoch {epoch}: Loss {train_losses[epoch]}")

In [None]:
test_error = 0
for image_b, label_b in test_loader:
    X_test = image_b.view(-1, d).numpy()
    Y_test = np.eye(k)[label_b.numpy()]
    
    A1 = sigmoid(np.dot(W1, X_test.T))
    Y_out = softmax(np.dot(W2, A1).T)
    
    test_error += (-np.sum(Y_test*np.log(Y_out), axis=1).mean())

In [None]:
test_error/len(test_loader)

In [None]:
plt.plot(train_losses)
plt.xlabel("Epochs")
plt.ylabel("Train Loss")

In [None]:
def init_weights_torch(layer):
    if type(layer) == torch.nn.Linear:
        #torch.nn.init.zeros_(layer.weight)
        torch.nn.init.uniform_(layer.weight, a=-1, b=1)

In [None]:
torch_model = torch.nn.Sequential(torch.nn.Linear(d, d1), torch.nn.Sigmoid(), torch.nn.Linear(d1, k))
torch_model.apply(init_weights_torch)

In [None]:
torch_model[2].weight

In [None]:
optimizer = torch.optim.SGD(torch_model.parameters(), lr=0.001)
loss_func = torch.nn.CrossEntropyLoss()

training_torch_losses = []
for epoch in range(num_epochs):
    epoch_loss = 0
    for image_b, label_b in train_loader:
        X_train = image_b.view(image_b.shape[0], -1)
        
        optimizer.zero_grad()
        Y_out = torch_model(X_train)
        loss_cal = loss_func(Y_out, label_b)
        loss_cal.backward()
        optimizer.step()
        epoch_loss += loss_cal.item()
    training_torch_losses.append(epoch_loss/len(train_loader))
    print(training_torch_losses[epoch])

In [None]:
plt.plot(training_torch_losses)
plt.xlabel("Epochs")
plt.ylabel("Training Loss")

In [None]:
test_error = 0
for image_b, label_b in test_loader:
    X_test = image_b.view(image_b.shape[0], -1)
    Y_test_out = torch_model(X_test)
    test_error += loss_func(Y_test_out, label_b).item()

In [None]:
test_error/len(test_loader)