In [1]:
%%javascript
IPython.notebook.clear_all_output();

<IPython.core.display.Javascript object>

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms


import numpy as np
from sklearn.datasets import load_digits
from sklearn.metrics import accuracy_score
import torch
import torchvision
import torchvision.transforms as transforms

transform = transforms.Compose([transforms.ToTensor(), 
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=2000,
                                          shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(testset, batch_size=2000,
                                         shuffle=False, num_workers=2)

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(32 * 32 * 3, 512) 
        self.fc2 = nn.Linear(512, 10)

    def forward(self, x):
        x = x.view(-1, 32 * 32 * 3)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x
    
net = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

for epoch in range(10): 
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        x_train, y_train = data[0], data[1]
        optimizer.zero_grad()
        outputs = net(x_train)
        loss = criterion(outputs, y_train)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 200 == 199: 
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 200))
            running_loss = 0.0
        

print('Finished Training')

correct =0
total =0

with torch.no_grad():
    for data in testloader:
        x_test,y_test = data[0], data[1]
        outputs = net(x_test)
        _, y_pred = torch.max(outputs.data, 1)
        total += y_test.size(0)
        correct += (y_pred == y_test).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))
    
    



Files already downloaded and verified
Files already downloaded and verified
Finished Training
Accuracy of the network on the 10000 test images: 53 %


In [7]:
def to_numpy(dataset):
    data_loader = torch.utils.data.DataLoader(dataset, batch_size=len(dataset), shuffle=False)
    data = next(iter(data_loader))
    images, labels = data
    images = images.numpy()
    labels = labels.numpy()
    images = images.reshape(images.shape[0], -1)
    return images, labels
x_train, y_train = to_numpy(trainset)
x_test, y_test = to_numpy(testset)


def encoding(y):
    en = np.zeros((len(y), 10))
    for i in range(len(y)):
        en[i, y[i]] = 1
    return en

y_train_encoded = encoding(y_train)
y_test_encoded = encoding(y_test)

def relu(x):
    return np.maximum(0, x)
def softmax(x):
    exp_vals = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exp_vals / np.sum(exp_vals, axis=1, keepdims=True)
def forward_pass(X, W1, b1, W2, b2):
    h1 = relu(np.dot(X, W1) + b1)
    scores = np.dot(h1, W2) + b2
    probs = softmax(scores)
    return h1, probs
def backward_pass(X, h1, probs, W1, W2, Y):
    dist = probs - Y
    dW2 = np.dot(h1.T, dist)
    db2 = np.sum(dist, axis=0, keepdims=True)
    dh1 = np.dot(dist, W2.T)
    dh1[h1 <= 0] = 0
    dW1 = np.dot(X.T, dh1)
    db1 = np.sum(dh1, axis=0, keepdims=True)
    return dW1, db1, dW2, db2


np.random.seed(0)
learning_rate = 1e-4
itr = 100
batch = 2000
hidden_layer = 512

W1 = 0.01 * np.random.randn(3072, hidden_layer)
b1 = np.zeros((1, hidden_layer))
W2 = 0.01 * np.random.randn(hidden_layer, 10)
b2 = np.zeros((1, 10))


for i in range(itr):
    idx = np.random.choice(x_train.shape[0], batch, replace=False)
    x_batch = x_train[idx]
    y_batch = y_train_encoded[idx]
    hidden_layer, train_probs = forward_pass(x_batch, W1, b1, W2, b2)
    loss = np.mean(-np.log(train_probs[range(batch), y_batch.argmax(axis=1)]))
    dW1, db1, dW2, db2 = backward_pass(x_batch, hidden_layer, train_probs, W1, W2, y_batch)
    W1 -= learning_rate * dW1
    b1 -= learning_rate * db1
    W2 -= learning_rate * dW2
    b2 -= learning_rate * db2
    if i % 10 == 0:
        print(f'Iteration {(i/10)+1}: Loss {loss}')
    

_, test_probs = forward_pass(x_test, W1, b1, W2, b2)
y_pred = np.argmax(test_probs, axis=1)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy*100)

Iteration 1.0: Loss 2.298655034545292
Iteration 2.0: Loss 2.0464697210666705
Iteration 3.0: Loss 1.9314129759715715
Iteration 4.0: Loss 1.8309072343523494
Iteration 5.0: Loss 1.796154947078394
Iteration 6.0: Loss 1.7577056495982877
Iteration 7.0: Loss 1.7202386985853362
Iteration 8.0: Loss 1.6884538591710443
Iteration 9.0: Loss 1.6869641304936165
Iteration 10.0: Loss 1.6513777917897
Accuracy: 43.919999999999995


In [8]:
#backward propagation with regularization of loss
def regbackward_pass(X, h1, probs, W1, W2, Y, reg_strength):
    dist = probs - Y
    dW2 = np.dot(h1.T, dist)
    db2 = np.sum(dist, axis=0, keepdims=True)
    dh1 = np.dot(dist, W2.T)
    dh1[h1 <= 0] = 0
    dW1 = np.dot(X.T, dh1)
    db1 = np.sum(dh1, axis=0, keepdims=True)
    dW2 += reg_strength * W2
    dW1 += reg_strength * W1
    return dW1, db1, dW2, db2


for reg_strength in range(10):
    print(f'Loss for lambda = {reg_strength/10}')
    for i in range(itr):
        idx = np.random.choice(x_train.shape[0], batch, replace=False)
        x_batch = x_train[idx]
        y_batch = y_train_encoded[idx]
        hidden_layer, train_probs = forward_pass(x_batch, W1, b1, W2, b2)
        loss = np.mean(-np.log(train_probs[range(batch), y_batch.argmax(axis=1)])) + (reg_strength/10) * np.sqrt((np.sum(W1 * W1) + np.sum(W2 * W2)))
        dW1, db1, dW2, db2 = regbackward_pass(x_batch, hidden_layer, train_probs, W1, W2, y_batch,reg_strength/10)
        W1 -= learning_rate * dW1
        b1 -= learning_rate * db1
        W2 -= learning_rate * dW2
        b2 -= learning_rate * db2
        if i % 10 == 0:
            print(f'Iteration {(i/10)+1}: Loss {loss}')
    

_, test_probs = forward_pass(x_test, W1, b1, W2, b2)
y_pred = np.argmax(test_probs, axis=1)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy*100)

Loss for lambda = 0.0
Iteration 1.0: Loss 1.652690598052167
Iteration 2.0: Loss 1.5859499977318872
Iteration 3.0: Loss 1.5777135878769477
Iteration 4.0: Loss 1.596567333497947
Iteration 5.0: Loss 1.5500282197759534
Iteration 6.0: Loss 1.578628496971099
Iteration 7.0: Loss 1.5386313962173317
Iteration 8.0: Loss 1.5332602526332497
Iteration 9.0: Loss 1.5071013837999507
Iteration 10.0: Loss 1.5221264510599026
Loss for lambda = 0.1
Iteration 1.0: Loss 2.7896923204449413
Iteration 2.0: Loss 2.830667741322787
Iteration 3.0: Loss 2.771223094808242
Iteration 4.0: Loss 2.782680979705247
Iteration 5.0: Loss 2.7893493285668294
Iteration 6.0: Loss 2.834108202013612
Iteration 7.0: Loss 2.793645708923129
Iteration 8.0: Loss 2.774742742842367
Iteration 9.0: Loss 2.7884143316079983
Iteration 10.0: Loss 2.8093388608111676
Loss for lambda = 0.2
Iteration 1.0: Loss 4.1644025743717705
Iteration 2.0: Loss 4.167835309530966
Iteration 3.0: Loss 4.18585826314491
Iteration 4.0: Loss 4.166820723742145
Iteration