# NN for MNIST
* more layers than softmax + ReLU

In [18]:
import torch
from torch import nn, optim
from torch.autograd import Variable
import numpy as np
import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.utils.data as Data

In [53]:
class MNIST_NN(nn.Module):
    def __init__(self):
        super(MNIST_NN, self).__init__()
        self.relu = nn.ReLU()
        self.linear1 = nn.Linear(784, 256, bias = True)
        self.linear2 = nn.Linear(256, 256, bias = True)
        self.linear3 = nn.Linear(256, 10, bias = True)
        self.model = nn.Sequential(self.linear1, self.relu,
                                   self.linear2, self.relu,
                                   self.linear3)
        
    def forward(self, data):
        outputs = self.model(data)
        return outputs

In [54]:
#MNIST data
mnist_trainset = datasets.MNIST(root = './data',train =True,transform=transforms.ToTensor(),download=True)
mnist_testset = datasets.MNIST(root = './data', train = False, transform=transforms.ToTensor(),download=True)

In [55]:
model = MNIST_NN()

#parameters
lr = 0.01
epochs = 15
batch_size = 100

#data
traindata = Data.DataLoader(dataset=mnist_trainset, batch_size=batch_size, shuffle=True)

#Optimizer / cost
optimizer = optim.Adam(model.parameters(), lr = lr)
criterion = nn.CrossEntropyLoss()  #Sotfmax is internally computed

In [56]:
for epoch in range(1, epochs +1):
    avg_cost = 0
    total_batch = len(mnist_trainset) // batch_size
    
    for i, (batch_xs, batch_ys) in enumerate(traindata):
        X = Variable(batch_xs.view(-1, 28*28)) #from image to numeric data
        Y = Variable(batch_ys)  #not ont-hot
        
        optimizer.zero_grad()
        outputs = model.forward(X)
        cost = criterion(outputs, Y)
        cost.backward()
        optimizer.step()
        
        avg_cost += cost / total_batch
        
    print('epoch  {},  loss {}'.format(epoch, avg_cost))

epoch  1,  loss 0.23607602715492249
epoch  2,  loss 0.1395534723997116
epoch  3,  loss 0.11366860568523407
epoch  4,  loss 0.10406602174043655
epoch  5,  loss 0.08936727046966553
epoch  6,  loss 0.09181848913431168
epoch  7,  loss 0.08038211613893509
epoch  8,  loss 0.07555294036865234
epoch  9,  loss 0.07662954926490784
epoch  10,  loss 0.06711594760417938
epoch  11,  loss 0.07028888165950775
epoch  12,  loss 0.0611613467335701
epoch  13,  loss 0.0657026544213295
epoch  14,  loss 0.06251231580972672
epoch  15,  loss 0.05607284978032112


In [57]:
#Test the model and check accuracy
X_test = Variable(mnist_testset.test_data.view(-1, 28 * 28).float())
Y_test = Variable(mnist_testset.test_labels)

prediction = model(X_test)
correct_prediction = (prediction.argmax(1) == Y_test)
accuracy = correct_prediction.float().mean()
print('Accuracy:', accuracy.item())

Accuracy: 0.9690999984741211


# Xavier Initialization

In [58]:
class Xavier_NN(nn.Module):
    def __init__(self):
        super(Xavier_NN, self).__init__()
        self.relu = nn.ReLU()
        self.linear1 = nn.Linear(784, 256, bias = True)
        self.linear2 = nn.Linear(256, 256, bias = True)
        self.linear3 = nn.Linear(256, 10, bias = True)
        
        #Xavier initializer
        nn.init.xavier_uniform_(self.linear1.weight)
        nn.init.xavier_uniform_(self.linear2.weight)
        nn.init.xavier_uniform_(self.linear3.weight)
        
        self.model = nn.Sequential(self.linear1, self.relu,
                                   self.linear2, self.relu,
                                   self.linear3)
        
    def forward(self, data):
        outputs = self.model(data)
        return outputs

In [59]:
model_X = Xavier_NN()


optimizer = optim.Adam(model_X.parameters(), lr = lr) #Optimizer
criterion = nn.CrossEntropyLoss()  #Sotfmax is internally computed

In [60]:
for epoch in range(1, epochs +1):
    avg_cost = 0
    total_batch = len(mnist_trainset) // batch_size
    
    for i, (batch_xs, batch_ys) in enumerate(traindata):
        X = Variable(batch_xs.view(-1, 28*28)) #from image to numeric data
        Y = Variable(batch_ys)  #not ont-hot
        
        optimizer.zero_grad()
        outputs = model_X.forward(X)
        cost = criterion(outputs, Y)
        cost.backward()
        optimizer.step()
        
        avg_cost += cost / total_batch
        
    print('epoch  {},  loss {}'.format(epoch, avg_cost))

epoch  1,  loss 0.23538334667682648
epoch  2,  loss 0.14147844910621643
epoch  3,  loss 0.11603114008903503
epoch  4,  loss 0.10626893490552902
epoch  5,  loss 0.10493173450231552
epoch  6,  loss 0.0902472659945488
epoch  7,  loss 0.08121168613433838
epoch  8,  loss 0.08487104624509811
epoch  9,  loss 0.07300370186567307
epoch  10,  loss 0.06213495507836342
epoch  11,  loss 0.06859105825424194
epoch  12,  loss 0.06452157348394394
epoch  13,  loss 0.06314702332019806
epoch  14,  loss 0.06860172748565674
epoch  15,  loss 0.05719377472996712


In [61]:
#Test the model and check accuracy
X_test = Variable(mnist_testset.test_data.view(-1, 28 * 28).float())
Y_test = Variable(mnist_testset.test_labels)

prediction = model_X(X_test)
correct_prediction = (prediction.argmax(1) == Y_test)
accuracy = correct_prediction.float().mean()
print('Accuracy:', accuracy.item())

Accuracy: 0.9731000065803528


# Deep NN for MNIST
* add 2 layers and increase # of nodes!

In [62]:
class Deep_NN(nn.Module):
    def __init__(self):
        super(Deep_NN, self).__init__()
        self.relu = nn.ReLU()
        self.linear1 = nn.Linear(784, 512, bias = True)
        self.linear2 = nn.Linear(512, 512, bias = True)
        self.linear3 = nn.Linear(512, 512, bias = True)
        self.linear4 = nn.Linear(512, 512, bias = True)
        self.linear5 = nn.Linear(512, 10, bias = True)
        
        #Xavier initializer
        nn.init.xavier_uniform_(self.linear1.weight)
        nn.init.xavier_uniform_(self.linear2.weight)
        nn.init.xavier_uniform_(self.linear3.weight)
        nn.init.xavier_uniform_(self.linear4.weight)
        nn.init.xavier_uniform_(self.linear5.weight)
        
        self.model = nn.Sequential(self.linear1, self.relu,
                                   self.linear2, self.relu,
                                   self.linear3, self.relu,
                                   self.linear4, self.relu,
                                   self.linear5)
        
    def forward(self, data):
        outputs = self.model(data)
        return outputs

In [63]:
model_D = Deep_NN()

lr = 0.01
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_D.parameters(), lr = lr)

In [64]:
for epoch in range(1, epochs +1):
    avg_cost = 0
    total_batch = len(mnist_trainset) // batch_size
    
    for i, (batch_xs, batch_ys) in enumerate(traindata):
        X = Variable(batch_xs.view(-1, 28*28)) #from image to numeric data
        Y = Variable(batch_ys)  #not ont-hot
        
        optimizer.zero_grad()
        outputs = model_D.forward(X)
        cost = criterion(outputs, Y)
        cost.backward()
        optimizer.step()
        
        avg_cost += cost / total_batch
        
    print('epoch  {},  loss {}'.format(epoch, avg_cost))

epoch  1,  loss 0.3862922787666321
epoch  2,  loss 0.21096588671207428
epoch  3,  loss 0.1832371950149536
epoch  4,  loss 0.1688367873430252
epoch  5,  loss 0.15225964784622192
epoch  6,  loss 0.16192306578159332
epoch  7,  loss 0.1538003832101822
epoch  8,  loss 0.13886702060699463
epoch  9,  loss 0.11756253242492676
epoch  10,  loss 0.12512308359146118
epoch  11,  loss 0.11533062160015106
epoch  12,  loss 0.12324825674295425
epoch  13,  loss 0.11090489476919174
epoch  14,  loss 0.11754362285137177
epoch  15,  loss 0.10756530612707138


In [65]:
#Test the model and check accuracy
X_test = Variable(mnist_testset.test_data.view(-1, 28 * 28).float())
Y_test = Variable(mnist_testset.test_labels)

prediction = model_D(X_test)
correct_prediction = (prediction.argmax(1) == Y_test)
accuracy = correct_prediction.float().mean()
print('Accuracy:', accuracy.item())

Accuracy: 0.9498000144958496


Even though this model is deeper than before, the accuracy is lower.
This is because of overfitting.
We can solve this problem by dropout.

# Dropout for MNIST
* In general, p is in range(0.3, 0.5)
* When test the model, sholud use p = 1

In [67]:
class Dropout_NN(nn.Module):
    def __init__(self):
        super(Dropout_NN, self).__init__()
        self.relu = nn.ReLU()
        self.drop = nn.Dropout(1-keep_prob)
        self.linear1 = nn.Linear(784, 512, bias = True)
        self.linear2 = nn.Linear(512, 512, bias = True)
        self.linear3 = nn.Linear(512, 512, bias = True)
        self.linear4 = nn.Linear(512, 512, bias = True)
        self.linear5 = nn.Linear(512, 10, bias = True)
        
        #Xavier initializer
        nn.init.xavier_uniform_(self.linear1.weight)
        nn.init.xavier_uniform_(self.linear2.weight)
        nn.init.xavier_uniform_(self.linear3.weight)
        nn.init.xavier_uniform_(self.linear4.weight)
        nn.init.xavier_uniform_(self.linear5.weight)
        
        self.model = nn.Sequential(self.linear1, self.relu, self.drop,
                                   self.linear2, self.relu, self.drop,
                                   self.linear3, self.relu, self.drop,
                                   self.linear4, self.relu, self.drop,
                                   self.linear5)
        
    def forward(self, data):
        outputs = self.model(data)
        return outputs

In [72]:
keep_prob = 0.7
model_Drop = Dropout_NN()

lr = 0.001
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_Drop.parameters(), lr = lr)

In [73]:
for epoch in range(1, epochs +1):
    avg_cost = 0
    total_batch = len(mnist_trainset) // batch_size
    
    for i, (batch_xs, batch_ys) in enumerate(traindata):
        X = Variable(batch_xs.view(-1, 28*28)) #from image to numeric data
        Y = Variable(batch_ys)  #not ont-hot
        
        optimizer.zero_grad()
        outputs = model_Drop.forward(X)
        cost = criterion(outputs, Y)
        cost.backward()
        optimizer.step()
        
        avg_cost += cost / total_batch
        
    print('epoch  {},  loss {}'.format(epoch, avg_cost))

epoch  1,  loss 0.31357690691947937
epoch  2,  loss 0.14212769269943237
epoch  3,  loss 0.11370948702096939
epoch  4,  loss 0.09662743657827377
epoch  5,  loss 0.08155152946710587
epoch  6,  loss 0.07803860306739807
epoch  7,  loss 0.06681666523218155
epoch  8,  loss 0.06269488483667374
epoch  9,  loss 0.05967650189995766
epoch  10,  loss 0.054092053323984146
epoch  11,  loss 0.053618740290403366
epoch  12,  loss 0.049541570246219635
epoch  13,  loss 0.04811105877161026
epoch  14,  loss 0.045855194330215454
epoch  15,  loss 0.04552885517477989


In [74]:
#Test the model and check accuracy
X_test = Variable(mnist_testset.test_data.view(-1, 28 * 28).float())
Y_test = Variable(mnist_testset.test_labels)

keep_prob = 1
prediction = model_Drop(X_test)
correct_prediction = (prediction.argmax(1) == Y_test)
accuracy = correct_prediction.float().mean()
print('Accuracy:', accuracy.item())

Accuracy: 0.9763000011444092
