In [0]:
import torch
from torch import nn, optim
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision import datasets
import time

batch_size = 32
learning_rate = 1e-3
num_epoches = 10

In [0]:
train_dataset = datasets.MNIST(root='./data', train = True, transform=transforms.ToTensor(), download = True)
test_dataset = datasets.MNIST(root='./data', train = False, transform=transforms.ToTensor())
train_loader = DataLoader(train_dataset, batch_size = batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size = batch_size, shuffle=False)

In [0]:
def train(model):
    for epoch in range(num_epoches):
#         print('epoch {}/{}'.format(epoch + 1, num_epoches))
    #     print('*' * 10)
        for i, data in enumerate(train_loader, 1):
            img, label = data
            img = img.view(img.size(0), -1)
            img = Variable(img).to(device)
            label = Variable(label).to(device)

    #         print('batch {}, img size '.format(i) + str(img.size()))
            out = model(img)
            loss = criterion(out, label)        
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        eval_loss = 0.
        eval_acc = 0.
        for data in test_loader:
            img, label = data
            img = img.view(img.size(0), -1)
            img = Variable(img).to(device)
            label = Variable(label).to(device)
            out = model(img)
            loss = criterion(out, label)
            eval_loss += loss.data.item() * label.size(0)
            _, pred = torch.max(out, 1)
            num_correct = (pred == label).sum()
            eval_acc += num_correct.data.item()

        print('epoch {}/{}, Test Loss: {:.6f}, Acc: {:.6f}'.format(epoch + 1, num_epoches, eval_loss / (len(test_dataset)), eval_acc / (len(test_dataset))))


In [0]:
class FCNet(nn.Module):
    def __init__(self, in_dim, n_hidden_1, n_hidden_2, out_dim):
        super(FCNet, self).__init__()
        self.layer1 = nn.Linear(in_dim, n_hidden_1)
        self.layer2 = nn.Linear(n_hidden_1, n_hidden_2)
        self.layer3 = nn.Linear(n_hidden_2, out_dim)
    
    def forward(self, x):
        x = x.view(x.size(0), -1)
        out_1 = self.layer1(x)
        out_2 = self.layer2(out_1)
        out_3 = self.layer3(out_2)
        return out_3

In [0]:
class FCNet2(nn.Module):
    def __init__(self, in_dim, n_hidden_1, n_hidden_2, out_dim):
        super(FCNet2, self).__init__()
        self.fcnet = nn.Sequential(
            nn.Linear(in_dim, n_hidden_1),
            nn.ReLU(),
            nn.Linear(n_hidden_1, n_hidden_2),
            nn.ReLU(),
            nn.Linear(n_hidden_2, out_dim),
            nn.Softmax(),
        )
    
    def forward(self, x):
        x = x.view(x.size(0), -1)
        out = self.fcnet(x)
        return out

In [22]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_linear = FCNet(28*28, 300, 100, 10).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_linear.parameters(), lr = learning_rate, weight_decay = 1e-5)
train(model_linear)

epoch 1/10, Test Loss: 0.322297, Acc: 0.905900
epoch 2/10, Test Loss: 0.312928, Acc: 0.913800
epoch 3/10, Test Loss: 0.319871, Acc: 0.907400
epoch 4/10, Test Loss: 0.301230, Acc: 0.914500
epoch 5/10, Test Loss: 0.295956, Acc: 0.918300
epoch 6/10, Test Loss: 0.303271, Acc: 0.911000
epoch 7/10, Test Loss: 0.302588, Acc: 0.918700
epoch 8/10, Test Loss: 0.306661, Acc: 0.912300
epoch 9/10, Test Loss: 0.298172, Acc: 0.917900
epoch 10/10, Test Loss: 0.285057, Acc: 0.921200


In [24]:
model_relu_softmax = FCNet2(28*28, 300, 100, 10).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_relu_softmax.parameters(), lr = learning_rate, weight_decay = 1e-5)
train(model_relu_softmax)

  input = module(input)


epoch 1/10, Test Loss: 1.519300, Acc: 0.944200
epoch 2/10, Test Loss: 1.502804, Acc: 0.960200
epoch 3/10, Test Loss: 1.496745, Acc: 0.965800
epoch 4/10, Test Loss: 1.497404, Acc: 0.964400
epoch 5/10, Test Loss: 1.488862, Acc: 0.972300
epoch 6/10, Test Loss: 1.488474, Acc: 0.972800
epoch 7/10, Test Loss: 1.487576, Acc: 0.973700
epoch 8/10, Test Loss: 1.489184, Acc: 0.972200
epoch 9/10, Test Loss: 1.497089, Acc: 0.964400
epoch 10/10, Test Loss: 1.486019, Acc: 0.975300


In [0]:
class FCNet_sig_softmax(nn.Module):
    def __init__(self, in_dim, n_hidden_1, n_hidden_2, out_dim):
        super(FCNet_sig_softmax, self).__init__()
        self.fcnet = nn.Sequential(
            nn.Linear(in_dim, n_hidden_1),
            nn.Sigmoid(),
            nn.Linear(n_hidden_1, n_hidden_2),
            nn.Sigmoid(),
            nn.Linear(n_hidden_2, out_dim),
            nn.Softmax(dim=1),
        )
    
    def forward(self, x):
        x = x.view(x.size(0), -1)
        out = self.fcnet(x)
        return out

In [29]:
model_sig_softmax = FCNet_sig_softmax(28*28, 300, 100, 10).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_sig_softmax.parameters(), lr = learning_rate, weight_decay = 1e-5)
train(model_sig_softmax)

epoch 1/10, Test Loss: 1.536332, Acc: 0.931500
epoch 2/10, Test Loss: 1.515496, Acc: 0.949000
epoch 3/10, Test Loss: 1.508236, Acc: 0.956000
epoch 4/10, Test Loss: 1.502240, Acc: 0.962800
epoch 5/10, Test Loss: 1.497970, Acc: 0.966400
epoch 6/10, Test Loss: 1.497321, Acc: 0.966700
epoch 7/10, Test Loss: 1.491599, Acc: 0.971600
epoch 8/10, Test Loss: 1.490493, Acc: 0.972200
epoch 9/10, Test Loss: 1.490858, Acc: 0.972000
epoch 10/10, Test Loss: 1.489943, Acc: 0.973500


In [0]:
def train_cnn(model):
    for epoch in range(num_epoches):
#         print('epoch {}/{}'.format(epoch + 1, num_epoches))
    #     print('*' * 10)
        for i, data in enumerate(train_loader, 1):
#             print('Batch ',i)
            img, label = data
#             img = img.view(img.size(0), -1)
            img = Variable(img).to(device)
            label = Variable(label).to(device)

#             print('batch {}, img size '.format(i) + str(img.size()))
            out = model(img)
            loss = criterion(out, label)        
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        eval_loss = 0.
        eval_acc = 0.
        for data in test_loader:
            img, label = data
#             img = img.view(img.size(0), -1)
            img = Variable(img).to(device)
            label = Variable(label).to(device)
            out = model(img)
            loss = criterion(out, label)
            eval_loss += loss.data.item() * label.size(0)
            _, pred = torch.max(out, 1)
            num_correct = (pred == label).sum()
            eval_acc += num_correct.data.item()

        print('epoch {}/{}, Test Loss: {:.6f}, Acc: {:.6f}'.format(epoch + 1, num_epoches, eval_loss / (len(test_dataset)), eval_acc / (len(test_dataset))))

In [0]:
class cnn(nn.Module):
    def __init__(self, in_dim, nclass):
      super(cnn, self).__init__()
      self.conv1 = nn.Sequential(
          nn.Conv2d(in_dim, 6, 3, stride = 1, padding = 1),
          nn.ReLU(),
          nn.MaxPool2d(2),
      )
      self.conv2 = nn.Sequential(
          nn.Conv2d(6, 16, 3, stride = 1),
          nn.ReLU(),
          nn.MaxPool2d(2),
      )
      self.fc = nn.Sequential(
          nn.Linear(576, 200),
          nn.Linear(200, 100),
          nn.Linear(100, nclass)
      )
      
    def forward(self, x):
#         print('In forward, input size ', x.size())
#         x = x.view(x.size(0), 1, 28, 28)
        out1 = self.conv1(x)
        out2 = self.conv2(out1)
        out2 = out2.view(out2.size(0), -1)
        out3 = self.fc(out2)
        return out3

In [67]:
model_cnn_lin = cnn(1, 10).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_cnn_lin.parameters(), lr = learning_rate, weight_decay = 1e-5)
train_cnn(model_cnn_lin)

epoch 1/10, Test Loss: 0.075236, Acc: 0.976200
epoch 2/10, Test Loss: 0.062623, Acc: 0.980200
epoch 3/10, Test Loss: 0.062769, Acc: 0.980300
epoch 4/10, Test Loss: 0.054678, Acc: 0.981100
epoch 5/10, Test Loss: 0.060837, Acc: 0.981100
epoch 6/10, Test Loss: 0.042072, Acc: 0.987100
epoch 7/10, Test Loss: 0.044130, Acc: 0.984900
epoch 8/10, Test Loss: 0.045207, Acc: 0.985700
epoch 9/10, Test Loss: 0.043120, Acc: 0.986400
epoch 10/10, Test Loss: 0.052145, Acc: 0.984400


In [0]:
class cnn_bn(nn.Module):
    def __init__(self, in_dim, nclass):
      super(cnn_bn, self).__init__()
      self.conv1 = nn.Sequential(
          nn.Conv2d(in_dim, 6, 3, stride = 1, padding = 1),
          nn.MaxPool2d(2),
          nn.ReLU(),
          nn.BatchNorm2d(6)
      )
      self.conv2 = nn.Sequential(
          nn.Conv2d(6, 16, 3, stride = 1),
          nn.MaxPool2d(2),
          nn.ReLU(),
          nn.BatchNorm2d(16)
          
      )
      self.fc = nn.Sequential(
          nn.Linear(576, 200),
#           nn.ReLU(),
#           nn.Sigmoid(),
          nn.Linear(200, 100),
#           nn.ReLU(),
#           nn.Sigmoid(),
          nn.Linear(100, nclass),
#           nn.Softmax(dim=1),
      )
      
    def forward(self, x):
#         print('In forward, input size ', x.size())
#         x = x.view(x.size(0), 1, 28, 28)
        out1 = self.conv1(x)
        out2 = self.conv2(out1)
        out2 = out2.view(out2.size(0), -1)
        out3 = self.fc(out2)
        return out3

In [77]:
model_cnn_bn = cnn_bn(1, 10).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_cnn_bn.parameters(), lr = learning_rate, weight_decay = 1e-5)
train_cnn(model_cnn_bn)

epoch 1/10, Test Loss: 0.071877, Acc: 0.978400
epoch 2/10, Test Loss: 0.050684, Acc: 0.984200
epoch 3/10, Test Loss: 0.048558, Acc: 0.983900
epoch 4/10, Test Loss: 0.055176, Acc: 0.983200
epoch 5/10, Test Loss: 0.046137, Acc: 0.984300
epoch 6/10, Test Loss: 0.053588, Acc: 0.983600
epoch 7/10, Test Loss: 0.048516, Acc: 0.986200
epoch 8/10, Test Loss: 0.040939, Acc: 0.987600
epoch 9/10, Test Loss: 0.049530, Acc: 0.986000
epoch 10/10, Test Loss: 0.046109, Acc: 0.985400


In [0]:
img.size()

In [0]:
img = Variable(img).to(device)

In [0]:
img.size()

In [0]:
for i, data in enumerate(train_loader, 1):
    img, label = data
    img = img.view(img.size(0), -1)
    img = Variable(img).to(device)
    label = Variable(label).to(device)
    print(img.size())