#SGD with lr = 0.05

In [15]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision 
import torchvision.transforms as transforms
import time

# Preparing for Data
print('==> Preparing data..')

# Training Data augmentation
transform_train = transforms.Compose([
    transforms.Resize((32,32)),
    transforms.ToTensor(),
    transforms.Normalize(mean = (0.1307,), std = (0.3081,)),
])
# Testing Data preparation
transform_test = transforms.Compose([
    transforms.Resize((32,32)),
    transforms.ToTensor(),
    transforms.Normalize(mean = (0.1307,), std = (0.3081,)),
])




class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.c1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.c2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
        self.c3 = nn.Conv2d(in_channels=16, out_channels=120, kernel_size=5)
        self.max_pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.relu = nn.ReLU()
        self.fc1 = nn.Linear(in_features=120, out_features=84)
        self.fc2 = nn.Linear(in_features=84, out_features=10)

    def forward(self, x):
        x = self.c1(x)
        x = self.max_pool(self.relu(x))
        x = self.c2(x)
        x = self.max_pool(self.relu(x))
        x = self.relu(self.c3(x))
        x = torch.flatten(x, 1)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x



def train(model, device, train_loader, optimizer, epoch):
    model.train()
    count = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
  
        optimizer.zero_grad()
        output = model(data)
        criterion = nn.CrossEntropyLoss()
            
        loss = criterion(output,target)
        loss.backward()
        optimizer.step()
        
    
        if batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

def test( model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()
    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

def main():
    time0 = time.time()
    # Training settings
    epochs = 50
    lr = 0.05
    no_cuda = True
    save_model = False
    use_cuda = not no_cuda and torch.cuda.is_available()
    #torch.manual_seed(100)
    device = torch.device("cuda" if use_cuda else "cpu")
    
    trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform_train)
    train_loader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True)
    testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform_test)
    test_loader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False)

    model = LeNet().to(device)
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4)

    for epoch in range(1, epochs + 1):
        train( model, device, train_loader, optimizer, epoch)
        test( model, device, test_loader)

    if (save_model):
        torch.save(model.state_dict(),"cifar_lenet.pt")
    time1 = time.time() 
    print ('Traning and Testing total excution time is: %s seconds ' % (time1-time0))   
if __name__ == '__main__':
    main()

==> Preparing data..

Test set: Average loss: -10.4425, Accuracy: 9674/10000 (97%)


Test set: Average loss: -12.4357, Accuracy: 9814/10000 (98%)


Test set: Average loss: -12.8658, Accuracy: 9880/10000 (99%)


Test set: Average loss: -13.1380, Accuracy: 9839/10000 (98%)


Test set: Average loss: -12.2274, Accuracy: 9841/10000 (98%)


Test set: Average loss: -13.1919, Accuracy: 9880/10000 (99%)


Test set: Average loss: -13.3203, Accuracy: 9857/10000 (99%)


Test set: Average loss: -14.2476, Accuracy: 9881/10000 (99%)


Test set: Average loss: -11.9467, Accuracy: 9860/10000 (99%)


Test set: Average loss: -13.1896, Accuracy: 9880/10000 (99%)


Test set: Average loss: -14.6759, Accuracy: 9868/10000 (99%)


Test set: Average loss: -12.9225, Accuracy: 9876/10000 (99%)


Test set: Average loss: -12.4296, Accuracy: 9897/10000 (99%)


Test set: Average loss: -13.5874, Accuracy: 9845/10000 (98%)


Test set: Average loss: -12.9814, Accuracy: 9855/10000 (99%)


Test set: Average loss: -13.9996,

#SGD with lr = 0.0001

In [3]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision 
import torchvision.transforms as transforms
import time

# Preparing for Data
print('==> Preparing data..')

# Training Data augmentation
transform_train = transforms.Compose([
    transforms.Resize((32,32)),
    transforms.ToTensor(),
    transforms.Normalize(mean = (0.1307,), std = (0.3081,)),
])
# Testing Data preparation
transform_test = transforms.Compose([
    transforms.Resize((32,32)),
    transforms.ToTensor(),
    transforms.Normalize(mean = (0.1307,), std = (0.3081,)),
])




class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.c1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.c2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
        self.c3 = nn.Conv2d(in_channels=16, out_channels=120, kernel_size=5)
        self.max_pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.relu = nn.ReLU()
        self.fc1 = nn.Linear(in_features=120, out_features=84)
        self.fc2 = nn.Linear(in_features=84, out_features=10)

    def forward(self, x):
        x = self.c1(x)
        x = self.max_pool(self.relu(x))
        x = self.c2(x)
        x = self.max_pool(self.relu(x))
        x = self.relu(self.c3(x))
        x = torch.flatten(x, 1)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x



def train(model, device, train_loader, optimizer, epoch):
    model.train()
    count = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
       
        optimizer.zero_grad()
        output = model(data)
        criterion = nn.CrossEntropyLoss()
            
        loss = criterion(output,target)
        loss.backward()
        optimizer.step()
        
        
        if batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

def test( model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()
    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

def main():
    time0 = time.time()
    # Training settings
    epochs = 50
    lr = 0.0001
    no_cuda = True
    save_model = False
    use_cuda = not no_cuda and torch.cuda.is_available()
    #torch.manual_seed(100)
    device = torch.device("cuda" if use_cuda else "cpu")
    
    trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform_train)
    train_loader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True)
    testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform_test)
    test_loader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False)

    model = LeNet().to(device)
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4)

    for epoch in range(1, epochs + 1):
        train( model, device, train_loader, optimizer, epoch)
        test( model, device, test_loader)

    if (save_model):
        torch.save(model.state_dict(),"cifar_lenet.pt")
    time1 = time.time() 
    print ('Traning and Testing total excution time is: %s seconds ' % (time1-time0))   
if __name__ == '__main__':
    main()

==> Preparing data..

Test set: Average loss: -0.0013, Accuracy: 1559/10000 (16%)


Test set: Average loss: -0.0170, Accuracy: 2200/10000 (22%)


Test set: Average loss: -0.0481, Accuracy: 3033/10000 (30%)


Test set: Average loss: -0.1248, Accuracy: 3343/10000 (33%)


Test set: Average loss: -0.3954, Accuracy: 4720/10000 (47%)


Test set: Average loss: -1.7625, Accuracy: 7088/10000 (71%)


Test set: Average loss: -4.2810, Accuracy: 7937/10000 (79%)


Test set: Average loss: -5.9574, Accuracy: 8384/10000 (84%)


Test set: Average loss: -6.9823, Accuracy: 8680/10000 (87%)


Test set: Average loss: -7.6629, Accuracy: 8817/10000 (88%)


Test set: Average loss: -8.0992, Accuracy: 8911/10000 (89%)


Test set: Average loss: -8.4041, Accuracy: 8956/10000 (90%)


Test set: Average loss: -8.6586, Accuracy: 9028/10000 (90%)


Test set: Average loss: -8.8513, Accuracy: 9070/10000 (91%)


Test set: Average loss: -8.9096, Accuracy: 9122/10000 (91%)


Test set: Average loss: -9.0137, Accuracy: 9187/

#Adagrad with lr = 0.05

In [1]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision 
import torchvision.transforms as transforms
import time

# Preparing for Data
print('==> Preparing data..')

# Training Data augmentation
transform_train = transforms.Compose([
    transforms.Resize((32,32)),
    transforms.ToTensor(),
    transforms.Normalize(mean = (0.1307,), std = (0.3081,)),
])
# Testing Data preparation
transform_test = transforms.Compose([
    transforms.Resize((32,32)),
    transforms.ToTensor(),
    transforms.Normalize(mean = (0.1307,), std = (0.3081,)),
])




class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.c1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.c2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
        self.c3 = nn.Conv2d(in_channels=16, out_channels=120, kernel_size=5)
        self.max_pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.relu = nn.ReLU()
        self.fc1 = nn.Linear(in_features=120, out_features=84)
        self.fc2 = nn.Linear(in_features=84, out_features=10)

    def forward(self, x):
        x = self.c1(x)
        x = self.max_pool(self.relu(x))
        x = self.c2(x)
        x = self.max_pool(self.relu(x))
        x = self.relu(self.c3(x))
        x = torch.flatten(x, 1)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x



def train(model, device, train_loader, optimizer, epoch):
    model.train()
    count = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        
        optimizer.zero_grad()
        output = model(data)
        criterion = nn.CrossEntropyLoss()
            
        loss = criterion(output,target)
        loss.backward()
        optimizer.step()
        
       
        if batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

def test( model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()
    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

def main():
    time0 = time.time()
    # Training settings
    epochs = 50
    lr = 0.05
    no_cuda = True
    save_model = False
    use_cuda = not no_cuda and torch.cuda.is_available()
    #torch.manual_seed(100)
    device = torch.device("cuda" if use_cuda else "cpu")
    
    trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform_train)
    train_loader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True)
    testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform_test)
    test_loader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False)

    model = LeNet().to(device)
    optimizer = optim.Adagrad(model.parameters(), lr=lr,  weight_decay=5e-4)

    for epoch in range(1, epochs + 1):
        train( model, device, train_loader, optimizer, epoch)
        test( model, device, test_loader)

    if (save_model):
        torch.save(model.state_dict(),"cifar_lenet.pt")
    time1 = time.time() 
    print ('Traning and Testing total excution time is: %s seconds ' % (time1-time0))   
if __name__ == '__main__':
    main()

==> Preparing data..
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 43855264.01it/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 69939777.03it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 27282591.48it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 13415865.33it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw







Test set: Average loss: -11.2949, Accuracy: 9108/10000 (91%)


Test set: Average loss: -12.0639, Accuracy: 9603/10000 (96%)


Test set: Average loss: -12.9060, Accuracy: 9686/10000 (97%)


Test set: Average loss: -12.9887, Accuracy: 9754/10000 (98%)


Test set: Average loss: -12.7172, Accuracy: 9735/10000 (97%)


Test set: Average loss: -13.0686, Accuracy: 9757/10000 (98%)


Test set: Average loss: -13.0980, Accuracy: 9777/10000 (98%)


Test set: Average loss: -13.8551, Accuracy: 9782/10000 (98%)


Test set: Average loss: -13.8282, Accuracy: 9806/10000 (98%)


Test set: Average loss: -14.1750, Accuracy: 9809/10000 (98%)


Test set: Average loss: -14.3974, Accuracy: 9806/10000 (98%)


Test set: Average loss: -14.5533, Accuracy: 9822/10000 (98%)


Test set: Average loss: -14.8121, Accuracy: 9826/10000 (98%)


Test set: Average loss: -14.7555, Accuracy: 9821/10000 (98%)


Test set: Average loss: -14.7577, Accuracy: 9829/10000 (98%)


Test set: Average loss: -15.3990, Accuracy: 9817/10000

#Adagrad with lr = 0.0001

In [4]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision 
import torchvision.transforms as transforms
import time

# Preparing for Data
print('==> Preparing data..')

# Training Data augmentation
transform_train = transforms.Compose([
    transforms.Resize((32,32)),
    transforms.ToTensor(),
    transforms.Normalize(mean = (0.1307,), std = (0.3081,)),
])
# Testing Data preparation
transform_test = transforms.Compose([
    transforms.Resize((32,32)),
    transforms.ToTensor(),
    transforms.Normalize(mean = (0.1307,), std = (0.3081,)),
])




class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.c1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.c2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
        self.c3 = nn.Conv2d(in_channels=16, out_channels=120, kernel_size=5)
        self.max_pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.relu = nn.ReLU()
        self.fc1 = nn.Linear(in_features=120, out_features=84)
        self.fc2 = nn.Linear(in_features=84, out_features=10)

    def forward(self, x):
        x = self.c1(x)
        x = self.max_pool(self.relu(x))
        x = self.c2(x)
        x = self.max_pool(self.relu(x))
        x = self.relu(self.c3(x))
        x = torch.flatten(x, 1)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x



def train(model, device, train_loader, optimizer, epoch):
    model.train()
    count = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
      
        optimizer.zero_grad()
        output = model(data)
        criterion = nn.CrossEntropyLoss()
            
        loss = criterion(output,target)
        loss.backward()
        optimizer.step()
        
       
        if batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

def test( model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()
    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

def main():
    time0 = time.time()
    # Training settings
    epochs = 50
    lr = 0.0001
    no_cuda = True
    save_model = False
    use_cuda = not no_cuda and torch.cuda.is_available()
    #torch.manual_seed(100)
    device = torch.device("cuda" if use_cuda else "cpu")
    
    trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform_train)
    train_loader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True)
    testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform_test)
    test_loader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False)

    model = LeNet().to(device)
    optimizer = optim.Adagrad(model.parameters(), lr=lr,  weight_decay=5e-4)

    for epoch in range(1, epochs + 1):
        train( model, device, train_loader, optimizer, epoch)
        test( model, device, test_loader)

    if (save_model):
        torch.save(model.state_dict(),"cifar_lenet.pt")
    time1 = time.time() 
    print ('Traning and Testing total excution time is: %s seconds ' % (time1-time0))   
if __name__ == '__main__':
    main()

==> Preparing data..

Test set: Average loss: -0.0737, Accuracy: 3658/10000 (37%)


Test set: Average loss: -0.1654, Accuracy: 4546/10000 (45%)


Test set: Average loss: -0.2745, Accuracy: 5165/10000 (52%)


Test set: Average loss: -0.3971, Accuracy: 5769/10000 (58%)


Test set: Average loss: -0.5309, Accuracy: 6318/10000 (63%)


Test set: Average loss: -0.6787, Accuracy: 6822/10000 (68%)


Test set: Average loss: -0.8315, Accuracy: 7224/10000 (72%)


Test set: Average loss: -0.9962, Accuracy: 7507/10000 (75%)


Test set: Average loss: -1.1671, Accuracy: 7787/10000 (78%)


Test set: Average loss: -1.3341, Accuracy: 7968/10000 (80%)


Test set: Average loss: -1.4955, Accuracy: 8106/10000 (81%)


Test set: Average loss: -1.6566, Accuracy: 8200/10000 (82%)


Test set: Average loss: -1.8160, Accuracy: 8284/10000 (83%)


Test set: Average loss: -1.9657, Accuracy: 8342/10000 (83%)


Test set: Average loss: -2.1124, Accuracy: 8388/10000 (84%)


Test set: Average loss: -2.2532, Accuracy: 8403/

#RMSprop with lr = 0.05

In [2]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision 
import torchvision.transforms as transforms
import time

# Preparing for Data
print('==> Preparing data..')

# Training Data augmentation
transform_train = transforms.Compose([
    transforms.Resize((32,32)),
    transforms.ToTensor(),
    transforms.Normalize(mean = (0.1307,), std = (0.3081,)),
])
# Testing Data preparation
transform_test = transforms.Compose([
    transforms.Resize((32,32)),
    transforms.ToTensor(),
    transforms.Normalize(mean = (0.1307,), std = (0.3081,)),
])




class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.c1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.c2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
        self.c3 = nn.Conv2d(in_channels=16, out_channels=120, kernel_size=5)
        self.max_pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.relu = nn.ReLU()
        self.fc1 = nn.Linear(in_features=120, out_features=84)
        self.fc2 = nn.Linear(in_features=84, out_features=10)

    def forward(self, x):
        x = self.c1(x)
        x = self.max_pool(self.relu(x))
        x = self.c2(x)
        x = self.max_pool(self.relu(x))
        x = self.relu(self.c3(x))
        x = torch.flatten(x, 1)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x



def train(model, device, train_loader, optimizer, epoch):
    model.train()
    count = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        
        optimizer.zero_grad()
        output = model(data)
        criterion = nn.CrossEntropyLoss()
            
        loss = criterion(output,target)
        loss.backward()
        optimizer.step()
        
        
        if batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

def test( model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()
    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

def main():
    time0 = time.time()
    # Training settings
    epochs = 50
    lr = 0.05
    no_cuda = True
    save_model = False
    use_cuda = not no_cuda and torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    
    trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform_train)
    train_loader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True)
    testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform_test)
    test_loader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False)

    model = LeNet().to(device)
    optimizer = optim.RMSprop(model.parameters(), lr=lr, alpha=0.99, eps=1e-08, weight_decay=0, momentum=0, centered=False)

    for epoch in range(1, epochs + 1):
        train( model, device, train_loader, optimizer, epoch)
        test( model, device, test_loader)

    if (save_model):
        torch.save(model.state_dict(),"cifar_lenet.pt")
    time1 = time.time() 
    print ('Traning and Testing total excution time is: %s seconds ' % (time1-time0))   
if __name__ == '__main__':
    main()

==> Preparing data..

Test set: Average loss: -0.6408, Accuracy: 1010/10000 (10%)


Test set: Average loss: -0.6056, Accuracy: 1135/10000 (11%)


Test set: Average loss: -0.6045, Accuracy: 1028/10000 (10%)


Test set: Average loss: -0.6021, Accuracy: 1028/10000 (10%)


Test set: Average loss: -0.5986, Accuracy: 1135/10000 (11%)


Test set: Average loss: -0.6019, Accuracy: 1135/10000 (11%)


Test set: Average loss: -0.6011, Accuracy: 1028/10000 (10%)


Test set: Average loss: -0.6044, Accuracy: 1028/10000 (10%)


Test set: Average loss: -0.6035, Accuracy: 980/10000 (10%)


Test set: Average loss: -0.6098, Accuracy: 1135/10000 (11%)


Test set: Average loss: -0.6078, Accuracy: 1009/10000 (10%)


Test set: Average loss: -0.6164, Accuracy: 1135/10000 (11%)


Test set: Average loss: -0.6199, Accuracy: 1028/10000 (10%)


Test set: Average loss: -0.6249, Accuracy: 982/10000 (10%)


Test set: Average loss: -0.6267, Accuracy: 1135/10000 (11%)


Test set: Average loss: -0.6243, Accuracy: 974/100

#RMSprop with lr = 0.0001


In [5]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision 
import torchvision.transforms as transforms
import time

# Preparing for Data
print('==> Preparing data..')

# Training Data augmentation
transform_train = transforms.Compose([
    transforms.Resize((32,32)),
    transforms.ToTensor(),
    transforms.Normalize(mean = (0.1307,), std = (0.3081,)),
])
# Testing Data preparation
transform_test = transforms.Compose([
    transforms.Resize((32,32)),
    transforms.ToTensor(),
    transforms.Normalize(mean = (0.1307,), std = (0.3081,)),
])




class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.c1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.c2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
        self.c3 = nn.Conv2d(in_channels=16, out_channels=120, kernel_size=5)
        self.max_pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.relu = nn.ReLU()
        self.fc1 = nn.Linear(in_features=120, out_features=84)
        self.fc2 = nn.Linear(in_features=84, out_features=10)

    def forward(self, x):
        x = self.c1(x)
        x = self.max_pool(self.relu(x))
        x = self.c2(x)
        x = self.max_pool(self.relu(x))
        x = self.relu(self.c3(x))
        x = torch.flatten(x, 1)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x



def train(model, device, train_loader, optimizer, epoch):
    model.train()
    count = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
       
        optimizer.zero_grad()
        output = model(data)
        criterion = nn.CrossEntropyLoss()
            
        loss = criterion(output,target)
        loss.backward()
        optimizer.step()
        
     
        if batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

def test( model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()
    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

def main():
    time0 = time.time()
    # Training settings
    epochs = 50
    lr = 0.0001
    no_cuda = True
    save_model = False
    use_cuda = not no_cuda and torch.cuda.is_available()
    #torch.manual_seed(100)
    device = torch.device("cuda" if use_cuda else "cpu")
    
    trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform_train)
    train_loader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True)
    testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform_test)
    test_loader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False)

    model = LeNet().to(device)
    optimizer = optim.RMSprop(model.parameters(), lr=lr, alpha=0.99, eps=1e-08, weight_decay=0, momentum=0, centered=False)

    for epoch in range(1, epochs + 1):
        train( model, device, train_loader, optimizer, epoch)
        test( model, device, test_loader)

    if (save_model):
        torch.save(model.state_dict(),"cifar_lenet.pt")
    time1 = time.time() 
    print ('Traning and Testing total excution time is: %s seconds ' % (time1-time0))   
if __name__ == '__main__':
    main()

==> Preparing data..

Test set: Average loss: -6.2775, Accuracy: 9093/10000 (91%)


Test set: Average loss: -7.2237, Accuracy: 9347/10000 (93%)


Test set: Average loss: -7.7215, Accuracy: 9516/10000 (95%)


Test set: Average loss: -8.2855, Accuracy: 9622/10000 (96%)


Test set: Average loss: -8.7285, Accuracy: 9695/10000 (97%)


Test set: Average loss: -9.1655, Accuracy: 9723/10000 (97%)


Test set: Average loss: -9.4737, Accuracy: 9761/10000 (98%)


Test set: Average loss: -9.6328, Accuracy: 9770/10000 (98%)


Test set: Average loss: -10.1460, Accuracy: 9787/10000 (98%)


Test set: Average loss: -10.0874, Accuracy: 9793/10000 (98%)


Test set: Average loss: -10.2890, Accuracy: 9819/10000 (98%)


Test set: Average loss: -10.6459, Accuracy: 9799/10000 (98%)


Test set: Average loss: -10.5271, Accuracy: 9831/10000 (98%)


Test set: Average loss: -10.7200, Accuracy: 9845/10000 (98%)


Test set: Average loss: -10.9232, Accuracy: 9841/10000 (98%)


Test set: Average loss: -11.0050, Accurac