In [0]:
import torch
import time
from torchvision import datasets, transforms
import torch.nn as nn        
import torch.nn.functional as F
import torch.optim as optim
from IPython.display import Image, display
import torch.multiprocessing as mp
from torch.multiprocessing import Pool
import Augmentor


In [0]:
MODELS = {}

In [0]:
class Hyperparameters():
    def __init__(self, train_bs, test_bs, lr, momentum, epochs):
        super(Hyperparameters, self).__init__()
        self.train_bs = train_bs
        self.test_bs = test_bs
        self.lr = lr
        self.momentum = momentum
        self.epochs = epochs

In [0]:
# defines linear classifier model 
# 400 -> 10 Linear Function -> log_softmax
class LinearModel(nn.Module):
    hyperparameters = Hyperparameters(100, 100, 0.01, 0.5, 5)
    def __init__(self):
        # define layers of net
        super(LinearModel, self).__init__()
        self.fc1 = nn.Linear(400, 10)
        
        
    def forward(self, x):
        # define the forward prop function
        x = self.fc1(x)
        return F.log_softmax(x, dim=1)
    
    def preprocess(self, data, bs):
        # preprocess input data
        return data.view(bs, 400)
      
MODELS['Linear'] = LinearModel

In [0]:
class ConvModel(nn.Module):
  # hyperparameters = Hyperparameters(100, 100, 0.02, 0.9, 7) - 99
    hyperparameters = Hyperparameters(150, 150, 0.01, 0.8, 14)
    def __init__(self):
        super(ConvModel, self).__init__()
        self.conv1 = nn.Conv2d(1,6,3) # 1 input channel, 6 output channels, 3x3 conv kernel
        self.conv2 = nn.Conv2d(6,12,3) # 6 input channels, 12 output channels, 3x3 conv kernel
        self.fc1 = nn.Linear(300, 500)
        self.fc2 = nn.Linear(500, 10)
        
    def forward(self, x):
        x = F.relu(self.conv1(x)) # output -> [6,26,26]
        x = F.max_pool2d(x, 2) # output -> [6,13,13]
        
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2)
        
        x = x.view(-1, self.num_flat_features(x)) # output -> [1, 1014]
        x = F.relu(self.fc1(x)) # output -> [1, 500]
        x = F.relu(self.fc2(x)) # output -> [1, 10]
        return F.log_softmax(x, dim=1)
    
    def preprocess(self, data, bs):
        return data
                   
    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

MODELS['Conv'] = ConvModel

In [0]:
class HiddenModel(nn.Module):
    hyperparameters = Hyperparameters(100, 100, 0.01, 0.6, 10)
    def __init__(self):
        super(HiddenModel, self).__init__()
        self.fc1 = nn.Linear(784, 800)
        self.fc2 = nn.Linear(800, 10)
    
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return F.log_softmax(x, dim=1)
    
    def preprocess(self, data, bs):
        # preprocess input data
        return data.view(bs, 784)
      
MODELS['Hidden'] = HiddenModel

In [0]:
# returns training data in tuple where 
# tuple[0] = X =  [1,20,20] tensor (20x20 input image)
# tuple[1] = Y = [] tensor (Scalar output value)
def get_train_data():
    p = Augmentor.Pipeline('./data')
    p.random_distortion(probability=1, grid_width=4, grid_height=4, magnitude=8)
    p.sample(2)
    transform = transforms.Compose([
        p.torch_transform(),
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
    ])
    return datasets.MNIST('./data', 
                          train=True, 
                          transform=transform, 
                          target_transform=None, 
                          download=True)

In [0]:
# returns data batched according to specified batch size
def batch_data(data, batch_size):
    return torch.utils.data.DataLoader(
        data, 
        pin_memory=True,
        batch_size=batch_size, 
        shuffle=True)
    

In [0]:
# returns testing data in tuple where 
# tuple[0] = X =  [1,20,20] tensor (20x20 input image)
# tuple[1] = Y = [] tensor (Scalar output value)
def get_test_data():
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
    ])
    return datasets.MNIST('./data',
                          train=False,
                          transform=transform,
                          target_transform=None,
                          download=True)

In [0]:
def get_one_example():
    data = get_test_data()
    return data[0][0]

In [0]:
def train(model, device, train_batches, optimizer, epoch):
    model.train() # put model in training mode
    
    for batch_idx, (data, target) in enumerate(train_batches):
        batch_size = len(data) # Question: Is this weird????
        data = model.preprocess(data, batch_size) # reshape data to be 1 * 400
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target) # negative log likelihood loss
        loss.backward()
        optimizer.step()
                
        if batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * batch_size, len(train_batches.dataset),
                100. * batch_idx / len(train_batches), loss.item()))

In [0]:
def test(model, device, test_batches):
    #print ("--- Testing ---")
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for batch_idx, (data, target) in enumerate(test_batches):
            batch_size = len(data)
            data = model.preprocess(data, batch_size)
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_batches.dataset)

    print('\nAverage loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_batches.dataset),
        100. * correct / len(test_batches.dataset)))

In [0]:
def run_epoch(model, device, train_batches, test_batches, optimizer, epoch):
    train(model, device, train_batches, optimizer, epoch)
    test(model, device, test_batches)
    
  

In [0]:
def get_hypers(bs_range, lr_range):
  pairs = []
  for bs in bs_range:
    for lr in lr_range:
      pairs.append({'lr': lr, 'bs': bs})
  return pairs
  

In [0]:
def grid_search():
  
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda") if use_cuda else torch.device("cpu")
    print("device: "+ torch.cuda.get_device_name())
    model_name = 'Conv'

    train_data = get_train_data()
    test_data = get_test_data()

    bs_range = [100, 110, 120, 130, 140, 150]
    lr_range = [0.01]
    pairs = get_hypers(bs_range, lr_range)

    for pair in pairs:
        model = MODELS[model_name]()
        hyperparams = model.hyperparameters
        train_bs = pair['bs'] # training batch size
        test_bs = pair['bs'] # test batch size
        lr = pair['lr'] # learning rate
        momentum = hyperparams.momentum # momentum ??
        epochs = hyperparams.epochs # epochs
        
        train_batches = batch_data(train_data, train_bs)
        test_batches = batch_data(test_data, test_bs)
        
        
        model = model.to(device) # load model to cpu
        optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)
        
        for epoch in range(1, epochs + 1):
            train(model, device, train_batches, optimizer, epoch)
        
        print("Testing bs: %s\tlr: %s" % (pair['bs'], pair['lr']))
        test(model, device, test_batches)
    
    print('Done!')

In [0]:
grid_search()

In [0]:
def main():
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda") if use_cuda else torch.device("cpu")
    print("device: "+ torch.cuda.get_device_name())
    model_name = 'Conv'
    model = MODELS[model_name]()
    
    hyperparams = model.hyperparameters
    train_bs = hyperparams.train_bs # training batch size
    test_bs = hyperparams.test_bs # test batch size
    lr = hyperparams.lr # learning rate
    momentum = hyperparams.momentum # momentum ??
    epochs = hyperparams.epochs # epochs
    
    
    train_data = get_train_data()
    train_batches = batch_data(train_data, train_bs)
    
    test_data = get_test_data()
    test_batches = batch_data(test_data, test_bs)
    
    
    model = model.to(device) # load model to cpu
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)

    start = time.perf_counter()
    for epoch in range(1, epochs + 1):
        train(model, device, train_batches, optimizer, epoch)
        test(model, device, test_batches)
    end = time.perf_counter()
    print('Done!: ' + str(end-start))

    #torch.save(model, './linearclassifier.pth')
    