In [1]:
!pip install -q torch==1.0.0 torchvision
import torch
print(torch.__version__)

1.0.0


In [0]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import argparse
import numpy as np

## Data Preparation

In [3]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainset, valset = torch.utils.data.random_split(trainset, [40000, 10000])
testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)


trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                          shuffle=True, num_workers=2)
valloader = torch.utils.data.DataLoader(valset, batch_size=4, 
                                        shuffle=False)
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz
Files already downloaded and verified


## Model Architecture

In [0]:
class MLP(nn.Module):
    def __init__(self, in_dim, out_dim, hid_dim, n_layer, act):
        super(MLP, self).__init__()
        self.in_dim = in_dim
        self.out_dim = out_dim
        self.hid_dim = hid_dim
        self.n_layer = n_layer
        self.act = act
        
        self.fc = nn.Linear(self.in_dim, self.hid_dim)
        self.linears = nn.ModuleList()
        
        for i in range(self.n_layer-1):
            self.linears.append(nn.Linear(self.hid_dim, self.hid_dim))
        self.fc2 = nn.Linear(self.hid_dim, self.out_dim)
        
        if self.act == 'relu':
            self.act = nn.ReLU()
          
    def forward(self, x):
        x = self.act(self.fc(x))
        for fc in self.linears:
            x = self.act(fc(x))
        x = self.fc2(x)
        return x
      
net = MLP(3072, 10, 100, 4, 'relu')

## Define Experiment

In [0]:
def experiment(args):
  
    net = MLP(args.in_dim, args.out_dim, args.hid_dim, args.n_layer, args.act)
    net.cuda()
    #print(net)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.mm)
    
    for epoch in range(args.epoch):  # loop over the dataset multiple times

        # ==== Train ===== #
        net.train()
        optimizer.zero_grad()
        
        running_loss = 0.0
        train_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            # get the inputs
            inputs, labels = data
            inputs = inputs.view(-1, 3072)
            
            inputs = inputs.cuda()
            labels = labels.cuda()
            

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            train_loss += loss.item()
            if i % 2000 == 1999:    # print every 2000 mini-batches
                #print('[%d, %5d] loss: %.3f' %
                #      (epoch + 1, i + 1, running_loss / 2000))
                running_loss = 0.0
                

        # ==== Validation ====== #
        net.eval()
        optimizer.zero_grad()
        
        correct = 0
        total = 0
        val_loss = 0 ########
        with torch.no_grad():
            for data in valloader:
                images, labels = data
                images = images.view(-1, 3072)
                
                ################################
                images = images.cuda()
                labels = labels.cuda()
                
                outputs = net(images)

                loss = criterion(outputs, labels)
                val_loss += loss.item()

                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

            val_loss = val_loss / len(valloader)
            val_acc = 100 * correct / total
            
        print('Epoch {}, Train Loss: {}, Val Loss: {}, Val Acc: {}'.format(epoch, train_loss, val_loss, val_acc ))


    # ===== Evaluation ===== #
    net.eval()
    optimizer.zero_grad()
    
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            images = images.view(-1, 3072)
            images = images.cuda()
            labels = labels.cuda()

            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        test_acc = 100 * correct / total
            
    return train_loss, val_loss, val_acc , test_acc #: test_acc shoudn't be trained (either by machine, tester)
    

## Experiment

In [7]:
# ====== Grid Test ====== #
seed = 123
np.random.seed(seed)
torch.manual_seed(seed)

parser = argparse.ArgumentParser()
args = parser.parse_args("")


args.n_layer = 5
args.in_dim = 3072
args.out_dim = 10
args.hid_dim = 100
args.act = 'relu'

args.lr = 0.001
args.mm = 0.9
args.epoch = 3


list_n_layer = [3, 4, 5, 6, 7]
list_hid_dim = [100, 200, 400, 800]
list_lr = [.1, .01, .001, .0001]

results = {"n_layer":[], "hid_dim":[], "lr":[], "train_loss":[], "val_loss":[], "val_acc":[], "test_acc":[]}


for var1 in list_n_layer:
    for var2 in list_hid_dim:
        for var3 in list_lr:
            args.n_layer = var1
            args.hid_dim = var2
            args.lr = var3
            result = experiment(args)
            print(args.n_layer, args.hid_dim, args.lr, result[0:2])
            results["n_layer"].append(args.n_layer)
            results["hid_dim"].append(args.hid_dim)
            results["lr"].append(args.lr)
            results["train_loss"].append(result[0])
            results["val_loss"].append(result[1])
            results["val_acc"].append(result[2])
            results["test_acc"].append(result[3])


Epoch 0, Train Loss: nan, Val Loss: nan, Val Acc: 10.69
Epoch 1, Train Loss: nan, Val Loss: nan, Val Acc: 10.69
Epoch 2, Train Loss: nan, Val Loss: nan, Val Acc: 10.69
3 100 0.1 (nan, nan)
Epoch 0, Train Loss: 21298.81070739031, Val Loss: 2.162953118801117, Val Acc: 16.74
Epoch 1, Train Loss: 22707.860365509987, Val Loss: 2.271721113014221, Val Acc: 15.74
Epoch 2, Train Loss: 22499.64016544819, Val Loss: 2.260210109233856, Val Acc: 14.29
3 100 0.01 (22499.64016544819, 2.260210109233856)
Epoch 0, Train Loss: 17729.019091129303, Val Loss: 1.5849751967787742, Val Acc: 43.4
Epoch 1, Train Loss: 15226.626621723175, Val Loss: 1.5125966014266015, Val Acc: 46.02
Epoch 2, Train Loss: 14188.57342851162, Val Loss: 1.4608655064344407, Val Acc: 48.43
3 100 0.001 (14188.57342851162, 1.4608655064344407)
Epoch 0, Train Loss: 21637.185064554214, Val Loss: 1.979131968808174, Val Acc: 28.53
Epoch 1, Train Loss: 18558.14709532261, Val Loss: 1.7608481732845307, Val Acc: 36.38
Epoch 2, Train Loss: 16931.224

In [0]:
# ====== Random Test ====== #
args.n_layer = 5
args.in_dim = 3072
args.out_dim = 10
args.hid_dim = 100
args.act = 'relu'

args.lr = 0.001
args.mm = 0.9
args.epoch = 5

for _ in range(10):
    var1 = np.random.randint(2, 10)
    var2 = 2 ** np.random.randint(3, 10)
    var3 = .1 ** np.random.randint(1, 5)
    args.n_layer = var1
    args.hid_dim = var2
    args.lr = var3
    result = experiment(args)
    print(var1, var2, var3, result)
    list_result.append((var1, var2, var3, result))
    results["n_layer"].append(args.n_layer)
    results["hid_dim"].append(args.hid_dim)
    results["lr"].append(args.lr)
    results["train_loss"].append(result[0])
    results["val_loss"].append(result[1])
    results["val_acc"].append(result[2])
    results["test_acc"].append(result[3])


Epoch 0, Train Loss: 23034.317021131516, Val Loss: 2.3025724511146546, Val Acc: 10.13
Epoch 1, Train Loss: 21641.941073656082, Val Loss: 1.9992116573810577, Val Acc: 20.43
Epoch 2, Train Loss: 19351.123461008072, Val Loss: 1.8401921659469604, Val Acc: 29.48
Epoch 3, Train Loss: 16993.30404508114, Val Loss: 1.6050382831573486, Val Acc: 40.91
Epoch 4, Train Loss: 15336.870354741812, Val Loss: 1.5104494742512702, Val Acc: 45.05
8 256 0.0010000000000000002 (15336.870354741812, 1.5104494742512702, 45.05, 46.37)
Epoch 0, Train Loss: 18340.46972501278, Val Loss: 1.6119767166733743, Val Acc: 42.07
Epoch 1, Train Loss: 15420.32874301076, Val Loss: 1.5116655302286148, Val Acc: 46.28
Epoch 2, Train Loss: 14201.861329227686, Val Loss: 1.4737717979073524, Val Acc: 48.42
Epoch 3, Train Loss: 13419.708039939404, Val Loss: 1.4292302593588828, Val Acc: 49.9
Epoch 4, Train Loss: 12732.226737588644, Val Loss: 1.3948483634114266, Val Acc: 50.85
4 128 0.0010000000000000002 (12732.226737588644, 1.3948483634

In [0]:
# ====== Grid & Random Visualization ====== #
import matplotlib.pyplot as plt

plt.figure(figsize=(10,10), projection="3d")
plt.scatter(results["n_layer"], results["hid_dim"], results["lr"], c=results["val_acc"])

plt.set_xlabel('n_layer')
plt.set_ylabel('hid_dim')
plt.set_zlabel('lr')
plt.set_title('Hyperparameter Train Set Distribution')
plt.set_zlim(-10, 6)
plt.view_init(40, -60)
plt.invert_xaxis()

plt.show()

In [0]:
# ====== Hand Tuning Test ====== #

args.n_layer = 5
args.in_dim = 3072
args.out_dim = 10
args.hid_dim = 100
args.act = 'relu'

args.lr = 0.001
args.mm = 0.9
args.epoch = 5

for _ in range(10):
    var1 = int(input())
    var2 = int(input())
    args.n_layer = var1
    args.hid_dim = var2
    result = experiment(args)
    print(var1, var2, result)