In [1]:
from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
import pandas as pd
from numpy import random

In [2]:
# Prepare data for training, validation and testing
TRAIN_FILE = 'D:\Study\Ostfold\MachineLearning\git\data\ohenc_data_colNames.train';
VAL_FILE = 'D:\Study\Ostfold\MachineLearning\git\data\ohenc_data_colNames.val';
TEST_FILE = 'D:\Study\Ostfold\MachineLearning\git\data\ohenc_data_colNames.test';

# use one of 2 labels
redundant_label = 'outcome<50K'
label_name = 'outcome>50K'

# training data
train = pd.read_table(TRAIN_FILE, sep=' ')
train.pop(redundant_label)
train_x, train_y = train, train.pop(label_name)

# validation data
val = pd.read_table(VAL_FILE, sep=' ')
val.pop(redundant_label)
val_x, val_y = val, val.pop(label_name)

# testing data
test = pd.read_table(TEST_FILE, sep=' ')
test.pop(redundant_label)
test_x, test_y = test, test.pop(label_name)

display(train_x.head())
display(train_y.head())

Unnamed: 0,age,workclassMissing,workclassFederal-gov,workclassLocal-gov,workclassNever-worked,workclassPrivate,workclassSelf-emp-inc,workclassSelf-emp-not-inc,workclassState-gov,workclassWithout-pay,...,native-countryPortugal,native-countryPuerto-Rico,native-countryScotland,native-countrySouth,native-countryTaiwan,native-countryThailand,native-countryTrinadad-Tobago,native-countryUnited-States,native-countryVietnam,native-countryYugoslavia
0,-1.286609,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
1,0.395073,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,1,0,0
2,0.02949,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,-1.286609,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
4,0.833773,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,1,0,0


0    0
1    0
2    0
3    0
4    1
Name: outcome>50K, dtype: int64

In [3]:
#prepare loaders
train = torch.utils.data.TensorDataset(torch.from_numpy(train_x.values).type(torch.FloatTensor), torch.from_numpy(train_y.values))
train_loader = torch.utils.data.DataLoader(train, batch_size=128, shuffle=True)
train_loader_val = torch.utils.data.DataLoader(train, batch_size=10000, shuffle=True)

val = torch.utils.data.TensorDataset(torch.from_numpy(val_x.values).type(torch.FloatTensor), torch.from_numpy(val_y.values))
val_loader = torch.utils.data.DataLoader(val, batch_size=10000, shuffle=True)

test = torch.utils.data.TensorDataset(torch.from_numpy(test_x.values).type(torch.FloatTensor), torch.from_numpy(test_y.values))
test_loader = torch.utils.data.DataLoader(test, batch_size=10000, shuffle=True)

In [4]:
class Net4HiddenLayers(nn.Module):
    def __init__(self, nodes1, nodes2, nodes4, dropout):
        super(Net4HiddenLayers, self).__init__()
        self.fc1 = nn.Linear(108, nodes1)
        self.fc2 = nn.Linear(nodes1, nodes2)
        self.fc3 = nn.Linear(nodes2, nodes3)
        self.fc4 = nn.Linear(nodes3, nodes4)
        self.fc5 = nn.Linear(nodes4, 2)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.dropout(x, p=dropout, training=self.training)
        x = F.relu(self.fc2(x))
        x = F.dropout(x, p=dropout, training=self.training)
        x = F.relu(self.fc3(x))
        x = F.dropout(x, p=dropout, training=self.training)
        x = F.relu(self.fc4(x))
        x = F.dropout(x, p=dropout, training=self.training)
        
        x = self.fc5(x)
        return F.log_softmax(x, dim=1)

class Net3HiddenLayers(nn.Module):
    def __init__(self, nodes1, nodes2, nodes3, dropout):
        super(Net3HiddenLayers, self).__init__()
        self.fc1 = nn.Linear(108, nodes1)
        self.fc2 = nn.Linear(nodes1, nodes2)
        self.fc3 = nn.Linear(nodes2, nodes3)
        self.fc4 = nn.Linear(nodes3, 2)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.dropout(x, p=dropout, training=self.training)
        x = F.relu(self.fc2(x))
        x = F.dropout(x, p=dropout, training=self.training)
        x = F.relu(self.fc3(x))
        x = F.dropout(x, p=dropout, training=self.training)
        
        x = self.fc4(x)
        return F.log_softmax(x, dim=1)
    
class Net2HiddenLayers(nn.Module):
    def __init__(self, nodes1, nodes2, dropout):
        super(Net2HiddenLayers, self).__init__()
        self.fc1 = nn.Linear(108, nodes1)
        self.fc2 = nn.Linear(nodes1, nodes2)
        self.fc3 = nn.Linear(nodes2, 2)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.dropout(x, p=dropout, training=self.training)
        x = F.relu(self.fc2(x))
        x = F.dropout(x, p=dropout, training=self.training)
        
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)

class Net1HiddenLayer(nn.Module):
    def __init__(self, nodes, dropout):
        super(Net1HiddenLayer, self).__init__()
        self.fc1 = nn.Linear(108, nodes)
        self.fc2 = nn.Linear(nodes, 2)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.dropout(x, p=dropout, training=self.training)
        
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

def train(epoch, optimizer, model, log_enable = False):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
#         if args.cuda:
#             data, target = data.cuda(), target.cuda()
        data, target = Variable(data), Variable(target)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if log_enable and (batch_idx % log_interval == 0):
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.data[0]))

def evaluate(data_loader, data_set="validation"):
    model.eval()
    test_loss = 0
    correct = 0
    for data, target in data_loader:
#         if args.cuda:
#             data, target = data.cuda(), target.cuda()
        data, target = Variable(data, volatile=True), Variable(target)
        output = model(data)
        test_loss += F.nll_loss(output, target, size_average=False).data[0] # sum up batch loss
        pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
        correct += pred.eq(target.data.view_as(pred)).long().cpu().sum()

    test_loss /= len(data_loader.dataset)
    print('{} set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)'.format(
        data_set, test_loss, correct, len(data_loader.dataset),
        100. * correct / len(data_loader.dataset)))

def train_and_eval(optimizer, model, epochs, log_enable=False):
    for epoch in range(1, epochs + 1):
        train(epoch, optimizer, model, log_enable)
        if (log_enable):
            evaluate(train_loader_val, "training")
            evaluate(val_loader)
            print("\n")
    
    evaluate(train_loader_val, "training")
    evaluate(val_loader)


In [6]:
log_interval = 1000
epochs = 100
max_count = 50
print("Using SGD optimizer & momentum") 
      
hidden_set = [2048, 1024, 512, 256, 128, 64, 32, 16] 
for count in range(max_count):
    lr = 10**random.uniform(-2, -4)
    dropout = random.uniform(0.1,0.7)
    momentum = random.uniform(0.5,0.99)
    layers = random.randint(1, 4)
    hidden_units = random.randint(1, size=layers)
    l2_reg = 10**random.uniform(-4,0)
    for i in range(layers):
        hidden_units[i] = hidden_set[random.randint(0,8)]
        
    hidden_units = sorted(hidden_units, reverse=True)
    
    torch.manual_seed(1234)
    print("{}, hidden units{}, lr {}, dropout {}, momentum {}, l2_reg {}".format(
        count, hidden_units, lr, dropout, momentum, l2_reg))
    
    if layers == 1:
        model = Net1HiddenLayer(hidden_units[0].item(), dropout)
    elif layers == 2:
        model = Net2HiddenLayers(hidden_units[0].item(), hidden_units[1].item(), 
                                 dropout)
    elif layers == 3:
        model = Net3HiddenLayers(hidden_units[0].item(), hidden_units[1].item(), 
                                 hidden_units[2].item(), dropout)
    elif layers == 4:
        model = Net4HiddenLayers(hidden_units[0].item(), hidden_units[1].item(), 
                                 hidden_units[2].item(), hidden_units[3].item(), 
                                 dropout)

    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=l2_reg)
    train_and_eval(optimizer, model, epochs)
    

Using SGD optimizer & momentum
0, hidden units[1024, 256], lr 0.008827112463346805, dropout 0.6073167193450211, momentum 0.9239248414127018, l2_reg 0.09638354254769173
training set: Average loss: 0.4046, Accuracy: 20982/26048 (80.55%)
validation set: Average loss: 0.4125, Accuracy: 5210/6513 (79.99%)
1, hidden units[16], lr 0.006417496038647365, dropout 0.5456198718041897, momentum 0.9305830330635156, l2_reg 0.005450201790650044
training set: Average loss: 0.3117, Accuracy: 22309/26048 (85.65%)
validation set: Average loss: 0.3310, Accuracy: 5489/6513 (84.28%)
2, hidden units[1024, 128], lr 0.0014738534848893503, dropout 0.5409336996838958, momentum 0.7309706702714478, l2_reg 0.0001903414762627394
training set: Average loss: 0.3009, Accuracy: 22392/26048 (85.96%)
validation set: Average loss: 0.3240, Accuracy: 5516/6513 (84.69%)
3, hidden units[128, 64], lr 0.0019703302257526823, dropout 0.19106647772062346, momentum 0.8001118690052376, l2_reg 0.0011979491262683647
training set: Averag

training set: Average loss: 0.3322, Accuracy: 22037/26048 (84.60%)
validation set: Average loss: 0.3457, Accuracy: 5429/6513 (83.36%)
31, hidden units[2048], lr 0.0024574759430160344, dropout 0.12965423856933816, momentum 0.6457663192103558, l2_reg 0.029568684674751197
training set: Average loss: 0.3309, Accuracy: 22134/26048 (84.97%)
validation set: Average loss: 0.3448, Accuracy: 5456/6513 (83.77%)
32, hidden units[128, 64, 32], lr 0.003289739734889431, dropout 0.20890950106787187, momentum 0.6818179170967857, l2_reg 0.013816730387139304
training set: Average loss: 0.3112, Accuracy: 22355/26048 (85.82%)
validation set: Average loss: 0.3284, Accuracy: 5505/6513 (84.52%)
33, hidden units[128, 64, 16], lr 0.000913412794324198, dropout 0.659079752195815, momentum 0.5276459458635601, l2_reg 0.09071675925358247
training set: Average loss: 0.5558, Accuracy: 19792/26048 (75.98%)
validation set: Average loss: 0.5588, Accuracy: 4928/6513 (75.66%)
34, hidden units[2048], lr 0.000121666500148903

In [9]:
log_interval = 1000
epochs = 100
max_count = 30
print("Using SGD optimizer") 
      
hidden_set = [2048, 1024, 512, 256, 128, 64, 32, 16] 
for count in range(max_count):
    lr = 10**random.uniform(-2, -4)
    dropout = random.uniform(0.1,0.7)
    momentum = random.uniform(0.5,0.99)
    hidden_units = [1024, 128, 32]
    l2_reg = 10**random.uniform(-4,0)
    
    print("{}, hidden units{}, lr {}, dropout {}, momentum {}, l2_reg {}".format(
        count, hidden_units, lr, dropout, momentum, l2_reg))
    
    torch.manual_seed(1234)
    model = Net3HiddenLayers(hidden_units[0], hidden_units[1], hidden_units[2], dropout)

    optimizer = optim.SGD(model.parameters(), lr=lr, weight_decay = l2_reg, momentum=momentum)
    train_and_eval(optimizer, model, epochs)

Using SGD optimizer
0, hidden units[1024, 128, 32], lr 0.0001754435507228605, dropout 0.26347802100997375, momentum 0.7832330856962784, l2_reg 0.17954244340643058
training set: Average loss: 0.5636, Accuracy: 19792/26048 (75.98%)
validation set: Average loss: 0.5661, Accuracy: 4928/6513 (75.66%)
1, hidden units[1024, 128, 32], lr 0.0007749401914548419, dropout 0.3143914644620083, momentum 0.9548543427405453, l2_reg 0.07088313447756668
training set: Average loss: 0.3853, Accuracy: 21632/26048 (83.05%)
validation set: Average loss: 0.3939, Accuracy: 5351/6513 (82.16%)
2, hidden units[1024, 128, 32], lr 0.00012674168305450902, dropout 0.288040928260869, momentum 0.8719601396392199, l2_reg 0.07240515375367737
training set: Average loss: 0.4783, Accuracy: 19792/26048 (75.98%)
validation set: Average loss: 0.4834, Accuracy: 4928/6513 (75.66%)
3, hidden units[1024, 128, 32], lr 0.0009710414438215947, dropout 0.6517744139386837, momentum 0.857910439395579, l2_reg 0.24069856102754036
training s

training set: Average loss: 0.5842, Accuracy: 19792/26048 (75.98%)
validation set: Average loss: 0.5860, Accuracy: 4928/6513 (75.66%)


In [13]:
log_interval = 1000
epochs = 1000
max_count = 1
print("Using SGD optimizer & momentum") 
      
for count in range(max_count):
    lr = 0.00430441753776839
    dropout = 0.33790385618345053
    momentum = 0.5544240157986617
    l2_reg =  0.0004893150674674339

    hidden_units = [1024, 128, 32]
    
    torch.manual_seed(1234)
    print("{}, hidden units{}, lr {}, dropout {}, reg {}, momentum {}".format(count, hidden_units, lr, dropout, l2_reg, momentum))
    
    model = Net3HiddenLayers(hidden_units[0], hidden_units[1], 
                             hidden_units[2], dropout)

    optimizer = optim.SGD(model.parameters(), lr=lr, weight_decay = l2_reg, momentum=momentum)
    train_and_eval(optimizer, model, epochs, True)

Using SGD optimizer & momentum
0, hidden units[1024, 128, 32], lr 0.00430441753776839, dropout 0.33790385618345053, reg 0.0004893150674674339, momentum 0.5544240157986617
training set: Average loss: 0.5433, Accuracy: 19792/26048 (75.98%)
validation set: Average loss: 0.5465, Accuracy: 4928/6513 (75.66%)


training set: Average loss: 0.4941, Accuracy: 19792/26048 (75.98%)
validation set: Average loss: 0.4992, Accuracy: 4928/6513 (75.66%)


training set: Average loss: 0.4271, Accuracy: 19987/26048 (76.73%)
validation set: Average loss: 0.4343, Accuracy: 4964/6513 (76.22%)


training set: Average loss: 0.3765, Accuracy: 21615/26048 (82.98%)
validation set: Average loss: 0.3866, Accuracy: 5364/6513 (82.36%)


training set: Average loss: 0.3543, Accuracy: 21790/26048 (83.65%)
validation set: Average loss: 0.3665, Accuracy: 5379/6513 (82.59%)


training set: Average loss: 0.3455, Accuracy: 21851/26048 (83.89%)
validation set: Average loss: 0.3587, Accuracy: 5384/6513 (82.67%)


training set:

training set: Average loss: 0.3005, Accuracy: 22399/26048 (85.99%)
validation set: Average loss: 0.3235, Accuracy: 5513/6513 (84.65%)


training set: Average loss: 0.3004, Accuracy: 22423/26048 (86.08%)
validation set: Average loss: 0.3235, Accuracy: 5513/6513 (84.65%)


training set: Average loss: 0.3000, Accuracy: 22407/26048 (86.02%)
validation set: Average loss: 0.3237, Accuracy: 5513/6513 (84.65%)


training set: Average loss: 0.2999, Accuracy: 22397/26048 (85.98%)
validation set: Average loss: 0.3239, Accuracy: 5512/6513 (84.63%)


training set: Average loss: 0.2995, Accuracy: 22422/26048 (86.08%)
validation set: Average loss: 0.3234, Accuracy: 5526/6513 (84.85%)


training set: Average loss: 0.2993, Accuracy: 22418/26048 (86.06%)
validation set: Average loss: 0.3233, Accuracy: 5523/6513 (84.80%)


training set: Average loss: 0.2995, Accuracy: 22409/26048 (86.03%)
validation set: Average loss: 0.3236, Accuracy: 5518/6513 (84.72%)


training set: Average loss: 0.2991, Accuracy: 22

training set: Average loss: 0.2931, Accuracy: 22523/26048 (86.47%)
validation set: Average loss: 0.3228, Accuracy: 5535/6513 (84.98%)


training set: Average loss: 0.2933, Accuracy: 22527/26048 (86.48%)
validation set: Average loss: 0.3227, Accuracy: 5552/6513 (85.24%)


training set: Average loss: 0.2929, Accuracy: 22533/26048 (86.51%)
validation set: Average loss: 0.3226, Accuracy: 5548/6513 (85.18%)


training set: Average loss: 0.2928, Accuracy: 22523/26048 (86.47%)
validation set: Average loss: 0.3223, Accuracy: 5534/6513 (84.97%)


training set: Average loss: 0.2926, Accuracy: 22543/26048 (86.54%)
validation set: Average loss: 0.3224, Accuracy: 5539/6513 (85.05%)


training set: Average loss: 0.2925, Accuracy: 22539/26048 (86.53%)
validation set: Average loss: 0.3226, Accuracy: 5539/6513 (85.05%)


training set: Average loss: 0.2924, Accuracy: 22545/26048 (86.55%)
validation set: Average loss: 0.3228, Accuracy: 5539/6513 (85.05%)


training set: Average loss: 0.2923, Accuracy: 22

training set: Average loss: 0.2884, Accuracy: 22595/26048 (86.74%)
validation set: Average loss: 0.3238, Accuracy: 5532/6513 (84.94%)


training set: Average loss: 0.2879, Accuracy: 22609/26048 (86.80%)
validation set: Average loss: 0.3227, Accuracy: 5535/6513 (84.98%)


training set: Average loss: 0.2878, Accuracy: 22606/26048 (86.79%)
validation set: Average loss: 0.3228, Accuracy: 5536/6513 (85.00%)


training set: Average loss: 0.2878, Accuracy: 22618/26048 (86.83%)
validation set: Average loss: 0.3224, Accuracy: 5542/6513 (85.09%)


training set: Average loss: 0.2877, Accuracy: 22600/26048 (86.76%)
validation set: Average loss: 0.3228, Accuracy: 5538/6513 (85.03%)


training set: Average loss: 0.2875, Accuracy: 22616/26048 (86.82%)
validation set: Average loss: 0.3227, Accuracy: 5540/6513 (85.06%)


training set: Average loss: 0.2875, Accuracy: 22609/26048 (86.80%)
validation set: Average loss: 0.3222, Accuracy: 5535/6513 (84.98%)


training set: Average loss: 0.2874, Accuracy: 22

training set: Average loss: 0.2835, Accuracy: 22667/26048 (87.02%)
validation set: Average loss: 0.3232, Accuracy: 5537/6513 (85.01%)


training set: Average loss: 0.2834, Accuracy: 22667/26048 (87.02%)
validation set: Average loss: 0.3230, Accuracy: 5529/6513 (84.89%)


training set: Average loss: 0.2837, Accuracy: 22670/26048 (87.03%)
validation set: Average loss: 0.3232, Accuracy: 5530/6513 (84.91%)


training set: Average loss: 0.2832, Accuracy: 22690/26048 (87.11%)
validation set: Average loss: 0.3235, Accuracy: 5537/6513 (85.01%)


training set: Average loss: 0.2831, Accuracy: 22685/26048 (87.09%)
validation set: Average loss: 0.3230, Accuracy: 5534/6513 (84.97%)


training set: Average loss: 0.2828, Accuracy: 22692/26048 (87.12%)
validation set: Average loss: 0.3233, Accuracy: 5533/6513 (84.95%)


training set: Average loss: 0.2828, Accuracy: 22700/26048 (87.15%)
validation set: Average loss: 0.3232, Accuracy: 5536/6513 (85.00%)


training set: Average loss: 0.2829, Accuracy: 22

training set: Average loss: 0.2788, Accuracy: 22758/26048 (87.37%)
validation set: Average loss: 0.3251, Accuracy: 5529/6513 (84.89%)


training set: Average loss: 0.2789, Accuracy: 22763/26048 (87.39%)
validation set: Average loss: 0.3248, Accuracy: 5526/6513 (84.85%)


training set: Average loss: 0.2789, Accuracy: 22748/26048 (87.33%)
validation set: Average loss: 0.3250, Accuracy: 5528/6513 (84.88%)


training set: Average loss: 0.2786, Accuracy: 22776/26048 (87.44%)
validation set: Average loss: 0.3239, Accuracy: 5529/6513 (84.89%)


training set: Average loss: 0.2782, Accuracy: 22773/26048 (87.43%)
validation set: Average loss: 0.3243, Accuracy: 5520/6513 (84.75%)


training set: Average loss: 0.2780, Accuracy: 22762/26048 (87.38%)
validation set: Average loss: 0.3248, Accuracy: 5524/6513 (84.81%)


training set: Average loss: 0.2781, Accuracy: 22772/26048 (87.42%)
validation set: Average loss: 0.3245, Accuracy: 5522/6513 (84.78%)


training set: Average loss: 0.2780, Accuracy: 22

training set: Average loss: 0.2738, Accuracy: 22823/26048 (87.62%)
validation set: Average loss: 0.3263, Accuracy: 5533/6513 (84.95%)


training set: Average loss: 0.2735, Accuracy: 22839/26048 (87.68%)
validation set: Average loss: 0.3251, Accuracy: 5525/6513 (84.83%)


training set: Average loss: 0.2735, Accuracy: 22833/26048 (87.66%)
validation set: Average loss: 0.3254, Accuracy: 5519/6513 (84.74%)


training set: Average loss: 0.2734, Accuracy: 22819/26048 (87.60%)
validation set: Average loss: 0.3254, Accuracy: 5529/6513 (84.89%)


training set: Average loss: 0.2731, Accuracy: 22836/26048 (87.67%)
validation set: Average loss: 0.3251, Accuracy: 5524/6513 (84.81%)


training set: Average loss: 0.2731, Accuracy: 22831/26048 (87.65%)
validation set: Average loss: 0.3251, Accuracy: 5527/6513 (84.86%)


training set: Average loss: 0.2729, Accuracy: 22853/26048 (87.73%)
validation set: Average loss: 0.3248, Accuracy: 5527/6513 (84.86%)


training set: Average loss: 0.2725, Accuracy: 22

training set: Average loss: 0.2681, Accuracy: 22922/26048 (88.00%)
validation set: Average loss: 0.3256, Accuracy: 5531/6513 (84.92%)


training set: Average loss: 0.2676, Accuracy: 22916/26048 (87.98%)
validation set: Average loss: 0.3271, Accuracy: 5528/6513 (84.88%)


training set: Average loss: 0.2679, Accuracy: 22926/26048 (88.01%)
validation set: Average loss: 0.3277, Accuracy: 5537/6513 (85.01%)


training set: Average loss: 0.2674, Accuracy: 22923/26048 (88.00%)
validation set: Average loss: 0.3268, Accuracy: 5537/6513 (85.01%)


training set: Average loss: 0.2673, Accuracy: 22913/26048 (87.96%)
validation set: Average loss: 0.3268, Accuracy: 5524/6513 (84.81%)


training set: Average loss: 0.2670, Accuracy: 22919/26048 (87.99%)
validation set: Average loss: 0.3283, Accuracy: 5532/6513 (84.94%)


training set: Average loss: 0.2669, Accuracy: 22930/26048 (88.03%)
validation set: Average loss: 0.3272, Accuracy: 5534/6513 (84.97%)


training set: Average loss: 0.2672, Accuracy: 22

training set: Average loss: 0.2615, Accuracy: 22995/26048 (88.28%)
validation set: Average loss: 0.3292, Accuracy: 5540/6513 (85.06%)


training set: Average loss: 0.2618, Accuracy: 23013/26048 (88.35%)
validation set: Average loss: 0.3294, Accuracy: 5529/6513 (84.89%)


training set: Average loss: 0.2612, Accuracy: 22997/26048 (88.29%)
validation set: Average loss: 0.3294, Accuracy: 5534/6513 (84.97%)


training set: Average loss: 0.2611, Accuracy: 22992/26048 (88.27%)
validation set: Average loss: 0.3294, Accuracy: 5532/6513 (84.94%)


training set: Average loss: 0.2609, Accuracy: 22998/26048 (88.29%)
validation set: Average loss: 0.3292, Accuracy: 5534/6513 (84.97%)


training set: Average loss: 0.2612, Accuracy: 22985/26048 (88.24%)
validation set: Average loss: 0.3287, Accuracy: 5525/6513 (84.83%)


training set: Average loss: 0.2606, Accuracy: 23012/26048 (88.34%)
validation set: Average loss: 0.3299, Accuracy: 5538/6513 (85.03%)


training set: Average loss: 0.2603, Accuracy: 23

training set: Average loss: 0.2545, Accuracy: 23072/26048 (88.57%)
validation set: Average loss: 0.3306, Accuracy: 5522/6513 (84.78%)


training set: Average loss: 0.2548, Accuracy: 23041/26048 (88.46%)
validation set: Average loss: 0.3318, Accuracy: 5511/6513 (84.62%)


training set: Average loss: 0.2543, Accuracy: 23086/26048 (88.63%)
validation set: Average loss: 0.3313, Accuracy: 5527/6513 (84.86%)


training set: Average loss: 0.2542, Accuracy: 23082/26048 (88.61%)
validation set: Average loss: 0.3305, Accuracy: 5525/6513 (84.83%)


training set: Average loss: 0.2537, Accuracy: 23096/26048 (88.67%)
validation set: Average loss: 0.3326, Accuracy: 5523/6513 (84.80%)


training set: Average loss: 0.2536, Accuracy: 23098/26048 (88.67%)
validation set: Average loss: 0.3320, Accuracy: 5523/6513 (84.80%)


training set: Average loss: 0.2535, Accuracy: 23094/26048 (88.66%)
validation set: Average loss: 0.3311, Accuracy: 5527/6513 (84.86%)


training set: Average loss: 0.2532, Accuracy: 23

training set: Average loss: 0.2466, Accuracy: 23161/26048 (88.92%)
validation set: Average loss: 0.3340, Accuracy: 5516/6513 (84.69%)


training set: Average loss: 0.2461, Accuracy: 23173/26048 (88.96%)
validation set: Average loss: 0.3364, Accuracy: 5514/6513 (84.66%)


training set: Average loss: 0.2460, Accuracy: 23184/26048 (89.00%)
validation set: Average loss: 0.3357, Accuracy: 5519/6513 (84.74%)


training set: Average loss: 0.2462, Accuracy: 23177/26048 (88.98%)
validation set: Average loss: 0.3358, Accuracy: 5514/6513 (84.66%)


training set: Average loss: 0.2463, Accuracy: 23179/26048 (88.99%)
validation set: Average loss: 0.3337, Accuracy: 5512/6513 (84.63%)


training set: Average loss: 0.2460, Accuracy: 23205/26048 (89.09%)
validation set: Average loss: 0.3361, Accuracy: 5505/6513 (84.52%)


training set: Average loss: 0.2455, Accuracy: 23189/26048 (89.02%)
validation set: Average loss: 0.3350, Accuracy: 5523/6513 (84.80%)


training set: Average loss: 0.2459, Accuracy: 23

training set: Average loss: 0.2387, Accuracy: 23268/26048 (89.33%)
validation set: Average loss: 0.3402, Accuracy: 5513/6513 (84.65%)


training set: Average loss: 0.2390, Accuracy: 23285/26048 (89.39%)
validation set: Average loss: 0.3373, Accuracy: 5516/6513 (84.69%)


training set: Average loss: 0.2385, Accuracy: 23264/26048 (89.31%)
validation set: Average loss: 0.3380, Accuracy: 5506/6513 (84.54%)


training set: Average loss: 0.2378, Accuracy: 23246/26048 (89.24%)
validation set: Average loss: 0.3400, Accuracy: 5518/6513 (84.72%)


training set: Average loss: 0.2378, Accuracy: 23258/26048 (89.29%)
validation set: Average loss: 0.3395, Accuracy: 5514/6513 (84.66%)


training set: Average loss: 0.2377, Accuracy: 23298/26048 (89.44%)


KeyboardInterrupt: 

In [14]:
log_interval = 1000
epochs = 92
max_count = 1
print("Using SGD optimizer & momentum") 
      
for count in range(max_count):
    lr = 0.00430441753776839
    dropout = 0.33790385618345053
    momentum = 0.5544240157986617
    l2_reg =  0.0004893150674674339

    hidden_units = [1024, 128, 32]
    
    print("{}, hidden units{}, lr {}, dropout {}, reg {}, momentum {}".format(count, hidden_units, lr, dropout, l2_reg, momentum))
    
    torch.manual_seed(1234)
    model = Net3HiddenLayers(hidden_units[0], hidden_units[1], 
                             hidden_units[2], dropout)

    optimizer = optim.SGD(model.parameters(), lr=lr, weight_decay = l2_reg, momentum=momentum)
    train_and_eval(optimizer, model, epochs, True)

Using SGD optimizer & momentum
0, hidden units[1024, 128, 32], lr 0.00430441753776839, dropout 0.33790385618345053, reg 0.0004893150674674339, momentum 0.5544240157986617
training set: Average loss: 0.5433, Accuracy: 19792/26048 (75.98%)
validation set: Average loss: 0.5465, Accuracy: 4928/6513 (75.66%)


training set: Average loss: 0.4941, Accuracy: 19792/26048 (75.98%)
validation set: Average loss: 0.4992, Accuracy: 4928/6513 (75.66%)


training set: Average loss: 0.4271, Accuracy: 19987/26048 (76.73%)
validation set: Average loss: 0.4343, Accuracy: 4964/6513 (76.22%)


training set: Average loss: 0.3765, Accuracy: 21615/26048 (82.98%)
validation set: Average loss: 0.3866, Accuracy: 5364/6513 (82.36%)


training set: Average loss: 0.3543, Accuracy: 21790/26048 (83.65%)
validation set: Average loss: 0.3665, Accuracy: 5379/6513 (82.59%)


training set: Average loss: 0.3455, Accuracy: 21851/26048 (83.89%)
validation set: Average loss: 0.3587, Accuracy: 5384/6513 (82.67%)


training set:

training set: Average loss: 0.3005, Accuracy: 22399/26048 (85.99%)
validation set: Average loss: 0.3235, Accuracy: 5513/6513 (84.65%)


training set: Average loss: 0.3004, Accuracy: 22423/26048 (86.08%)
validation set: Average loss: 0.3235, Accuracy: 5513/6513 (84.65%)


training set: Average loss: 0.3000, Accuracy: 22407/26048 (86.02%)
validation set: Average loss: 0.3237, Accuracy: 5513/6513 (84.65%)


training set: Average loss: 0.2999, Accuracy: 22397/26048 (85.98%)
validation set: Average loss: 0.3239, Accuracy: 5512/6513 (84.63%)


training set: Average loss: 0.2995, Accuracy: 22422/26048 (86.08%)
validation set: Average loss: 0.3234, Accuracy: 5526/6513 (84.85%)


training set: Average loss: 0.2993, Accuracy: 22418/26048 (86.06%)
validation set: Average loss: 0.3233, Accuracy: 5523/6513 (84.80%)


training set: Average loss: 0.2995, Accuracy: 22409/26048 (86.03%)
validation set: Average loss: 0.3236, Accuracy: 5518/6513 (84.72%)


training set: Average loss: 0.2991, Accuracy: 22

validation set: Average loss: 0.3227, Accuracy: 5545/6513 (85.14%)


training set: Average loss: 0.2931, Accuracy: 22523/26048 (86.47%)
validation set: Average loss: 0.3228, Accuracy: 5535/6513 (84.98%)


training set: Average loss: 0.2933, Accuracy: 22527/26048 (86.48%)
validation set: Average loss: 0.3227, Accuracy: 5552/6513 (85.24%)


training set: Average loss: 0.2933, Accuracy: 22527/26048 (86.48%)
validation set: Average loss: 0.3227, Accuracy: 5552/6513 (85.24%)


In [15]:
evaluate(test_loader, "test")

test set: Average loss: 0.3092, Accuracy: 13971/16281 (85.81%)
