In [1]:
from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
import pandas as pd
from numpy import random

In [2]:
# Prepare data for training, validation and testing
TRAIN_FILE = 'D:\Study\Ostfold\MachineLearning\git\data\ohenc_data_colNames.train';
VAL_FILE = 'D:\Study\Ostfold\MachineLearning\git\data\ohenc_data_colNames.val';
TEST_FILE = 'D:\Study\Ostfold\MachineLearning\git\data\ohenc_data_colNames.test';

# use one of 2 labels
redundant_label = 'outcome<50K'
label_name = 'outcome>50K'

# training data
train = pd.read_table(TRAIN_FILE, sep=' ')
train.pop(redundant_label)
train_x, train_y = train, train.pop(label_name)

# validation data
val = pd.read_table(VAL_FILE, sep=' ')
val.pop(redundant_label)
val_x, val_y = val, val.pop(label_name)

# testing data
test = pd.read_table(TEST_FILE, sep=' ')
test.pop(redundant_label)
test_x, test_y = test, test.pop(label_name)

display(train_x.head())
display(train_y.head())

Unnamed: 0,age,workclassMissing,workclassFederal-gov,workclassLocal-gov,workclassNever-worked,workclassPrivate,workclassSelf-emp-inc,workclassSelf-emp-not-inc,workclassState-gov,workclassWithout-pay,...,native-countryPortugal,native-countryPuerto-Rico,native-countryScotland,native-countrySouth,native-countryTaiwan,native-countryThailand,native-countryTrinadad-Tobago,native-countryUnited-States,native-countryVietnam,native-countryYugoslavia
0,-1.286609,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
1,0.395073,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,1,0,0
2,0.02949,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,-1.286609,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
4,0.833773,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,1,0,0


0    0
1    0
2    0
3    0
4    1
Name: outcome>50K, dtype: int64

In [3]:
#prepare loaders
train = torch.utils.data.TensorDataset(torch.from_numpy(train_x.values).type(torch.FloatTensor), torch.from_numpy(train_y.values))
train_loader = torch.utils.data.DataLoader(train, batch_size=128, shuffle=True)
train_loader_val = torch.utils.data.DataLoader(train, batch_size=10000, shuffle=True)

val = torch.utils.data.TensorDataset(torch.from_numpy(val_x.values).type(torch.FloatTensor), torch.from_numpy(val_y.values))
val_loader = torch.utils.data.DataLoader(val, batch_size=10000, shuffle=True)

test = torch.utils.data.TensorDataset(torch.from_numpy(test_x.values).type(torch.FloatTensor), torch.from_numpy(test_y.values))
test_loader = torch.utils.data.DataLoader(test, batch_size=10000, shuffle=True)

In [6]:
class Net4HiddenLayers(nn.Module):
    def __init__(self, nodes1, nodes2, nodes4, dropout):
        super(Net4HiddenLayers, self).__init__()
        self.fc1 = nn.Linear(108, nodes1)
        self.fc1_bn = nn.BatchNorm1d(nodes1)
        self.fc2 = nn.Linear(nodes1, nodes2)
        self.fc2_bn = nn.BatchNorm1d(nodes2)
        self.fc3 = nn.Linear(nodes2, nodes3)
        self.fc3_bn = nn.BatchNorm1d(nodes3)
        self.fc4 = nn.Linear(nodes3, nodes4)
        self.fc4_bn = nn.BatchNorm1d(nodes4)
        self.fc5 = nn.Linear(nodes4, 2)

    def forward(self, x):
        x = F.relu(self.fc1_bn(self.fc1(x)))
        x = F.dropout(x, p=dropout, training=self.training)
        x = F.relu(self.fc2_bn(self.fc2(x)))
        x = F.dropout(x, p=dropout, training=self.training)
        x = F.relu(self.fc3_bn(self.fc3(x)))
        x = F.dropout(x, p=dropout, training=self.training)
        x = F.relu(self.fc4_bn(self.fc4(x)))
        x = F.dropout(x, p=dropout, training=self.training)
        
        x = self.fc5(x)
        return F.log_softmax(x, dim=1)

class Net3HiddenLayers(nn.Module):
    def __init__(self, nodes1, nodes2, nodes3, dropout):
        super(Net3HiddenLayers, self).__init__()
        self.fc1 = nn.Linear(108, nodes1)
        self.fc1_bn = nn.BatchNorm1d(nodes1)
        self.fc2 = nn.Linear(nodes1, nodes2)
        self.fc2_bn = nn.BatchNorm1d(nodes2)
        self.fc3 = nn.Linear(nodes2, nodes3)
        self.fc3_bn = nn.BatchNorm1d(nodes3)
        self.fc4 = nn.Linear(nodes3, 2)

    def forward(self, x):
        x = F.relu(self.fc1_bn(self.fc1(x)))
        x = F.dropout(x, p=dropout, training=self.training)
        x = F.relu(self.fc2_bn(self.fc2(x)))
        x = F.dropout(x, p=dropout, training=self.training)
        x = F.relu(self.fc3_bn(self.fc3(x)))
        x = F.dropout(x, p=dropout, training=self.training)
        
        x = self.fc4(x)
        return F.log_softmax(x, dim=1)
    
class Net2HiddenLayers(nn.Module):
    def __init__(self, nodes1, nodes2, dropout):
        super(Net2HiddenLayers, self).__init__()
        self.fc1 = nn.Linear(108, nodes1)
        self.fc1_bn = nn.BatchNorm1d(nodes1)
        self.fc2 = nn.Linear(nodes1, nodes2)
        self.fc2_bn = nn.BatchNorm1d(nodes2)
        self.fc3 = nn.Linear(nodes2, 2)

    def forward(self, x):
        x = F.relu(self.fc1_bn(self.fc1(x)))
        x = F.dropout(x, p=dropout, training=self.training)
        x = F.relu(self.fc2_bn(self.fc2(x)))
        x = F.dropout(x, p=dropout, training=self.training)
        
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)

class Net1HiddenLayer(nn.Module):
    def __init__(self, nodes, dropout):
        super(Net1HiddenLayer, self).__init__()
        self.fc1 = nn.Linear(108, nodes)
        self.fc1_bn = nn.BatchNorm1d(nodes)
        self.fc2 = nn.Linear(nodes, 2)

    def forward(self, x):
        x = F.relu(self.fc1_bn(self.fc1(x)))
        x = F.dropout(x, p=dropout, training=self.training)
        
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

def train(epoch, optimizer, model, log_enable = False):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
#         if args.cuda:
#             data, target = data.cuda(), target.cuda()
        data, target = Variable(data), Variable(target)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if log_enable and (batch_idx % log_interval == 0):
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.data[0]))

def evaluate(data_loader, data_set="validation"):
    model.eval()
    test_loss = 0
    correct = 0
    for data, target in data_loader:
#         if args.cuda:
#             data, target = data.cuda(), target.cuda()
        data, target = Variable(data, volatile=True), Variable(target)
        output = model(data)
        test_loss += F.nll_loss(output, target, size_average=False).data[0] # sum up batch loss
        pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
        correct += pred.eq(target.data.view_as(pred)).long().cpu().sum()

    test_loss /= len(data_loader.dataset)
    print('{} set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)'.format(
        data_set, test_loss, correct, len(data_loader.dataset),
        100. * correct / len(data_loader.dataset)))

def train_and_eval(optimizer, model, epochs, log_enable=False):
    for epoch in range(1, epochs + 1):
        train(epoch, optimizer, model, log_enable)
        if (log_enable):
            evaluate(train_loader_val, "training")
            evaluate(val_loader)
            print("\n")
    
    evaluate(train_loader_val, "training")
    evaluate(val_loader)


In [7]:
log_interval = 1000
epochs = 100
max_count = 50
print("Using Adam optimizer") 
      
hidden_set = [2048, 1024, 512, 256, 128, 64, 32, 16] 
for count in range(max_count):
    lr = 10**random.uniform(-2, -4)
    dropout = random.uniform(0.1,0.7)
    layers = random.randint(1, 4)
    hidden_units = random.randint(1, size=layers)
    l2_reg = 10**random.uniform(-4,0)
    for i in range(layers):
        hidden_units[i] = hidden_set[random.randint(0,8)]
        
    hidden_units = sorted(hidden_units, reverse=True)
    
    torch.manual_seed(1234)
    print("{}, hidden units{}, lr {}, dropout {}, l2_reg {}".format(
        count, hidden_units, lr, dropout, l2_reg))
    
    if layers == 1:
        model = Net1HiddenLayer(hidden_units[0].item(), dropout)
    elif layers == 2:
        model = Net2HiddenLayers(hidden_units[0].item(), hidden_units[1].item(), 
                                 dropout)
    elif layers == 3:
        model = Net3HiddenLayers(hidden_units[0].item(), hidden_units[1].item(), 
                                 hidden_units[2].item(), dropout)
    elif layers == 4:
        model = Net4HiddenLayers(hidden_units[0].item(), hidden_units[1].item(), 
                                 hidden_units[2].item(), hidden_units[3].item(), 
                                 dropout)

    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=l2_reg)
    train_and_eval(optimizer, model, epochs)
    

Using Adam optimizer
0, hidden units[512, 128, 16], lr 0.008817740148838987, dropout 0.19292064211820775, l2_reg 0.06526312845736566
training set: Average loss: 0.3870, Accuracy: 21584/26048 (82.86%)
validation set: Average loss: 0.3935, Accuracy: 5348/6513 (82.11%)
1, hidden units[512, 32], lr 0.0002197650292535493, dropout 0.12708883969802606, l2_reg 0.00019603703649661065
training set: Average loss: 0.1949, Accuracy: 23778/26048 (91.29%)
validation set: Average loss: 0.3864, Accuracy: 5465/6513 (83.91%)
2, hidden units[512, 512], lr 0.0009722409087384057, dropout 0.6595924273002486, l2_reg 0.00018757377382423246
training set: Average loss: 0.2443, Accuracy: 23106/26048 (88.71%)
validation set: Average loss: 0.3414, Accuracy: 5529/6513 (84.89%)
3, hidden units[512, 256, 128], lr 0.0003445814894676429, dropout 0.6628926041214146, l2_reg 0.00039927980952424554
training set: Average loss: 0.2374, Accuracy: 23268/26048 (89.33%)
validation set: Average loss: 0.3435, Accuracy: 5506/6513 (8

training set: Average loss: 0.5916, Accuracy: 19792/26048 (75.98%)
validation set: Average loss: 0.5932, Accuracy: 4928/6513 (75.66%)
35, hidden units[2048], lr 0.0003249533170129262, dropout 0.381941374409544, l2_reg 0.9372571544652458
training set: Average loss: 0.6152, Accuracy: 19792/26048 (75.98%)
validation set: Average loss: 0.6164, Accuracy: 4928/6513 (75.66%)
36, hidden units[256, 16], lr 0.0005717946142694649, dropout 0.46523891276893126, l2_reg 0.001821698869940675
training set: Average loss: 0.2891, Accuracy: 22689/26048 (87.10%)
validation set: Average loss: 0.3248, Accuracy: 5553/6513 (85.26%)
37, hidden units[1024, 256], lr 0.00525813800072652, dropout 0.6744389032918569, l2_reg 0.7035084210643078
training set: Average loss: 0.6037, Accuracy: 19792/26048 (75.98%)
validation set: Average loss: 0.6051, Accuracy: 4928/6513 (75.66%)
38, hidden units[512, 128, 32], lr 0.006296566518349741, dropout 0.24877006707539231, l2_reg 0.1588067475169318
training set: Average loss: 0.56

In [13]:
log_interval = 100
epochs = 17
max_count = 1
print("Using Adam optimizer Finer search") 
      
hidden_set = [2048, 1024, 512, 256, 128, 64, 32, 16] 
for count in range(max_count):
    lr = 0.001
    dropout = 0.5
    hidden_units = [1024, 256]
    l2_reg = 0.001
    
    print("{}, hidden units{}, lr {}, dropout {}, l2_reg {}".format(count, hidden_units, lr, dropout, l2_reg))
    
    torch.manual_seed(1234)
    model = Net2HiddenLayers(hidden_units[0], hidden_units[1], dropout)

    optimizer = optim.Adam(model.parameters(), lr=lr)
    train_and_eval(optimizer, model, epochs, True)

Using Adam optimizer Finer search
0, hidden units[1024, 256], lr 0.001, dropout 0.5, l2_reg 0.001
training set: Average loss: 0.3031, Accuracy: 22401/26048 (86.00%)
validation set: Average loss: 0.3213, Accuracy: 5547/6513 (85.17%)


training set: Average loss: 0.2976, Accuracy: 22434/26048 (86.13%)
validation set: Average loss: 0.3231, Accuracy: 5527/6513 (84.86%)


training set: Average loss: 0.2934, Accuracy: 22515/26048 (86.44%)
validation set: Average loss: 0.3234, Accuracy: 5512/6513 (84.63%)


training set: Average loss: 0.2905, Accuracy: 22544/26048 (86.55%)
validation set: Average loss: 0.3203, Accuracy: 5541/6513 (85.08%)


training set: Average loss: 0.2883, Accuracy: 22556/26048 (86.59%)
validation set: Average loss: 0.3223, Accuracy: 5532/6513 (84.94%)


training set: Average loss: 0.2846, Accuracy: 22625/26048 (86.86%)
validation set: Average loss: 0.3197, Accuracy: 5542/6513 (85.09%)


training set: Average loss: 0.2845, Accuracy: 22598/26048 (86.76%)
validation set: Ave

In [15]:
 evaluate(test_loader, "test")

test set: Average loss: 0.3748, Accuracy: 13769/16281 (84.57%)
