Explain here

In [0]:
!pip install hpbandster

In [0]:
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(100)

# for pytorch
import torch
import torch.utils.data
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from typing import NamedTuple
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR

import copy
import logging
logging.basicConfig(level=logging.DEBUG)
import os
import pickle
import argparse
import time

# for hyperparameter optimization
import ConfigSpace as CS
import ConfigSpace.hyperparameters as CSH
from hpbandster.core.worker import Worker
import hpbandster.core.nameserver as hpns
import hpbandster.core.result as hpres
from hpbandster.optimizers import BOHB

In [0]:
def twospirals(n_points, difficulty, noise=1.):
    """
     Returns the two spirals dataset.
    """
    n = np.sqrt(np.random.rand(n_points,1)) * difficulty * (2*np.pi)/360
    d1x = -np.cos(n)*n + np.random.rand(n_points,1) * noise
    d1y = np.sin(n)*n + np.random.rand(n_points,1) * noise
    return (np.vstack((np.hstack((d1x,d1y)),np.hstack((-d1x,-d1y)))), 
            np.hstack((np.zeros(n_points),np.ones(n_points))))

In [0]:
class SprialDataSet(Dataset):
    """
    Loading the dataset
    """
    def __init__(self, n_points, difficulty):
        super(SprialDataSet, self).__init__()
        X, Y = twospirals(n_points, difficulty)
        self.set_len = len(X)
        X = torch.from_numpy(X)
        self.input = X.type(torch.float32)
        
        Y = torch.from_numpy(Y)
        Y = Y.unsqueeze(1)
        self.label = Y.type(torch.float32)
    
    def __len__(self):
        return self.set_len

    def __getitem__(self, idx):
        return self.input[idx], self.label[idx]

In [0]:
class MLP(nn.Module):
    def __init__(self, args):
        super(MLP, self).__init__()
        self.dropout = nn.Dropout(p=args.dropout)
        self.args = args

        self.fc1 = nn.Linear(2, args.fc1)
        out = args.fc1

        if args.num_layers >= 2:
            self.fc2 = nn.Linear(args.fc1, args.fc2)
            out = args.fc2

        elif args.num_layers == 3:
            self.fc3 = nn.Linear(args.fc2, args.fc3)
            out = args.fc3

        self.fc_out = nn.Linear(out, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        
        if self.args.num_layers >= 2:
          x = self.dropout(torch.relu(self.fc2(x)))

        elif self.args.num_layers >= 3:
          x = self.dropout(torch.relu(self.fc3(x)))
        
        out = self.fc_out(x)
        # TODO5 construct the network here
        return out

In [0]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


class Optimization():
    def __init__(self, args, loss,  train_loader, val_loader, test_loader):
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.test_loader = test_loader
        device = args.device

        self.model = MLP(args).to(device)
        
        print("number of trainable parameter = ", count_parameters(self.model))
        
        if args.optimizer == 'Adam':
            self.optimizer = optim.Adam(self.model.parameters(), lr=args.rate)
        elif args.optimizer == 'SGD':
            self.optimizer = torch.optim.SGD(self.model.parameters(), lr=args.rate, momentum=args.sgd_momentum)

        self.scheduler = StepLR(self.optimizer, step_size=args.lr_decay_step)

        self.loss = loss
        self.device = device

    def train(self):
        batch_counter = 0.0
        total_loss = 0.0
        self.model.train()
        for iter, data in enumerate(self.train_loader):
            
            inputs, labels = data 

            inputs = inputs.to(self.device)
            labels = labels.to(self.device)

            self.model.zero_grad()
            outputs = self.model(inputs)
            loss = self.loss(outputs, labels)
            loss.backward()

            self.optimizer.step()

            batch_counter += 1
            total_loss += loss.item()
            
        loss_value = total_loss / batch_counter
        return loss_value

    def val_eval(self):
        batch_counter = 0.0
        total_loss = 0.0
        self.model.eval()
        for iter, data in enumerate(self.val_loader):
            inputs, labels = data
            
            inputs = inputs.to(self.device)
            labels = labels.to(self.device)
            
            # for evaluating the network, we disable the gradient calculation with the no_grad function
            with torch.no_grad():
                outputs = self.model(inputs)
                loss = self.loss(outputs, labels)

            batch_counter += 1
            total_loss += loss.item()

        loss_value = total_loss / batch_counter
        return loss_value

    def test_eval(self, graph=False):
        batch_counter = 0.0
        total_loss = 0.0
        self.model.eval()

        for iter, data in enumerate(self.test_loader):
            inputs, labels = data
            inputs = inputs.to(self.device)
            labels = labels.to(self.device)
           
            # for evaluating the network, we disable the gradient calculation with the no_grad function
            with torch.no_grad():
                outputs = self.model(inputs)
                loss = torch.mean((torch.sign(labels - 0.5) * torch.sign(outputs) > 0).type(torch.float32).to(self.device))

            batch_counter += 1
            total_loss += loss
            
           
        loss_value = total_loss / batch_counter
        return loss_value

In [0]:
def main(args, train_loader, val_loader, test_loader):
    device = torch.device(args.device)
    best_val_error = np.inf

    if args.loss == 'BCE':
        loss_function = nn.BCEWithLogitsLoss(reduction='mean')
    elif args.loss == 'NLL':
        loss_function = nn.NLLLoss(reduction='mean')

    optimization = Optimization(args, loss_function, train_loader, val_loader, test_loader)

    train_loss_records = []
    val_loss_records = []
    test_loss_records = []

    print("loading training, val and test set completed!")
    mistake_counter = 0  # mistakes counter for validation loss

    for epoch in range(args.epochs):
        train_loss = optimization.train()
        train_loss_records.append(train_loss)
        optimization.scheduler.step()

        val_loss = optimization.val_eval()
        val_loss_records.append(val_loss)

        test_loss = optimization.test_eval()
        test_loss_records.append(test_loss)

        if epoch > 1:
            if val_loss_records[-1] > val_loss_records[-2]:
                mistake_counter += 1

        if val_loss < best_val_error:
            best_results = {
                'epoch': epoch + 1,
                'state_dict': copy.deepcopy(optimization.model.state_dict()),
                'model': optimization.model,
                'best_val_error': val_loss,
                'best_test_error': test_loss,
                'optimizer': copy.deepcopy(optimization.optimizer),
                'args': args
            }
            best_val_error = val_loss
        print(
            '[Epoch: %3d/%3d] LR: %0.8f  Train loss: %.4f,  Val loss: %.4f,  Test Acc: %.4f'
            % (epoch + 1, args.epochs, optimization.scheduler.get_lr()[0], train_loss_records[epoch], val_loss_records[epoch],
               test_loss_records[epoch]))
        
        if mistake_counter >= args.tol or epoch == args.epochs - 1:
            print('Training is terminated. final epoch or validation loss has increased')
            break
    return test_loss, val_loss

In [0]:
class Args(NamedTuple):
    rate: float  # learning rate
    lr_decay_step: int  # learning rate decay
    batch_size: int  # minibatch size
    epochs: int  # maximum training epochs
    tol: int  # tolerance for the validation error increment
    device: str  # cuda or cpu

    loss: str  # loss function     
    optimizer: str # optimizer method
    sgd_momentum: float #

    dropout: float  # the probability for dropout 
    fc1: int # 1st hidden layer's units
    fc2: int
    fc3: int
    num_layers: int
    # TODO1: add more layers or parameters if necessary

In [0]:
class PyTorchWorker(Worker):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        
        # initialize the arguments for loading the data
        difficulty = 1000
        batch_size = 32

        training_set = SprialDataSet(1024, difficulty)
        val_set = SprialDataSet(64, difficulty)
        test_set = SprialDataSet(128, difficulty)

        self.train_loader = DataLoader(training_set, batch_size=batch_size, num_workers=8, shuffle=True, drop_last=True)
        self.val_loader = DataLoader(val_set, batch_size=batch_size, num_workers=8, shuffle=False, drop_last=True)
        self.test_loader = DataLoader(test_set, batch_size=batch_size, num_workers=8, shuffle=False, drop_last=True)

    def compute(self, config, budget, working_directory, *args, **kwargs):
        """
        testing the configuration
        """

        # TODO: Decide which hyperparameters are searched by BOHB
        new_args = Args(
                        rate=config['lr'],
                        lr_decay_step=30,
                        batch_size=32,
                        epochs=int(budget),
                        tol=5,
                        loss='BCE',
                        device=torch.device("cuda" if torch.cuda.is_available() else "cpu"),

                        optimizer=config['optimizer'],
                        sgd_momentum= config['sgd_momentum'] if 'sgd_momentum' in config else None,
                        dropout= config['dropout'],
                        fc1=config['fc1'], # 
                        fc2=config['fc2'] if 'fc2' in config else None,
                        fc3=config['fc3'] if 'fc3' in config else None,
                        num_layers=config['num_layers']
                        # TODO2 add the additional parameters from Args class here
                        )
        
        test_loss, val_loss = main(new_args, self.train_loader, self.val_loader, self.test_loader)
        time.sleep(0.1)
        return ({
            'loss': val_loss,  # remember: HpBandSter always minimizes!
            'info': {'test accuracy': test_loss,
                     }
        })

    
    @staticmethod
    def get_configspace():
        """
            It builds the configuration space with the needed hyperparameters
            :return: ConfigurationsSpace-Object
            """
        cs = CS.ConfigurationSpace()

        # TODO3: add a proper condition here
        # Type 1 condition: float
        lr = CSH.UniformFloatHyperparameter('lr', lower=1e-4, upper=1e-1, default_value='1e-2', log=True)
        
        # Type 2 condition: categorical 
        # For demonstration purposes, we add different optimizers as categorical hyperparameters.
        # To show how to use conditional hyperparameters with ConfigSpace, we'll add the optimizers 'Adam' and 'SGD'.
        # SGD has a different parameter 'momentum'.
        optimizer = CSH.CategoricalHyperparameter('optimizer', ['Adam', 'SGD'])

        sgd_momentum = CSH.UniformFloatHyperparameter('sgd_momentum', lower=0.0, upper=0.99, default_value=0.9, log=False)

        cs.add_hyperparameters([lr, optimizer, sgd_momentum])
        #cs.add_hyperparameters([lr])

        # Type 3 condition: conditional
        # The hyperparameter sgd_momentum will be used,if the configuration
        # contains 'SGD' as optimizer.
        cond = CS.EqualsCondition(sgd_momentum, optimizer, 'SGD')
        cs.add_condition(cond)

        # Type 4 condition: Integer
        fc1 = CSH.UniformIntegerHyperparameter('fc1', lower=2, upper=20, default_value=10, log=False)
        fc2 = CSH.UniformIntegerHyperparameter('fc2', lower=2, upper=20, default_value=10, log=False)
        fc3 = CSH.UniformIntegerHyperparameter('fc3', lower=2, upper=20, default_value=10, log=False)
        num_layers = CSH.UniformIntegerHyperparameter('num_layers', lower=1, upper=3, default_value=2, log=False)

        cs.add_hyperparameters([fc1, fc2, fc3, num_layers])

        cond_fc2 = CS.GreaterThanCondition(fc2, num_layers,1)
        cond_fc3 = CS.GreaterThanCondition(fc3, num_layers,2)
        cs.add_condition(cond_fc2)
        cs.add_condition(cond_fc3)

        dropout = CSH.UniformFloatHyperparameter('dropout', lower=0, upper=0.9, default_value='0', log=False)
        cs.add_hyperparameters([dropout])
        return cs

In [0]:
# TO record a backup from the old hyperparamter searches
def backup_jsons(curDir):
    for fname in ('results', 'configs'):
        if os.path.exists(curDir + os.path.sep + fname + '.json'):
            counter = 1
            while os.path.exists(curDir + os.path.sep + fname + '_' + str(counter) + '.json'):
                counter += 1
            os.rename(curDir + os.path.sep + fname + '.json', curDir + os.path.sep + fname + '_' + str(counter) + '.json')


args_min_budget = 1 # Minimum number of epochs for training.
args_max_budget = 9 # Maximum number of epochs for training.
args_n_iterations = 4 # Number of iterations performed by the optimizer
args_worker = False # Flag to turn this into a worker process
args_run_id = '' # A unique run id for this optimization run. An easy option is to use the job id of the clusters scheduler.
args_nic_name = 'lo' # Which network interface to use for communication.
args_shared_directory = '.' # A directory that is accessible for all processes, e.g. a NFS share.
args_eta = 3 # eta

# Every process has to lookup the hostname
host = hpns.nic_name_to_host(args_nic_name)


if args_worker:
    import time
    time.sleep(1)   # short artificial delay to make sure the nameserver is already running
    w = PyTorchWorker(run_id=args_run_id, host=host, timeout=120)
    w.load_nameserver_credentials(working_directory=args_shared_directory)
    w.run(background=False)
    exit(0)


# This example shows how to log live results. This is most useful
# for really long runs, where intermediate results could already be
# interesting. The core.result submodule contains the functionality to
# read the two generated files (results.json and configs.json) and
# create a Result object.
#backup_jsons(args_shared_directory)
result_logger = hpres.json_result_logger(directory=args_shared_directory, overwrite=True)


# Start a nameserver:
NS = hpns.NameServer(run_id=args_run_id, host=host, port=0, working_directory=args_shared_directory)
ns_host, ns_port = NS.start()

# Start local worker
w = PyTorchWorker(run_id=args_run_id, host=host, nameserver=ns_host, nameserver_port=ns_port, timeout=120)
w.run(background=True)

# Run an optimizer
bohb = BOHB(  configspace = PyTorchWorker.get_configspace(),
                      run_id = args_run_id,
                      eta = args_eta,
                      host=host,
                      nameserver=ns_host,
                      nameserver_port=ns_port,
                      result_logger=result_logger,
                      min_budget=args_min_budget, 
                      max_budget=args_max_budget,
               )
res = bohb.run(n_iterations=args_n_iterations)


# store results
with open(os.path.join(args_shared_directory, 'results.pkl'), 'wb') as fh:
    pickle.dump(res, fh)

# shutdown
bohb.shutdown(shutdown_workers=True)
NS.shutdown()

DEBUG:hpbandster:wait_for_workers trying to get the condition
INFO:hpbandster:DISPATCHER: started the 'discover_worker' thread
INFO:hpbandster:DISPATCHER: started the 'job_runner' thread
DEBUG:hpbandster.run_.worker.e1190fe56a0f.121:WORKER: Connected to nameserver <Pyro4.core.Proxy at 0x7f1c75bf6da0; connected IPv4; for PYRO:Pyro.NameServer@127.0.0.1:36101>
DEBUG:hpbandster.run_.worker.e1190fe56a0f.121:WORKER: No dispatcher found. Waiting for one to initiate contact.
INFO:hpbandster.run_.worker.e1190fe56a0f.121:WORKER: start listening for jobs
INFO:hpbandster:DISPATCHER: Pyro daemon running on 127.0.0.1:43013
DEBUG:hpbandster:DISPATCHER: Starting worker discovery
DEBUG:hpbandster:DISPATCHER: Found 1 potential workers, 0 currently in the pool.
INFO:hpbandster:DISPATCHER: discovered new worker, hpbandster.run_.worker.e1190fe56a0f.121139762526115712
DEBUG:hpbandster:HBMASTER: number of workers changed to 1
DEBUG:hpbandster:Enough workers to start this run!
INFO:hpbandster:HBMASTER: starti

number of trainable parameter =  69
loading training, val and test set completed!


DEBUG:hpbandster.run_.worker.e1190fe56a0f.121:WORKER: done with job (0, 0, 0), trying to register it.
INFO:hpbandster.run_.worker.e1190fe56a0f.121:WORKER: registered result for job (0, 0, 0) with dispatcher


[Epoch:   1/  1] LR: 0.02622788  Train loss: 0.6912,  Val loss: 0.6911,  Test Acc: 0.6797
Training is terminated. final epoch or validation loss has increased
