# Make a notebook wherefrom I can work on my NAS problem for HyperMapper

In [1]:
import math
import torch
import torch.nn as nn
import sys
from torch.autograd import Variable
import time


# Translates HyperMapper json to pyTorch module

In [2]:
class json2pheno(nn.Module):
    def __init__(self, json, nin, nout):
        super(json2pheno, self).__init__()
        # build layers from genome encoding

        n_in = nin
        fw_map = {}

        # stupid loop but I want to know how deep the net will be
        active_list = []
        for param in json.keys():
            if param[:-2] == 'active' and json[param] == 1:
                active_list.append(param[-1])

        # Add the active layers in the encoding to the model
        for new_i, old_i in enumerate(active_list):
            key = str(new_i)
            setattr(self, key, nn.Linear(n_in, json['n_nodes']))

            # We are on the last hidden layer, so we will not have any skipps here
            if new_i == len(active_list) - 1:
                fw_map[key] = ['out']
                # at current setting n_in is same for all but first layer
                n_in = json['n_nodes']
                break

            fw_map[key] = [str(new_i + 1)]

            # Add skips to the fw_map. If they are to long, sent them to output layer
            if 'skip_' + str(old_i) in json:
                target = json['skip_' + str(old_i)] + new_i + 1
                if target >= len(active_list):
                    fw_map[key].append('out')
                elif target > new_i + 1:
                    fw_map[key].append(str(target))

            # Again, this is same for all but first layer
            n_in = json['n_nodes']

        setattr(self, 'out', nn.Linear(n_in, nout))

        # fw_scheme is a dict containing to which layers each layer is sending its output
        # This will fail if we have non-forward connections
        self.fw_scheme = fw_map
        print(self.fw_scheme)

    def forward(self, x):
        k = 0
        X = dict()
        X[str(k)] = [x]
        while hasattr(self, str(k)):
            # pass trough all layers except the output layer
            key = str(k)

            # we might want to concat instead of sum, then we need to modify input_size in __init__
            temp_x = sum(X[key])
            temp_out = torch.tanh(getattr(self, key)(temp_x))
            # this seem to work when doing the list thing with x
            for target in self.fw_scheme[key]:
                if target in X:
                    X[target].append(temp_out)
                else:
                    X[target] = [temp_out]

            k += 1

        # if k = 0 we have no active layers and a perceptron model
        if k:
            temp_x = sum(X['out'])
        else:
            temp_x = x

        # Identity as output function since we do regression
        # Add support for other types od problems problems
        out = getattr(self, 'out')(temp_x)
        return out


# Takes network and trains it for given number of epochs given a objective function

In [3]:
# Stole most of this online, should be improved uppon
# Maybe let it be and do a proper fuction when doing the mnist problem
def trainer(net, epochs, noise, objective='x2'):
    torch.manual_seed(1)  # reproducible

    x = torch.unsqueeze(torch.linspace(-1, 1, 20), dim=1)  # x data (tensor), shape=(100, 1)

    if objective == 'x2':
        y = x.pow(2) + noise * torch.rand(x.size())  # noisy y data (tensor), shape=(100, 1)
    elif objective == 'sinx':
        y = torch.sin(3 * 3.14 * x) + noise * torch.rand(x.size())  # noisy y data (tensor), shape=(100, 1)

    # torch can only train on Variable, so convert them to Variable
    x, y = Variable(x), Variable(y)
    # does this help me??
    torch.autograd.set_detect_anomaly(True)

    print(net)  # net architecture

    optimizer = torch.optim.SGD(net.parameters(), lr=0.2)
    loss_func = torch.nn.MSELoss()  # this is for regression mean squared loss

    # train the network
    t0 = time.perf_counter()

    for t in range(epochs):
        prediction = net(x)  # input x and predict based on x
        loss = loss_func(prediction, y)  # must be (1. nn output, 2. target)

        optimizer.zero_grad()  # clear gradients for next train
        loss.backward()  # backpropagation, compute gradients
        optimizer.step()  # apply gradients

    loss = loss.data.numpy()
    tr_time = time.perf_counter() - t0
    # print(tr_time, "seconds to train")

    # view data
    #plt.figure(figsize=(10, 4))
    #plt.scatter(x.data.numpy(), y.data.numpy(), color="orange")
    #plt.plot(x.data.numpy(), prediction.data.numpy(), 'g-', lw=3)

    #plt.title('Regression Analysis')
    #plt.text(1.0, 0, 'Loss = %.4f' % loss,
             #fontdict={'size': 24, 'color': 'red'})
    #plt.show()

    return loss, tr_time

# Used in the HyperMapper solver. Takes a json scenario and returns a score based on the hyperparameters

In [4]:
def NAS_function(X):
    """
    Compute the branin function.
    :param X: dictionary containing the input points.
    :return: the value of the branin function
    """
    nin = 1
    nout = 1

    eps = 2000    # 5000
    noise = 0.3
    my_net = json2pheno(X, nin, nout)

    loss, t = trainer(my_net, eps, noise, objective='sinx')
    score = loss * t
    # do not consider time for now
    print(100 * loss)
    return 100 * loss


# Makes the json scenario for the problem

In [6]:
import json
scenario = {}
scenario["application_name"] = "nas"
scenario["optimization_objectives"] = ["Value"]
scenario["optimization_iterations"] = 5
scenario["input_parameters"] = {}

n_nodes = {}
n_nodes["parameter_type"] = "ordinal"
n_nodes["values"] = [2**i for i in range(2, int(math.log(16, 2)) + 1)]

#n_lay = {}
#n_lay["parameter_type"] = "integer"
#n_lay["values"] = [1, 5]

active_0 = {}
active_0["parameter_type"] = "ordinal"
active_0["values"] = [0, 1]

skip_0 = {}
skip_0["parameter_type"] = "integer"
skip_0["values"] = [0, 4]

active_1 = {}
active_1["parameter_type"] = "ordinal"
active_1["values"] = [0, 1]

skip_1 = {}
skip_1["parameter_type"] = "integer"
skip_1["values"] = [0, 4]

active_2 = {}
active_2["parameter_type"] = "ordinal"
active_2["values"] = [0, 1]

skip_2 = {}
skip_2["parameter_type"] = "integer"
skip_2["values"] = [0, 4]

active_3 = {}
active_3["parameter_type"] = "integer"
active_3["values"] = [0, 1]

skip_3 = {}
skip_3["parameter_type"] = "integer"
skip_3["values"] = [0, 4]


scenario["input_parameters"]["n_nodes"] = n_nodes
#scenario["input_parameters"]["n_lay"] = n_lay
scenario["input_parameters"]["active_0"] = active_0
scenario["input_parameters"]["skip_0"] = skip_0
scenario["input_parameters"]["active_1"] = active_1
#scenario["input_parameters"]["skip_1"] = skip_1
scenario["input_parameters"]["active_2"] = active_2
#scenario["input_parameters"]["skip_2"] = skip_2
#scenario["input_parameters"]["active_3"] = active_3
#scenario["input_parameters"]["skip_3"] = skip_3
%cd
%cd "PycharmProjects/hypermapper/example_scenarios/quick_start"
with open("example_nas_scenario.json", "w") as scenario_file:
    json.dump(scenario, scenario_file, indent=4)

/home/samuel
/home/samuel/PycharmProjects/hypermapper/example_scenarios/quick_start


# Basically the main method. Optimizes the given function based on the given scenario. 

## Stores the results in nas_output_samples.cvs

In [7]:
%cd
%cd "PycharmProjects/hypermapper/scripts"
#import sys
# sys.path.append('../../scripts')
#sys.path.append('scripts')
import hypermapper
%cd ".."

parameters_file = "example_scenarios/quick_start/example_nas_scenario.json"
# parameters_file = "example_branin_scenario.json"
hypermapper.optimize(parameters_file, NAS_function)

/home/samuel
/home/samuel/PycharmProjects/hypermapper/scripts
/home/samuel/PycharmProjects/hypermapper
Design of experiment phase, number of doe samples = 10 .......
{'0': ['1', 'out'], '1': ['2'], '2': ['out']}
json2pheno(
  (0): Linear(in_features=1, out_features=8, bias=True)
  (1): Linear(in_features=8, out_features=8, bias=True)
  (2): Linear(in_features=8, out_features=8, bias=True)
  (out): Linear(in_features=8, out_features=1, bias=True)
)
8.850663900375366
{'0': ['1', 'out'], '1': ['out']}
json2pheno(
  (0): Linear(in_features=1, out_features=4, bias=True)
  (1): Linear(in_features=4, out_features=4, bias=True)
  (out): Linear(in_features=4, out_features=1, bias=True)
)
36.808255314826965
{'0': ['out']}
json2pheno(
  (0): Linear(in_features=1, out_features=4, bias=True)
  (out): Linear(in_features=4, out_features=1, bias=True)
)
47.031956911087036
{}
json2pheno(
  (out): Linear(in_features=1, out_features=1, bias=True)
)
47.1607506275177
{'0': ['out']}
json2pheno(
  (0): Linea

In [31]:
%ls


branin_output_samples.csv  hypermapper_logfile.log  README.md  visualize.py
[0m[01;34mexample_outputs[0m/           LICENSE                  [01;34mscripts[0m/
[01;34mexample_scenarios[0m/         nas_output_samples.csv   [01;34mtests[0m/
