In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import json
import pickle as pkl

from absl import app
from nasbench import api






In [2]:
MAX_NODES = 7
MAX_EDGES = 9
NASBENCH_TFRECORD = './data/nasbench_full.tfrecord'

INPUT = 'input'
OUTPUT = 'output'
CONV1X1 = 'conv1x1-bn-relu'
CONV3X3 = 'conv3x3-bn-relu'
MAXPOOL3X3 = 'maxpool3x3'

CODING = [INPUT]
CODING = CODING + [CONV1X1 + "_" + str(i) for i in range(0, (MAX_NODES - 2))]
CODING = CODING + [CONV3X3 + "_" + str(i) for i in range(0, (MAX_NODES - 2))]
CODING = CODING + [MAXPOOL3X3 + "_" + str(i) for i in range(0, (MAX_NODES - 2))]
CODING = CODING + [OUTPUT]

In [3]:
def rename_ops(ops):
    c1x1 = 0
    c3x3 = 0
    mp3x3 = 0
    new_ops = []
    for op in ops:
        if op == CONV1X1:
            new_ops = new_ops + [op + "_" + str(c1x1)]
            c1x1 = c1x1 + 1
        elif op == CONV3X3:
            new_ops = new_ops + [op + "_" + str(c3x3)]
            c3x3 = c3x3 + 1
        elif op == MAXPOOL3X3:
            new_ops = new_ops + [op + "_" + str(mp3x3)]
            mp3x3 = mp3x3 + 1
        else:
            new_ops = new_ops + [op]
    return new_ops


def encode_matrix(adj_matrix, ops):
    enc_matrix = np.zeros((len(CODING), len(CODING)))
    pos = [CODING.index(op) for op in ops]
    trans = dict()
    for i, ix in enumerate(pos):
        trans[i] = ix
    i, j = np.nonzero(adj_matrix)
    ix = [trans.get(n) for n in i]
    jy = [trans.get(n) for n in j]
    for p in zip(ix, jy):
        enc_matrix[p] = 1
    encoded = enc_matrix[np.triu_indices(len(CODING), k=1)]
    return encoded.astype(int)


def encode_solution(solution):
    adj_matrix = solution['module_adjacency']
    ops = rename_ops(solution['module_operations'])
    encoded = encode_matrix(adj_matrix, ops)    
    return encoded, solution['trainable_parameters'], ops


def summarize_fitness(computed_metrics, epochs=[108]):
    fitness = dict()
    for ep in epochs:
        training_time = 0
        train_acc = 0
        validation_acc = 0
        test_acc = 0
        for metrics in computed_metrics[ep]:
            training_time = metrics['final_training_time']
            train_acc = train_acc + metrics['final_train_accuracy']
            validation_acc = validation_acc + metrics['final_validation_accuracy']
            test_acc = test_acc + metrics['final_test_accuracy']
        training_time = training_time / len(computed_metrics[ep])
        train_acc = train_acc / len(computed_metrics[ep])
        validation_acc = validation_acc / len(computed_metrics[ep])
        test_acc = test_acc / len(computed_metrics[ep])
        fitness[ep] = {
            'training_time': training_time,
            'train_acc': train_acc,
            'validation_acc': validation_acc,
            'test_acc': test_acc}
    return fitness




def get_fitnesses():
    nasbench = api.NASBench(NASBENCH_TFRECORD)
    prev = ""
    print('{\n\t"coding": ', json.dumps(CODING), ",")
    print('\t"solutions": [\n')
    for unique_hash in nasbench.hash_iterator():
        fixed_metrics, computed_metrics = nasbench.get_metrics_from_hash(unique_hash)
        encoded, params, ops = encode_solution(fixed_metrics)
        fitness = summarize_fitness(computed_metrics, epochs=[4, 12, 36, 108])
        print('\t\t', prev, '{"encoded": ', encoded.tolist(),
              ', "tr_params": ', params, 
              ', "ops": ', json.dumps(ops), 
              ', "fitness": ', json.dumps(fitness), '}')
        if not prev:
            prev = ","
    print('\t]\n}')

In [11]:
def get_fitnesses_lcz42(path_to_eval_file):
    
    with open(path_to_eval_file, 'rb') as filehandle:
        # read the data as bina|ry data stream
        read_data_lcz_evals = pkl.load(filehandle)
    print('{\n\t"coding": ', json.dumps(CODING), ",")
    print('\t"solutions": [\n')
    prev = ""
    for m, ops, data in read_data_lcz_evals:
        ops = rename_ops(ops)
        encoded = encode_matrix(m, ops)
        params = data['trainable_params']
        print(data)
        print('\t\t', prev, '{"encoded": ', encoded.tolist(),
              ', "tr_params": ', params, 
              ', "ops": ', json.dumps(ops), 
              ', "fitness": ', json.dumps(str(data)), '}')
        if not prev:
            prev = ","
    print('\t]\n}')

In [12]:
PATH_TO_LCZ_EVALUATIONS = '/local_home/trao_ka/projects/results_from_server/eval_on_lcz42_sen2_d2_36e/final_meta.data'
str_to_json = get_fitnesses_lcz42(PATH_TO_LCZ_EVALUATIONS)

{
	"coding":  ["input", "conv1x1-bn-relu_0", "conv1x1-bn-relu_1", "conv1x1-bn-relu_2", "conv1x1-bn-relu_3", "conv1x1-bn-relu_4", "conv3x3-bn-relu_0", "conv3x3-bn-relu_1", "conv3x3-bn-relu_2", "conv3x3-bn-relu_3", "conv3x3-bn-relu_4", "maxpool3x3_0", "maxpool3x3_1", "maxpool3x3_2", "maxpool3x3_3", "maxpool3x3_4", "output"] ,
	"solutions": [

{'training_time': 1126.669, 'trainable_params': 1767953, 'train_accuracy': 0.7897423, 'average_train_accuracy': 0.8636363636363636, 'kappaCohen_train': 0.7671563747664638, 'test_accuracy': 0.7926363, 'average_test_accuracy': 0.8776595744680851, 'kappaCohen_test': 0.7704467968369495}
		  {"encoded":  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0