In [1]:
import os
from concurrent.futures import ProcessPoolExecutor
from concurrent.futures import ThreadPoolExecutor
from functools import partial
import json

import neat
import multiprocess as mp

from features import extract_features_map
from metrics import do_opt_step, do_begin, do_end, score
from utils import run_command

In [2]:
nr_opt_map = {
    0: 'stop',
    1: 'f',
    2: 'l',
    3: 'c',
    4: 'C',
    5: 'U',
    6: 'n',
    7: 'D',
    8: 'E',
    9: 'v',
    10: 'e',
    11: 'j',
    12: 's',
    13: 'x',
    14: 'I',
    15: 'O',
    16: 'o',
    17: 'stop', # 'i',
    18: 'g',
    19: 'h',
    20: 'F',
    21: 'T',
    22: 'L',
    23: 'M',
    24: 'm',
    25: 'V',
    26: 'a',
    27: 't',
    28: 'r',
    29: 'p',
    30: 'S',
    31: 'u',
    32: 'd',
    
}

opt_nr_map = {
    'stop': 0,
    'f': 1,
    'l': 2,
    'c': 3,
    'C': 4,
    'U': 5,
    'n': 6,
    'D': 7,
    'E': 8,
    'v': 9,
    'e': 10,
    'j': 11,
    's': 12,
    'x': 13,
    'I': 14,
    'O': 15,
    'o': 16,
    'i': 17,
    'g': 18,
    'h': 19,
    'F': 20,
    'T': 21,
    'L': 22,
    'M': 23,
    'm': 24,
    'V': 25,
    'a': 26,
    't': 27,
    'r': 28,
    'p': 29,
    'S': 30,
    'u': 31,
    'd': 32,
}

In [3]:
def _score(yul_file, opt_steps, init_steps='', binary=None) -> float:
    if binary is None:
        out = run_command(f"solc --strict-assembly --bin --optimize --yul-optimizations '{opt_steps}:{init_steps}' {yul_file}")
        start_idx = out.index('Binary representation:') + 23
        res1 = out[start_idx:-1]
    else:
        res1 = binary

    out = run_command(f"solc --strict-assembly --bin --optimize {yul_file}")
    start_idx = out.index('Binary representation:') + 23
    res2 = out[start_idx:-1]
    
    return len(res2) / len(res1)

In [4]:
samples_dir = "./samples_real"
max_n = 50
max_rep = 5
max_input_repetition = 5
stop_stats = {
    "features_repetition": 0,
    "steps_repetition": 0,
    "max_steps_repetition": 0,
    "stop_condition": 0,
    "grow_too_much": 0,
}

features_stats = {}
outputs_stats = {}

def eval_genome(net, yul_file):
    # print("eval_genome " + yul_file)
    tmp_yul_file = yul_file
    print(yul_file)
    orig_size = 0
    last_size = 0
    rep = 0
    rep_index = 0
    input_rep = 0
    last_step = None
    opt_steps = []
    last_input_vec = None
    last_binary = None
    cached_input_output = {}
    
    # stats_features_repetition = 0
    # stats_steps_repetition = 0
    # stats_max_steps_repetition = 0
    # stats_stop_condition = 0
    
    # begin section
    #print("Begin on " + tmp_yul_file)
    opt_yul_code, _ = do_begin(tmp_yul_file)
    orig_size = len(opt_yul_code)
    tmp_yul_file = os.path.join('./cache', f"{hash(opt_yul_code)}_{yul_file.split('/')[-1]}")
    if not os.path.exists(tmp_yul_file):
        with open(tmp_yul_file, "w") as f:
            f.write(opt_yul_code)
    
    for i in range(max_n):
        #print("Features from " + tmp_yul_file + " steps: " + "".join(opt_steps))
        input_vec = extract_features_map(tmp_yul_file)
        #input_vec = input_vec + [opt_nr_map.get(last_step, 0)]
        last_output_vec = [0 for _ in range(33)]
        if last_step:
            last_output_vec[opt_nr_map[last_step]] = 1
        input_vec = input_vec + last_output_vec
        input_vec_hash = hash(tuple(input_vec))
        # features_stats[input_vec_hash] = input_vec
        # print(f"Process id: {os.getpid()}, feature vec: {input_vec}")
        if input_vec_hash in cached_input_output:
            output = list(cached_input_output[input_vec_hash])
        else:
            output = net.activate(input_vec)
            cached_input_output[input_vec_hash] = output
        print(f"Input vec: {input_vec_hash}")
        # outputs_stats[hash(tuple(output))] = output
        # last_output_vec = output
        best_output_value = max(output)
        potential_steps = [nr_opt_map[idx] for idx, value in enumerate(output) if best_output_value == value]
        
        if last_input_vec != input_vec:
            rep_index = 0
        else:
            input_rep += 1
            if input_rep >= max_input_repetition:
                # print("Break due to features repetition")
                #stats_features_repetition += 1
                stop_stats["features_repetition"] += 1
                break
        step = potential_steps[rep_index]
        
        
        if last_step == step:
            if last_input_vec != input_vec:
                rep += 1
            else:
                if len(potential_steps) > rep_index + 1:
                    rep_index += 1
                    rep = 0
                    step = potential_steps[rep_index]
                else:
                    # print("Break due to steps repetition")
                    # stats_steps_repetition += 1
                    stop_stats["steps_repetition"] += 1
                    break
        else:
            rep = 0
            rep_index = 0
        
        if rep == max_rep:
            # print("Break due to max step repetition")
            # stats_max_steps_repetition += 1
            stop_stats["max_steps_repetition"] += 1
            break
                
        if step == 'stop':
            # print("Break due to stop")
            # stats_stop_condition += 1
            stop_stats["stop_condition"] += 1
            break
        
        last_step = step
        last_input_vec = input_vec
        opt_yul_code, last_binary = do_opt_step(tmp_yul_file, step)
        last_size = len(opt_yul_code)
        opt_steps.append(step)
        tmp_yul_file = os.path.join('./cache', f"{hash(opt_yul_code)}_{yul_file.split('/')[-1]}")
        if not os.path.exists(tmp_yul_file):
            with open(tmp_yul_file, "w") as f:
                f.write(opt_yul_code)
                
        if last_size > orig_size * 3:
            # print("Break due to stop")
            # stats_stop_condition += 1
            stop_stats["grow_too_much"] += 1
            print("Blowing file " + tmp_yul_file + " due to steps " + "".join(opt_steps))
            return 0 # big penalty for this
                
    opt_steps.append('g')
        
    # return score(yul_file, "".join(opt_steps))
    result = score(yul_file, "".join(opt_steps), binary=last_binary, init_steps="")
    if result < 0.6:
       print("Fatal score (" + str(result) + ") for " + tmp_yul_file + " with steps " + "".join(opt_steps))
    
    # if result > 1.05 or len(opt_steps) > 40:
    #     print(f"opt steps for result {result}: " + "".join(opt_steps))
    #init_steps="fDnTOcmuO"
    
    # if stats_stop_condition > 0 and len(opt_steps) == 0:
    #     print("Stop immediately")
    #print(json.dumps(features_stats))
    #print(json.dumps(outputs_stats))
    #print(f"opt steps for result {result}: " + "".join(opt_steps))
    
    return result
    

# def create_eval_genomes(samples_dir, aggregate_fn):
#     def eval_genomes(genomes, config):
#         def eval_single_genome(genome_tuple):
#             genome_id, genome = genome_tuple
#             genome.fitness = 0.0
#             net = neat.nn.FeedForwardNetwork.create(genome, config)
#             genome.fitness = []
#             for file in os.listdir(samples_dir):
#                 genome.fitness.append(eval_genome(net, os.path.join(samples_dir, file)))
#             print("Fitness genome " + str(genome_id) + " max: " + str(max(genome.fitness)) + " min: " + str(min(genome.fitness)))
#             return aggregate_fn(genome.fitness)
#         with mp.Pool(8) as executor:
#             for genome_id, genome in genomes:
#                 genome.fitness.append(executor.)
#                 
#             
#             
#     return eval_genomes

#executor = mp.Pool(10)
executor = ThreadPoolExecutor(1)

# def create_eval_genomes(samples_dir, aggregate_fn):
#     def eval_genomes(genomes, config):
#         print("Number of genomes: " + str(len(genomes)))
#         for genome_id, genome in genomes:
#             genome.fitness = 0.0
#             net = neat.nn.FeedForwardNetwork.create(genome, config)
#             genome.fitness = []
#             # for file in os.listdir(samples_dir):
#             #     genome.fitness.append(eval_genome(net, os.path.join(samples_dir, file)))
#             
#             for result in executor.map(partial(eval_genome, net), [os.path.join(samples_dir, file) for file in os.listdir(samples_dir)]):
#                 # fitness, stats_features_repetition, stats_steps_repetition, stats_max_steps_repetition, stats_stop_condition = result
#                 genome.fitness.append(result)
#                 # stop_stats["features_repetition"] += stats_features_repetition
#                 # stop_stats["steps_repetition"] += stats_steps_repetition
#                 # stop_stats["max_steps_repetition"] += stats_max_steps_repetition
#                 # stop_stats["stop_condition"] += stats_stop_condition
#             aggregated_fitness = aggregate_fn(genome.fitness)
#             #print("Fitness genome " + str(genome_id) + " max: " + str(max(genome.fitness)) + " min: " + str(min(genome.fitness)) + ", fitness: " + str(aggregated_fitness))
#             genome.fitness = aggregated_fitness
#         
#         print(json.dumps(stop_stats))
#         print(run_command("ls -1 ./cache | wc -l"))
#         
#         # with open("stats.txt", "a") as f:
#         #     print(json.dumps(features_stats), file=f)
#         #     print(json.dumps(outputs_stats), file=f)
#         
# 
#     return eval_genomes

def create_eval_genomes(samples_dir, aggregate_fn):
    def eval_genomes(genomes, config):
        print("Number of genomes: " + str(len(genomes)))
        def _eval_genome(item):
            genome_id, genome = item
            genome.fitness = 0.0
            net = neat.nn.FeedForwardNetwork.create(genome, config)
            genome.fitness = []
            
            for result in [eval_genome(net, x) for x in [os.path.join(samples_dir, file) for file in os.listdir(samples_dir)]]:
                # fitness, stats_features_repetition, stats_steps_repetition, stats_max_steps_repetition, stats_stop_condition = result
                genome.fitness.append(result)
                # stop_stats["features_repetition"] += stats_features_repetition
                # stop_stats["steps_repetition"] += stats_steps_repetition
                # stop_stats["max_steps_repetition"] += stats_max_steps_repetition
                # stop_stats["stop_condition"] += stats_stop_condition
            aggregated_fitness = aggregate_fn(genome.fitness)
            #print("Fitness genome " + str(genome_id) + " max: " + str(max(genome.fitness)) + " min: " + str(min(genome.fitness)) + ", fitness: " + str(aggregated_fitness))
            genome.fitness = aggregated_fitness
            return genome_id, genome
        
        results = {k: v for k,v in executor.map(_eval_genome, genomes)}
        # for genome_id, genome in genomes:
        #     ext_genome = results[genome_id]
        #     genome.key = ext_genome.key
        #     genome.connections = ext_genome.connections
        #     genome.nodes = ext_genome.nodes
        #     genome.fitness = ext_genome.fitness
        
        print(json.dumps(stop_stats))
        print(run_command("ls -1 ./cache | wc -l"))
        
        # with open("stats.txt", "a") as f:
        #     print(json.dumps(features_stats), file=f)
        #     print(json.dumps(outputs_stats), file=f)
        

    return eval_genomes


In [5]:
def run(config_file):
    # Load configuration.
    config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                         neat.DefaultSpeciesSet, neat.DefaultStagnation,
                         config_file)

    # Create the population, which is the top-level object for a NEAT run.
    p = neat.Population(config)

    # Add a stdout reporter to show progress in the terminal.
    p.add_reporter(neat.StdOutReporter(True))
    stats = neat.StatisticsReporter()
    p.add_reporter(stats)
    p.add_reporter(neat.Checkpointer(5))

    # Run for up to 300 generations.
    avg = lambda n: sum(n)/len(n)
    median = lambda n: sorted(n)[len(n) // 2]
    winner = p.run(create_eval_genomes(samples_dir, min), 300)
    os.system('find ./cache -name "*.yul" -delete')

    # Display the winning genome.
    print('\nBest genome:\n{!s}'.format(winner))
    
    return winner
    
# Determine path to configuration file. This path manipulation is
# here so that the script will run successfully regardless of the
# current working directory.
local_dir = './'
config_path = os.path.join(local_dir, 'config-feedforward')
winner = run(config_path)


 ****** Running generation 0 ****** 

Number of genomes: 100
./samples_real/optimizor_club_puretea.yul
Extracting features from yul_file(./cache/9014585364101911315_optimizor_club_puretea.yul) or yul_json(True)
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1e-06, 1e-06, 1e-06, 1e-06]
Input vec: -7980379345261336227
./samples_real/strings.yul
Extracting features from yul_file(./cache/7768187775401957181_strings.yul) or yul_json(True)
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1e-06, 1e-06, 1e-06, 1e-06]
Input vec: -7980379345261336227
./samples_real/fibonacci.yul
Extracting features from yul_file(./cache/8708734855570369189_fibonacci.yul) or yul_json(True)
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1e-06, 1e-06, 1e-06, 1e-06]
Input vec: -7980379345261336227
./samples_real/chains.yul
Extracting features from yul_file(./cache/1533106304203082815_chains.yul) or yul_json(True)
[0.

KeyboardInterrupt: 

In [1]:
winner.fitness

NameError: name 'winner' is not defined

In [None]:
config_path = os.path.join(local_dir, 'config-feedforward')
config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                         neat.DefaultSpeciesSet, neat.DefaultStagnation,
                         config_path)
net = neat.nn.FeedForwardNetwork.create(winner, config)
eval_genome(net, 'samples_test/verifier.yul')