In [1]:
import numpy as np
# from src.utils import *
from utils import *
from tqdm import tqdm
import logging
import pickle

# unary_operators=[np.sin, np.cos, np.exp, np.abs, np.log, np.tan]
# binary_operators=[np.add, np.subtract, np.multiply, np.divide]


# operators = unary_operators + binary_operators

In [2]:
conf={
"NUM_POPULATION": 100,
"NUM_GENERATIONS": 300,
"MAX_DEPTH_INITIAL": 7,
"DEDUPE_INTERVAL": 15,
"ELITISM": False,
"ELITE_COUNT": 3,
"TOURNAMENT_SUBSET_SIZE": 3,
"TOURNAMENT_WINNER_SIZE": 30,
"XOVER": 0.8,
"MUTATION": 0.8,
"BREED_NEW": 15,
"KILL_AGE": 16,
"MAX_COMPLEXITY": 150,
"CONSTANT_FIT_INTERVAL": 30,
"CONSTANT_FIT_ITERATION": 6,
"UNARY_OPERATORS": [np.sin, np.cos, np.exp, np.abs, np.log, np.tan],
"BINARY_OPERATORS": [np.add, np.subtract, np.multiply, np.divide]
}

In [3]:
def evolve(train_x,train_y,val_x,val_y, conf, logger, my_population=None):
    # Set up the logger
    NUM_POPULATION = conf["NUM_POPULATION"]
    NUM_GENERATIONS = conf["NUM_GENERATIONS"]
    MAX_DEPTH_INITIAL = conf["MAX_DEPTH_INITIAL"]
    DEDUPE_INTERVAL = conf["DEDUPE_INTERVAL"]
    ELITISM = conf["ELITISM"]
    ELITE_COUNT = conf["ELITE_COUNT"]
    TOURNAMENT_SUBSET_SIZE = conf["TOURNAMENT_SUBSET_SIZE"]
    TOURNAMENT_WINNER_SIZE = conf["TOURNAMENT_WINNER_SIZE"]
    XOVER = conf["XOVER"]
    MUTATION = conf["MUTATION"]
    BREED_NEW = conf["BREED_NEW"]
    KILL_AGE = conf["KILL_AGE"]
    MAX_COMPLEXITY = conf["MAX_COMPLEXITY"]
    CONSTANT_FIT_INTERVAL = conf["CONSTANT_FIT_INTERVAL"]
    CONSTANT_FIT_ITERATION = conf["CONSTANT_FIT_ITERATION"]
    UNARY_OPERATORS = conf["UNARY_OPERATORS"]
    BINARY_OPERATORS = conf["BINARY_OPERATORS"]

    logger.info(f"Population is being created with a size: {NUM_POPULATION}")
    if my_population==None:
        my_population = create_population(NUM_POPULATION,MAX_DEPTH_INITIAL,train_x.shape[0], UNARY_OPERATORS, BINARY_OPERATORS)
        logger.info("Population has been created.")
        logger.info("Assigning individual fitness values...")
    assign_population_fitness_train(my_population,train_x,train_y)
    assign_population_fitness_val(my_population,val_x,val_y)
    logger.info(f"Population size: {len(my_population)}")
    logger.info(f"Population mean fitness: {calculate_mean_fitness(my_population)}")
    logger.info(f"Population mean complexity: {calculate_mean_complexity(my_population)}")


    # Sort the population by fitness (ascending), then take the top 5
    top_5_individuals = top_n_individuals(my_population, 5)

    # Print the top 5 individuals with the minimum fitness
    for i, individual in enumerate(top_5_individuals, 1):
        logger.info(f"Top {i}: Fitness = {individual.fitness:.5f} Fitness_val = {individual.fitness_val:.5f} Genome = {individual.genome}")
    #----------------------------------------------------------------------------
    assign_population_fitness_train(my_population,train_x,train_y)
    assign_population_fitness_val(my_population,val_x,val_y)
    best_result = top_n_individuals(my_population, 1)[0]
    for generation in tqdm(range(NUM_GENERATIONS)):
        logger.info(f"Starting generation {generation + 1}/{NUM_GENERATIONS}")
        num_xover=0
        num_mut=0
        num_succesfull_mut=0
        age_population(my_population)
        

        # Kill eldest individuals
        kill_eldest(my_population, KILL_AGE)

        best_ind = tournament_selection(my_population,TOURNAMENT_SUBSET_SIZE,TOURNAMENT_WINNER_SIZE,ELITISM=ELITISM, elite_count=ELITE_COUNT)

        
        for _ in range(BREED_NEW): # CHECK
            if random.random() < XOVER:
                # Xover the best individual and replace worst individuals
                parent1 = my_population[best_ind[np.random.choice(len(best_ind))]]
                parent2 = my_population[best_ind[np.random.choice(len(best_ind))]]
                child1, child2 = crossover(parent1,parent2)
                my_population.append(child1)
                my_population.append(child2)
                num_xover+=1

            if random.random() < MUTATION:
                # Mutate best individuals and replace worst individuals
                parent = my_population[best_ind[np.random.choice(len(best_ind))]]
                child, success = mutation_w_sa(parent, train_x.shape[0], train_x, train_y)
                num_mut+=1
                if success:                
                    my_population.append(child)
                    num_succesfull_mut+=1

        assign_population_fitness_train(my_population,train_x,train_y)
        assign_population_fitness_val(my_population,val_x,val_y)

        # logger.info(f"xover: {num_xover}, mut: {num_mut}, succesfull mut: {num_succesfull_mut}")
        simplify_constant_population(my_population)
        # Update elites age as 0
        elites = top_n_individuals(my_population, ELITE_COUNT)
        if best_result.fitness_val > elites[0].fitness_val:
            best_result = elites[0]
            logger.info(f"New best result found: {best_result}")
        
        for elite in elites:
            my_population.remove(elite)
            elite.age = 0
            my_population.append(elite)

        # Deduplicate every few iterations
        if generation % DEDUPE_INTERVAL == DEDUPE_INTERVAL-1:
            init_population = len(my_population)
            my_population = deduplicate_population(my_population)
            dedup_population = len(my_population)
            simplify_operation_population(my_population)
            simplified_operations = len(my_population)
            kill_constant(my_population)
            no_constant = len(my_population)
            kill_complex(my_population, MAX_COMPLEXITY)
            no_complex = len(my_population)
            # simplify_population(my_population)
            

            logger.info(f"Initial: {init_population}, Deduplicated: {init_population-dedup_population}, Operation simplified: {dedup_population-simplified_operations}, Constant: {simplified_operations-no_constant}, Complex: {no_constant - no_complex}, Final: {no_complex}")
            # logger.info(f"Population mean complexity: {calculate_mean_complexity(my_population)}")
            logger.info(f"Population mean fitness: {calculate_mean_fitness(my_population)}")


            elites = top_n_individuals(my_population, ELITE_COUNT)
            for elite in elites:
                my_population.remove(elite)
                elite.age = 0
                my_population.append(elite)
                
            best_5 = top_n_individuals(my_population, 5)
            for ind in range(len(best_5)):
                logger.info(f"{ind}. Fitness: {best_5[ind].fitness:.5f}, Fitness_val = {best_5[ind].fitness_val:.5f}, Genome: {best_5[ind].genome}, Age: {best_5[ind].age}, Complexity: {best_5[ind].genome.complexity}")
        
            if best_5[0].fitness_val < 0.0001:
                logger.info(f"Early stopping triggered at generation {generation + 1} due to best fitness = {best_5[0].fitness_val:.5f}")
                break
        if len(my_population) > NUM_POPULATION*2: # If too much individual, remove some
            my_population = top_n_individuals(my_population,NUM_POPULATION*2)
        

        # NOTE: We decide not to use this method. It seems that it has very few effects and it requires a lot of computational resource
        #     # Fit constants of every individual
        # if generation % CONSTANT_FIT_INTERVAL == CONSTANT_FIT_INTERVAL-1:
        #     print(f"Generation {generation}: Mean fitness {calculate_mean_fitness(my_population)}")
        #     for ind in range(len(my_population)):
        #         stronger_inv = fit_constants(my_population[ind],CONSTANT_FIT_ITERATION, train_x, train_y)
        #         my_population[ind] = stronger_inv
        #     assign_population_fitness_val(my_population,val_x,val_y)
        #     print(f"Population mean fitness after constant fit: {calculate_mean_fitness(my_population)}")
        #     best_5 = top_n_individuals(my_population, 5)
        #     for ind in range(len(best_5)):
        #         print(f"{ind}. Fitness: {best_5[ind].fitness:.5f}, Fitness_val = {best_5[ind].fitness_val:.5f}, Genome: {best_5[ind].genome}, Age: {best_5[ind].age}, Complexity: {best_5[ind].genome.complexity}")
        

    my_population = deduplicate_population(my_population)
    #-------------------------------------------------------------------------------------------------------
    assign_population_fitness_train(my_population,train_x,train_y)
    # Sort the population by fitness (ascending), then take the top 5
    top_5_individuals = top_n_individuals(my_population, 15)

    # Print the best individual
    logger.info(f"Best individual: Fitness_val = {best_result.fitness_val}: Genome = {best_result.genome}")
    # Print the top 5 individuals with the minimum fitness in population
    for i, individual in enumerate(top_5_individuals, 1):
        logger.info(f"Top {i}: Fitness = {individual.fitness} Fitness_val = {individual.fitness_val} Genome = {individual.genome}")

    # Print the mean fitness of the population
    logger.info(f"Mean fitness of the population: {calculate_mean_fitness(my_population)}")
    logger.info(f"Population size: {len(my_population)}")
    logger.info(f"Population mean complexity: {calculate_mean_complexity(my_population)}")
    
    return my_population

In [6]:
def evolve_and_migrate(dataset_id):
    # Set up the logger
    # BUG: We put logger config there in order to create different log file each time we call this function. It turns out that it is not the wat logger works. Since it didnt affect the result, we just ignore it
    logging.basicConfig(
        filename=f"evolve_and_migrate_{dataset_id}.log",  # Log file
        level=logging.INFO,        # Logging level
        format="%(asctime)s - %(levelname)s - %(message)s",  # Log format
        datefmt="%Y-%m-%d %H:%M:%S",  # Date format
    )
    logger = logging.getLogger(__name__)

    problem = np.load(f"../data/problem_{dataset_id}.npz")
    x = problem["x"]
    y = problem["y"]
    logger.info(f"x.shape: {x.shape}")
    logger.info(f"y.shape: {y.shape}")
    x_train, y_train, x_val, y_val = split_dataset(x,y,0.8)
    logger.info(f"x_train.shape: {x_train.shape}")
    logger.info(f"x_val.shape: {x_val.shape}")
    logger.info(f"y_train.shape: {y_train.shape}")
    logger.info(f"y_val.shape: {y_val.shape}")


    print("x_train.shape: ", x_train.shape)
    print("x_val.shape: ", x_val.shape)
    print("y_train.shape: ", y_train.shape)
    print("y_val.shape: ", y_val.shape)

    TOTAL_GENERATION_NUM = conf["NUM_GENERATIONS"] 
    conf["NUM_GENERATIONS"]=TOTAL_GENERATION_NUM//3
    conf["MAX_COMPLEXITY"]=100
    logger.info("First batch of training started")

    logger.info("my_pop_1 training started")
    my_pop_1 = evolve(x_train,y_train,x_val,y_val,conf,logger)
    with open("my_pop_1_first", "wb") as fb:
        pickle.dump(my_pop_1, fb)

    logger.info("my_pop_2 training started")
    my_pop_2 = evolve(x_train,y_train,x_val,y_val,conf,logger)
    with open("my_pop_2_first", "wb") as fb:
        pickle.dump(my_pop_2, fb)

    logger.info("my_pop_3 training started")
    my_pop_3 = evolve(x_train,y_train,x_val,y_val,conf,logger)
    with open("my_pop_3_first", "wb") as fb:
        pickle.dump(my_pop_3, fb)

    logger.info("my_pop_4 training started")
    my_pop_4 = evolve(x_train,y_train,x_val,y_val,conf,logger)
    with open("my_pop_4_first", "wb") as fb:
        pickle.dump(my_pop_4, fb)

    logger.info("Migration between 1-2 and 3-4")
    my_pop_1, my_pop_2 = migration(my_pop_1, my_pop_2, 15)
    my_pop_3, my_pop_4 = migration(my_pop_3, my_pop_4, 15)

    conf["MAX_COMPLEXITY"]=TOTAL_GENERATION_NUM//3
    logger.info("Second batch of training started")

    logger.info("my_pop_1 training started")
    my_pop_1 = evolve(x_train,y_train,x_val,y_val,conf,logger,my_pop_1)
    with open("my_pop_1_second", "wb") as fb:
        pickle.dump(my_pop_1, fb)

    logger.info("my_pop_2 training started")
    my_pop_2 = evolve(x_train,y_train,x_val,y_val,conf,logger,my_pop_2)
    with open("my_pop_2_second", "wb") as fb:
        pickle.dump(my_pop_2, fb)

    logger.info("my_pop_3 training started")
    my_pop_3 = evolve(x_train,y_train,x_val,y_val,conf,logger,my_pop_3)
    with open("my_pop_3_second", "wb") as fb:
        pickle.dump(my_pop_3, fb)

    logger.info("my_pop_4 training started")
    my_pop_4 = evolve(x_train,y_train,x_val,y_val,conf,logger,my_pop_4)
    with open("my_pop_4_second", "wb") as fb:
        pickle.dump(my_pop_4, fb)    

    logger.info("Migration between 1-4 and 2-3")
    my_pop_1, my_pop_4 = migration(my_pop_1, my_pop_4, 15)
    my_pop_2, my_pop_3 = migration(my_pop_2, my_pop_3, 15)

    conf["MAX_COMPLEXITY"]=TOTAL_GENERATION_NUM//3
    logger.info("Third batch of training started")
    
    logger.info("my_pop_1 training started")
    my_pop_1 = evolve(x_train,y_train,x_val,y_val,conf,logger,my_pop_1)
    with open("my_pop_1_third", "wb") as fb:
        pickle.dump(my_pop_1, fb)

    logger.info("my_pop_2 training started")
    my_pop_2 = evolve(x_train,y_train,x_val,y_val,conf,logger,my_pop_2)
    with open("my_pop_2_third", "wb") as fb:
        pickle.dump(my_pop_2, fb)

    logger.info("my_pop_3 training started")
    my_pop_3 = evolve(x_train,y_train,x_val,y_val,conf,logger,my_pop_3)
    with open("my_pop_3_third", "wb") as fb:
        pickle.dump(my_pop_3, fb)

    logger.info("my_pop_4 training started")
    my_pop_4 = evolve(x_train,y_train,x_val,y_val,conf,logger,my_pop_4)
    with open("my_pop_4_third", "wb") as fb:
        pickle.dump(my_pop_4, fb)

    return my_pop_1, my_pop_2, my_pop_3, my_pop_4


In [None]:
pop_1, pop_2, pop_3, pop_4 = evolve_and_migrate(0)

In [None]:
pop_1, pop_2, pop_3, pop_4 = evolve_and_migrate(1)

In [5]:
pop_1, pop_2, pop_3, pop_4 = evolve_and_migrate(2)

x_train.shape:  (3, 4000)
x_val.shape:  (3, 1000)
y_train.shape:  (4000,)
y_val.shape:  (1000,)


100%|██████████| 100/100 [13:52<00:00,  8.32s/it]
100%|██████████| 100/100 [11:57<00:00,  7.17s/it]
100%|██████████| 100/100 [11:19<00:00,  6.80s/it]
100%|██████████| 100/100 [14:18<00:00,  8.58s/it]
100%|██████████| 100/100 [23:12<00:00, 13.92s/it]
100%|██████████| 100/100 [28:51<00:00, 17.32s/it]
100%|██████████| 100/100 [22:03<00:00, 13.24s/it]
100%|██████████| 100/100 [20:19<00:00, 12.19s/it]
100%|██████████| 100/100 [36:11<00:00, 21.72s/it]
100%|██████████| 100/100 [32:43<00:00, 19.64s/it]
100%|██████████| 100/100 [31:58<00:00, 19.18s/it]
100%|██████████| 100/100 [32:09<00:00, 19.30s/it]


In [6]:
all_ind = pop_1 + pop_2 + pop_3 + pop_4


In [7]:
top_5_individuals = top_n_individuals(all_ind, 15)

# Print the best individual
print(f"Best individual: Fitness_val = {top_5_individuals[0].fitness_val}: Genome = {top_5_individuals[0].genome}")
print()
# Print the top 5 individuals with the minimum fitness in population
for i, individual in enumerate(top_5_individuals, 1):
    print(f"Top {i}: Fitness = {individual.fitness:.3f} | Fitness_val = {individual.fitness_val:.3f} | Complexity = {individual.genome} Genome = {individual.genome}")


Best individual: Fitness_val = 25935122278937.34: Genome = (abs((x[0] + x[0])) * ((exp(abs(x[0])) + abs(((abs((((x[0] - x[2]) + x[0]) / x[0])) + (x[0] + (((((x[0] + ((x[0] + (x[0] + (x[0] - x[2]))) * (abs((((x[0] + (x[0] + (((x[0] / x[0]) + (x[0] + ((x[0] - x[2]) + x[0]))) + (abs(x[2]) + x[0])))) * x[0]) + x[0])) + abs(x[0])))) + x[0]) + (x[0] + x[0])) + (x[0] + x[0])) * abs((x[0] + x[0]))))) + x[2]))) * (x[0] + ((x[0] + ((x[0] + x[0]) + x[0])) + x[0]))))

Top 1: Fitness = 25366402689227.414 | Fitness_val = 25935122278937.340 | Complexity = (abs((x[0] + x[0])) * ((exp(abs(x[0])) + abs(((abs((((x[0] - x[2]) + x[0]) / x[0])) + (x[0] + (((((x[0] + ((x[0] + (x[0] + (x[0] - x[2]))) * (abs((((x[0] + (x[0] + (((x[0] / x[0]) + (x[0] + ((x[0] - x[2]) + x[0]))) + (abs(x[2]) + x[0])))) * x[0]) + x[0])) + abs(x[0])))) + x[0]) + (x[0] + x[0])) + (x[0] + x[0])) * abs((x[0] + x[0]))))) + x[2]))) * (x[0] + ((x[0] + ((x[0] + x[0]) + x[0])) + x[0])))) Genome = (abs((x[0] + x[0])) * ((exp(abs(x[0])) + ab

In [None]:
pop_1, pop_2, pop_3, pop_4 = evolve_and_migrate(3)

In [None]:
pop_1, pop_2, pop_3, pop_4 = evolve_and_migrate(4)

In [None]:
pop_1, pop_2, pop_3, pop_4 = evolve_and_migrate(5)

In [None]:
pop_1, pop_2, pop_3, pop_4 = evolve_and_migrate(6)

In [None]:
pop_1, pop_2, pop_3, pop_4 = evolve_and_migrate(7)

In [None]:
pop_1, pop_2, pop_3, pop_4 = evolve_and_migrate(8)