Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ LineSearches = "d3d80556-e9d4-5f37-9878-2ab0fcc64255"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890"
NearestNeighbors = "b8a86587-4115-5ab1-83bc-aa920d37bbce"
Optim = "429524aa-4258-5aef-a3af-852621145aeb"
OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
Expand All @@ -30,6 +31,7 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
Tensors = "48a634ad-e948-5137-8d70-aa71f2a747f4"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
Expand Down
2 changes: 1 addition & 1 deletion examples/Main_min_bench.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,4 @@ y_data = @. x_data[:,1] * x_data[:,1] + x_data[:,1] * x_data[:,2] - 2 * x_data[:

#define the
regressor = GepRegressor(number_features)
@btime fit!(regressor, epochs, population_size, x_data', y_data; loss_fun="mse")
@btime fit!(regressor, epochs, population_size, x_data', y_data; loss_fun="mse", population_sampling_multiplier=1000)
2 changes: 1 addition & 1 deletion paper/ConstraintViaSBP.jl
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ function main()
@show ("Current case: ", case_name)
#gep_params
epochs = 1000
population_size = 1500
population_size = 200

results = DataFrame(Seed=[],
Name=String[], NoiseLeve=String[], Fitness=Float64[], Equation=String[], R2_test=Float64[],
Expand Down
232 changes: 124 additions & 108 deletions src/Entities.jl

Large diffs are not rendered by default.

6 changes: 4 additions & 2 deletions src/GeneExpressionProgramming.jl
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,9 @@ import .GepUtils:
train_test_split,
ARITY_LIB_COMMON,
FUNCTION_LIB_COMMON,
FUNCTION_STRINGIFY
FUNCTION_STRINGIFY,
one_hot_mean,
select_n_samples_lhs

# Import selection mechanisms
import .EvoSelection:
Expand Down Expand Up @@ -275,7 +277,7 @@ export equal_unit_forward, mul_unit_forward, div_unit_forward,
export find_indices_with_sum, compile_djl_datatype,
optimize_constants!, minmax_scale, isclose,
save_state, load_state,
train_test_split
train_test_split, one_hot_mean, select_n_samples_lhs

# Export history recording functionality
export HistoryRecorder, OptimizationHistory,
Expand Down
109 changes: 75 additions & 34 deletions src/Gep.jl
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ using Logging
using Printf
using Base.Threads: SpinLock
using .Threads
using Distributions

export runGep

Expand Down Expand Up @@ -151,22 +152,24 @@ Performs one evolutionary step in the GEP algorithm, creating and evaluating new
- Operations are performed in parallel using multiple threads
"""
@inline function perform_step!(population::Vector{Chromosome}, parents::Vector{Chromosome}, next_gen::Vector{Chromosome},
toolbox::Toolbox, mating_size::Int)

toolbox::Toolbox, mating_size::Int, generation::Int, max_generation::Int)
@inbounds Threads.@threads for i in 1:2:mating_size-1
next_gen[i] = parents[i]
next_gen[i+1] = parents[i+1]

genetic_operations!(next_gen, i, toolbox)
genetic_operations!(next_gen, i, toolbox;
generation=generation, max_generation=max_generation, parents=parents)

compile_expression!(next_gen[i]; force_compile=true)
compile_expression!(next_gen[i+1]; force_compile=true)

end

Threads.@threads for i in 1:mating_size-1
Threads.@threads for i in eachindex(next_gen)
try
population[end-i] = next_gen[i]
population[end-i] = population[end-mating_size-i]
population[end-mating_size-i] = next_gen[i]
#@show ("Position $i - new insert $(length(population)-mating_size-i) - $(pointer_from_objref(next_gen[i]))")
catch e
error_message = sprint(showerror, e, catch_backtrace())
@error "Error in perform_step!: $error_message"
Expand All @@ -175,10 +178,6 @@ Performs one evolutionary step in the GEP algorithm, creating and evaluating new
end


@inline function update_surrogate!(::EvaluationStrategy)
nothing
end

"""
perform_correction_callback!(population::Vector{Chromosome}, epoch::Int,
correction_epochs::Int, correction_amount::Real,
Expand Down Expand Up @@ -212,14 +211,51 @@ Applies correction operations to ensure dimensional homogeneity in chromosomes.
compile_expression!(population[i]; force_compile=true)
population[i].dimension_homogene = true
else
population[i].fitness = (population[i].fitness[1]+distance,)
#population[i].fitness += distance
end
end
end
end
end


"""
equation_characterization_default(population::Vector, n_samples::Int)

Employs latin hyperqube sampling on a population
"""
@inline function equation_characterization_default(population::Vector{Chromosome}, n_samples::Int; inputs_::Int=0)
len_extented_pop = length(population)
coeff_count = isempty(population[1].toolbox.preamble_syms) ? 1 : length(length(population[1].toolbox.preamble_syms))
features = zeros(coeff_count * 2, len_extented_pop)
prob_dataset = rand(Uniform(0, 1), 100, inputs_ == 0 ? 10 : inputs_)

Threads.@threads for p_index in eachindex(population)
if population[p_index].compiled
try
if coeff_count > 1
for e_index in 1:coeff_count
features[e_index, p_index] = mean(population[p_index].compiled_function[e_index](prob_dataset,
population[p_index].toolbox.operators_))
features[e_index+1, p_index] = length(population[p_index].expression_raw[e_index])
end
else
features[coeff_count, p_index] = mean(population[p_index].compiled_function(prob_dataset, population[p_index].toolbox.operators_))
features[coeff_count+1, p_index] = length(population[p_index].expression_raw)
end
catch
features[:, p_index] .= Inf
end

else
features[:, p_index] .= Inf
end
end

return select_n_samples_lhs(features, n_samples)
end


"""
runGep(epochs::Int, population_size::Int, toolbox::Toolbox, evalStrategy::EvaluationStrategy;
hof::Int=3, correction_callback::Union{Function,Nothing}=nothing,
Expand Down Expand Up @@ -275,9 +311,11 @@ The evolution process stops when either:
correction_amount::Real=0.6,
tourni_size::Int=3,
optimization_epochs::Int=500,
file_logger_callback::Union{Function, Nothing}=nothing,
save_state_callback::Union{Function, Nothing}=nothing,
load_state_callback::Union{Function, Nothing}=nothing)
file_logger_callback::Union{Function,Nothing}=nothing,
save_state_callback::Union{Function,Nothing}=nothing,
load_state_callback::Union{Function,Nothing}=nothing,
update_surrogate_callback::Union{Function,Nothing}=nothing,
population_sampling_multiplier::Int=100)

recorder = HistoryRecorder(epochs, Tuple)
mating_ = toolbox.gep_probs["mating_size"]
Expand All @@ -287,36 +325,41 @@ The evolution process stops when either:
fit_cache = Dict{Vector{Int8},Tuple}()
cache_lock = SpinLock()

population, start_epoch = isnothing(load_state_callback) ? (generate_population(population_size, toolbox), 1) : load_state_callback()

initial_size = isnothing(toolbox.operators_) ? population_size + mating_size : population_size * population_sampling_multiplier
population, start_epoch = isnothing(load_state_callback) ? (generate_population(initial_size, toolbox), 1) : load_state_callback()
if start_epoch <= 1 & !isnothing(toolbox.operators_)
population = population[equation_characterization_default(population, population_size + mating_size)]
end

next_gen = Vector{eltype(population)}(undef, mating_size)
progBar = Progress(epochs; showspeed=true, desc="Training: ")
prev_best = (typemax(Float64),)

for epoch in start_epoch:epochs
same = Atomic{Int}(0)
perform_correction_callback!(population, epoch, correction_epochs, correction_amount, correction_callback)


Threads.@threads for i in eachindex(population)
if isnan(mean(population[i].fitness))
cache_value = nothing
lock(cache_lock) do
cache_value = get(fit_cache, population[i].expression_raw, nothing)
end
perform_correction_callback!(population[1:population_size], epoch, correction_epochs, correction_amount, correction_callback)

Threads.@threads for i in eachindex(population[1:population_size])
if isnan(mean(population[i].fitness))
key = copy(population[i].expression_raw)
cache_value = get(fit_cache, key, nothing)
if isnothing(cache_value)

population[i].fitness = compute_fitness(population[i], evalStrategy)
lock(cache_lock)
fit_cache[population[i].expression_raw] = population[i].fitness
fit_cache[key] = population[i].fitness
unlock(cache_lock)
else
atomic_add!(same, 1)
population[i].fitness = cache_value
end
end
end


sort!(population, by=x -> mean(x.fitness))
Threads.@threads for index in eachindex(population)
Threads.@threads for index in eachindex(population[1:population_size])
fits_representation[index] = population[index].fitness
end

Expand All @@ -337,24 +380,22 @@ The evolution process stops when either:
(:validation_loss, @sprintf("%.6e", mean(val_loss)))
])

update_surrogate!(evalStrategy)
!isnothing(update_surrogate_callback) && update_surrogate_callback(evalStrategy)
!isnothing(evalStrategy.break_condition) && evalStrategy.break_condition(population[1:population_size], epoch) && break

if !isnothing(evalStrategy.break_condition) && evalStrategy.break_condition(population, epoch)
break
end

if length(fits_representation[1]) == 1
selectedMembers = tournament_selection(fits_representation, mating_size, tourni_size)
selectedMembers = tournament_selection(fits_representation[1:mating_size], mating_size, tourni_size)
else
selectedMembers = nsga_selection(fits_representation)
end

!isnothing(file_logger_callback) && file_logger_callback(population, epoch, selectedMembers)
!isnothing(file_logger_callback) && file_logger_callback(population[1:population_size], epoch, selectedMembers)
!isnothing(save_state_callback) && save_state_callback(population, epoch)

if epoch < epochs
parents = population[selectedMembers.indices]
perform_step!(population, parents, next_gen, toolbox, mating_size)
perform_step!(population, parents, next_gen, toolbox, mating_size, epoch, epochs)
end

end
Expand Down
23 changes: 15 additions & 8 deletions src/Losses.jl
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ module LossFunction
export get_loss_function
using Statistics
using LoopVectorization
using Random

function floor_to_n10p(x::T) where T<:AbstractFloat
abs_x = abs(x)
Expand All @@ -98,15 +99,21 @@ function xicor(y_true::AbstractArray{T}, y_pred::AbstractArray{T}; ties::Bool=tr
end
end

tie_indices = tie_counts .> 1
mean_ties = mean(tie_counts[tie_indices])
tie_groups = Dict{Int, Vector{Int}}()
for i in 1:n
val = r[i]
if haskey(tie_groups, val)
push!(tie_groups[val], i)
else
tie_groups[val] = [i]
end
end

Threads.@threads for i in 1:n
if tie_counts[i] > 1
tie_group = findall(==(r[i]), r)
shuffled = Random.shuffle(0:(tie_counts[i]-1))
for (idx, group_idx) in enumerate(tie_group)
r[group_idx] = r[i] - shuffled[idx]
for (val, group) in tie_groups
if length(group) > 1
shuffled = Random.shuffle(0:(length(group)-1))
for (idx, group_idx) in enumerate(group)
r[group_idx] = val - shuffled[idx]
end
end
end
Expand Down
Loading