Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 20 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -150,17 +150,35 @@ fit!(regressor, epochs, population_size, loss_new)

# Supported `Engines' for Symbolic Evaluation
- DynamicExpressions.jl
- Flux.jl --> in development
- Flux.jl --> should be utilized when performing tensor regression


# References
- [1] Ferreira, C. (2001). Gene Expression Programming: a New Adaptive Algorithm for Solving Problems. Complex Systems, 13.
- [2] Reissmann, M., Fang, Y., Ooi, A., & Sandberg, R. (2024). Constraining genetic symbolic regression via semantic backpropagation. arXiv. https://arxiv.org/abs/2409.07369
- [2] Reissmann, M., Fang, Y., Ooi, A. S. H., & Sandberg, R. D. (2025). Constraining genetic symbolic regression via semantic backpropagation. Genetic Programming and Evolvable Machines, 26(1), 12

# Acknowledgement
- The Coefficient optimization is inspired by [https://github.com/MilesCranmer/SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl/blob/master/src/ConstantOptimization.jl)
- We employ the insane fast [DynamicExpressions.jl](https://github.com/SymbolicML/DynamicExpressions.jl) for evaluating our expressions


# How to cite
Feel free to utilize it for your research, it would be nice __citing us__! Our [paper](https://doi.org/10.1007/s10710-025-09510-z).
```
@article{Reissmann2025,
author = {Maximilian Reissmann and Yuan Fang and Andrew S. H. Ooi and Richard D. Sandberg},
title = {Constraining Genetic Symbolic Regression via Semantic Backpropagation},
journal = {Genetic Programming and Evolvable Machines},
year = {2025},
volume = {26},
number = {1},
pages = {12},
doi = {10.1007/s10710-025-09510-z},
url = {https://doi.org/10.1007/s10710-025-09510-z}
}

```

# Todo
- [ ] Documentation
- [x] Naming conventions!
Expand Down
2 changes: 1 addition & 1 deletion paper/ConstraintViaSBP.jl
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ function main()

#perform the regression by entering epochs, population_size, the feature cols, the target col and the loss function
fit!(regressor, epochs, population_size, x_train', y_train;
x_test=x_test', y_test=y_test',
x_test=x_test', y_test=y_test', target_dimension=target_dim,
loss_fun="mse", break_condition=break_condition)

end_time = (time_ns() - start_time) / 1e9
Expand Down
32 changes: 20 additions & 12 deletions src/Entities.jl
Original file line number Diff line number Diff line change
Expand Up @@ -75,14 +75,10 @@ module GepEntities
export Chromosome, Toolbox, EvaluationStrategy, StandardRegressionStrategy, GenericRegressionStrategy
export fitness, set_fitness!
export generate_gene, compile_expression!, generate_chromosome, generate_population
export genetic_operations!, replicate, gene_inversion!, gene_mutation!, gene_one_point_cross_over!, gene_two_point_cross_over!, gene_fussion!
export genetic_operations!, replicate, gene_inversion!, gene_mutation!, gene_one_point_cross_over!, gene_two_point_cross_over!, gene_fussion!, split_karva

include("Util.jl")
include("TensorOps.jl")


using .GepUtils
using .TensorRegUtils
using ..GepUtils
using ..TensorRegUtils
using OrderedCollections
using DynamicExpressions

Expand Down Expand Up @@ -401,8 +397,7 @@ Vector{Int8} representing the K-expression of the chromosome
```

"""

@inline function _karva_raw(chromosome::Chromosome)
@inline function _karva_raw(chromosome::Chromosome; split::Bool=false)
gene_len = chromosome.toolbox.head_len * 2 + 1
gene_count = chromosome.toolbox.gene_count

Expand All @@ -420,9 +415,22 @@ Vector{Int8} representing the K-expression of the chromosome
rolled_indices[idx+1] = @view genes[i:i+first(indices)-1]
end

return vcat(rolled_indices...)
!split && return vcat(rolled_indices...)
return rolled_indices
end

@inline function split_karva(chromosome::Chromosome, coeffs::Int=2)
raw = _karva_raw(chromosome; split=true)
connectors = popfirst!(raw)[coeffs:end]
gene_count_per_factor = div(chromosome.toolbox.gene_count,coeffs)
retval = []
for _ in 1:coeffs
temp_cons = splice!(connectors, 1:gene_count_per_factor-1)
temp_genes = reduce(vcat, splice!(raw,1:gene_count_per_factor))
push!(retval,vcat([temp_cons, temp_genes]...))
end
return retval
end

"""
generate_gene(headsyms::Vector{Int8}, tailsyms::Vector{Int8}, headlen::Int;
Expand All @@ -443,10 +451,10 @@ Vector{Int8} representing gene
@inline function generate_gene(headsyms::Vector{Int8}, tailsyms::Vector{Int8}, headlen::Int;
unarys::Vector{Int8}=[], unary_prob::Real=0.2, tensor_prob::Real=0.2)
if !isempty(unarys) && rand() < unary_prob
heads = vcat(headsyms, tailsyms)
heads = vcat(headsyms, rand(tailsyms,2))
push!(heads, rand(unarys))
else
heads = headsyms
heads = vcat(headsyms, rand(tailsyms,2))
end

head = rand(heads, headlen)
Expand Down
120 changes: 92 additions & 28 deletions src/GeneExpressionProgramming.jl
Original file line number Diff line number Diff line change
Expand Up @@ -78,18 +78,31 @@ dims = get_constant_dims(constant)
- Includes comprehensive utilities for tree manipulation and unit consistency
"""

include("Util.jl")
include("TensorOps.jl")
include("Entities.jl")
include("Gep.jl")
include("Losses.jl")
include("PhyConstants.jl")
include("Sbp.jl")
include("Selection.jl")
include("Util.jl")
include("Sbp.jl")
# First export the submodules themselves
export GepUtils, TensorRegUtils, GepEntities, LossFunction, EvoSelection, SBPUtils

include("Gep.jl")
include("PhyConstants.jl")
include("RegressionWrapper.jl")
export PhysicalConstants, GepRegression, RegressionWrapper

# First export the submodules themselves
export GepEntities, LossFunction, PhysicalConstants,
GepUtils, GepRegression, RegressionWrapper, GPSurrogate
# Import core functions for tensor regression
import .TensorRegUtils:
InputSelector,
AdditionNode, SubtractionNode, MultiplicationNode, DivisionNode, PowerNode,
MinNode, MaxNode, InversionNode,
TraceNode, DeterminantNode, SymmetricNode, SkewNode,
VolumetricNode, DeviatricNode, TdotNode, DottNode,
DoubleContractionNode, DeviatoricNode,
ConstantNode, UnaryNode,
compile_to_flux_network,
TENSOR_NODES, TENSOR_NODES_ARITY

# Import GEP core functionality
import .GepRegression:
Expand Down Expand Up @@ -176,7 +189,8 @@ import .GepEntities:
compile_expression!,
generate_chromosome,
generate_population,
genetic_operations!
genetic_operations!,
split_karva

# Import regression wrapper functionality
import .RegressionWrapper:
Expand All @@ -192,31 +206,81 @@ import .RegressionWrapper:
set_arity!,
set_forward_handler!,
set_backward_handler!,
update_function!

update_function!,
create_physical_operations,
create_function_entries,
create_constants_entries,
create_feature_entries


# Export GEP core functionality
export runGep, EvaluationStrategy, StandardRegressionStrategy, GenericRegressionStrategy

# Export structures for the tensor regression
export InputSelector,
AdditionNode, SubtractionNode, MultiplicationNode, DivisionNode, PowerNode,
MinNode, MaxNode, InversionNode,
TraceNode, DeterminantNode, SymmetricNode, SkewNode,
VolumetricNode, DeviatricNode, TdotNode, DottNode,
DoubleContractionNode, DeviatoricNode,
ConstantNode, UnaryNode,
compile_to_flux_network,
TENSOR_NODES, TENSOR_NODES_ARITY


# Export core GEP entities and operations
export Chromosome, Toolbox, fitness, set_fitness!,
generate_gene, compile_expression!, generate_chromosome, generate_population,
genetic_operations!, split_karva

# Export regression components
export GepRegressor, GepTensorRegressor, fit!,
list_all_functions, list_all_arity, list_all_forward_handlers,
list_all_backward_handlers, list_all_genetic_params,
set_function!, set_arity!, set_forward_handler!, set_backward_handler!,
update_function!, create_physical_operations, create_function_entries, create_constants_entries,
create_feature_entries

# Export loss functions
export get_loss_function
export find_indices_with_sum, compile_djl_datatype, optimize_constants!, minmax_scale, isclose
export save_state, load_state, record_history!, record!, close_recorder!
export HistoryRecorder, OptimizationHistory, get_history_arrays
export train_test_split
export tournament_selection, nsga_selection, dominates_, fast_non_dominated_sort, calculate_fronts, determine_ranks, assign_crowding_distance
export physical_constants, physical_constants_all, get_constant, get_constant_value, get_constant_dims

# Export selection mechanisms
export tournament_selection, nsga_selection, dominates_,
fast_non_dominated_sort, calculate_fronts,
determine_ranks, assign_crowding_distance

# Export physical constants functionality
export physical_constants, physical_constants_all,
get_constant, get_constant_value, get_constant_dims

# Export symbolic computation types
export TokenLib, TokenDto, LibEntry, TempComputeTree
export create_lib, create_compute_tree, propagate_necessary_changes!, calculate_vector_dimension!, flush!, flatten_dependents
export correct_genes!, equal_unit_forward, mul_unit_forward, div_unit_forward
export zero_unit_backward, zero_unit_forward, sqr_unit_backward, sqr_unit_forward, mul_unit_backward, div_unit_backward, equal_unit_backward
export get_feature_dims_json, get_target_dim_json, retrieve_coeffs_based_on_similarity
export Chromosome, Toolbox, fitness, set_fitness!
export generate_gene, compile_expression!, generate_chromosome, generate_population
export genetic_operations!
export GepRegressor,GepTensorRegressor, fit!
export list_all_functions, list_all_arity, list_all_forward_handlers
export list_all_backward_handlers, list_all_genetic_params
export set_function!, set_arity!, set_forward_handler!, set_backward_handler!
export update_function!

# Export symbolic computation utilities
export create_lib, create_compute_tree,
propagate_necessary_changes!, calculate_vector_dimension!,
flush!, flatten_dependents, correct_genes!,
get_feature_dims_json, get_target_dim_json,
retrieve_coeffs_based_on_similarity

# Export unit handling operations
export equal_unit_forward, mul_unit_forward, div_unit_forward,
zero_unit_backward, zero_unit_forward,
sqr_unit_backward, sqr_unit_forward,
mul_unit_backward, div_unit_backward, equal_unit_backward

# Export general utilities
export find_indices_with_sum, compile_djl_datatype,
optimize_constants!, minmax_scale, isclose,
save_state, load_state,
train_test_split

# Export history recording functionality
export HistoryRecorder, OptimizationHistory,
record_history!, record!, close_recorder!,
get_history_arrays

# Export common libraries
export ARITY_LIB_COMMON, FUNCTION_LIB_COMMON


Expand Down
39 changes: 20 additions & 19 deletions src/Gep.jl
Original file line number Diff line number Diff line change
Expand Up @@ -63,15 +63,10 @@ See also:
module GepRegression


include("Losses.jl")
include("Util.jl")
include("Selection.jl")


using .LossFunction
using .GepUtils
using .EvoSelection
using ..GepUtils
using ..GepEntities
using ..LossFunction
using ..EvoSelection


using Random
Expand Down Expand Up @@ -123,7 +118,7 @@ Returns the computed fitness value (loss) or crash_value if computation fails
y_pred = elem.compiled_function(evalArgs.x_data, evalArgs.operators)
return (evalArgs.loss_function(evalArgs.y_data, y_pred),)
else
return (elem.fitness,)
return elem.fitness
end
catch e
return (evalArgs.crash_value,)
Expand Down Expand Up @@ -279,7 +274,10 @@ The evolution process stops when either:
correction_epochs::Int=1,
correction_amount::Real=0.6,
tourni_size::Int=3,
optimization_epochs::Int=500)
optimization_epochs::Int=500,
file_logger_callback::Union{Function, Nothing}=nothing,
save_state_callback::Union{Function, Nothing}=nothing,
load_state_callback::Union{Function, Nothing}=nothing)

recorder = HistoryRecorder(epochs, Tuple)
mating_ = toolbox.gep_probs["mating_size"]
Expand All @@ -289,11 +287,12 @@ The evolution process stops when either:
fit_cache = Dict{Vector{Int8},Tuple}()
cache_lock = SpinLock()

population = generate_population(population_size, toolbox)
population, start_epoch = isnothing(load_state_callback) ? (generate_population(population_size, toolbox), 1) : load_state_callback()
next_gen = Vector{eltype(population)}(undef, mating_size)
progBar = Progress(epochs; showspeed=true, desc="Training: ")
prev_best = (typemax(Float64),)
for epoch in 1:epochs

for epoch in start_epoch:epochs
same = Atomic{Int}(0)
perform_correction_callback!(population, epoch, correction_epochs, correction_amount, correction_callback)

Expand Down Expand Up @@ -337,18 +336,20 @@ The evolution process stops when either:

update_surrogate!(evalStrategy)


if !isnothing(evalStrategy.break_condition) && evalStrategy.break_condition(population, epoch)
break
end

if epoch < epochs
if length(fits_representation[1]) == 1
selectedMembers = tournament_selection(fits_representation, mating_size, tourni_size)
else
selectedMembers = nsga_selection(fits_representation)
end
if length(fits_representation[1]) == 1
selectedMembers = tournament_selection(fits_representation, mating_size, tourni_size)
else
selectedMembers = nsga_selection(fits_representation)
end

!isnothing(file_logger_callback) && file_logger_callback(population, epoch, selectedMembers)
!isnothing(save_state_callback) && save_state_callback(population, epoch)

if epoch < epochs
parents = population[selectedMembers.indices]
perform_step!(population, parents, next_gen, toolbox, mating_size)
end
Expand Down
Loading