Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
2399af1
change signature for reg. - add dyn selection method test green
Jan 10, 2025
484d0b3
del uness consideration
Jan 10, 2025
0fa162e
adding strategies
Jan 11, 2025
c768cd2
test flex error
Jan 12, 2025
3fdb2df
test regression test
Jan 12, 2025
380f1bb
multi optimization capable
Jan 13, 2025
bfd459a
multi optimization capable
Jan 13, 2025
670d44e
add example multiobjective
Jan 13, 2025
5eff99b
add example multiobjective
Jan 13, 2025
3f1d78d
handle dubplicates properly
Jan 14, 2025
3637f53
adapt test to new interface
Jan 14, 2025
5f99a97
adapt test to new interface
Jan 14, 2025
f5966ec
change files
Jan 14, 2025
e8ff265
include tensor function stuff
Jan 15, 2025
af9d54a
using formular for tournisize
Jan 15, 2025
486f98d
change stuff
Jan 15, 2025
36d79a0
prior package changes
Jan 16, 2025
c33adea
prior package changes
Jan 16, 2025
ed7f9da
correcting imports
Jan 16, 2025
e7281ab
correcting imports
Jan 16, 2025
bceb13e
sg
Jan 16, 2025
5afd2a5
change adress
Jan 17, 2025
b4eb411
Update GeneExpressionProgramming.jl
maxreiss123 Jan 17, 2025
6802b5b
Update Gep.jl
maxreiss123 Jan 17, 2025
4cefced
reset changes
Jan 17, 2025
cc5618d
Update Gep.jl
maxreiss123 Jan 17, 2025
b361e1b
add further changes
Jan 18, 2025
6686086
Update README.md
maxreiss123 Jan 21, 2025
16ea35a
change
Jan 23, 2025
c06328a
Merge branch 'feature/modular_error' of https://github.com/maxreiss12…
Jan 23, 2025
2cd7ccd
hide cuda options
Jan 24, 2025
0dcc9c3
green tests for network
Jan 24, 2025
24de575
add new TensorRegressorWrapper + docu
Jan 24, 2025
6f87d12
add some docu
Jan 24, 2025
ef5fa9a
simplify inputs
Jan 24, 2025
c0da113
add benchmark
Jan 25, 2025
03ae430
reduce expcetion throwing
Jan 25, 2025
c350a7d
type
Jan 25, 2025
5a2c5ff
stop - test
Jan 27, 2025
ea95c07
function save
Jan 29, 2025
0b9e7a9
adding precompile test green + create tensor
Jan 30, 2025
b7721f4
new example
Jan 30, 2025
2ca242f
remark
Jan 30, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,32 +3,37 @@ uuid = "2f0a5bb0-5f4f-4f7f-b515-93a1e67611af"
authors = ["Max Reissmann <reissmannm@student.unimelb.edu.au>"]
version = "0.3.4"


[deps]
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
DynamicExpressions = "a40a106e-89c9-4ca8-8020-a735e8728b6b"
FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549"
Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
GZip = "92fee26a-97fe-5a0c-ad85-20a5f3185b63"
JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
LineSearches = "d3d80556-e9d4-5f37-9878-2ab0fcc64255"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890"
Optim = "429524aa-4258-5aef-a3af-852621145aeb"
OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
Profile = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"
ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
Tensors = "48a634ad-e948-5137-8d70-aa71f2a747f4"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"


[compat]
BenchmarkTools = "1"
CSV = "0.10"
Expand Down
64 changes: 62 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,66 @@ The repository contains an implementation of the Gene Expression Programming [1]
- Remark: the tutorial folder contains notebook, that can be run with google-colab, while showing a step-by-step introduction


# How can I approximate functions involving vectors or matricies?
- To conduct a regression involving higher dimensional objects we swap the underlying evaluation from DynamicExpression.jl to Flux.jl
- Hint: By involving such objects, the performance deteriorates significantly

```julia
using GeneExpressionProgramming
using Random
using Tensors

Random.seed!(1)

#Define the iterations for the algorithm and the population size
epochs = 100
population_size = 1000

#Number of features which needs to be inserted
number_features = 5

#define the
regressor = GepTensorRegressor(number_features,
gene_count=2, #2 works quite reliable
head_len=3) # 5 works quite reliable

#create some testdata - testing simply on a few velocity vectors
size_test = 1000
u1 = [randn(Tensor{1,3}) for _ in 1:size_test]
u2 = [randn(Tensor{1,3}) for _ in 1:size_test]
u3 = [randn(Tensor{1,3}) for _ in 1:size_test]

x1 = [2.0 for _ in 1:size_test]

x2 = [0.0 for _ in 1:size_test]

a = 0.5 * u1 .+ x2 .* u2 + 2* u3

inputs = (x1,x2,u1,u2,u3)


@inline function loss_new(elem, validate::Bool)
if isnan(mean(elem.fitness)) || validate
model = elem.compiled_function
a_pred = model(inputs)
!isfinite(norm(a_pred)) && return (typemax(Float64),)
size(a_pred) != size(a) && return (typemax(Float64),)
size(a_pred[1]) != size(a[1]) && return (typemax(Float64),)

loss = norm(a_pred .- a)
return (loss,)
else
return (elem.fitness,)
end
end
fit!(regressor, epochs, population_size, loss_new)
```

# Supported `Engines' for Symbolic Evaluation
- DynamicExpressions.jl
- Flux.jl --> in development


# References
- [1] Ferreira, C. (2001). Gene Expression Programming: a New Adaptive Algorithm for Solving Problems. Complex Systems, 13.
- [2] Reissmann, M., Fang, Y., Ooi, A., & Sandberg, R. (2024). Constraining genetic symbolic regression via semantic backpropagation. arXiv. https://arxiv.org/abs/2409.07369
Expand All @@ -106,5 +166,5 @@ The repository contains an implementation of the Gene Expression Programming [1]
- [x] Naming conventions!
- [x] Improve usability for user interaction
- [ ] Next operations: Tail flip, Connection symbol flip, wrapper class for easy usage, config class for predefinition, staggered exploration
- [ ] latest enhancements are provided in the branch 'feature/modular_error'
- [ ] Flexible underlying engine to evaluate the expressions -> Currently DynamicExpressions.jl, Flux in the future for GPU support
- [ ] nice print flux
- [ ] constant node needs to be fixed
90 changes: 90 additions & 0 deletions benchmark/Benchmark.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# Tensor Operation Benchmarks

Comparing tensor computations using DynamicExpressions.jl vs custom Flux network.

## Usage

```bash
export JULIA_NUM_THREADS=1
```

# Evaluating Performance of DynamicExpressions
- example from: https://github.com/SymbolicML/DynamicExpressions.jl (adapted for utilizing Tensors as Dtype)

```julia
using DynamicExpressions
using DynamicExpressions: @declare_expression_operator
using BenchmarkTools
using LinearAlgebra

# Operations
vec_add(x::Tensor, y::Tensor) = @fastmath x + y
vec_square(x::Tensor) = @fastmath dot(x,x)

@declare_expression_operator(vec_add, 2)
@declare_expression_operator(vec_square, 1)

# Build expression
operators = GenericOperatorEnum(
binary_operators=[vec_add],
unary_operators=[vec_square]
)
variable_names = ["x1"]
c1 = Expression(Node{T}(; val=ones(Tensor{2,3})); operators, variable_names);
expression = vec_add(vec_add(vec_square(c1), c1), c1);
X = ones(Tensor{2,3});

#Evalutate the expression:

tests_n = 100000
@show "Benchmark expression"
expression(X) # [[5.0 5.0 5.0], [5.0 5.0 5.0], [5.0 5.0 5.0]]
@btime for _ in 1:tests_n
expression(X)
end

# 83.021 ms (1798979 allocations: 187.67 MiB)
```

# Evaluating performance when generated with Flux

```julia
# create the inputs for Flux
c1_ = ones(Tensor{2,3});
inputs = (c1_,);

# create the arity map
arity_map = OrderedDict{Int8,Int}(
1 => 2, # Addition
2 => 2 # Multiplication
);

#assign the callbacks
callbacks = Dict{Int8,Any}(
Int8(1) => AdditionNode,
Int8(2) => MultiplicationNode
);

#define nodes
nodes = OrderedDict{Int8,Any}(
Int8(5) => InputSelector(1)
);

rek_string = Int8[1, 1, 2, 5, 5, 5, 5];
network = TensorRegUtils.compile_to_flux_network(rek_string, arity_map, callbacks, nodes, 0);
@show "Benchmark network"
result = network(inputs) # [[5.0 5.0 5.0], [5.0 5.0 5.0], [5.0 5.0 5.0]]
@btime for _ in 1:tests_n
result = network(inputs)
end

#11.703 ms (998979 allocations: 59.49 MiB)

```

## Conclusion
- Flux in handling higher dimension around 7.5 times faster
- Flux uses around 3.8 times less memory resources



81 changes: 81 additions & 0 deletions benchmark/benchmark_djl_nn.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
using DynamicExpressions
using DynamicExpressions: @declare_expression_operator
using BenchmarkTools
using LinearAlgebra

include("../src/TensorOps.jl")
using .TensorRegUtils
using Tensors
using OrderedCollections
using Flux


"""
Benchmark for comparing higher dim structures for tensor regression - run test with - export JULIA_NUM_THREADS=1
example from: https://github.com/SymbolicML/DynamicExpressions.jl with changes according to utilize tensors
"""


T = Union{Float64,Vector{Float64},Tensor}
vec_add(x::Tensor, y::Tensor) = @fastmath x + y;
vec_square(x::Tensor) = @fastmath dot(x,x);


@declare_expression_operator(vec_add, 2);
@declare_expression_operator(vec_square, 1);


operators = GenericOperatorEnum(; binary_operators=[vec_add], unary_operators=[vec_square]);

# Construct the expression:
variable_names = ["x1"]
c1 = Expression(Node{T}(; val=ones(Tensor{2,3})); operators, variable_names);
expression = vec_add(vec_add(vec_square(c1), c1), c1);

X = ones(Tensor{2,3});


# create the inputs for Flux
c1_ = ones(Tensor{2,3});
inputs = (c1_,);

# create the arity map
arity_map = OrderedDict{Int8,Int}(
1 => 2, # Addition
2 => 2 # Multiplication
);

#assign the callbacks
callbacks = Dict{Int8,Any}(
Int8(1) => AdditionNode,
Int8(2) => MultiplicationNode
);

#define nodes
nodes = OrderedDict{Int8,Any}(
Int8(5) => InputSelector(1)
);

# Evaluate - expression
# Solution => [[5.0 5.0 5.0], [5.0 5.0 5.0], [5.0 5.0 5.0]]
tests_n = 100000
@show "Benchmark expression"
expression(X)
@btime for _ in 1:tests_n
expression(X)
end

#83.021 ms (1798979 allocations: 187.67 MiB)

rek_string = Int8[1, 1, 2, 5, 5, 5, 5];
network = TensorRegUtils.compile_to_flux_network(rek_string, arity_map, callbacks, nodes, 0);
@show "Benchmark network"
result = network(inputs)
@btime for _ in 1:tests_n
result = network(inputs)
end

#11.703 ms (998979 allocations: 59.49 MiB)


#Conclusion ≈ 7 times faster than DynamicExpressions.jl for such structures
3 changes: 2 additions & 1 deletion paper/ConstraintSymbolicRegression.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ Evolutionary symbolic regression approaches are powerful tools that can approxim


# Test Reproduction
The file ConstrainViaSBP.jl contains the test setup. Please follow the steps outlined there.
- The file ConstrainViaSBP.jl contains the test setup. Please follow the steps outlined there.
- Utilize release 0.4

# Source
- [1] Reissmann, M., Fang, Y., Ooi, A., & Sandberg, R. (2024). Constraining genetic symbolic regression via semantic backpropagation. arXiv. https://arxiv.org/abs/2409.07369
48 changes: 41 additions & 7 deletions paper/ConstraintViaSBP.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,21 @@ using Random
using Logging
using Dates
using JSON
using Statistics


function break_condition(population, epoch)
return isclose(mean(population[1].fitness), 0.0)
end

function loss_new(eqn::Node, operators::OperatorEnum, x_data::AbstractArray, y_data::AbstractArray)
try
y_pred = eqn(x_data, operators)
return get_loss_function("r2_score")(y_data, y_pred)
catch e
return zero(Float64)
end
end


function setup_logger(log_file_path::String)
Expand Down Expand Up @@ -88,7 +103,7 @@ function main()
println(feature_names)
println(case_name)
phy_dims = get_feature_dims_json(case_data, feature_names, case_name)
phy_dims = Dict{Symbol, Vector{Float16}}( Symbol(x_n) => dim_n for (x_n, dim_n) in phy_dims)
phy_dims = Dict{Symbol,Vector{Float16}}(Symbol(x_n) => dim_n for (x_n, dim_n) in phy_dims)
target_dim = get_target_dim_json(case_data, case_name)

print(phy_dims)
Expand All @@ -104,23 +119,42 @@ function main()

start_time = time_ns()

regressor = GepRegressor(num_cols-1;
regressor = GepRegressor(num_cols - 1;
considered_dimensions=phy_dims,
entered_non_terminals=[:+, :-, :*, :/, :sqrt, :sin, :cos, :exp, :log],
max_permutations_lib=10000, rounds=7)
max_permutations_lib=10000, rounds=7, number_of_objectives=1)


@inline function loss_new_(elem, validate::Bool)
try
if isnan(mean(elem.fitness)) || validate
y_pred = elem.compiled_function(x_train', regressor.operators_)
return (get_loss_function("mse")(y_train, y_pred),)
else
return (elem.fitness, length(elem.expression_raw) * elem.fitness)
#return (elem.fitness,)
end
catch e
return (typemax(Float64),typemax(Float64))
end
end


#perform the regression by entering epochs, population_size, the feature cols, the target col and the loss function
fit!(regressor, epochs, population_size, x_train', y_train;
x_test=x_test', y_test=y_test',
loss_fun="mse", target_dimension=target_dim)
loss_fun="mse", break_condition=break_condition)

end_time = (time_ns() - start_time) / 1e9
elem = regressor.best_models_[1]
fitness_r2_train = loss_new(elem.compiled_function, regressor.operators_, x_train', y_train)
fitness_r2_test = loss_new(elem.compiled_function, regressor.operators_, x_test', y_test)

#log_results
push!(results, (seed, case_name, noise_level, elem.fitness, string(elem.compiled_function),
elem.fitness_r2_train, elem.fitness_r2_test, end_time, elem.dimension_homogene, target_dim))
push!(results, (seed, case_name, noise_level, mean(elem.fitness), string(elem.compiled_function),
fitness_r2_train, fitness_r2_test, end_time, elem.dimension_homogene, target_dim))

@show elem.fitness_r2_test
@show fitness_r2_test
save_results_to_csv(file_name_save, results)
end
end
Expand Down
Loading