From df407e5ec36a53c6e675141a96ed26da54e890a5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maximilian=20Rei=C3=9Fmann?=
 <reissmannm@4000l-217946-l.mobility.unimelb.net.au>
Date: Sun, 2 Mar 2025 13:00:20 +1100
Subject: [PATCH 1/6] add probs

---
 Project.toml             |   1 +
 src/Entities.jl          | 152 +++++++++++++++++++++++++++++++++------
 src/RegressionWrapper.jl |  29 +++++---
 3 files changed, 151 insertions(+), 31 deletions(-)

diff --git a/Project.toml b/Project.toml
index a24d2a0..9ba2200 100644
--- a/Project.toml
+++ b/Project.toml
@@ -30,6 +30,7 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
 StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 Tensors = "48a634ad-e948-5137-8d70-aa71f2a747f4"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
diff --git a/src/Entities.jl b/src/Entities.jl
index 9572c23..4aa93f4 100644
--- a/src/Entities.jl
+++ b/src/Entities.jl
@@ -75,13 +75,13 @@ module GepEntities
 export Chromosome, Toolbox, EvaluationStrategy, StandardRegressionStrategy, GenericRegressionStrategy
 export fitness, set_fitness!
 export generate_gene, compile_expression!, generate_chromosome, generate_population
-export genetic_operations!, replicate, gene_inversion!, gene_mutation!, gene_one_point_cross_over!, gene_two_point_cross_over!, gene_fussion!, split_karva
-export print_karva_strings
+export genetic_operations!, replicate, gene_inversion!, gene_mutation!, gene_one_point_cross_over!, gene_two_point_cross_over!, gene_fussion!, split_karva, print_karva_strings
 
 using ..GepUtils
 using ..TensorRegUtils
 using OrderedCollections
 using DynamicExpressions
+using StatsBase
 
 
 """
@@ -228,12 +228,15 @@ struct Toolbox
     len_preamble::Int8
     operators_::Union{OperatorEnum,Nothing}
     compile_function_::Function
+    tail_weights::Union{Weights,Nothing}
 
 
     function Toolbox(gene_count::Int, head_len::Int, symbols::OrderedDict{Int8,Int8}, gene_connections::Vector{Int8},
         callbacks::Dict, nodes::OrderedDict, gep_probs::Dict{String,AbstractFloat};
         unary_prob::Real=0.1, preamble_syms=Int8[],
-        number_of_objectives::Int=1, operators_::Union{OperatorEnum,Nothing}=nothing, function_complile::Function=compile_djl_datatype)
+        number_of_objectives::Int=1, operators_::Union{OperatorEnum,Nothing}=nothing, 
+        function_complile::Function=compile_djl_datatype,
+        tail_weights_::Union{Weights,Nothing}=nothing)
 
         fitness_reset = (
             ntuple(_ -> Inf, number_of_objectives),
@@ -245,9 +248,12 @@ struct Toolbox
         tailsyms = [key for (key, arity) in symbols if arity < 1 && !(key in preamble_syms)]
         len_preamble = length(preamble_syms)
         gen_start_indices = [gene_count + (gene_len * (i - 1)) for i in 1:gene_count]
+        tail_weights = isnothing(tail_weights_) ? weights([1/length(tailsyms) for _ in 1:length(tailsyms)]) : tail_weights_
         #ensure_buffer_size!(head_len, gene_count)
         new(gene_count, head_len, symbols, gene_connections, headsyms, unary_syms, tailsyms, symbols,
-            callbacks, nodes, gen_start_indices, gep_probs, unary_prob, fitness_reset, preamble_syms, len_preamble, operators_, function_complile)
+            callbacks, nodes, gen_start_indices, gep_probs, unary_prob, fitness_reset, preamble_syms, len_preamble, operators_, 
+            function_complile,
+            tail_weights)
     end
 end
 
@@ -416,19 +422,19 @@ Vector{Int8} representing the K-expression of the chromosome
         rolled_indices[idx+1] = @view genes[i:i+first(indices)-1]
     end
 
-    !split && return vcat(rolled_indices...)
+    !split && return vcat(rolled_indices...) 
     return rolled_indices
 end
 
 @inline function split_karva(chromosome::Chromosome, coeffs::Int=2)
     raw = _karva_raw(chromosome; split=true)
     connectors = popfirst!(raw)[coeffs:end]
-    gene_count_per_factor = div(chromosome.toolbox.gene_count, coeffs)
+    gene_count_per_factor = div(chromosome.toolbox.gene_count,coeffs)
     retval = []
     for _ in 1:coeffs
         temp_cons = splice!(connectors, 1:gene_count_per_factor-1)
-        temp_genes = reduce(vcat, splice!(raw, 1:gene_count_per_factor))
-        push!(retval, vcat([temp_cons, temp_genes]...))
+        temp_genes = reduce(vcat, splice!(raw,1:gene_count_per_factor))
+        push!(retval,vcat([temp_cons, temp_genes]...))
     end
     return retval
 end
@@ -451,8 +457,6 @@ end
         callback_, 
         chromosome.toolbox.nodes, 
         1)
-
-
 end
 
 """
@@ -471,17 +475,20 @@ Generate a single gene for GEP.
 # Returns
 Vector{Int8} representing gene
 """
-@inline function generate_gene(headsyms::Vector{Int8}, tailsyms::Vector{Int8}, headlen::Int;
-    unarys::Vector{Int8}=[], unary_prob::Real=0.2, tensor_prob::Real=0.2)
+@inline function generate_gene(headsyms::Vector{Int8}, tailsyms::Vector{Int8}, headlen::Int,
+    tail_weights::Weights;
+    unarys::Vector{Int8}=[], unary_prob::Real=0.2)
+
+
     if !isempty(unarys) && rand() < unary_prob
-        heads = vcat(headsyms, rand(tailsyms, 2))
+        heads = vcat(headsyms)
         push!(heads, rand(unarys))
     else
-        heads = vcat(headsyms, rand(tailsyms, 2))
+        heads = vcat(headsyms)
     end
-
-    head = rand(heads, headlen)
-    tail = rand(tailsyms, headlen + 1)
+    head_len_temp = rand(1:headlen)
+    head = rand(heads, head_len_temp)
+    tail = sample(tailsyms,tail_weights,headlen + (headlen-head_len_temp) + 1)
     return vcat(head, tail)
 end
 
@@ -497,8 +504,8 @@ New Chromosome instance
 """
 @inline function generate_chromosome(toolbox::Toolbox)
     connectors = rand(toolbox.gene_connections, toolbox.gene_count - 1)
-    genes = vcat([generate_gene(toolbox.headsyms, toolbox.tailsyms, toolbox.head_len; unarys=toolbox.unary_syms,
-        unary_prob=toolbox.unary_prob) for _ in 1:toolbox.gene_count]...)
+    genes = vcat([generate_gene(toolbox.headsyms, toolbox.tailsyms, toolbox.head_len, toolbox.tail_weights;
+        unarys=toolbox.unary_syms,unary_prob=toolbox.unary_prob) for _ in 1:toolbox.gene_count]...)
     return Chromosome(vcat(connectors, genes), toolbox, true)
 end
 
@@ -709,6 +716,99 @@ end
 end
 
 
+"""
+    diversity_injection!(chromosome::Chromosome, diversity_factor::Real=0.5)
+
+Injects diversity by randomly replacing portions of genes with completely new material.
+Useful for avoiding premature convergence.
+
+# Arguments
+- `chromosome::Chromosome`: Target chromosome
+- `diversity_factor::Real=0.5`: Controls how much of the chromosome to randomize
+"""
+@inline function diversity_injection!(chromosome::Chromosome, diversity_factor::Real=0.5)
+    gene_len = chromosome.toolbox.head_len * 2 + 1
+    gene_count = chromosome.toolbox.gene_count
+    
+    genes_to_randomize = max(1, round(Int, diversity_factor * gene_count))
+    
+    genes_indices = sample(1:gene_count, genes_to_randomize, replace=false)
+    
+    for gene_idx in genes_indices
+        gene_start = chromosome.toolbox.gen_start_indices[gene_idx]
+        
+        new_gene = generate_gene(
+            chromosome.toolbox.headsyms, 
+            chromosome.toolbox.tailsyms, 
+            chromosome.toolbox.head_len,
+            chromosome.toolbox.tail_weights,
+            unarys=chromosome.toolbox.unary_syms,
+            unary_prob=chromosome.toolbox.unary_prob
+        )
+        
+        chromosome.genes[gene_start:(gene_start + gene_len - 1)] .= new_gene
+    end
+end
+
+"""
+    adaptive_mutation!(chromosome::Chromosome, generation::Int, max_generations::Int, pb_start::Real=0.4, pb_end::Real=0.1)
+
+Adaptive mutation operator that adjusts mutation rate based on generation progress.
+Higher mutation rate early for exploration, lower later for exploitation.
+
+# Arguments
+- `chromosome::Chromosome`: Target chromosome
+- `generation::Int`: Current generation
+- `max_generations::Int`: Maximum generations for the run
+- `pb_start::Real=0.4`: Starting probability (higher for exploration)
+- `pb_end::Real=0.1`: Ending probability (lower for exploitation)
+"""
+@inline function adaptive_mutation!(chromosome::Chromosome, generation::Int, max_generations::Int; 
+        pb_start::Real=0.4, pb_end::Real=0.1)
+    progress = generation / max_generations
+    adaptive_pb = pb_start - (pb_start - pb_end) * progress
+    
+    buffer = THREAD_BUFFERS[Threads.threadid()]
+    create_operator_masks(chromosome.genes, chromosome.genes, adaptive_pb)
+    buffer.child_1_genes[1:length(chromosome.genes)] .= generate_chromosome(chromosome.toolbox).genes[1:length(chromosome.genes)]
+
+    @inbounds @simd for i in eachindex(chromosome.genes)
+        chromosome.genes[i] = buffer.alpha_operator[i] == 1 ? buffer.child_1_genes[i] : chromosome.genes[i]
+    end
+end
+
+"""
+    gene_transposition!(chromosome::Chromosome, len::Int=3)
+
+Transposes a small segment of genes from one position to another within the same chromosome,
+preserving their order but changing their context.
+
+# Arguments
+- `chromosome::Chromosome`: Target chromosome
+- `len::Int=3`: Length of segment to transpose
+"""
+@inline function gene_transposition!(chromosome::Chromosome, len::Int=3)
+    gene_len = chromosome.toolbox.head_len * 2 + 1
+    gene_count = chromosome.toolbox.gene_count
+    
+    source_gene_idx = rand(1:gene_count)
+    target_gene_idx = rand([i for i in 1:gene_count if i != source_gene_idx])
+    
+    source_start = chromosome.toolbox.gen_start_indices[source_gene_idx]
+    target_start = chromosome.toolbox.gen_start_indices[target_gene_idx]
+    
+    segment_len = min(len, gene_len - 1)
+    
+    source_pos = rand(source_start:(source_start + gene_len - segment_len - 1))
+    target_pos = rand(target_start:(target_start + gene_len - segment_len - 1))
+    
+    segment = copy(chromosome.genes[source_pos:(source_pos + segment_len - 1)])
+    
+    target_region = chromosome.genes[target_pos:(target_pos + segment_len - 1)]
+    chromosome.genes[target_pos:(target_pos + segment_len - 1)] .= segment
+end
+
+
 """
     genetic_operations!(space_next::Vector{Chromosome}, i::Int, toolbox::Toolbox)
 
@@ -744,10 +844,11 @@ Genetic operators for chromosome modification.
 # Effects
 Modify chromosome genes in place
 """
-@inline function genetic_operations!(space_next::Vector{Chromosome}, i::Int, toolbox::Toolbox)
+@inline function genetic_operations!(space_next::Vector{Chromosome}, i::Int, toolbox::Toolbox;
+        generation::Int=0, max_generation::Int=0)
     #allocate them within the space - create them once instead of n time 
     space_next[i:i+1] = replicate(space_next[i], space_next[i+1], toolbox)
-    rand_space = rand(15)
+    rand_space = rand(17)
 
 
     if rand_space[1] < toolbox.gep_probs["one_point_cross_over_prob"]
@@ -810,5 +911,14 @@ Modify chromosome genes in place
         reverse_insertion_tail!(space_next[i+1])
     end
 
+    if rand_space[16] < toolbox.gep_probs["gene_transposition"]
+        gene_transposition!(space_next[i])
+    end
+
+    if rand_space[17] < toolbox.gep_probs["gene_transposition"]
+        gene_transposition!(space_next[i+1])
+    end
+
+
 end
 end
diff --git a/src/RegressionWrapper.jl b/src/RegressionWrapper.jl
index b7fb16b..fe75a47 100644
--- a/src/RegressionWrapper.jl
+++ b/src/RegressionWrapper.jl
@@ -143,7 +143,7 @@ using ..TensorRegUtils
 using DynamicExpressions
 using OrderedCollections
 using LinearAlgebra
-
+using StatsBase
 
 
 """
@@ -254,8 +254,8 @@ Dictionary containing default probabilities and parameters for genetic algorithm
 These values can be adjusted to fine-tune the genetic algorithm's behavior.
 """
 const GENE_COMMON_PROBS = Dict{String,AbstractFloat}(
-    "one_point_cross_over_prob" => 0.6,
-    "two_point_cross_over_prob" => 0.4,
+    "one_point_cross_over_prob" => 0.3,
+    "two_point_cross_over_prob" => 0.2,
     "mutation_prob" => 1.0,
     "mutation_rate" => 0.05,
     "dominant_fusion_prob" => 0.1,
@@ -267,7 +267,8 @@ const GENE_COMMON_PROBS = Dict{String,AbstractFloat}(
     "inversion_prob" => 0.1,
     "reverse_insertion" => 0.1,
     "reverse_insertion_tail" => 0.1,
-    "mating_size" => 0.7)
+    "gene_transposition" => 0.2,
+    "mating_size" => 0.5)
 
 const SymbolDict = OrderedDict{Int8,Int8}
 const CallbackDict = Dict{Int8,Function}
@@ -450,6 +451,8 @@ Create a Gene Expression Programming regressor for symbolic regression.
 - `max_permutations_lib::Int=10000`: Maximum permutations for dimension library
 - `rounds::Int=4`: Rounds for dimension library creation
 - `number_of_objectives::Int=1`: Defines the number of objectives considered by the search
+- `head_weigths=nothing`: Defines the weights for the different function - ∑(head_weigths)==1
+- `tail_weigths=[0.6,0.2,0.2]`: Defines the weights for the different utilized symbols - ∑(tail-weights)==1
 """
 mutable struct GepRegressor
     toolbox_::Toolbox
@@ -472,13 +475,18 @@ mutable struct GepRegressor
         head_len::Int=6,
         preamble_syms::Vector{Symbol}=Symbol[],
         max_permutations_lib::Int=10000, rounds::Int=4,
-        number_of_objectives::Int=1
-    )
+        number_of_objectives::Int=1,
+        head_weigths::Union{Vector{<:AbstractFloat},Nothing}=nothing,
+        tail_weigths::Union{Vector{<:AbstractFloat},Nothing}=[0.6,0.2,0.2]
+    )        
+        tail_count = feature_amount + rnd_count + length(entered_terminal_nums)
+        tail_weigths_ = [tail_weigths[1]/tail_count for _ in 1:feature_amount]
+        append!(tail_weigths_, fill(tail_weigths[2]/tail_count, length(entered_terminal_nums)))
+        append!(tail_weigths_, fill(tail_weigths[3]/tail_count, rnd_count))
 
         entered_features_ = isempty(entered_features) ?
                             [Symbol("x$i") for i in 1:feature_amount] : entered_features
 
-
         func_syms, callbacks, binary_ops, unary_ops, gene_connections_, cur_idx = create_function_entries(
             entered_non_terminals, gene_connections
         )
@@ -497,10 +505,11 @@ mutable struct GepRegressor
 
 
         utilized_symbols = merge_collections(func_syms, feat_syms, const_syms, pre_syms)
+        
         nodes = merge!(NodeDict(), feat_nodes, const_nodes, pre_nodes)
+        
         dimension_information = merge!(DimensionDict(), feat_dims, const_dims, pre_dims)
-
-
+        
         operators = OperatorEnum(binary_operators=binary_ops, unary_operators=unary_ops)
 
         if !isempty(considered_dimensions)
@@ -526,7 +535,7 @@ mutable struct GepRegressor
 
         toolbox = Toolbox(gene_count, head_len, utilized_symbols, gene_connections_,
             callbacks, nodes, GENE_COMMON_PROBS; preamble_syms=preamble_syms_, number_of_objectives=number_of_objectives,
-            operators_=operators)
+            operators_=operators, tail_weights_=weights(tail_weigths_))
 
         obj = new()
         obj.toolbox_ = toolbox

From 52529d05df590647c360f8a4f7320567a93ddd3e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maximilian=20Rei=C3=9Fmann?=
 <reissmannm@4000l-217946-l.mobility.unimelb.net.au>
Date: Sun, 2 Mar 2025 16:02:40 +1100
Subject: [PATCH 2/6] add new opeator that includes information of whole parent
 generation

---
 src/Entities.jl                  | 88 +++++++++++++++++++++-----------
 src/GeneExpressionProgramming.jl |  5 +-
 src/Gep.jl                       | 49 ++++++++----------
 src/RegressionWrapper.jl         |  6 +--
 src/Util.jl                      | 53 +++++++++++++++++--
 test/Entity_test.jl              |  3 +-
 6 files changed, 137 insertions(+), 67 deletions(-)

diff --git a/src/Entities.jl b/src/Entities.jl
index 4aa93f4..dc434fc 100644
--- a/src/Entities.jl
+++ b/src/Entities.jl
@@ -191,23 +191,26 @@ Contains parameters and operations for GEP algorithm execution.
 - `symbols::OrderedDict{Int8,Int8}`: Available symbols and their arities
 - `gene_connections::Vector{Int8}`: How genes connect
 - `headsyms::Vector{Int8}`: Symbols allowed in head
-- `unary_syms::Vector{Int8}`: Unary operators
 - `tailsyms::Vector{Int8}`: Symbols allowed in tail
 - `arrity_by_id::OrderedDict{Int8,Int8}`: Symbol arities
 - `callbacks::Dict`: Operation callbacks
 - `nodes::OrderedDict`: Node definitions
 - `gen_start_indices::Vector{Int}`: Gene start positions
 - `gep_probs::Dict{String,AbstractFloat}`: Operation probabilities
-- `unary_prob::Real`: Unary operator probability
 - `fitness_reset::Tuple`: Default fitness values
 - `preamble_syms::Vector{Int8}`: Preamble symbols
 - `len_preamble::Int8`: Preamble length
+- `tail_weights::Union{Weights,Nothing}` Defines the probability for tail symbols
+- `head_weigths::Union{Weights,Nothing}` Defines the probability for head symbols 
 
 # Constructor
     Toolbox(gene_count::Int, head_len::Int, symbols::OrderedDict{Int8,Int8}, 
            gene_connections::Vector{Int8}, callbacks::Dict, nodes::OrderedDict, 
-           gep_probs::Dict{String,AbstractFloat}; unary_prob::Real=0.4, 
-           fitness_reset::Tuple=(Inf, NaN), preamble_syms=Int8[])
+           gep_probs::Dict{String,AbstractFloat}; 
+           fitness_reset::Tuple=(Inf, NaN), preamble_syms=Int8[],
+           function_complile::Function=compile_djl_datatype,
+           tail_weights_::Union{Weights,Nothing}=nothing,
+           head_tail_balance::Real=0.2)
 """
 struct Toolbox
     gene_count::Int
@@ -215,20 +218,19 @@ struct Toolbox
     symbols::OrderedDict{Int8,Int8}
     gene_connections::Vector{Int8}
     headsyms::Vector{Int8}
-    unary_syms::Vector{Int8}
     tailsyms::Vector{Int8}
     arrity_by_id::OrderedDict{Int8,Int8}
     callbacks::Dict
     nodes::OrderedDict
     gen_start_indices::Vector{Int}
     gep_probs::Dict{String,AbstractFloat}
-    unary_prob::Real
     fitness_reset::Tuple
     preamble_syms::Vector{Int8}
     len_preamble::Int8
     operators_::Union{OperatorEnum,Nothing}
     compile_function_::Function
     tail_weights::Union{Weights,Nothing}
+    head_weights::Union{Weights,Nothing}
 
 
     function Toolbox(gene_count::Int, head_len::Int, symbols::OrderedDict{Int8,Int8}, gene_connections::Vector{Int8},
@@ -236,7 +238,8 @@ struct Toolbox
         unary_prob::Real=0.1, preamble_syms=Int8[],
         number_of_objectives::Int=1, operators_::Union{OperatorEnum,Nothing}=nothing, 
         function_complile::Function=compile_djl_datatype,
-        tail_weights_::Union{Weights,Nothing}=nothing)
+        tail_weights_::Union{Weights,Nothing}=nothing, 
+        head_tail_balance::Real=0.8)
 
         fitness_reset = (
             ntuple(_ -> Inf, number_of_objectives),
@@ -245,15 +248,26 @@ struct Toolbox
         gene_len = head_len * 2 + 1
         headsyms = [key for (key, arity) in symbols if arity == 2]
         unary_syms = [key for (key, arity) in symbols if arity == 1]
+        up = isempty(unary_syms) ? unary_prob : 0
+
         tailsyms = [key for (key, arity) in symbols if arity < 1 && !(key in preamble_syms)]
         len_preamble = length(preamble_syms)
         gen_start_indices = [gene_count + (gene_len * (i - 1)) for i in 1:gene_count]
+        
+
         tail_weights = isnothing(tail_weights_) ? weights([1/length(tailsyms) for _ in 1:length(tailsyms)]) : tail_weights_
+        head_weights = weights([
+            fill(head_tail_balance/length(headsyms), length(headsyms));
+            fill(unary_prob/length(unary_syms), length(unary_syms));
+            tail_weights .* (1-head_tail_balance-up)
+        ])
+
         #ensure_buffer_size!(head_len, gene_count)
-        new(gene_count, head_len, symbols, gene_connections, headsyms, unary_syms, tailsyms, symbols,
-            callbacks, nodes, gen_start_indices, gep_probs, unary_prob, fitness_reset, preamble_syms, len_preamble, operators_, 
+        head_syms = vcat([headsyms, unary_syms, tailsyms]...)
+        new(gene_count, head_len, symbols, gene_connections, head_syms, tailsyms, symbols,
+            callbacks, nodes, gen_start_indices, gep_probs, fitness_reset, preamble_syms, len_preamble, operators_, 
             function_complile,
-            tail_weights)
+            tail_weights, head_weights)
     end
 end
 
@@ -469,26 +483,16 @@ Generate a single gene for GEP.
 - `headsyms::Vector{Int8}`: Symbols for head
 - `tailsyms::Vector{Int8}`: Symbols for tail
 - `headlen::Int`: Head length
-- `unarys::Vector{Int8}=[]`: Unary operators
-- `unary_prob::Real=0.2`: Unary operator probability
+- `tail_weights::Union{Weights,Nothing}` Defines the probability for tail symbols
+- `head_weigths::Union{Weights,Nothing}` Defines the probability for head symbols 
 
 # Returns
 Vector{Int8} representing gene
 """
 @inline function generate_gene(headsyms::Vector{Int8}, tailsyms::Vector{Int8}, headlen::Int,
-    tail_weights::Weights;
-    unarys::Vector{Int8}=[], unary_prob::Real=0.2)
-
-
-    if !isempty(unarys) && rand() < unary_prob
-        heads = vcat(headsyms)
-        push!(heads, rand(unarys))
-    else
-        heads = vcat(headsyms)
-    end
-    head_len_temp = rand(1:headlen)
-    head = rand(heads, head_len_temp)
-    tail = sample(tailsyms,tail_weights,headlen + (headlen-head_len_temp) + 1)
+    tail_weights::Weights, head_weights::Weights)
+    head = sample(headsyms,head_weights,headlen)
+    tail = sample(tailsyms,tail_weights,headlen + 1)
     return vcat(head, tail)
 end
 
@@ -504,8 +508,8 @@ New Chromosome instance
 """
 @inline function generate_chromosome(toolbox::Toolbox)
     connectors = rand(toolbox.gene_connections, toolbox.gene_count - 1)
-    genes = vcat([generate_gene(toolbox.headsyms, toolbox.tailsyms, toolbox.head_len, toolbox.tail_weights;
-        unarys=toolbox.unary_syms,unary_prob=toolbox.unary_prob) for _ in 1:toolbox.gene_count]...)
+    genes = vcat([generate_gene(toolbox.headsyms, toolbox.tailsyms, toolbox.head_len, toolbox.tail_weights,
+        toolbox.head_weights) for _ in 1:toolbox.gene_count]...)
     return Chromosome(vcat(connectors, genes), toolbox, true)
 end
 
@@ -716,6 +720,21 @@ end
 end
 
 
+@inline function gene_fussion_extent!(chromosome1::Chromosome, parents::Vector{Chromosome}, pb::Real=0.2)
+    buffer = THREAD_BUFFERS[Threads.threadid()]
+    len_a = length(chromosome1.genes)
+    genes2 = one_hot_mean([p.genes for p in parents], 2)
+
+    create_operator_masks(chromosome1.genes, genes2, pb)
+
+    @inbounds @simd for i in eachindex(chromosome1.genes)
+        buffer.child_1_genes[i] = buffer.alpha_operator[i] == 1 ? genes2[i] : chromosome1.genes[i]
+    end
+
+    chromosome1.genes .= @view buffer.child_1_genes[1:len_a]
+end
+
+
 """
     diversity_injection!(chromosome::Chromosome, diversity_factor::Real=0.5)
 
@@ -742,8 +761,7 @@ Useful for avoiding premature convergence.
             chromosome.toolbox.tailsyms, 
             chromosome.toolbox.head_len,
             chromosome.toolbox.tail_weights,
-            unarys=chromosome.toolbox.unary_syms,
-            unary_prob=chromosome.toolbox.unary_prob
+            chromosome.toolbox.head_weights
         )
         
         chromosome.genes[gene_start:(gene_start + gene_len - 1)] .= new_gene
@@ -845,7 +863,7 @@ Genetic operators for chromosome modification.
 Modify chromosome genes in place
 """
 @inline function genetic_operations!(space_next::Vector{Chromosome}, i::Int, toolbox::Toolbox;
-        generation::Int=0, max_generation::Int=0)
+        generation::Int=0, max_generation::Int=0, parents::Union{Vector{Chromosome},Nothing}=nothing)
     #allocate them within the space - create them once instead of n time 
     space_next[i:i+1] = replicate(space_next[i], space_next[i+1], toolbox)
     rand_space = rand(17)
@@ -919,6 +937,16 @@ Modify chromosome genes in place
         gene_transposition!(space_next[i+1])
     end
 
+    if rand_space[16] < toolbox.gep_probs["gene_transposition"]
+        gene_fussion_extent!(space_next[i], parents)
+    end
+
+    if rand_space[17] < toolbox.gep_probs["gene_transposition"]
+        gene_fussion_extent!(space_next[i+1], parents)
+    end
+
+
+    
 
 end
 end
diff --git a/src/GeneExpressionProgramming.jl b/src/GeneExpressionProgramming.jl
index 6a622c0..afdc436 100644
--- a/src/GeneExpressionProgramming.jl
+++ b/src/GeneExpressionProgramming.jl
@@ -130,7 +130,8 @@ import .GepUtils:
     train_test_split,
     ARITY_LIB_COMMON,
     FUNCTION_LIB_COMMON,
-    FUNCTION_STRINGIFY
+    FUNCTION_STRINGIFY,
+    one_hot_mean
 
 # Import selection mechanisms
 import .EvoSelection:
@@ -275,7 +276,7 @@ export equal_unit_forward, mul_unit_forward, div_unit_forward,
 export find_indices_with_sum, compile_djl_datatype,
     optimize_constants!, minmax_scale, isclose,
     save_state, load_state,
-    train_test_split
+    train_test_split, one_hot_mean
 
 # Export history recording functionality
 export HistoryRecorder, OptimizationHistory,
diff --git a/src/Gep.jl b/src/Gep.jl
index 6599123..45783ed 100644
--- a/src/Gep.jl
+++ b/src/Gep.jl
@@ -151,22 +151,24 @@ Performs one evolutionary step in the GEP algorithm, creating and evaluating new
 - Operations are performed in parallel using multiple threads
 """
 @inline function perform_step!(population::Vector{Chromosome}, parents::Vector{Chromosome}, next_gen::Vector{Chromosome},
-    toolbox::Toolbox, mating_size::Int)
-
+    toolbox::Toolbox, mating_size::Int, generation::Int, max_generation::Int)
     @inbounds Threads.@threads for i in 1:2:mating_size-1
         next_gen[i] = parents[i]
         next_gen[i+1] = parents[i+1]
 
-        genetic_operations!(next_gen, i, toolbox)
+        genetic_operations!(next_gen, i, toolbox;
+            generation=generation, max_generation=max_generation, parents=parents)
 
         compile_expression!(next_gen[i]; force_compile=true)
         compile_expression!(next_gen[i+1]; force_compile=true)
 
     end
-
-    Threads.@threads for i in 1:mating_size-1
+    
+    Threads.@threads for i in eachindex(next_gen)
         try
-            population[end-i] = next_gen[i]
+            population[end-i] = population[end-mating_size-i]
+            population[end-mating_size-i] = next_gen[i]
+            #@show ("Position $i - new insert $(length(population)-mating_size-i) - $(pointer_from_objref(next_gen[i]))")
         catch e
             error_message = sprint(showerror, e, catch_backtrace())
             @error "Error in perform_step!: $error_message"
@@ -175,10 +177,6 @@ Performs one evolutionary step in the GEP algorithm, creating and evaluating new
 end
 
 
-@inline function update_surrogate!(::EvaluationStrategy) 
-    nothing
-end
-
 """
     perform_correction_callback!(population::Vector{Chromosome}, epoch::Int, 
         correction_epochs::Int, correction_amount::Real,
@@ -212,7 +210,7 @@ Applies correction operations to ensure dimensional homogeneity in chromosomes.
                     compile_expression!(population[i]; force_compile=true)
                     population[i].dimension_homogene = true
                 else
-                    population[i].fitness = (population[i].fitness[1]+distance,)
+                    #population[i].fitness += distance
                 end
             end
         end
@@ -277,7 +275,8 @@ The evolution process stops when either:
     optimization_epochs::Int=500,
     file_logger_callback::Union{Function, Nothing}=nothing, 
     save_state_callback::Union{Function, Nothing}=nothing,
-    load_state_callback::Union{Function, Nothing}=nothing)
+    load_state_callback::Union{Function, Nothing}=nothing,
+    update_surrogate_callback::Union{Function, Nothing}=nothing)
 
     recorder = HistoryRecorder(epochs, Tuple)
     mating_ = toolbox.gep_probs["mating_size"]
@@ -287,22 +286,18 @@ The evolution process stops when either:
     fit_cache = Dict{Vector{Int8},Tuple}()
     cache_lock = SpinLock()
 
-    population, start_epoch = isnothing(load_state_callback) ? (generate_population(population_size, toolbox), 1) : load_state_callback()
+    population, start_epoch = isnothing(load_state_callback) ? (generate_population(population_size+mating_size, toolbox), 1) : load_state_callback()
     next_gen = Vector{eltype(population)}(undef, mating_size)
     progBar = Progress(epochs; showspeed=true, desc="Training: ")
     prev_best = (typemax(Float64),)
     
     for epoch in start_epoch:epochs
         same = Atomic{Int}(0)
-        perform_correction_callback!(population, epoch, correction_epochs, correction_amount, correction_callback)
+        perform_correction_callback!(population[1:population_size], epoch, correction_epochs, correction_amount, correction_callback)
 
-         
-        Threads.@threads for i in eachindex(population)
+        Threads.@threads for i in eachindex(population[1:population_size])
             if isnan(mean(population[i].fitness)) 
-                cache_value = nothing
-                lock(cache_lock) do
-                    cache_value = get(fit_cache, population[i].expression_raw, nothing)
-                end
+                cache_value = get(fit_cache, population[i].expression_raw, nothing)
                 if isnothing(cache_value)
                     
                     population[i].fitness = compute_fitness(population[i], evalStrategy)
@@ -315,8 +310,10 @@ The evolution process stops when either:
                 end
             end
         end
+
+
         sort!(population, by=x -> mean(x.fitness))
-        Threads.@threads for index in eachindex(population)
+        Threads.@threads for index in eachindex(population[1:population_size])
             fits_representation[index] = population[index].fitness
         end
 
@@ -337,11 +334,9 @@ The evolution process stops when either:
             (:validation_loss, @sprintf("%.6e", mean(val_loss)))
         ])
 
-        update_surrogate!(evalStrategy)
+        !isnothing(update_surrogate_callback) && update_surrogate_callback(evalStrategy)
+        !isnothing(evalStrategy.break_condition) && evalStrategy.break_condition(population[1:population_size], epoch) && break
 
-        if !isnothing(evalStrategy.break_condition) && evalStrategy.break_condition(population, epoch)
-            break
-        end
 
         if length(fits_representation[1]) == 1
             selectedMembers = tournament_selection(fits_representation, mating_size, tourni_size)
@@ -349,12 +344,12 @@ The evolution process stops when either:
             selectedMembers = nsga_selection(fits_representation)
         end
 
-        !isnothing(file_logger_callback) && file_logger_callback(population, epoch, selectedMembers)
+        !isnothing(file_logger_callback) && file_logger_callback(population[1:population_size], epoch, selectedMembers)
         !isnothing(save_state_callback) && save_state_callback(population, epoch)
 
         if epoch < epochs
             parents = population[selectedMembers.indices]
-            perform_step!(population, parents, next_gen, toolbox, mating_size)
+            perform_step!(population, parents, next_gen, toolbox, mating_size, epoch, epochs)
         end
 
     end
diff --git a/src/RegressionWrapper.jl b/src/RegressionWrapper.jl
index fe75a47..84bd646 100644
--- a/src/RegressionWrapper.jl
+++ b/src/RegressionWrapper.jl
@@ -254,10 +254,10 @@ Dictionary containing default probabilities and parameters for genetic algorithm
 These values can be adjusted to fine-tune the genetic algorithm's behavior.
 """
 const GENE_COMMON_PROBS = Dict{String,AbstractFloat}(
-    "one_point_cross_over_prob" => 0.3,
-    "two_point_cross_over_prob" => 0.2,
+    "one_point_cross_over_prob" => 0.5,
+    "two_point_cross_over_prob" => 0.4,
     "mutation_prob" => 1.0,
-    "mutation_rate" => 0.05,
+    "mutation_rate" => 0.1,
     "dominant_fusion_prob" => 0.1,
     "dominant_fusion_rate" => 0.1,
     "rezessiv_fusion_prob" => 0.1,
diff --git a/src/Util.jl b/src/Util.jl
index d5fef42..dfbe5ff 100644
--- a/src/Util.jl
+++ b/src/Util.jl
@@ -110,10 +110,10 @@ module GepUtils
 export find_indices_with_sum, compile_djl_datatype, optimize_constants!, minmax_scale, float16_scale, isclose
 export save_state, load_state
 export create_history_recorder, record_history!, record!, close_recorder!
-export HistoryRecorder, OptimizationHistory, get_history_arrays
+export HistoryRecorder, OptimizationHistory, get_history_arrays, one_hot_mean, FUNCTION_STRINGIFY
 export train_test_split
 export FUNCTION_LIB_COMMON, ARITY_LIB_COMMON
-export TensorNode, compile_network, FUNCTION_STRINGIFY
+export TensorNode, compile_network
 
 using OrderedCollections
 using DynamicExpressions
@@ -126,6 +126,7 @@ using Statistics
 using Random
 using Tensors
 using Flux
+using StatsBase
 using Base.Threads: @spawn
 
 
@@ -724,8 +725,10 @@ See also: [`DynamicExpressions.Node`](@ref), [`Optim.optimize`](@ref), [`LineSea
                 end
             end
         end
-
+        #needs to be revised!
         x0, refs = get_scalar_constants(current_node)
+
+
         function opt_step(x::AbstractVector)
             set_scalar_constants!(current_node,x, refs)
             loss(current_node)
@@ -808,5 +811,49 @@ function train_test_split(
     return x_train, y_train, x_test, y_test
 end
 
+function one_hot_mean(vectors::Vector{Vector{T}}, k::Int) where T <: Integer
+    if isempty(vectors)
+        return T[]
+    end
+    
+    max_value = maximum(maximum(v) for v in vectors if !isempty(v))
+    
+    max_length = maximum(length(v) for v in vectors)
+    
+    frequency_matrix = zeros(Float64, max_length, max_value)
+    
+    position_counts = zeros(Int, max_length)
+    
+    for vec in vectors
+        for (i, val) in enumerate(vec)
+            frequency_matrix[i, convert(Int, val)] += 1
+            position_counts[i] += 1
+        end
+    end
+    
+    for i in 1:max_length
+        if position_counts[i] > 0
+            frequency_matrix[i, :] ./= position_counts[i]
+        end
+    end
+    
+    result = Vector{T}(undef, max_length)
+    
+    for i in 1:max_length
+        sorted_indices = sortperm(frequency_matrix[i, :], rev=true)
+        if k == 1
+            result[i] = convert(T, sorted_indices[1])
+        else
+            top_k_indices = sorted_indices[1:min(k, length(sorted_indices))]
+            top_k_probs = frequency_matrix[i, top_k_indices]
+            top_k_probs = top_k_probs ./ sum(top_k_probs)
+            result[i] = convert(T, sample(top_k_indices, Weights(top_k_probs)))
+        end
+    end
+    
+    return result
+end
+
+
 
 end
\ No newline at end of file
diff --git a/test/Entity_test.jl b/test/Entity_test.jl
index cbc84bd..31b52b8 100644
--- a/test/Entity_test.jl
+++ b/test/Entity_test.jl
@@ -49,8 +49,7 @@ end
         
         @test toolbox.gene_count == 2
         @test toolbox.head_len == 3
-        @test length(toolbox.headsyms) == 2  # + and *
-        @test length(toolbox.unary_syms) == 1  # square
+        @test length(toolbox.headsyms) == 2+1+2  # all symbols possible in the head
         @test length(toolbox.tailsyms) == 2  # x and 1.0
     end
 

From b56771145a80d6ca388b705d50499e5841774c4f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maximilian=20Rei=C3=9Fmann?=
 <reissmannm@4000l-217946-l.mobility.unimelb.net.au>
Date: Sun, 2 Mar 2025 16:54:29 +1100
Subject: [PATCH 3/6] adjust param

---
 src/RegressionWrapper.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/RegressionWrapper.jl b/src/RegressionWrapper.jl
index 84bd646..7591d89 100644
--- a/src/RegressionWrapper.jl
+++ b/src/RegressionWrapper.jl
@@ -268,7 +268,7 @@ const GENE_COMMON_PROBS = Dict{String,AbstractFloat}(
     "reverse_insertion" => 0.1,
     "reverse_insertion_tail" => 0.1,
     "gene_transposition" => 0.2,
-    "mating_size" => 0.5)
+    "mating_size" => 0.7)
 
 const SymbolDict = OrderedDict{Int8,Int8}
 const CallbackDict = Dict{Int8,Function}

From cdbe9788ca1dfeff8ad992ef508a8fe58632b867 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maximilian=20Rei=C3=9Fmann?=
 <reissmannm@4000l-217946-l.mobility.unimelb.net.au>
Date: Mon, 3 Mar 2025 11:41:35 +1100
Subject: [PATCH 4/6] correct xi_core fun

---
 src/Entities.jl          | 12 ++++++------
 src/Losses.jl            | 23 +++++++++++++++--------
 src/RegressionWrapper.jl | 12 +++++++-----
 3 files changed, 28 insertions(+), 19 deletions(-)

diff --git a/src/Entities.jl b/src/Entities.jl
index dc434fc..1b9e2f8 100644
--- a/src/Entities.jl
+++ b/src/Entities.jl
@@ -720,10 +720,10 @@ end
 end
 
 
-@inline function gene_fussion_extent!(chromosome1::Chromosome, parents::Vector{Chromosome}, pb::Real=0.2)
+@inline function gene_fussion_extent!(chromosome1::Chromosome, parents::Vector{Chromosome}, pb::Real=0.2; topk::Int=1)
     buffer = THREAD_BUFFERS[Threads.threadid()]
     len_a = length(chromosome1.genes)
-    genes2 = one_hot_mean([p.genes for p in parents], 2)
+    genes2 = one_hot_mean([p.genes for p in parents], topk)
 
     create_operator_masks(chromosome1.genes, genes2, pb)
 
@@ -937,12 +937,12 @@ Modify chromosome genes in place
         gene_transposition!(space_next[i+1])
     end
 
-    if rand_space[16] < toolbox.gep_probs["gene_transposition"]
-        gene_fussion_extent!(space_next[i], parents)
+    if rand_space[16] < toolbox.gep_probs["gene_averaging_prob"]
+        gene_fussion_extent!(space_next[i], parents, toolbox.gep_probs["gene_averaging_rate"])
     end
 
-    if rand_space[17] < toolbox.gep_probs["gene_transposition"]
-        gene_fussion_extent!(space_next[i+1], parents)
+    if rand_space[17] < toolbox.gep_probs["gene_averaging_prob"]
+        gene_fussion_extent!(space_next[i+1], parents, toolbox.gep_probs["gene_averaging_rate"])
     end
 
 
diff --git a/src/Losses.jl b/src/Losses.jl
index 3a8da0d..968a58e 100644
--- a/src/Losses.jl
+++ b/src/Losses.jl
@@ -73,6 +73,7 @@ module LossFunction
 export get_loss_function
 using Statistics
 using LoopVectorization
+using Random
 
 function floor_to_n10p(x::T) where T<:AbstractFloat
     abs_x = abs(x)
@@ -98,15 +99,21 @@ function xicor(y_true::AbstractArray{T}, y_pred::AbstractArray{T}; ties::Bool=tr
             end
         end
         
-        tie_indices = tie_counts .> 1
-        mean_ties = mean(tie_counts[tie_indices])
+        tie_groups = Dict{Int, Vector{Int}}()
+        for i in 1:n
+            val = r[i]
+            if haskey(tie_groups, val)
+                push!(tie_groups[val], i)
+            else
+                tie_groups[val] = [i]
+            end
+        end
         
-        Threads.@threads for i in 1:n
-            if tie_counts[i] > 1
-                tie_group = findall(==(r[i]), r)
-                shuffled = Random.shuffle(0:(tie_counts[i]-1))
-                for (idx, group_idx) in enumerate(tie_group)
-                    r[group_idx] = r[i] - shuffled[idx]
+        for (val, group) in tie_groups
+            if length(group) > 1
+                shuffled = Random.shuffle(0:(length(group)-1))
+                for (idx, group_idx) in enumerate(group)
+                    r[group_idx] = val - shuffled[idx]
                 end
             end
         end
diff --git a/src/RegressionWrapper.jl b/src/RegressionWrapper.jl
index 7591d89..04dc58c 100644
--- a/src/RegressionWrapper.jl
+++ b/src/RegressionWrapper.jl
@@ -258,16 +258,18 @@ const GENE_COMMON_PROBS = Dict{String,AbstractFloat}(
     "two_point_cross_over_prob" => 0.4,
     "mutation_prob" => 1.0,
     "mutation_rate" => 0.1,
-    "dominant_fusion_prob" => 0.1,
+    "dominant_fusion_prob" => 0.0,
     "dominant_fusion_rate" => 0.1,
-    "rezessiv_fusion_prob" => 0.1,
+    "rezessiv_fusion_prob" => 0.0,
     "rezessiv_fusion_rate" => 0.1,
-    "fusion_prob" => 0.1,
-    "fusion_rate" => 0.1,
+    "fusion_prob" => 0.0,
+    "fusion_rate" => 0.0,
     "inversion_prob" => 0.1,
     "reverse_insertion" => 0.1,
     "reverse_insertion_tail" => 0.1,
-    "gene_transposition" => 0.2,
+    "gene_transposition" => 0.1,
+    "gene_averaging_prob" => 1.0,
+    "gene_averaging_rate" => 0.1,
     "mating_size" => 0.7)
 
 const SymbolDict = OrderedDict{Int8,Int8}

From b973e33c2ce8fd75f7925e1512ccddd4d305b208 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maximilian=20Rei=C3=9Fmann?=
 <reissmannm@4000l-217946-l.mobility.unimelb.net.au>
Date: Tue, 4 Mar 2025 11:26:17 +1100
Subject: [PATCH 5/6] revert buffer due to approx issues

---
 src/Entities.jl          | 322 ++++++++++++---------------------------
 src/Gep.jl               |   7 +-
 src/RegressionWrapper.jl |  14 +-
 3 files changed, 111 insertions(+), 232 deletions(-)

diff --git a/src/Entities.jl b/src/Entities.jl
index 1b9e2f8..ac25aa4 100644
--- a/src/Entities.jl
+++ b/src/Entities.jl
@@ -236,9 +236,9 @@ struct Toolbox
     function Toolbox(gene_count::Int, head_len::Int, symbols::OrderedDict{Int8,Int8}, gene_connections::Vector{Int8},
         callbacks::Dict, nodes::OrderedDict, gep_probs::Dict{String,AbstractFloat};
         unary_prob::Real=0.1, preamble_syms=Int8[],
-        number_of_objectives::Int=1, operators_::Union{OperatorEnum,Nothing}=nothing, 
+        number_of_objectives::Int=1, operators_::Union{OperatorEnum,Nothing}=nothing,
         function_complile::Function=compile_djl_datatype,
-        tail_weights_::Union{Weights,Nothing}=nothing, 
+        tail_weights_::Union{Weights,Nothing}=nothing,
         head_tail_balance::Real=0.8)
 
         fitness_reset = (
@@ -253,19 +253,19 @@ struct Toolbox
         tailsyms = [key for (key, arity) in symbols if arity < 1 && !(key in preamble_syms)]
         len_preamble = length(preamble_syms)
         gen_start_indices = [gene_count + (gene_len * (i - 1)) for i in 1:gene_count]
-        
 
-        tail_weights = isnothing(tail_weights_) ? weights([1/length(tailsyms) for _ in 1:length(tailsyms)]) : tail_weights_
+
+        tail_weights = isnothing(tail_weights_) ? weights([1 / length(tailsyms) for _ in 1:length(tailsyms)]) : tail_weights_
         head_weights = weights([
-            fill(head_tail_balance/length(headsyms), length(headsyms));
-            fill(unary_prob/length(unary_syms), length(unary_syms));
-            tail_weights .* (1-head_tail_balance-up)
+            fill(head_tail_balance / length(headsyms), length(headsyms));
+            fill(unary_prob / length(unary_syms), length(unary_syms));
+            tail_weights .* (1 - head_tail_balance - up)
         ])
 
         #ensure_buffer_size!(head_len, gene_count)
         head_syms = vcat([headsyms, unary_syms, tailsyms]...)
         new(gene_count, head_len, symbols, gene_connections, head_syms, tailsyms, symbols,
-            callbacks, nodes, gen_start_indices, gep_probs, fitness_reset, preamble_syms, len_preamble, operators_, 
+            callbacks, nodes, gen_start_indices, gep_probs, fitness_reset, preamble_syms, len_preamble, operators_,
             function_complile,
             tail_weights, head_weights)
     end
@@ -436,26 +436,26 @@ Vector{Int8} representing the K-expression of the chromosome
         rolled_indices[idx+1] = @view genes[i:i+first(indices)-1]
     end
 
-    !split && return vcat(rolled_indices...) 
+    !split && return vcat(rolled_indices...)
     return rolled_indices
 end
 
 @inline function split_karva(chromosome::Chromosome, coeffs::Int=2)
     raw = _karva_raw(chromosome; split=true)
     connectors = popfirst!(raw)[coeffs:end]
-    gene_count_per_factor = div(chromosome.toolbox.gene_count,coeffs)
+    gene_count_per_factor = div(chromosome.toolbox.gene_count, coeffs)
     retval = []
     for _ in 1:coeffs
         temp_cons = splice!(connectors, 1:gene_count_per_factor-1)
-        temp_genes = reduce(vcat, splice!(raw,1:gene_count_per_factor))
-        push!(retval,vcat([temp_cons, temp_genes]...))
+        temp_genes = reduce(vcat, splice!(raw, 1:gene_count_per_factor))
+        push!(retval, vcat([temp_cons, temp_genes]...))
     end
     return retval
 end
 
 @inline function print_karva_strings(chromosome::Chromosome)
     coeff_count = length(chromosome.toolbox.preamble_syms)
-    callback_ = Dict{Int8, Function}()
+    callback_ = Dict{Int8,Function}()
 
     for (key, value) in chromosome.toolbox.callbacks
         if Symbol(value) in keys(FUNCTION_STRINGIFY)
@@ -466,10 +466,10 @@ end
     end
 
     return compile_djl_datatype(
-        chromosome.expression_raw, 
+        chromosome.expression_raw,
         chromosome.toolbox.arrity_by_id,
-        callback_, 
-        chromosome.toolbox.nodes, 
+        callback_,
+        chromosome.toolbox.nodes,
         1)
 end
 
@@ -491,8 +491,8 @@ Vector{Int8} representing gene
 """
 @inline function generate_gene(headsyms::Vector{Int8}, tailsyms::Vector{Int8}, headlen::Int,
     tail_weights::Weights, head_weights::Weights)
-    head = sample(headsyms,head_weights,headlen)
-    tail = sample(tailsyms,tail_weights,headlen + 1)
+    head = sample(headsyms, head_weights, headlen)
+    tail = sample(tailsyms, tail_weights, headlen + 1)
     return vcat(head, tail)
 end
 
@@ -539,27 +539,18 @@ end
 
 
 @inline function create_operator_masks(gene_seq_alpha::Vector{Int8}, gene_seq_beta::Vector{Int8}, pb::Real=0.2)
-    buffer = THREAD_BUFFERS[Threads.threadid()]
-    len_a = length(gene_seq_alpha)
-
-    buffer.alpha_operator[1:len_a] .= zeros(Int8)
-    buffer.beta_operator[1:len_a] .= zeros(Int8)
-
-    indices_alpha = rand(1:len_a, min(round(Int, (pb * len_a)), len_a))
-    indices_beta = rand(1:len_a, min(round(Int, (pb * len_a)), len_a))
-
-    buffer.alpha_operator[indices_alpha] .= Int8(1)
-    buffer.beta_operator[indices_beta] .= Int8(1)
+    alpha_operator = zeros(Int8, length(gene_seq_alpha))
+    beta_operator = zeros(Int8, length(gene_seq_beta))
+    indices_alpha = rand(1:length(gene_seq_alpha), min(round(Int, (pb * length(gene_seq_alpha))), length(gene_seq_alpha)))
+    indices_beta = rand(1:length(gene_seq_beta), min(round(Int, (pb * length(gene_seq_beta))), length(gene_seq_beta)))
+    alpha_operator[indices_alpha] .= Int8(1)
+    beta_operator[indices_beta] .= Int8(1)
+    return alpha_operator, beta_operator
 end
 
-
 @inline function create_operator_point_one_masks(gene_seq_alpha::Vector{Int8}, gene_seq_beta::Vector{Int8}, toolbox::Toolbox)
-    buffer = THREAD_BUFFERS[Threads.threadid()]
-    len_a = length(gene_seq_alpha)
-
-    buffer.alpha_operator[1:len_a] .= zeros(Int8)
-    buffer.beta_operator[1:len_a] .= zeros(Int8)
-
+    alpha_operator = zeros(Int8, length(gene_seq_alpha))
+    beta_operator = zeros(Int8, length(gene_seq_beta))
     head_len = toolbox.head_len
     gene_len = head_len * 2 + 1
 
@@ -569,21 +560,20 @@ end
 
         point1 = rand(ref:mid)
         point2 = rand((mid+1):(ref+gene_len-1))
-        buffer.alpha_operator[point1:point2] .= Int8(1)
+        alpha_operator[point1:point2] .= Int8(1)
 
         point1 = rand(ref:mid)
         point2 = rand((mid+1):(ref+gene_len-1))
-        buffer.beta_operator[point1:point2] .= Int8(1)
+        beta_operator[point1:point2] .= Int8(1)
     end
+
+    return alpha_operator, beta_operator
 end
 
 
 @inline function create_operator_point_two_masks(gene_seq_alpha::Vector{Int8}, gene_seq_beta::Vector{Int8}, toolbox::Toolbox)
-    buffer = THREAD_BUFFERS[Threads.threadid()]
-    len_a = length(gene_seq_alpha)
-
-    buffer.alpha_operator[1:len_a] .= zeros(Int8)
-    buffer.beta_operator[1:len_a] .= zeros(Int8)
+    alpha_operator = zeros(Int8, length(gene_seq_alpha))
+    beta_operator = zeros(Int8, length(gene_seq_beta))
     head_len = toolbox.head_len
     gene_len = head_len * 2 + 1
 
@@ -597,17 +587,17 @@ end
         point1 = rand(start:quarter)
         point2 = rand(quarter+1:half)
         point3 = rand(half+1:end_gene)
-        buffer.alpha_operator[point1:point2] .= Int8(1)
-        buffer.alpha_operator[point3:end_gene] .= Int8(1)
+        alpha_operator[point1:point2] .= Int8(1)
+        alpha_operator[point3:end_gene] .= Int8(1)
 
 
         point1 = rand(start:end_gene)
         point2 = rand(point1:end_gene)
-        buffer.beta_operator[point1:point2] .= Int8(1)
-        buffer.beta_operator[point2+1:end_gene] .= Int8(1)
+        beta_operator[point1:point2] .= Int8(1)
+        beta_operator[point2+1:end_gene] .= Int8(1)
     end
 
-
+    return alpha_operator, beta_operator
 end
 
 @inline function replicate(chromosome1::Chromosome, chromosome2::Chromosome, toolbox)
@@ -616,82 +606,97 @@ end
 
 
 @inline function gene_dominant_fusion!(chromosome1::Chromosome, chromosome2::Chromosome, pb::Real=0.2)
-    buffer = THREAD_BUFFERS[Threads.threadid()]
-    len_a = length(chromosome1.genes)
-    create_operator_masks(chromosome1.genes, chromosome2.genes, pb)
+    gene_seq_alpha = chromosome1.genes
+    gene_seq_beta = chromosome2.genes
+    alpha_operator, beta_operator = create_operator_masks(gene_seq_alpha, gene_seq_beta, pb)
+
+    child_1_genes = similar(gene_seq_alpha)
+    child_2_genes = similar(gene_seq_beta)
 
-    @inbounds @simd for i in eachindex(chromosome1.genes)
-        buffer.child_1_genes[i] = buffer.alpha_operator[i] == 1 ? max(chromosome1.genes[i], chromosome2.genes[i]) : chromosome1.genes[i]
-        buffer.child_2_genes[i] = buffer.beta_operator[i] == 1 ? max(chromosome1.genes[i], chromosome2.genes[i]) : chromosome2.genes[i]
+    @inbounds @simd for i in eachindex(gene_seq_alpha)
+        child_1_genes[i] = alpha_operator[i] == 1 ? max(gene_seq_alpha[i], gene_seq_beta[i]) : gene_seq_alpha[i]
+        child_2_genes[i] = beta_operator[i] == 1 ? max(gene_seq_alpha[i], gene_seq_beta[i]) : gene_seq_beta[i]
     end
 
-    chromosome1.genes .= @view buffer.child_1_genes[1:len_a]
-    chromosome2.genes .= @view buffer.child_2_genes[1:len_a]
+    chromosome1.genes = child_1_genes
+    chromosome2.genes = child_2_genes
 end
 
 @inline function gen_rezessiv!(chromosome1::Chromosome, chromosome2::Chromosome, pb::Real=0.2)
-    buffer = THREAD_BUFFERS[Threads.threadid()]
-    len_a = length(chromosome1.genes)
-    create_operator_masks(chromosome1.genes, chromosome2.genes, pb)
+    gene_seq_alpha = chromosome1.genes
+    gene_seq_beta = chromosome2.genes
+    alpha_operator, beta_operator = create_operator_masks(gene_seq_alpha, gene_seq_beta, pb)
 
-    @inbounds @simd for i in eachindex(chromosome1.genes)
-        buffer.child_1_genes[i] = buffer.alpha_operator[i] == 1 ? min(chromosome1.genes[i], chromosome2.genes[i]) : chromosome1.genes[i]
-        buffer.child_2_genes[i] = buffer.beta_operator[i] == 1 ? min(chromosome1.genes[i], chromosome2.genes[i]) : chromosome2.genes[i]
+    child_1_genes = similar(gene_seq_alpha)
+    child_2_genes = similar(gene_seq_beta)
+
+    @inbounds @simd for i in eachindex(gene_seq_alpha)
+        child_1_genes[i] = alpha_operator[i] == 1 ? min(gene_seq_alpha[i], gene_seq_beta[i]) : gene_seq_alpha[i]
+        child_2_genes[i] = beta_operator[i] == 1 ? min(gene_seq_alpha[i], gene_seq_beta[i]) : gene_seq_beta[i]
     end
 
-    chromosome1.genes .= @view buffer.child_1_genes[1:len_a]
-    chromosome2.genes .= @view buffer.child_2_genes[1:len_a]
+    chromosome1.genes = child_1_genes
+    chromosome2.genes = child_2_genes
 end
 
 @inline function gene_fussion!(chromosome1::Chromosome, chromosome2::Chromosome, pb::Real=0.2)
-    buffer = THREAD_BUFFERS[Threads.threadid()]
-    len_a = length(chromosome1.genes)
-    create_operator_masks(chromosome1.genes, chromosome2.genes, pb)
+    gene_seq_alpha = chromosome1.genes
+    gene_seq_beta = chromosome2.genes
+    alpha_operator, beta_operator = create_operator_masks(gene_seq_alpha, gene_seq_beta, pb)
+
+    child_1_genes = similar(gene_seq_alpha)
+    child_2_genes = similar(gene_seq_beta)
 
-    @inbounds @simd for i in eachindex(chromosome1.genes)
-        buffer.child_1_genes[i] = buffer.alpha_operator[i] == 1 ? Int8((chromosome1.genes[i] + chromosome2.genes[i]) ÷ 2) : chromosome1.genes[i]
-        buffer.child_2_genes[i] = buffer.beta_operator[i] == 1 ? Int8((chromosome1.genes[i] + chromosome2.genes[i]) ÷ 2) : chromosome2.genes[i]
+    @inbounds @simd for i in eachindex(gene_seq_alpha)
+        child_1_genes[i] = alpha_operator[i] == 1 ? Int8((gene_seq_alpha[i] + gene_seq_beta[i]) ÷ 2) : gene_seq_alpha[i]
+        child_2_genes[i] = beta_operator[i] == 1 ? Int8((gene_seq_alpha[i] + gene_seq_beta[i]) ÷ 2) : gene_seq_beta[i]
     end
 
-    chromosome1.genes .= @view buffer.child_1_genes[1:len_a]
-    chromosome2.genes .= @view buffer.child_2_genes[1:len_a]
+    chromosome1.genes = child_1_genes
+    chromosome2.genes = child_2_genes
 end
 
 @inline function gene_one_point_cross_over!(chromosome1::Chromosome, chromosome2::Chromosome)
-    buffer = THREAD_BUFFERS[Threads.threadid()]
-    len_a = length(chromosome1.genes)
-    create_operator_point_one_masks(chromosome1.genes, chromosome2.genes, chromosome1.toolbox)
+    gene_seq_alpha = chromosome1.genes
+    gene_seq_beta = chromosome2.genes
+    alpha_operator, beta_operator = create_operator_point_one_masks(gene_seq_alpha, gene_seq_beta, chromosome1.toolbox)
 
-    @inbounds @simd for i in eachindex(chromosome1.genes)
-        buffer.child_1_genes[i] = buffer.alpha_operator[i] == 1 ? chromosome1.genes[i] : chromosome2.genes[i]
-        buffer.child_2_genes[i] = buffer.beta_operator[i] == 1 ? chromosome2.genes[i] : chromosome1.genes[i]
+    child_1_genes = similar(gene_seq_alpha)
+    child_2_genes = similar(gene_seq_beta)
+
+    @inbounds @simd for i in eachindex(gene_seq_alpha)
+        child_1_genes[i] = alpha_operator[i] == 1 ? gene_seq_alpha[i] : gene_seq_beta[i]
+        child_2_genes[i] = beta_operator[i] == 1 ? gene_seq_beta[i] : gene_seq_alpha[i]
     end
 
-    chromosome1.genes .= @view buffer.child_1_genes[1:len_a]
-    chromosome2.genes .= @view buffer.child_2_genes[1:len_a]
+    chromosome1.genes = child_1_genes
+    chromosome2.genes = child_2_genes
 end
 
 @inline function gene_two_point_cross_over!(chromosome1::Chromosome, chromosome2::Chromosome)
-    buffer = THREAD_BUFFERS[Threads.threadid()]
-    len_a = length(chromosome1.genes)
-    create_operator_point_two_masks(chromosome1.genes, chromosome2.genes, chromosome1.toolbox)
+    gene_seq_alpha = chromosome1.genes
+    gene_seq_beta = chromosome2.genes
+    alpha_operator, beta_operator = create_operator_point_two_masks(gene_seq_alpha, gene_seq_beta, chromosome1.toolbox)
+
+    child_1_genes = similar(gene_seq_alpha)
+    child_2_genes = similar(gene_seq_beta)
 
-    @inbounds @simd for i in eachindex(chromosome1.genes)
-        buffer.child_1_genes[i] = buffer.alpha_operator[i] == 1 ? chromosome1.genes[i] : chromosome2.genes[i]
-        buffer.child_2_genes[i] = buffer.beta_operator[i] == 1 ? chromosome2.genes[i] : chromosome1.genes[i]
+    @inbounds @simd for i in eachindex(gene_seq_alpha)
+        child_1_genes[i] = alpha_operator[i] == 1 ? gene_seq_alpha[i] : gene_seq_beta[i]
+        child_2_genes[i] = beta_operator[i] == 1 ? gene_seq_beta[i] : gene_seq_alpha[i]
     end
 
-    chromosome1.genes .= @view buffer.child_1_genes[1:len_a]
-    chromosome2.genes .= @view buffer.child_2_genes[1:len_a]
+    chromosome1.genes = child_1_genes
+    chromosome2.genes = child_2_genes
 end
 
 @inline function gene_mutation!(chromosome1::Chromosome, pb::Real=0.25)
-    buffer = THREAD_BUFFERS[Threads.threadid()]
-    create_operator_masks(chromosome1.genes, chromosome1.genes, pb)
-    buffer.child_1_genes[1:length(chromosome1.genes)] .= generate_chromosome(chromosome1.toolbox).genes[1:length(chromosome1.genes)]
+    gene_seq_alpha = chromosome1.genes
+    alpha_operator, _ = create_operator_masks(gene_seq_alpha, gene_seq_alpha, pb)
+    mutation_seq_1 = generate_chromosome(chromosome1.toolbox)
 
-    @inbounds @simd for i in eachindex(chromosome1.genes)
-        chromosome1.genes[i] = buffer.alpha_operator[i] == 1 ? buffer.child_1_genes[i] : chromosome1.genes[i]
+    @inbounds @simd for i in eachindex(gene_seq_alpha)
+        gene_seq_alpha[i] = alpha_operator[i] == 1 ? mutation_seq_1.genes[i] : gene_seq_alpha[i]
     end
 end
 
@@ -720,113 +725,6 @@ end
 end
 
 
-@inline function gene_fussion_extent!(chromosome1::Chromosome, parents::Vector{Chromosome}, pb::Real=0.2; topk::Int=1)
-    buffer = THREAD_BUFFERS[Threads.threadid()]
-    len_a = length(chromosome1.genes)
-    genes2 = one_hot_mean([p.genes for p in parents], topk)
-
-    create_operator_masks(chromosome1.genes, genes2, pb)
-
-    @inbounds @simd for i in eachindex(chromosome1.genes)
-        buffer.child_1_genes[i] = buffer.alpha_operator[i] == 1 ? genes2[i] : chromosome1.genes[i]
-    end
-
-    chromosome1.genes .= @view buffer.child_1_genes[1:len_a]
-end
-
-
-"""
-    diversity_injection!(chromosome::Chromosome, diversity_factor::Real=0.5)
-
-Injects diversity by randomly replacing portions of genes with completely new material.
-Useful for avoiding premature convergence.
-
-# Arguments
-- `chromosome::Chromosome`: Target chromosome
-- `diversity_factor::Real=0.5`: Controls how much of the chromosome to randomize
-"""
-@inline function diversity_injection!(chromosome::Chromosome, diversity_factor::Real=0.5)
-    gene_len = chromosome.toolbox.head_len * 2 + 1
-    gene_count = chromosome.toolbox.gene_count
-    
-    genes_to_randomize = max(1, round(Int, diversity_factor * gene_count))
-    
-    genes_indices = sample(1:gene_count, genes_to_randomize, replace=false)
-    
-    for gene_idx in genes_indices
-        gene_start = chromosome.toolbox.gen_start_indices[gene_idx]
-        
-        new_gene = generate_gene(
-            chromosome.toolbox.headsyms, 
-            chromosome.toolbox.tailsyms, 
-            chromosome.toolbox.head_len,
-            chromosome.toolbox.tail_weights,
-            chromosome.toolbox.head_weights
-        )
-        
-        chromosome.genes[gene_start:(gene_start + gene_len - 1)] .= new_gene
-    end
-end
-
-"""
-    adaptive_mutation!(chromosome::Chromosome, generation::Int, max_generations::Int, pb_start::Real=0.4, pb_end::Real=0.1)
-
-Adaptive mutation operator that adjusts mutation rate based on generation progress.
-Higher mutation rate early for exploration, lower later for exploitation.
-
-# Arguments
-- `chromosome::Chromosome`: Target chromosome
-- `generation::Int`: Current generation
-- `max_generations::Int`: Maximum generations for the run
-- `pb_start::Real=0.4`: Starting probability (higher for exploration)
-- `pb_end::Real=0.1`: Ending probability (lower for exploitation)
-"""
-@inline function adaptive_mutation!(chromosome::Chromosome, generation::Int, max_generations::Int; 
-        pb_start::Real=0.4, pb_end::Real=0.1)
-    progress = generation / max_generations
-    adaptive_pb = pb_start - (pb_start - pb_end) * progress
-    
-    buffer = THREAD_BUFFERS[Threads.threadid()]
-    create_operator_masks(chromosome.genes, chromosome.genes, adaptive_pb)
-    buffer.child_1_genes[1:length(chromosome.genes)] .= generate_chromosome(chromosome.toolbox).genes[1:length(chromosome.genes)]
-
-    @inbounds @simd for i in eachindex(chromosome.genes)
-        chromosome.genes[i] = buffer.alpha_operator[i] == 1 ? buffer.child_1_genes[i] : chromosome.genes[i]
-    end
-end
-
-"""
-    gene_transposition!(chromosome::Chromosome, len::Int=3)
-
-Transposes a small segment of genes from one position to another within the same chromosome,
-preserving their order but changing their context.
-
-# Arguments
-- `chromosome::Chromosome`: Target chromosome
-- `len::Int=3`: Length of segment to transpose
-"""
-@inline function gene_transposition!(chromosome::Chromosome, len::Int=3)
-    gene_len = chromosome.toolbox.head_len * 2 + 1
-    gene_count = chromosome.toolbox.gene_count
-    
-    source_gene_idx = rand(1:gene_count)
-    target_gene_idx = rand([i for i in 1:gene_count if i != source_gene_idx])
-    
-    source_start = chromosome.toolbox.gen_start_indices[source_gene_idx]
-    target_start = chromosome.toolbox.gen_start_indices[target_gene_idx]
-    
-    segment_len = min(len, gene_len - 1)
-    
-    source_pos = rand(source_start:(source_start + gene_len - segment_len - 1))
-    target_pos = rand(target_start:(target_start + gene_len - segment_len - 1))
-    
-    segment = copy(chromosome.genes[source_pos:(source_pos + segment_len - 1)])
-    
-    target_region = chromosome.genes[target_pos:(target_pos + segment_len - 1)]
-    chromosome.genes[target_pos:(target_pos + segment_len - 1)] .= segment
-end
-
-
 """
     genetic_operations!(space_next::Vector{Chromosome}, i::Int, toolbox::Toolbox)
 
@@ -862,11 +760,10 @@ Genetic operators for chromosome modification.
 # Effects
 Modify chromosome genes in place
 """
-@inline function genetic_operations!(space_next::Vector{Chromosome}, i::Int, toolbox::Toolbox;
-        generation::Int=0, max_generation::Int=0, parents::Union{Vector{Chromosome},Nothing}=nothing)
+@inline function genetic_operations!(space_next::Vector{Chromosome}, i::Int, toolbox::Toolbox; generation::Int, max_generation::Int, parents::Vector{Chromosome})
     #allocate them within the space - create them once instead of n time 
     space_next[i:i+1] = replicate(space_next[i], space_next[i+1], toolbox)
-    rand_space = rand(17)
+    rand_space = rand(15)
 
 
     if rand_space[1] < toolbox.gep_probs["one_point_cross_over_prob"]
@@ -929,24 +826,5 @@ Modify chromosome genes in place
         reverse_insertion_tail!(space_next[i+1])
     end
 
-    if rand_space[16] < toolbox.gep_probs["gene_transposition"]
-        gene_transposition!(space_next[i])
-    end
-
-    if rand_space[17] < toolbox.gep_probs["gene_transposition"]
-        gene_transposition!(space_next[i+1])
-    end
-
-    if rand_space[16] < toolbox.gep_probs["gene_averaging_prob"]
-        gene_fussion_extent!(space_next[i], parents, toolbox.gep_probs["gene_averaging_rate"])
-    end
-
-    if rand_space[17] < toolbox.gep_probs["gene_averaging_prob"]
-        gene_fussion_extent!(space_next[i+1], parents, toolbox.gep_probs["gene_averaging_rate"])
-    end
-
-
-    
-
 end
 end
diff --git a/src/Gep.jl b/src/Gep.jl
index 45783ed..fa9a3e4 100644
--- a/src/Gep.jl
+++ b/src/Gep.jl
@@ -297,12 +297,13 @@ The evolution process stops when either:
 
         Threads.@threads for i in eachindex(population[1:population_size])
             if isnan(mean(population[i].fitness)) 
-                cache_value = get(fit_cache, population[i].expression_raw, nothing)
+                key = copy(population[i].expression_raw)
+                cache_value = get(fit_cache,key,nothing)
                 if isnothing(cache_value)
                     
                     population[i].fitness = compute_fitness(population[i], evalStrategy)
                     lock(cache_lock)
-                        fit_cache[population[i].expression_raw] = population[i].fitness
+                        fit_cache[key] = population[i].fitness
                     unlock(cache_lock)
                 else
                     atomic_add!(same, 1)
@@ -339,7 +340,7 @@ The evolution process stops when either:
 
 
         if length(fits_representation[1]) == 1
-            selectedMembers = tournament_selection(fits_representation, mating_size, tourni_size)
+            selectedMembers = tournament_selection(fits_representation[1:mating_size], mating_size, tourni_size)
         else
             selectedMembers = nsga_selection(fits_representation)
         end
diff --git a/src/RegressionWrapper.jl b/src/RegressionWrapper.jl
index 04dc58c..e95511b 100644
--- a/src/RegressionWrapper.jl
+++ b/src/RegressionWrapper.jl
@@ -254,8 +254,8 @@ Dictionary containing default probabilities and parameters for genetic algorithm
 These values can be adjusted to fine-tune the genetic algorithm's behavior.
 """
 const GENE_COMMON_PROBS = Dict{String,AbstractFloat}(
-    "one_point_cross_over_prob" => 0.5,
-    "two_point_cross_over_prob" => 0.4,
+    "one_point_cross_over_prob" => 0.2,
+    "two_point_cross_over_prob" => 0.1,
     "mutation_prob" => 1.0,
     "mutation_rate" => 0.1,
     "dominant_fusion_prob" => 0.0,
@@ -264,12 +264,12 @@ const GENE_COMMON_PROBS = Dict{String,AbstractFloat}(
     "rezessiv_fusion_rate" => 0.1,
     "fusion_prob" => 0.0,
     "fusion_rate" => 0.0,
-    "inversion_prob" => 0.1,
-    "reverse_insertion" => 0.1,
-    "reverse_insertion_tail" => 0.1,
-    "gene_transposition" => 0.1,
+    "inversion_prob" => 0.2,
+    "reverse_insertion" => 0.2,
+    "reverse_insertion_tail" => 0.2,
+    "gene_transposition" => 0.0,
     "gene_averaging_prob" => 1.0,
-    "gene_averaging_rate" => 0.1,
+    "gene_averaging_rate" => 0.05,
     "mating_size" => 0.7)
 
 const SymbolDict = OrderedDict{Int8,Int8}

From 4306ea7215254e922a8162ea9a5f564a4570831f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maximilian=20Rei=C3=9Fmann?=
 <reissmannm@4000l-217946-l.mobility.unimelb.net.au>
Date: Tue, 11 Mar 2025 13:47:42 +1100
Subject: [PATCH 6/6] add latin hypercube sampling

---
 Project.toml                     |  1 +
 examples/Main_min_bench.jl       |  2 +-
 paper/ConstraintViaSBP.jl        |  2 +-
 src/GeneExpressionProgramming.jl |  5 +-
 src/Gep.jl                       | 67 ++++++++++++++++++----
 src/RegressionWrapper.jl         |  6 +-
 src/Util.jl                      | 95 +++++++++++++++++++++++++++++++-
 7 files changed, 159 insertions(+), 19 deletions(-)

diff --git a/Project.toml b/Project.toml
index 9ba2200..010ecb6 100644
--- a/Project.toml
+++ b/Project.toml
@@ -20,6 +20,7 @@ LineSearches = "d3d80556-e9d4-5f37-9878-2ab0fcc64255"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
 LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890"
+NearestNeighbors = "b8a86587-4115-5ab1-83bc-aa920d37bbce"
 Optim = "429524aa-4258-5aef-a3af-852621145aeb"
 OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
 PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
diff --git a/examples/Main_min_bench.jl b/examples/Main_min_bench.jl
index 6f7e2a5..6162755 100644
--- a/examples/Main_min_bench.jl
+++ b/examples/Main_min_bench.jl
@@ -19,4 +19,4 @@ y_data = @. x_data[:,1] * x_data[:,1] + x_data[:,1] * x_data[:,2] - 2 * x_data[:
 
 #define the 
 regressor = GepRegressor(number_features)
-@btime fit!(regressor, epochs, population_size, x_data', y_data; loss_fun="mse")
\ No newline at end of file
+@btime fit!(regressor, epochs, population_size, x_data', y_data; loss_fun="mse", population_sampling_multiplier=1000)
\ No newline at end of file
diff --git a/paper/ConstraintViaSBP.jl b/paper/ConstraintViaSBP.jl
index 391bf06..8376a18 100644
--- a/paper/ConstraintViaSBP.jl
+++ b/paper/ConstraintViaSBP.jl
@@ -89,7 +89,7 @@ function main()
                 @show ("Current case: ", case_name)
                 #gep_params
                 epochs = 1000
-                population_size = 1500
+                population_size = 200
 
                 results = DataFrame(Seed=[],
                     Name=String[], NoiseLeve=String[], Fitness=Float64[], Equation=String[], R2_test=Float64[],
diff --git a/src/GeneExpressionProgramming.jl b/src/GeneExpressionProgramming.jl
index afdc436..0f00955 100644
--- a/src/GeneExpressionProgramming.jl
+++ b/src/GeneExpressionProgramming.jl
@@ -131,7 +131,8 @@ import .GepUtils:
     ARITY_LIB_COMMON,
     FUNCTION_LIB_COMMON,
     FUNCTION_STRINGIFY,
-    one_hot_mean
+    one_hot_mean,
+    select_n_samples_lhs
 
 # Import selection mechanisms
 import .EvoSelection:
@@ -276,7 +277,7 @@ export equal_unit_forward, mul_unit_forward, div_unit_forward,
 export find_indices_with_sum, compile_djl_datatype,
     optimize_constants!, minmax_scale, isclose,
     save_state, load_state,
-    train_test_split, one_hot_mean
+    train_test_split, one_hot_mean, select_n_samples_lhs
 
 # Export history recording functionality
 export HistoryRecorder, OptimizationHistory,
diff --git a/src/Gep.jl b/src/Gep.jl
index fa9a3e4..4052057 100644
--- a/src/Gep.jl
+++ b/src/Gep.jl
@@ -79,6 +79,7 @@ using Logging
 using Printf
 using Base.Threads: SpinLock
 using .Threads
+using Distributions
 
 export runGep
 
@@ -163,7 +164,7 @@ Performs one evolutionary step in the GEP algorithm, creating and evaluating new
         compile_expression!(next_gen[i+1]; force_compile=true)
 
     end
-    
+
     Threads.@threads for i in eachindex(next_gen)
         try
             population[end-i] = population[end-mating_size-i]
@@ -218,6 +219,43 @@ Applies correction operations to ensure dimensional homogeneity in chromosomes.
 end
 
 
+"""
+    equation_characterization_default(population::Vector, n_samples::Int)
+
+    Employs latin hyperqube sampling on a population
+"""
+@inline function equation_characterization_default(population::Vector{Chromosome}, n_samples::Int; inputs_::Int=0)
+    len_extented_pop = length(population)
+    coeff_count = isempty(population[1].toolbox.preamble_syms) ? 1 : length(length(population[1].toolbox.preamble_syms))
+    features = zeros(coeff_count * 2, len_extented_pop)
+    prob_dataset = rand(Uniform(0, 1), 100, inputs_ == 0 ? 10 : inputs_)
+
+    Threads.@threads for p_index in eachindex(population)
+        if population[p_index].compiled
+            try
+                if coeff_count > 1
+                    for e_index in 1:coeff_count
+                        features[e_index, p_index] = mean(population[p_index].compiled_function[e_index](prob_dataset,
+                            population[p_index].toolbox.operators_))
+                        features[e_index+1, p_index] = length(population[p_index].expression_raw[e_index])
+                    end
+                else
+                    features[coeff_count, p_index] = mean(population[p_index].compiled_function(prob_dataset, population[p_index].toolbox.operators_))
+                    features[coeff_count+1, p_index] = length(population[p_index].expression_raw)
+                end
+            catch
+                features[:, p_index] .= Inf
+            end
+
+        else
+            features[:, p_index] .= Inf
+        end
+    end
+
+    return select_n_samples_lhs(features, n_samples)
+end
+
+
 """
     runGep(epochs::Int, population_size::Int, toolbox::Toolbox, evalStrategy::EvaluationStrategy;
         hof::Int=3, correction_callback::Union{Function,Nothing}=nothing,
@@ -273,10 +311,11 @@ The evolution process stops when either:
     correction_amount::Real=0.6,
     tourni_size::Int=3,
     optimization_epochs::Int=500,
-    file_logger_callback::Union{Function, Nothing}=nothing, 
-    save_state_callback::Union{Function, Nothing}=nothing,
-    load_state_callback::Union{Function, Nothing}=nothing,
-    update_surrogate_callback::Union{Function, Nothing}=nothing)
+    file_logger_callback::Union{Function,Nothing}=nothing,
+    save_state_callback::Union{Function,Nothing}=nothing,
+    load_state_callback::Union{Function,Nothing}=nothing,
+    update_surrogate_callback::Union{Function,Nothing}=nothing, 
+    population_sampling_multiplier::Int=100)
 
     recorder = HistoryRecorder(epochs, Tuple)
     mating_ = toolbox.gep_probs["mating_size"]
@@ -286,24 +325,30 @@ The evolution process stops when either:
     fit_cache = Dict{Vector{Int8},Tuple}()
     cache_lock = SpinLock()
 
-    population, start_epoch = isnothing(load_state_callback) ? (generate_population(population_size+mating_size, toolbox), 1) : load_state_callback()
+
+    initial_size = isnothing(toolbox.operators_) ? population_size + mating_size : population_size * population_sampling_multiplier
+    population, start_epoch = isnothing(load_state_callback) ? (generate_population(initial_size, toolbox), 1) : load_state_callback()
+    if start_epoch <= 1 & !isnothing(toolbox.operators_)
+        population = population[equation_characterization_default(population, population_size + mating_size)]
+    end
+
     next_gen = Vector{eltype(population)}(undef, mating_size)
     progBar = Progress(epochs; showspeed=true, desc="Training: ")
     prev_best = (typemax(Float64),)
-    
+
     for epoch in start_epoch:epochs
         same = Atomic{Int}(0)
         perform_correction_callback!(population[1:population_size], epoch, correction_epochs, correction_amount, correction_callback)
 
         Threads.@threads for i in eachindex(population[1:population_size])
-            if isnan(mean(population[i].fitness)) 
+            if isnan(mean(population[i].fitness))
                 key = copy(population[i].expression_raw)
-                cache_value = get(fit_cache,key,nothing)
+                cache_value = get(fit_cache, key, nothing)
                 if isnothing(cache_value)
-                    
+
                     population[i].fitness = compute_fitness(population[i], evalStrategy)
                     lock(cache_lock)
-                        fit_cache[key] = population[i].fitness
+                    fit_cache[key] = population[i].fitness
                     unlock(cache_lock)
                 else
                     atomic_add!(same, 1)
diff --git a/src/RegressionWrapper.jl b/src/RegressionWrapper.jl
index e95511b..55adeca 100644
--- a/src/RegressionWrapper.jl
+++ b/src/RegressionWrapper.jl
@@ -697,7 +697,8 @@ function fit!(regressor::GepRegressor, epochs::Int, population_size::Int, x_trai
     break_condition::Union{Function,Nothing}=nothing,
     file_logger_callback::Union{Function,Nothing}=nothing,
     save_state_callback::Union{Function,Nothing}=nothing,
-    load_state_callback::Union{Function,Nothing}=nothing
+    load_state_callback::Union{Function,Nothing}=nothing, 
+    population_sampling_multiplier::Int=100
 )
 
     correction_callback = if !isnothing(target_dimension)
@@ -752,7 +753,8 @@ function fit!(regressor::GepRegressor, epochs::Int, population_size::Int, x_trai
         optimization_epochs=optimization_epochs,
         file_logger_callback=file_logger_callback,
         save_state_callback=save_state_callback,
-        load_state_callback=load_state_callback
+        load_state_callback=load_state_callback,
+        population_sampling_multiplier=population_sampling_multiplier
     )
 
     regressor.best_models_ = best
diff --git a/src/Util.jl b/src/Util.jl
index dfbe5ff..03f33b5 100644
--- a/src/Util.jl
+++ b/src/Util.jl
@@ -111,7 +111,7 @@ export find_indices_with_sum, compile_djl_datatype, optimize_constants!, minmax_
 export save_state, load_state
 export create_history_recorder, record_history!, record!, close_recorder!
 export HistoryRecorder, OptimizationHistory, get_history_arrays, one_hot_mean, FUNCTION_STRINGIFY
-export train_test_split
+export train_test_split, select_n_samples_lhs
 export FUNCTION_LIB_COMMON, ARITY_LIB_COMMON
 export TensorNode, compile_network
 
@@ -127,10 +127,10 @@ using Random
 using Tensors
 using Flux
 using StatsBase
+using NearestNeighbors
 using Base.Threads: @spawn
 
 
-
 function sqr(x::Vector{T}) where {T<:AbstractFloat}
     return x .* x
 end
@@ -855,5 +855,96 @@ function one_hot_mean(vectors::Vector{Vector{T}}, k::Int) where T <: Integer
 end
 
 
+function select_closest_points(lhs_points, normalized_features, n_samples)
+    n_available = size(normalized_features, 2)
+    if n_available < n_samples
+        @warn "Not enough valid points: $n_available available, $n_samples requested"
+        return collect(1:n_available)
+    end
+    
+    selected_indices = zeros(Int, n_samples)
+    remaining_indices = Set(1:n_available)
+    
+    kdtree = KDTree(normalized_features)
+    
+    for i in 1:n_samples
+        best_idx = -1
+        try
+            idxs, dists = knn(kdtree, lhs_points[:, i], 1, true, j -> j ∈ remaining_indices)
+            best_idx = idxs[1]
+        catch e
+            if !isempty(remaining_indices)
+                best_idx = first(remaining_indices)
+            else
+                error("No remaining points to select")
+            end
+        end
+        
+        selected_indices[i] = best_idx
+        delete!(remaining_indices, best_idx)
+    end
+    
+    return selected_indices
+end
+
+"""
+    select_n_samples_lhs(equastacked_features, n_samples; seed=nothing)
+
+Select a subset of equations using Latin Hypercube Sampling based on their characteristics.
+
+Parameters:
+- `stacked_features`: Array of equation information -> (n_features × n_probes)
+- `n_samples`: Number of equations to select
+- `seed`: Optional random seed for reproducibility
+
+Returns:
+- Indices of selected equations
+"""
+function select_n_samples_lhs(stacked_features::AbstractArray, n_samples::Int)
+    _,test_len = size(stacked_features)
+    invalid_mask = falses(test_len)
+    for i in 1:test_len
+        if any(isnan.(stacked_features[:, i])) || any(stacked_features[:, i] .< 0)
+            invalid_mask[i] = true
+        end
+    end
+    valid_indices = findall(.!invalid_mask)
+    valid_features = stacked_features[:, valid_indices]
+    normalized_features = normalize_features(valid_features)
+    n_features, n_probes = size(normalized_features)
+
+    bins = zeros(n_features, n_samples, 2)
+    bins[:, :, 1] .= ((1:n_samples)' .- 1) ./ n_samples
+    bins[:, :, 2] .= (1:n_samples)' ./ n_samples
+
+
+    for i in 1:n_features
+        shuffle!(view(bins,i,:,:))
+    end
+
+    rand_vals = rand(n_features, n_samples)
+    lhs_points = bins[:,:,1] + rand_vals .* (bins[:,:,2] - bins[:,:,1])
+
+    selected_indices = select_closest_points(lhs_points, normalized_features, n_samples)
+    return valid_indices[selected_indices]
+end
+
+
+function normalize_features(features)
+
+    feature_mins = minimum(features, dims=2)
+    feature_maxs = maximum(features, dims=2)
+    feature_ranges = feature_maxs .- feature_mins
+    
+    normalized = similar(features)
+    
+    normalized = @. ifelse(
+        feature_ranges > 0,
+        (features - feature_mins) / feature_ranges,
+        0.5
+    )
+    
+    return normalized
+end
 
 end
\ No newline at end of file