In [1]:
using TiSR
using Symbolics
using SymbolicUtils
using CSV
using DataFrames
using Statistics
import DynamicQuantities: uparse,  @u_str

TiSR.jl loaded.

In [3]:
vf = "05"
run = 0

exp_name = "AIFL_exp_dimlessF_origRe"

dataset_prefix = "./GP_data_varying_Re/varying_Re_all_n30_hidden30_dropout_AIFL"

time = 60 * 120.0

always_correct_dims = false
p_correct_dims = 0.0
death_penalty_dims = false
hall_of_fame_objectives = [:ms_processed_e, :dim_penalty, :custom_compl]
selection_objectives = [:ms_processed_e, :minus_spearman, :dim_penalty, :custom_compl]

# ==================================================================================================
# preparation
# ==================================================================================================

parts = [1.0, 0.0]
p_binops_        = (1.0, 1.0, 1.0, 1.0, 1.0)  # -> probabilites for selection of each binary functions (same length as provided binops) (dont need to add up to 1, adjusted accordingly)
p_unaops_        = (1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0)  # -> probability for each unary function
n_gen = 150
# ==================================================================================================
# options -> specify some custom settings, where the default setting is unsatisfactory
# ==================================================================================================
pow_abs(v1, v2) = abs(v1)^v2
sqrt_abs(v1) = sqrt(abs(v1))
pow2(v1) = v1^2
pow3(v1) = v1^3

pow3 (generic function with 1 method)

### Analysis of equations per vf

In [13]:
df_all = DataFrame(vf = String[], run = Int[], eq = Int[], eqs = String[], r2 = Float64[], mse = Float64[], compl = Int[], n_param = Int[])
df_top3 = DataFrame(vf = String[], run = Int[], eq = Int[], eqs = String[], r2 = Float64[], mse = Float64[], compl = Int[], n_param = Int[])


for vf in ["01","02","03","04","05","06"]

    arbitrary_name = "vf$(vf)_shuffled_test"
    data_matrix = Matrix(CSV.read("$(dataset_prefix)/$(arbitrary_name).csv", DataFrame))
    data_matrix = data_matrix[:, [1,2,3,4,12]]
    units = [u"m",  u"0", u"0", u"N", u"0"]

    ops, data = Options(
    data_descript=data_descript(
            data_matrix;
            arbitrary_name = arbitrary_name,
            parts          = parts,
            #fit_weights    = fit_weights, 
            units          = units,
        ),
        general=general_params(
            n_gens                  = n_gen,
            pop_size                = 500,
            max_compl               = 35,
            pow_abs_param           = true,
            prevent_doubles         = 1e-7,
            t_lim                   = typemax(Float64),
            multithreadding         = true,
            death_penalty_dims      = death_penalty_dims,
            always_drastic_simplify = false,
        ),
        selection=selection_params(
            hall_of_fame_objectives           = hall_of_fame_objectives,          # -> objectives for the hall_of_fame
            selection_objectives              = selection_objectives              # -> objectives for the Pareto-optimal selection part of selection
        ),
        fitting=fitting_params(
            early_stop_iter = 5,
            max_iter        = 30,
            pre_residual_processing = nothing,),

            binops          = (  +,   -,   *,   /,  ^),  # -> binary function set to choose from
            p_binops        = p_binops_,  # -> probabilites for selection of each binary functions (same length as provided binops) (dont need to add up to 1, adjusted accordingly)
            unaops          = (exp, log, sin, cos, abs, sqrt_abs, pow2, pow3),  # -> unary function set to choose from
            p_unaops        = p_unaops_,  # -> probability for each unary function
            illegal_dict = Dict(:sin => (sin, cos),
                            :cos => (sin, cos),
                            :abs => (abs,),
                            :exp => (exp, sqrt_abs), #log
                            :log => (log,), #exp
                            :pow2 => (log, sqrt_abs),
                            :sqrt_abs => (sqrt_abs,),
                            :pow3 => (pow3, pow2),
                            :^ => (^,),),

        mutation=mutation_params(;
            p_crossover        = 4.0,
            p_point            = 0.5,
            p_innergrow        = 0.0,
            p_insert           = 0.2,
            p_hoist            = 0.2,
            p_subtree          = 0.2,
            p_add_term         = 0.1,
            p_simplify         = 0.5,
            p_drastic_simplify = 0.5,),
    );
    y = data[end]

    #select every thirtieth element of y    
    y = [y[i] for i in 1:30:length(y)-1]
    println("vf: $vf")
    println(length(y))
    for run in 0:10
        println("run: $run")
        arbitrary_name_ = "vf$(vf)_shuffled_run$run"

        df = CSV.read("AIFL_exp_dimlessF/$(arbitrary_name_)/$(arbitrary_name_)_hall_of_fame.csv", DataFrame)
        #filter df for dim_penalty=0
        df = df[df[!,"dim_penalty"] .== 0,:]
        #filter df for only equations with R^2 > 0.8
        #df = df[df[!,"minus_r2"] .< -0.8,:]
        eqs = df[!,"eqs_orig"]
        df_tmp = DataFrame(vf = String[], run = Int[], eq = Int[], eqs = String[], r2 = Float64[], mse = Float64[], compl = Int[], n_param = Int[])
        for (i, eq) in enumerate(eqs)
            #replace -- in eq with +
            eq = replace(eq, r"--" => "+")
            parsed = TiSR.string_to_node(eq, ops)
            compl_notsimp = compl = TiSR.custom_compl(parsed, ops)
            #try to simplify the equation
            try
                TiSR.simplify_w_symbolic_utils!(parsed, ops, though_polyform=false)
                if (TiSR.custom_compl(parsed, ops)-4) > compl_notsimp
                    compl = compl_notsimp
                    #println(parsed)
                    parsed = TiSR.string_to_node(eq, ops) 
                    #println(parsed) 
                end
            catch
                println("simplify failed")
                #go to next equation
                continue
            end
            n_param = length(TiSR.list_of_param_nodes(parsed))
            #compute output of parsed with data as input
            pred, valid = TiSR.eval_equation(parsed, data[1:end-1], ops)
            pred = [sum(pred[i:i+29]) for i in 1:30:length(pred)-1]

            residual = y .- pred
            compl = TiSR.custom_compl(parsed, ops)
            #add results to data frame df_all
            eq = replace(eq, r"v1" => "r")
            eq = replace(eq, r"v2" => "theta")
            eq = replace(eq, r"v3" => "phi")
            eq = replace(eq, r"v4" => "Re")
            push!(df_all, (vf, run, i, eq, TiSR.r_squared(y .- residual, y), mean(abs2,residual), compl, n_param))
            push!(df_tmp, (vf, run, i, eq, TiSR.r_squared(y .- residual, y), mean(abs2,residual), compl, n_param))

        end
        #minimum of r2
        if size(df_tmp, 1) == 0
            continue
        end
        min_r2 = maximum(df_tmp[!,"r2"])
        selected = df_tmp[df_tmp[!,"r2"].>=(0.95*min_r2),:]
        #select the three equations with the lowest custom_compl
        sorted = sort(selected, :compl)
        selected = sorted
        #check if length is 3 or larger, if smaller select all
        len = min(size(selected, 1), 3)
        for i in 1:len
            push!(df_top3, (vf, run, i, selected[i,:eqs], selected[i,:r2], selected[i,:mse], selected[i,:compl], selected[i,:n_param]))
        end
    end
end

vf: 01
6400
run: 0


└ @ TiSR /Users/juliareu/GitCode/TiSR/src/options.jl:39


run: 1
run: 2
run: 3
run: 4
run: 5
run: 6
run: 7
run: 8
run: 9
run: 10
vf: 02
8064
run: 0


└ @ TiSR /Users/juliareu/GitCode/TiSR/src/options.jl:39


run: 1
run: 2
run: 3
run: 4
run: 5
run: 6
run: 7
run: 8
run: 9
run: 10
vf: 03
6272
run: 0
run: 1


└ @ TiSR /Users/juliareu/GitCode/TiSR/src/options.jl:39


run: 2
run: 3
run: 4
run: 5
run: 6
run: 7
run: 8
run: 9
run: 10
vf: 04
9216
run: 0


└ @ TiSR /Users/juliareu/GitCode/TiSR/src/options.jl:39


run: 1
run: 2
run: 3
run: 4
run: 5
run: 6
run: 7
run: 8
run: 9
run: 10
vf: 05
8576
run: 0


└ @ TiSR /Users/juliareu/GitCode/TiSR/src/options.jl:39


run: 1
run: 2
run: 3
run: 4
run: 5
run: 6
run: 7
run: 8
run: 9
run: 10
vf: 06
8768
run: 0


└ @ TiSR /Users/juliareu/GitCode/TiSR/src/options.jl:39


run: 1
run: 2
run: 3
run: 4
run: 5
run: 6
run: 7
run: 8
run: 9
run: 10


In [14]:
CSV.write("AIFL_exp_dimlessF/hall_of_fame_all_val_pervf.csv", df_all)
CSV.write("AIFL_exp_dimlessF/hall_of_fame_top3_val_pervf.csv", df_top3)

"AIFL_exp_dimlessF/hall_of_fame_top3_val_pervf.csv"

### Analysis of equations per vf


In [18]:
df_all = DataFrame(Re = String[], run = Int[], eq = Int[], eqs = String[], r2 = Float64[], mse = Float64[], compl = Int[], n_param = Int[])
df_top3 = DataFrame(Re = String[], run = Int[], eq = Int[], eqs = String[], r2 = Float64[], mse = Float64[], compl = Int[], n_param = Int[])

for Re in ["0","1","5","10","50","100","200","300"]

    arbitrary_name = "Re$(Re)_shuffled_test"
    data_matrix = Matrix(CSV.read("$(dataset_prefix)/$(arbitrary_name).csv", DataFrame))
    data_matrix = data_matrix[:, [1,2,3,4,12]]
    units = [u"m",  u"0", u"0", u"N", u"0"]

    ops, data = Options(
    data_descript=data_descript(
            data_matrix;
            arbitrary_name = arbitrary_name,
            parts          = parts,
            #fit_weights    = fit_weights, 
            units          = units,
        ),
        general=general_params(
            n_gens                  = n_gen,
            pop_size                = 500,
            max_compl               = 35,
            pow_abs_param           = true,
            prevent_doubles         = 1e-7,
            t_lim                   = typemax(Float64),
            multithreadding         = true,
            death_penalty_dims      = death_penalty_dims,
            always_drastic_simplify = false,
        ),
        selection=selection_params(
            hall_of_fame_objectives           = hall_of_fame_objectives,          # -> objectives for the hall_of_fame
            selection_objectives              = selection_objectives              # -> objectives for the Pareto-optimal selection part of selection
        ),
        fitting=fitting_params(
            early_stop_iter = 5,
            max_iter        = 30,
            pre_residual_processing = nothing,),

            binops          = (  +,   -,   *,   /,  ^),  # -> binary function set to choose from
            p_binops        = p_binops_,  # -> probabilites for selection of each binary functions (same length as provided binops) (dont need to add up to 1, adjusted accordingly)
            unaops          = (exp, log, sin, cos, abs, sqrt_abs, pow2, pow3),  # -> unary function set to choose from
            p_unaops        = p_unaops_,  # -> probability for each unary function
            illegal_dict = Dict(:sin => (sin, cos),
                            :cos => (sin, cos),
                            :abs => (abs,),
                            :exp => (exp, sqrt_abs), #log
                            :log => (log,), #exp
                            :pow2 => (log, sqrt_abs),
                            :sqrt_abs => (sqrt_abs,),
                            :pow3 => (pow3, pow2),
                            :^ => (^,),),

        mutation=mutation_params(;
            p_crossover        = 4.0,
            p_point            = 0.5,
            p_innergrow        = 0.0,
            p_insert           = 0.2,
            p_hoist            = 0.2,
            p_subtree          = 0.2,
            p_add_term         = 0.1,
            p_simplify         = 0.5,
            p_drastic_simplify = 0.5,),
    );
    y = data[end]
    #select every thirtieth element of y    
    y = [y[i] for i in 1:30:length(y)-1]
    println("Re: $Re")
    for run in 0:10
        println("run: $run")
        arbitrary_name_ = "Re$(Re)_shuffled_run$run"

        df = CSV.read("AIFL_exp_dimlessF/$(arbitrary_name_)/$(arbitrary_name_)_hall_of_fame.csv", DataFrame)
        #filter df for dim_penalty=0
        df = df[df[!,"dim_penalty"] .== 0,:]
        #filter df for only equations with R^2 > 0.8
        #df = df[df[!,"minus_r2"] .< -0.8,:]
        eqs = df[!,"eqs_orig"]
        df_tmp = DataFrame(Re = String[], run = Int[], eq = Int[], eqs = String[], r2 = Float64[], mse = Float64[], compl = Int[], n_param = Int[])
        for (i, eq) in enumerate(eqs)
            #replace -- in eq with +
            eq = replace(eq, r"--" => "+")
            parsed = TiSR.string_to_node(eq, ops)
            compl_notsimp = compl = TiSR.custom_compl(parsed, ops)
            #try to simplify the equation
            try
                TiSR.simplify_w_symbolic_utils!(parsed, ops, though_polyform=false)
                if (TiSR.custom_compl(parsed, ops)-4) > compl_notsimp
                    compl = compl_notsimp
                    #println(parsed)
                    parsed = TiSR.string_to_node(eq, ops) 
                    #println(parsed) 
                end
            catch
                println("simplify failed")
                #go to next equation
                continue
            end
            n_param = length(TiSR.list_of_param_nodes(parsed))
            #node = TiSR.simplify_w_symbolic_utils!(parsed, ops)
            #compute output of parsed with data as input
            pred, valid = TiSR.eval_equation(parsed, data[1:end-1], ops)
            pred = [sum(pred[i:i+29]) for i in 1:30:length(pred)-1]

            residual = y .- pred
            compl = TiSR.custom_compl(parsed, ops)
            #add results to data frame df_all
            eq = replace(eq, r"v1" => "r")
            eq = replace(eq, r"v2" => "theta")
            eq = replace(eq, r"v3" => "phi")
            eq = replace(eq, r"v4" => "vf")
            push!(df_all, (Re, run, i, eq, TiSR.r_squared(y .- residual, y), mean(abs2,residual), compl, n_param))
            push!(df_tmp, (Re, run, i, eq, TiSR.r_squared(y .- residual, y), mean(abs2,residual), compl, n_param))

        end
        #minimum of r2
        if size(df_tmp, 1) == 0
            continue
        end
        min_r2 = maximum(df_tmp[!,"r2"])
        selected = df_tmp[df_tmp[!,"r2"].>=(0.95*min_r2),:]
        #select the three equations with the lowest custom_compl
        sorted = sort(selected, :compl)
        selected = sorted
        #check if length is 3 or larger, if smaller select all
        len = min(size(selected, 1), 3)
        for i in 1:len
            push!(df_top3, (Re, run, i, selected[i,:eqs], selected[i,:r2], selected[i,:mse], selected[i,:compl], selected[i,:n_param]))
        end
    end
end

Re: 0
run: 0


└ @ TiSR /Users/juliareu/GitCode/TiSR/src/options.jl:39


run: 1
run: 2
run: 3
simplify failed
run: 4
run: 5
run: 6
run: 7
run: 8
run: 9
run: 10
Re: 1
run: 0


└ @ TiSR /Users/juliareu/GitCode/TiSR/src/options.jl:39


run: 1
run: 2
run: 3
run: 4
run: 5
run: 6
run: 7
run: 8
run: 9
run: 10
Re: 5
run: 0


└ @ TiSR /Users/juliareu/GitCode/TiSR/src/options.jl:39


run: 1
run: 2
run: 3
run: 4
run: 5
run: 6
run: 7
run: 8
run: 9
run: 10
Re: 10
run: 0
run: 1


└ @ TiSR /Users/juliareu/GitCode/TiSR/src/options.jl:39


run: 2
run: 3
run: 4
run: 5
run: 6
run: 7
run: 8
run: 9
run: 10
Re: 50
run: 0


└ @ TiSR /Users/juliareu/GitCode/TiSR/src/options.jl:39


run: 1
run: 2
run: 3
run: 4
run: 5
run: 6
run: 7
run: 8
run: 9
run: 10
Re: 100
run: 0
run: 1


└ @ TiSR /Users/juliareu/GitCode/TiSR/src/options.jl:39


run: 2
run: 3
run: 4
run: 5
run: 6
run: 7
run: 8
run: 9
run: 10
Re: 200
run: 0
run: 1


└ @ TiSR /Users/juliareu/GitCode/TiSR/src/options.jl:39


run: 2
run: 3
run: 4
run: 5
run: 6
run: 7
run: 8
run: 9
run: 10
Re: 300
run: 0


└ @ TiSR /Users/juliareu/GitCode/TiSR/src/options.jl:39


run: 1
run: 2
run: 3
run: 4
run: 5
run: 6
run: 7
run: 8
run: 9
run: 10


In [19]:
CSV.write("AIFL_exp_dimlessF/hall_of_fame_all_val_perRe.csv", df_all)
CSV.write("AIFL_exp_dimlessF/hall_of_fame_top3_val_perRe.csv", df_top3)

"AIFL_exp_dimlessF/hall_of_fame_top3_val_perRe.csv"