In [2]:
import numpy as np
import pandas as pd
from gplearn.genetic import SymbolicRegressor
from sklearn.metrics import mean_absolute_error
from collections import defaultdict

df = pd.read_excel('data_gp.xlsx')
df = df.drop(['XA', 'XB', 'QA', 'Nd', 'mu*RA', 'mu*t', 'RA/t'], axis=1)
df

Unnamed: 0,t,mu,RA,mu/t,VRHE,RA*t,mu/RA
0,0.993,0.43,1.36,0.433,1.791583,1.35048,0.316176
1,0.998,0.422,1.36,0.423,1.72275,1.35728,0.310294
2,1.003,0.415,1.36,0.413,1.707833,1.36408,0.305147
3,0.988,0.437,1.36,0.442,1.774417,1.34368,0.321324
4,1.004,0.414,1.36,0.413,1.790833,1.36544,0.304412
5,1.004,0.413,1.36,0.412,1.753917,1.36544,0.303676
6,1.009,0.407,1.36,0.404,1.759083,1.37224,0.299265
7,1.01,0.407,1.365,0.403,1.724667,1.37865,0.298168
8,1.012,0.407,1.37,0.402,1.755583,1.38644,0.29708
9,1.011,0.404,1.36,0.399,1.720583,1.37496,0.297059


In [3]:
X = df[['t', 'mu', 'RA', 'mu/t', 'RA*t', 'mu/RA']].values
y = df['VRHE'].values

X_train = X[:18, :]
y_train = y[:18]
X_val = X[18:, :]
y_val = y[18:]

X_train, y_train

(array([[0.993     , 0.43      , 1.36      , 0.433     , 1.35048   ,
         0.31617647],
        [0.998     , 0.422     , 1.36      , 0.423     , 1.35728   ,
         0.31029412],
        [1.003     , 0.415     , 1.36      , 0.413     , 1.36408   ,
         0.30514706],
        [0.988     , 0.437     , 1.36      , 0.442     , 1.34368   ,
         0.32132353],
        [1.004     , 0.414     , 1.36      , 0.413     , 1.36544   ,
         0.30441176],
        [1.004     , 0.413     , 1.36      , 0.412     , 1.36544   ,
         0.30367647],
        [1.009     , 0.407     , 1.36      , 0.404     , 1.37224   ,
         0.29926471],
        [1.01      , 0.407     , 1.365     , 0.403     , 1.37865   ,
         0.2981685 ],
        [1.012     , 0.407     , 1.37      , 0.402     , 1.38644   ,
         0.29708029],
        [1.011     , 0.404     , 1.36      , 0.399     , 1.37496   ,
         0.29705882],
        [1.011     , 0.398     , 1.35      , 0.394     , 1.36485   ,
         0.29481481],

In [3]:
for pc in np.arange(0.875, 0.95, 0.025):
    for ps in np.arange((1-pc), (0.92-pc), -0.01):
        for parsimony in np.arange(0.0005, 0.0016, 0.0005):
            est_gp = SymbolicRegressor(population_size=5000, generations=20, stopping_criteria=0.01, p_crossover=pc, p_subtree_mutation=ps/3, 
            p_hoist_mutation=ps/3, p_point_mutation=1-pc-ps/3-ps/3, function_set=('add', 'sub', 'mul', 'div', 'sqrt', 'log'), 
            parsimony_coefficient=parsimony, tournament_size=20, metric='mean absolute error', const_range=(-1.0, 1.0))

            est_gp.fit(X_train, y_train)
            program = str(est_gp._program)
            length = est_gp._program.length_
            depth = est_gp._program.depth_
            y_pred_t = est_gp.predict(X_train)
            mae_train = mean_absolute_error(y_train, y_pred_t)
            y_pred_v = est_gp.predict(X_val)
            mae_val = mean_absolute_error(y_val, y_pred_v)

            with open("program_length_depth_mae.txt", "a") as f:
                f.write(f"pc: {pc}   ps: {ps/3}   ph: {ps/3}   pp: {1-pc-ps/3-ps/3}   p_coef: {parsimony}   {program}   length: {length}   depth: {depth}   mae_train: {mae_train}   mae_val: {mae_val}\n")



In [1]:
from collections import defaultdict

# Input and output file paths
input_file = "program_length_depth_mae.txt"
output_file_depth = "grouped_programs_depth.txt"
output_file_length = "grouped_programs_length.txt"
MAE_t_file = "MAE_t.txt"
MAE_v_file = "MAE_v.txt"

grouped_lines_length = defaultdict(list)
grouped_lines_depth = defaultdict(list)
MAE_t = defaultdict(list)
MAE_v = defaultdict(list)

# Read and group lines based on length
with open(input_file, "r") as f:
    for line in f:
        parts = line.strip().split()
        key = parts[-7]
        grouped_lines_length[key].append(line)

# Read and group lines based on depth
with open(input_file, "r") as f:
    for line in f:
        parts = line.strip().split()
        key = parts[-5]
        grouped_lines_depth[key].append(line)

# Sort lines based on the MAE_val value in each group
def extract_lastthird_number(line):
    return float(line.strip().split()[-1])  # convert last token to float

for key in sorted(grouped_lines_length):
    grouped_lines_length[key] = sorted(grouped_lines_length[key], key=extract_lastthird_number)

for key in sorted(grouped_lines_depth):
    grouped_lines_depth[key] = sorted(grouped_lines_depth[key], key=extract_lastthird_number)

for key in sorted(grouped_lines_length):
    for line in grouped_lines_length[key]:
        MAE_t[key].append(line.strip().split()[-3])

for key in sorted(grouped_lines_length):
    for line in grouped_lines_length[key]:
        MAE_v[key].append(line.strip().split()[-1])

# Write grouped lines to output
with open(output_file_length, "w") as f:
    for key, lines in sorted(grouped_lines_length.items(), key = lambda item: int(item[0])):  # or just use grouped_lines if order doesn't matter
        for line in lines:
            f.write(line)

with open(output_file_depth, "w") as f:
    for key, lines in sorted(grouped_lines_depth.items(), key = lambda item: int(item[0])):  # or just use grouped_lines if order doesn't matter
        for line in lines:
            f.write(line)

with open(MAE_t_file, "w") as f:
    for key, values in sorted(MAE_t.items(), key = lambda item: int(item[0])):
        for value in values:
            f.write(value + " ")
        f.write("\n")

with open(MAE_v_file, "w") as f:
    for key, values in sorted(MAE_v.items(), key = lambda item: int(item[0])):
        for value in values:
            f.write(value + " ")
        f.write("\n")

In [6]:
A = 2 * X[:, 3] + 0.922
G = 1.754 / X[:, 0]
H = np.sqrt(X[:, 5]) + 0.775 + X[:, 3]
L = np.sqrt(0.398 + X[:, 3]) + np.sqrt(X[:, 5] / np.sqrt(0.189))

df_AGHL = pd.DataFrame({'A': A, 'G': G, 'H': H, 'L': L, 'VRHE': y})
df_AGHL

Unnamed: 0,A,G,H,L,VRHE
0,1.788,1.766365,1.770296,1.764396,1.791583
1,1.768,1.757515,1.75504,1.750924,1.72275
2,1.748,1.748754,1.740401,1.738353,1.707833
3,1.806,1.775304,1.783854,1.776233,1.774417
4,1.748,1.747012,1.739735,1.737343,1.790833
5,1.746,1.747012,1.738068,1.735776,1.753917
6,1.73,1.738355,1.726051,1.725228,1.759083
7,1.728,1.736634,1.724048,1.723148,1.724667
8,1.726,1.733202,1.722051,1.721077,1.755583
9,1.72,1.734916,1.719031,1.719368,1.720583
