From the trained neural network and permutation feature importance analysis, two features, namely the number of d electrons for TM ions (Nd) and mu/t, make greatest contribution to the model performance compared to the other features. We can use gplearn to find the exact mathematical formula to map Nd and mu/t to the VRHE.

In [1]:
import numpy as np
import pandas as pd
from gplearn.genetic import SymbolicRegressor
from sklearn.metrics import mean_absolute_error
from collections import defaultdict

Load the data, note that at his time, there are only two feature: Nd(X0) and mu/t(X1).

In [3]:
df = pd.read_excel("data_gp.xlsx")
X = df[['Nd', 'mu/t']].values
y = df['VRHE'].values

In [4]:
X, y

(array([[4.        , 0.433     ],
        [5.5       , 0.423     ],
        [7.        , 0.413     ],
        [6.        , 0.442     ],
        [6.8       , 0.413     ],
        [6.6       , 0.412     ],
        [5.        , 0.404     ],
        [5.        , 0.403     ],
        [5.        , 0.402     ],
        [6.        , 0.399     ],
        [5.5       , 0.394     ],
        [5.8       , 0.394     ],
        [5.25      , 0.393     ],
        [5.4       , 0.384     ],
        [5.2       , 0.379     ],
        [5.        , 0.374     ],
        [4.8       , 0.361     ],
        [4.        , 0.344     ],
        [5.        , 0.37422114],
        [4.7       , 0.36334775],
        [6.4       , 0.3484614 ],
        [5.75      , 0.34071461],
        [6.        , 0.31540073]]),
 array([1.79158333, 1.72275   , 1.70783333, 1.77441667, 1.79083333,
        1.75391667, 1.75908333, 1.72466667, 1.75558333, 1.72058333,
        1.68158333, 1.6875    , 1.71858333, 1.69933333, 1.68025   ,
        1.67

In [6]:
for pc in np.arange(0.5, 0.95, 0.05):
    for ps in np.arange((1-pc), (0.92-pc), -0.01):
        for parsimony in np.arange(0.0005, 0.0016, 0.0005):
            est_gp = SymbolicRegressor(population_size=3000, generations=20, stopping_criteria=0.01, p_crossover=pc, p_subtree_mutation=ps/3, 
            p_hoist_mutation=ps/3, p_point_mutation=1-pc-ps/3-ps/3, function_set=('add', 'sub', 'mul', 'div', 'sqrt'), 
            parsimony_coefficient=parsimony, tournament_size=20, metric='mean absolute error', const_range=(-1.0, 1.0))

            est_gp.fit(X, y)
            program = str(est_gp._program)
            length = est_gp._program.length_
            depth = est_gp._program.depth_
            y_pred = est_gp.predict(X)
            mae = mean_absolute_error(y, y_pred)

            with open("program_length_depth_mae.txt", "a") as f:
                f.write(f"pc: {pc}   ps: {ps/3}   ph: {ps/3}   pp: {1-pc-ps/3-ps/3}   p_coef: {parsimony}   {program}   length: {length}   depth: {depth}   mae: {mae}\n")

