In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

df = pd.read_csv(r"C:\Users\Lenovo\Desktop\24-25 bahar\ADL\ADL_project\df_last_encoded.csv")  # sen zaten yüklüyorsan atla

# 'designation' veya ilgili kolonun adını bul
designation_col = None
for cname in df.columns:
    if 'designation' in cname.lower() or 'mat' in cname.lower():
        designation_col = cname
        break

if designation_col is None:
    raise ValueError("Designation/mat0/mat1 kolonu bulunamadı!")

# Eğer string ise label encode et
if df[designation_col].dtype == 'object' or str(df[designation_col].dtype).startswith('str'):
    le = LabelEncoder()
    df[designation_col + '_label'] = le.fit_transform(df[designation_col])
    designation_feat = designation_col + '_label'
else:
    designation_feat = designation_col

# Hedefler
feature_cols = [col for col in df.columns if col not in ['n_opt', 'pce', 'hopt (%)', 'PCE (%)']]
if designation_feat not in feature_cols:
    feature_cols.append(designation_feat)  # Designation'ı featurelara ekle
X = df[feature_cols]
y_nopt = df['hopt (%)']

# Son 30 satırı test olarak ayır
X_train, X_test = X.iloc[:-30, :], X.iloc[-30:, :]
y_train, y_test = y_nopt.iloc[:-30], y_nopt.iloc[-30:]

# Sütun isimlerini sırayla X0, X1,... olarak göster
for i, col in enumerate(feature_cols):
    print(f"X{i}: {col}")


X0: Designation
X1: Uzunluk
X2: Genislik
X3: Kalinlik
X4: mat0
X5: mat1
X6: abs_peak
X7: abs_min
X8: abs_max
X9: em_peak
X10: em_min
X11: em_max
X12: QY (%)


In [None]:
import optuna
from gplearn.genetic import SymbolicRegressor
from sklearn.metrics import mean_squared_error

def objective(trial):
    p_crossover        = trial.suggest_float('p_crossover', 0.5, 0.75)
    p_subtree_mutation = trial.suggest_float('p_subtree_mutation', 0.05, 0.12)
    p_hoist_mutation   = trial.suggest_float('p_hoist_mutation', 0.01, 0.07)
    p_point_mutation   = trial.suggest_float('p_point_mutation', 0.05, 0.12)
    if (p_crossover + p_subtree_mutation + p_hoist_mutation + p_point_mutation) > 1.0:
        raise optuna.exceptions.TrialPruned()

    sr = SymbolicRegressor(
        population_size=trial.suggest_int('population_size', 800, 10000, step=200),
        generations=trial.suggest_int('generations', 10, 100, step=5),
        stopping_criteria=0.001,
        p_crossover=p_crossover,
        p_subtree_mutation=p_subtree_mutation,
        p_hoist_mutation=p_hoist_mutation,
        p_point_mutation=p_point_mutation,
        max_samples=trial.suggest_float('max_samples', 0.8, 1.0),
        parsimony_coefficient=trial.suggest_float('parsimony_coefficient', 0.001, 0.03, log=True),
        function_set=['add', 'sub', 'mul', 'div', 'sin', 'cos','sqrt','log'],  # hızlı prototip için sade set!
        metric='mse',
        init_depth=trial.suggest_categorical('init_depth', [(2,5), (3,6), (2,6)]),
        init_method=trial.suggest_categorical('init_method', ['half and half', 'grow']),
        const_range=None,
        verbose=0,
        random_state=42
    )
    sr.fit(X_train, y_train)
    y_pred = sr.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    return mse  # minimize

# HIZLI OPTIMIZASYON (düşük trial)
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=80, show_progress_bar=True)  # n_trials: 5-10 arası hızlı sonuç

print("\nBest hyperparameters:")
for key, value in study.best_params.items():
    print(f"{key}: {value}")

# EN İYİ PARAMETRELERLE MODELİ TEKRAR EĞİT
best_params = study.best_params

sr = SymbolicRegressor(
    population_size=best_params['population_size'],
    generations=best_params['generations'],
    stopping_criteria=0.001,
    p_crossover=best_params['p_crossover'],
    p_subtree_mutation=best_params['p_subtree_mutation'],
    p_hoist_mutation=best_params['p_hoist_mutation'],
    p_point_mutation=best_params['p_point_mutation'],
    max_samples=best_params['max_samples'],
    parsimony_coefficient=best_params['parsimony_coefficient'],
    function_set=['add', 'sub', 'mul', 'div','sin','cos','log','sqrt'],  # yukarıdakiyle aynı set
    metric='mse',
    init_depth=best_params['init_depth'],
    init_method=best_params['init_method'],
    const_range=None,
    verbose=1,
    random_state=42
)

sr.fit(X_train, y_train)

# Sonra sr ile tüm tahmin ve analiz işlemlerini yapabilirsin:
# y_pred_train = sr.predict(X_train)
# y_pred_test  = sr.predict(X_test)



  from .autonotebook import tqdm as notebook_tqdm
[I 2025-10-02 22:20:05,581] A new study created in memory with name: no-name-f5b9b7cc-a419-43ee-ac96-8fa767d11e4b


[I 2025-10-02 22:22:54,598] Trial 0 finished with value: 0.2778532230627197 and parameters: {'p_crossover': 0.6828639087359278, 'p_subtree_mutation': 0.05599662353685968, 'p_hoist_mutation': 0.0637271218382036, 'p_point_mutation': 0.09853632470109983, 'population_size': 9600, 'generations': 30, 'max_samples': 0.9024886813158493, 'parsimony_coefficient': 0.029151269937195416, 'init_depth': (3, 6), 'init_method': 'grow'}. Best is trial 0 with value: 0.2778532230627197.




[I 2025-10-02 22:23:13,054] Trial 1 finished with value: 0.466591812894055 and parameters: {'p_crossover': 0.6353487691122441, 'p_subtree_mutation': 0.07795953339111614, 'p_hoist_mutation': 0.06346808313687713, 'p_point_mutation': 0.05546663218333738, 'population_size': 2600, 'generations': 20, 'max_samples': 0.9701585345819594, 'parsimony_coefficient': 0.003403390892102365, 'init_depth': (2, 5), 'init_method': 'grow'}. Best is trial 0 with value: 0.2778532230627197.




[I 2025-10-02 22:23:26,418] Trial 2 finished with value: 0.5618298490006602 and parameters: {'p_crossover': 0.5367886633718, 'p_subtree_mutation': 0.058380324589196696, 'p_hoist_mutation': 0.06930170788264936, 'p_point_mutation': 0.10047147537169855, 'population_size': 4400, 'generations': 10, 'max_samples': 0.9713054761079392, 'parsimony_coefficient': 0.002716264214918919, 'init_depth': (2, 6), 'init_method': 'grow'}. Best is trial 0 with value: 0.2778532230627197.




[I 2025-10-02 22:24:45,736] Trial 3 finished with value: 0.33577635533804473 and parameters: {'p_crossover': 0.5153051732916722, 'p_subtree_mutation': 0.11894694734574834, 'p_hoist_mutation': 0.06748147204880917, 'p_point_mutation': 0.1154308598708447, 'population_size': 2800, 'generations': 80, 'max_samples': 0.9758671962633311, 'parsimony_coefficient': 0.005446872887667265, 'init_depth': (2, 5), 'init_method': 'grow'}. Best is trial 0 with value: 0.2778532230627197.




[I 2025-10-02 22:28:12,569] Trial 4 finished with value: 0.09197877861523364 and parameters: {'p_crossover': 0.7487260592924678, 'p_subtree_mutation': 0.0656118903549714, 'p_hoist_mutation': 0.05514345740199723, 'p_point_mutation': 0.06826691287597325, 'population_size': 8400, 'generations': 65, 'max_samples': 0.8241882462404615, 'parsimony_coefficient': 0.002619335458899474, 'init_depth': (2, 5), 'init_method': 'half and half'}. Best is trial 4 with value: 0.09197877861523364.




[I 2025-10-02 22:28:57,671] Trial 5 finished with value: 0.4184451576869721 and parameters: {'p_crossover': 0.6725571431423569, 'p_subtree_mutation': 0.10563384135230108, 'p_hoist_mutation': 0.036923425186618446, 'p_point_mutation': 0.05446165626896755, 'population_size': 1800, 'generations': 60, 'max_samples': 0.9934697480356875, 'parsimony_coefficient': 0.0017419283532408639, 'init_depth': (2, 6), 'init_method': 'grow'}. Best is trial 4 with value: 0.09197877861523364.




[I 2025-10-02 22:29:27,351] Trial 6 finished with value: 0.5209648814679118 and parameters: {'p_crossover': 0.5372612560285474, 'p_subtree_mutation': 0.10981784741119535, 'p_hoist_mutation': 0.03020208008390239, 'p_point_mutation': 0.10700867306461229, 'population_size': 2800, 'generations': 35, 'max_samples': 0.8765201838385687, 'parsimony_coefficient': 0.00954953096369179, 'init_depth': (2, 5), 'init_method': 'half and half'}. Best is trial 4 with value: 0.09197877861523364.




[I 2025-10-02 22:30:00,747] Trial 7 finished with value: 0.3506051759693276 and parameters: {'p_crossover': 0.5112548811499631, 'p_subtree_mutation': 0.11488570083057988, 'p_hoist_mutation': 0.06621465115323279, 'p_point_mutation': 0.07104506086514363, 'population_size': 5800, 'generations': 20, 'max_samples': 0.8402929570937464, 'parsimony_coefficient': 0.013524924906228636, 'init_depth': (2, 5), 'init_method': 'grow'}. Best is trial 4 with value: 0.09197877861523364.




[I 2025-10-02 22:30:40,485] Trial 8 finished with value: 1.056176441617341 and parameters: {'p_crossover': 0.6547869063331595, 'p_subtree_mutation': 0.07122784138785959, 'p_hoist_mutation': 0.052346966608791, 'p_point_mutation': 0.05637408266929438, 'population_size': 2800, 'generations': 50, 'max_samples': 0.9898538411705362, 'parsimony_coefficient': 0.02833264930845455, 'init_depth': (2, 5), 'init_method': 'grow'}. Best is trial 4 with value: 0.09197877861523364.




[I 2025-10-02 22:31:30,624] Trial 9 finished with value: 0.6436813047390254 and parameters: {'p_crossover': 0.6232165188292668, 'p_subtree_mutation': 0.07462819885800509, 'p_hoist_mutation': 0.04654791441173685, 'p_point_mutation': 0.08644221693084678, 'population_size': 4200, 'generations': 40, 'max_samples': 0.8912305817610557, 'parsimony_coefficient': 0.019560200413327, 'init_depth': (2, 6), 'init_method': 'half and half'}. Best is trial 4 with value: 0.09197877861523364.




[I 2025-10-02 22:38:06,370] Trial 10 finished with value: 0.1515488096525061 and parameters: {'p_crossover': 0.7498980660072243, 'p_subtree_mutation': 0.0920296514393781, 'p_hoist_mutation': 0.013699757992202997, 'p_point_mutation': 0.07375250527785294, 'population_size': 9000, 'generations': 100, 'max_samples': 0.828164888247869, 'parsimony_coefficient': 0.0011591481505407787, 'init_depth': (3, 6), 'init_method': 'half and half'}. Best is trial 4 with value: 0.09197877861523364.




[I 2025-10-02 22:53:13,986] Trial 11 finished with value: 0.15894312925730983 and parameters: {'p_crossover': 0.7235875253897501, 'p_subtree_mutation': 0.09401715047204914, 'p_hoist_mutation': 0.011344852913079647, 'p_point_mutation': 0.07272736565179645, 'population_size': 9200, 'generations': 100, 'max_samples': 0.8061716523130245, 'parsimony_coefficient': 0.001161096579004232, 'init_depth': (3, 6), 'init_method': 'half and half'}. Best is trial 4 with value: 0.09197877861523364.




[I 2025-10-02 23:13:42,541] Trial 12 finished with value: 0.12240092521087186 and parameters: {'p_crossover': 0.7498155547576284, 'p_subtree_mutation': 0.08687991048748223, 'p_hoist_mutation': 0.011495757796869472, 'p_point_mutation': 0.07210636822617407, 'population_size': 7600, 'generations': 100, 'max_samples': 0.8122377340404603, 'parsimony_coefficient': 0.0010662506396411733, 'init_depth': (3, 6), 'init_method': 'half and half'}. Best is trial 4 with value: 0.09197877861523364.




[I 2025-10-02 23:30:06,876] Trial 13 finished with value: 0.07956543100392245 and parameters: {'p_crossover': 0.7137108790262018, 'p_subtree_mutation': 0.06627031250962334, 'p_hoist_mutation': 0.024825848152499365, 'p_point_mutation': 0.0848137257611285, 'population_size': 7400, 'generations': 75, 'max_samples': 0.856422102173139, 'parsimony_coefficient': 0.002093220922342242, 'init_depth': (3, 6), 'init_method': 'half and half'}. Best is trial 13 with value: 0.07956543100392245.




[I 2025-10-02 23:40:21,523] Trial 14 finished with value: 0.21721910140694892 and parameters: {'p_crossover': 0.7061730596762902, 'p_subtree_mutation': 0.06606431514588836, 'p_hoist_mutation': 0.025024574359617073, 'p_point_mutation': 0.08638793205688526, 'population_size': 6800, 'generations': 70, 'max_samples': 0.857789852481737, 'parsimony_coefficient': 0.0027136204438632074, 'init_depth': (3, 6), 'init_method': 'half and half'}. Best is trial 13 with value: 0.07956543100392245.




[I 2025-10-02 23:50:17,600] Trial 15 finished with value: 0.37166803549024424 and parameters: {'p_crossover': 0.6009175096227442, 'p_subtree_mutation': 0.06298383038959965, 'p_hoist_mutation': 0.053561054814810924, 'p_point_mutation': 0.08006089340686373, 'population_size': 7800, 'generations': 80, 'max_samples': 0.9138770311106847, 'parsimony_coefficient': 0.004927184540293687, 'init_depth': (2, 5), 'init_method': 'half and half'}. Best is trial 13 with value: 0.07956543100392245.




[I 2025-10-03 00:00:05,232] Trial 16 finished with value: 0.15461411516294588 and parameters: {'p_crossover': 0.7107022659609397, 'p_subtree_mutation': 0.05104806394540358, 'p_hoist_mutation': 0.021863453457673925, 'p_point_mutation': 0.06266794818323644, 'population_size': 8000, 'generations': 65, 'max_samples': 0.9343952954331928, 'parsimony_coefficient': 0.0020032372718135493, 'init_depth': (3, 6), 'init_method': 'half and half'}. Best is trial 13 with value: 0.07956543100392245.




[I 2025-10-03 00:07:40,876] Trial 17 finished with value: 0.8963437751868716 and parameters: {'p_crossover': 0.5822889490763059, 'p_subtree_mutation': 0.08011347312820392, 'p_hoist_mutation': 0.0391865171970066, 'p_point_mutation': 0.09280905424210724, 'population_size': 6400, 'generations': 80, 'max_samples': 0.8581476569974763, 'parsimony_coefficient': 0.007985802122630527, 'init_depth': (3, 6), 'init_method': 'half and half'}. Best is trial 13 with value: 0.07956543100392245.




[I 2025-10-03 00:11:53,069] Trial 18 finished with value: 0.14208518809627715 and parameters: {'p_crossover': 0.6874945805547594, 'p_subtree_mutation': 0.06538593856211067, 'p_hoist_mutation': 0.046270324386755476, 'p_point_mutation': 0.06336027035548691, 'population_size': 5000, 'generations': 50, 'max_samples': 0.840077090121764, 'parsimony_coefficient': 0.0017424350135159935, 'init_depth': (2, 5), 'init_method': 'half and half'}. Best is trial 13 with value: 0.07956543100392245.




[I 2025-10-03 00:25:27,301] Trial 19 finished with value: 0.269285974199703 and parameters: {'p_crossover': 0.730361569173306, 'p_subtree_mutation': 0.06957100823941613, 'p_hoist_mutation': 0.03270308009659123, 'p_point_mutation': 0.08245108303519541, 'population_size': 8600, 'generations': 90, 'max_samples': 0.8654648826902862, 'parsimony_coefficient': 0.003780955540932685, 'init_depth': (2, 6), 'init_method': 'half and half'}. Best is trial 13 with value: 0.07956543100392245.




[I 2025-10-03 00:34:00,316] Trial 20 finished with value: 0.2107179860698598 and parameters: {'p_crossover': 0.699594122371961, 'p_subtree_mutation': 0.05133799204591063, 'p_hoist_mutation': 0.056081026790889145, 'p_point_mutation': 0.06507230271063305, 'population_size': 6600, 'generations': 70, 'max_samples': 0.8261129903237194, 'parsimony_coefficient': 0.002240862629370136, 'init_depth': (2, 5), 'init_method': 'half and half'}. Best is trial 13 with value: 0.07956543100392245.




[I 2025-10-03 00:47:17,818] Trial 21 finished with value: 0.1285936375761043 and parameters: {'p_crossover': 0.7479136490464381, 'p_subtree_mutation': 0.08614870893157742, 'p_hoist_mutation': 0.018339959175543465, 'p_point_mutation': 0.07744662604927939, 'population_size': 7600, 'generations': 90, 'max_samples': 0.8056552964016255, 'parsimony_coefficient': 0.0013264432210770478, 'init_depth': (3, 6), 'init_method': 'half and half'}. Best is trial 13 with value: 0.07956543100392245.




[I 2025-10-03 00:59:24,800] Trial 22 finished with value: 0.12817067359997675 and parameters: {'p_crossover': 0.7348289411175508, 'p_subtree_mutation': 0.08554130461553267, 'p_hoist_mutation': 0.018061627448538096, 'p_point_mutation': 0.06776250226152272, 'population_size': 7200, 'generations': 90, 'max_samples': 0.8233100209388866, 'parsimony_coefficient': 0.0014621363224760523, 'init_depth': (3, 6), 'init_method': 'half and half'}. Best is trial 13 with value: 0.07956543100392245.




[I 2025-10-03 01:10:17,672] Trial 23 finished with value: 0.1385195402105895 and parameters: {'p_crossover': 0.7183549580287191, 'p_subtree_mutation': 0.0989490801709211, 'p_hoist_mutation': 0.02726070447190782, 'p_point_mutation': 0.09016749386779462, 'population_size': 10000, 'generations': 55, 'max_samples': 0.8006045850936226, 'parsimony_coefficient': 0.0010197861335877036, 'init_depth': (3, 6), 'init_method': 'half and half'}. Best is trial 13 with value: 0.07956543100392245.




[I 2025-10-03 01:21:24,655] Trial 24 finished with value: 0.3282471847246553 and parameters: {'p_crossover': 0.6626898703765058, 'p_subtree_mutation': 0.08296162222685571, 'p_hoist_mutation': 0.015135892690982453, 'p_point_mutation': 0.07724082004702029, 'population_size': 8400, 'generations': 75, 'max_samples': 0.8421564159951852, 'parsimony_coefficient': 0.0038199406059825913, 'init_depth': (3, 6), 'init_method': 'half and half'}. Best is trial 13 with value: 0.07956543100392245.




[I 2025-10-03 01:30:25,193] Trial 25 finished with value: 0.4754603559431325 and parameters: {'p_crossover': 0.7410980228587686, 'p_subtree_mutation': 0.058890097552449984, 'p_hoist_mutation': 0.021930989253638046, 'p_point_mutation': 0.06005495184286323, 'population_size': 5800, 'generations': 90, 'max_samples': 0.8181523025646428, 'parsimony_coefficient': 0.0025445013522670985, 'init_depth': (3, 6), 'init_method': 'half and half'}. Best is trial 13 with value: 0.07956543100392245.




[I 2025-10-03 01:39:44,938] Trial 26 finished with value: 0.20561492527823166 and parameters: {'p_crossover': 0.7219973195883161, 'p_subtree_mutation': 0.07350413924344015, 'p_hoist_mutation': 0.01016056334338215, 'p_point_mutation': 0.06841968912641233, 'population_size': 7400, 'generations': 60, 'max_samples': 0.8771972594926324, 'parsimony_coefficient': 0.0015397492864803323, 'init_depth': (3, 6), 'init_method': 'half and half'}. Best is trial 13 with value: 0.07956543100392245.




[I 2025-10-03 01:46:05,255] Trial 27 finished with value: 0.10452527585399449 and parameters: {'p_crossover': 0.696167617926964, 'p_subtree_mutation': 0.09017507394371685, 'p_hoist_mutation': 0.04348346853992022, 'p_point_mutation': 0.05002242535778398, 'population_size': 8400, 'generations': 45, 'max_samples': 0.8439792969611982, 'parsimony_coefficient': 0.007562154499549977, 'init_depth': (3, 6), 'init_method': 'half and half'}. Best is trial 13 with value: 0.07956543100392245.




[I 2025-10-03 01:51:48,156] Trial 28 finished with value: 0.48631019559399513 and parameters: {'p_crossover': 0.700317981649006, 'p_subtree_mutation': 0.09879999745437706, 'p_hoist_mutation': 0.044503053430593895, 'p_point_mutation': 0.050980473459093366, 'population_size': 8800, 'generations': 45, 'max_samples': 0.8537520024428097, 'parsimony_coefficient': 0.007917715126348968, 'init_depth': (2, 6), 'init_method': 'half and half'}. Best is trial 13 with value: 0.07956543100392245.




[I 2025-10-03 01:56:11,099] Trial 29 finished with value: 0.39389241266261144 and parameters: {'p_crossover': 0.6859870992168969, 'p_subtree_mutation': 0.06104290751508982, 'p_hoist_mutation': 0.05898809530107393, 'p_point_mutation': 0.10015020082220577, 'population_size': 9800, 'generations': 30, 'max_samples': 0.8840258206053205, 'parsimony_coefficient': 0.010502825922978612, 'init_depth': (2, 5), 'init_method': 'half and half'}. Best is trial 13 with value: 0.07956543100392245.




[I 2025-10-03 02:04:45,331] Trial 30 finished with value: 0.25120396666227446 and parameters: {'p_crossover': 0.6334360551827484, 'p_subtree_mutation': 0.0915091841966211, 'p_hoist_mutation': 0.035243521402719837, 'p_point_mutation': 0.05826353206803739, 'population_size': 8200, 'generations': 60, 'max_samples': 0.9218857807048121, 'parsimony_coefficient': 0.006819742703273995, 'init_depth': (3, 6), 'init_method': 'half and half'}. Best is trial 13 with value: 0.07956543100392245.




[I 2025-10-03 02:10:09,090] Trial 31 finished with value: 0.19247246308222213 and parameters: {'p_crossover': 0.7326953696218768, 'p_subtree_mutation': 0.08953264551514876, 'p_hoist_mutation': 0.05158631971297818, 'p_point_mutation': 0.0924405935390636, 'population_size': 7000, 'generations': 45, 'max_samples': 0.8155122979473688, 'parsimony_coefficient': 0.0047392497535086574, 'init_depth': (3, 6), 'init_method': 'half and half'}. Best is trial 13 with value: 0.07956543100392245.




[I 2025-10-03 02:21:09,954] Trial 32 finished with value: 0.3905414907132235 and parameters: {'p_crossover': 0.7491683516618055, 'p_subtree_mutation': 0.07762903161024938, 'p_hoist_mutation': 0.040656830968277234, 'p_point_mutation': 0.05039058104523357, 'population_size': 9400, 'generations': 70, 'max_samples': 0.8467829425007618, 'parsimony_coefficient': 0.003075794105076363, 'init_depth': (3, 6), 'init_method': 'half and half'}. Best is trial 13 with value: 0.07956543100392245.




[I 2025-10-03 02:27:24,217] Trial 33 finished with value: 0.088554333926336 and parameters: {'p_crossover': 0.698161122526426, 'p_subtree_mutation': 0.10039987801089809, 'p_hoist_mutation': 0.05937774654484974, 'p_point_mutation': 0.07576296324461951, 'population_size': 6200, 'generations': 55, 'max_samples': 0.8295558571521241, 'parsimony_coefficient': 0.002197765694793196, 'init_depth': (3, 6), 'init_method': 'half and half'}. Best is trial 13 with value: 0.07956543100392245.




[I 2025-10-03 02:32:27,841] Trial 34 finished with value: 0.28940766435053067 and parameters: {'p_crossover': 0.6516078654554347, 'p_subtree_mutation': 0.09702937185607037, 'p_hoist_mutation': 0.062081040176215734, 'p_point_mutation': 0.0776730130789577, 'population_size': 6000, 'generations': 55, 'max_samples': 0.8318283224739079, 'parsimony_coefficient': 0.004255078672370371, 'init_depth': (3, 6), 'init_method': 'grow'}. Best is trial 13 with value: 0.07956543100392245.




[I 2025-10-03 02:39:06,680] Trial 35 finished with value: 0.11916644432860422 and parameters: {'p_crossover': 0.6832561900259174, 'p_subtree_mutation': 0.10611551609395065, 'p_hoist_mutation': 0.060342251947812765, 'p_point_mutation': 0.08345144831645629, 'population_size': 6200, 'generations': 65, 'max_samples': 0.8357188795709787, 'parsimony_coefficient': 0.005897613438798215, 'init_depth': (2, 5), 'init_method': 'grow'}. Best is trial 13 with value: 0.07956543100392245.




[I 2025-10-03 02:42:21,672] Trial 36 finished with value: 0.2503665313516881 and parameters: {'p_crossover': 0.6976756193269151, 'p_subtree_mutation': 0.1037820702871323, 'p_hoist_mutation': 0.049086401125396875, 'p_point_mutation': 0.10698481943375399, 'population_size': 5000, 'generations': 40, 'max_samples': 0.9442799009607362, 'parsimony_coefficient': 0.0031933617434564236, 'init_depth': (2, 6), 'init_method': 'half and half'}. Best is trial 13 with value: 0.07956543100392245.




[I 2025-10-03 02:43:51,185] Trial 37 finished with value: 0.43844117852702896 and parameters: {'p_crossover': 0.6639411929424379, 'p_subtree_mutation': 0.05501864110585406, 'p_hoist_mutation': 0.06558899725507662, 'p_point_mutation': 0.09758566837387642, 'population_size': 3800, 'generations': 25, 'max_samples': 0.8678664793188111, 'parsimony_coefficient': 0.002078840040519494, 'init_depth': (3, 6), 'init_method': 'grow'}. Best is trial 13 with value: 0.07956543100392245.




[I 2025-10-03 02:51:12,598] Trial 38 finished with value: 0.15601486224614083 and parameters: {'p_crossover': 0.6755726685396072, 'p_subtree_mutation': 0.1110324250453248, 'p_hoist_mutation': 0.05758264413851789, 'p_point_mutation': 0.05427311530316161, 'population_size': 8200, 'generations': 50, 'max_samples': 0.8993970926378263, 'parsimony_coefficient': 0.002372093365851821, 'init_depth': (2, 5), 'init_method': 'half and half'}. Best is trial 13 with value: 0.07956543100392245.




[I 2025-10-03 02:51:23,491] Trial 39 finished with value: 2.6374343928184265 and parameters: {'p_crossover': 0.5614680652922928, 'p_subtree_mutation': 0.08115577605168435, 'p_hoist_mutation': 0.04143222937900198, 'p_point_mutation': 0.08917168380047617, 'population_size': 1000, 'generations': 15, 'max_samples': 0.8519272493562116, 'parsimony_coefficient': 0.013632125192770723, 'init_depth': (2, 5), 'init_method': 'half and half'}. Best is trial 13 with value: 0.07956543100392245.




[I 2025-10-03 02:54:45,634] Trial 40 finished with value: 0.28987756233307227 and parameters: {'p_crossover': 0.7133510303137631, 'p_subtree_mutation': 0.10207184748640293, 'p_hoist_mutation': 0.049629225689802624, 'p_point_mutation': 0.06886303090214657, 'population_size': 5600, 'generations': 35, 'max_samples': 0.8654085511550752, 'parsimony_coefficient': 0.0018227115100771115, 'init_depth': (3, 6), 'init_method': 'grow'}. Best is trial 13 with value: 0.07956543100392245.




[I 2025-10-03 02:59:37,576] Trial 41 finished with value: 0.466591812894055 and parameters: {'p_crossover': 0.6871108768672161, 'p_subtree_mutation': 0.10908508413789789, 'p_hoist_mutation': 0.06102730927055563, 'p_point_mutation': 0.08260157886300593, 'population_size': 5000, 'generations': 65, 'max_samples': 0.8402292617238626, 'parsimony_coefficient': 0.006022969748516743, 'init_depth': (2, 5), 'init_method': 'grow'}. Best is trial 13 with value: 0.07956543100392245.




[I 2025-10-03 03:06:19,278] Trial 42 finished with value: 0.15079754352018035 and parameters: {'p_crossover': 0.6736285045630999, 'p_subtree_mutation': 0.11640546676160776, 'p_hoist_mutation': 0.06777051816496388, 'p_point_mutation': 0.08376356327700736, 'population_size': 6200, 'generations': 65, 'max_samples': 0.8334681510870947, 'parsimony_coefficient': 0.006545064209344365, 'init_depth': (2, 5), 'init_method': 'grow'}. Best is trial 13 with value: 0.07956543100392245.




[I 2025-10-03 03:12:16,243] Trial 43 finished with value: 0.46695048313140286 and parameters: {'p_crossover': 0.6443966446393643, 'p_subtree_mutation': 0.10801148501144117, 'p_hoist_mutation': 0.054231159525282756, 'p_point_mutation': 0.07538592189163906, 'population_size': 6800, 'generations': 60, 'max_samples': 0.8326013190789315, 'parsimony_coefficient': 0.010536058976597965, 'init_depth': (2, 5), 'init_method': 'grow'}. Best is trial 13 with value: 0.07956543100392245.




[I 2025-10-03 03:20:32,366] Trial 44 finished with value: 0.29347428628479094 and parameters: {'p_crossover': 0.6934799315910462, 'p_subtree_mutation': 0.09510105502742244, 'p_hoist_mutation': 0.06395386085147015, 'p_point_mutation': 0.09633150981662253, 'population_size': 7400, 'generations': 75, 'max_samples': 0.8175060885369679, 'parsimony_coefficient': 0.005281337017588207, 'init_depth': (2, 5), 'init_method': 'grow'}. Best is trial 13 with value: 0.07956543100392245.




[I 2025-10-03 03:30:42,397] Trial 45 finished with value: 0.4684290200125555 and parameters: {'p_crossover': 0.6205566188973355, 'p_subtree_mutation': 0.11282851057274129, 'p_hoist_mutation': 0.05981578965694026, 'p_point_mutation': 0.07974906199988552, 'population_size': 9000, 'generations': 75, 'max_samples': 0.8460732820768145, 'parsimony_coefficient': 0.003200904931267628, 'init_depth': (2, 5), 'init_method': 'grow'}. Best is trial 13 with value: 0.07956543100392245.
[I 2025-10-03 03:30:42,445] Trial 46 pruned. 




[I 2025-10-03 03:34:38,158] Trial 47 finished with value: 0.4863101955939954 and parameters: {'p_crossover': 0.7089503925234958, 'p_subtree_mutation': 0.10159599815513692, 'p_hoist_mutation': 0.06925957996176252, 'p_point_mutation': 0.0861500512924285, 'population_size': 5400, 'generations': 50, 'max_samples': 0.8107227637142812, 'parsimony_coefficient': 0.00791820948853864, 'init_depth': (2, 6), 'init_method': 'grow'}. Best is trial 13 with value: 0.07956543100392245.




[I 2025-10-03 03:37:34,775] Trial 48 finished with value: 0.26759372985622143 and parameters: {'p_crossover': 0.7369473364384674, 'p_subtree_mutation': 0.06879763474581514, 'p_hoist_mutation': 0.04504690172787326, 'p_point_mutation': 0.07043572703197537, 'population_size': 3400, 'generations': 55, 'max_samples': 0.8728465411544187, 'parsimony_coefficient': 0.013897252168881113, 'init_depth': (2, 5), 'init_method': 'half and half'}. Best is trial 13 with value: 0.07956543100392245.




[I 2025-10-03 03:46:33,347] Trial 49 finished with value: 0.2735828009073616 and parameters: {'p_crossover': 0.6741961868972906, 'p_subtree_mutation': 0.07578470420920783, 'p_hoist_mutation': 0.03243897110002872, 'p_point_mutation': 0.07415701580206027, 'population_size': 6400, 'generations': 85, 'max_samples': 0.823446848302672, 'parsimony_coefficient': 0.004281240444691834, 'init_depth': (3, 6), 'init_method': 'half and half'}. Best is trial 13 with value: 0.07956543100392245.




[I 2025-10-03 03:54:34,878] Trial 50 finished with value: 0.3028116227760994 and parameters: {'p_crossover': 0.7078226938740382, 'p_subtree_mutation': 0.05520383497830614, 'p_hoist_mutation': 0.03757331018481176, 'p_point_mutation': 0.10586333149018008, 'population_size': 7800, 'generations': 65, 'max_samples': 0.8602394679663375, 'parsimony_coefficient': 0.0027954737018209158, 'init_depth': (3, 6), 'init_method': 'half and half'}. Best is trial 13 with value: 0.07956543100392245.




[I 2025-10-03 04:12:10,793] Trial 51 finished with value: 0.08930899970224253 and parameters: {'p_crossover': 0.7270008894217882, 'p_subtree_mutation': 0.08823170724097834, 'p_hoist_mutation': 0.06293449206969677, 'p_point_mutation': 0.07210394308818124, 'population_size': 7200, 'generations': 100, 'max_samples': 0.8351433787281866, 'parsimony_coefficient': 0.001211624619914333, 'init_depth': (3, 6), 'init_method': 'half and half'}. Best is trial 13 with value: 0.07956543100392245.




[I 2025-10-03 04:27:23,732] Trial 52 finished with value: 0.15316017917648175 and parameters: {'p_crossover': 0.7256399579353637, 'p_subtree_mutation': 0.08924044872136824, 'p_hoist_mutation': 0.06192521370732843, 'p_point_mutation': 0.08041300574773699, 'population_size': 7000, 'generations': 95, 'max_samples': 0.8386979397129932, 'parsimony_coefficient': 0.001307573379280529, 'init_depth': (3, 6), 'init_method': 'half and half'}. Best is trial 13 with value: 0.07956543100392245.




[I 2025-10-03 04:35:40,920] Trial 53 finished with value: 0.10004614896965089 and parameters: {'p_crossover': 0.7163814880596353, 'p_subtree_mutation': 0.11963413053112346, 'p_hoist_mutation': 0.06497561201401478, 'p_point_mutation': 0.06492915149155087, 'population_size': 8600, 'generations': 45, 'max_samples': 0.8522843813030514, 'parsimony_coefficient': 0.0015812064058895261, 'init_depth': (3, 6), 'init_method': 'half and half'}. Best is trial 13 with value: 0.07956543100392245.




[I 2025-10-03 04:42:04,137] Trial 54 finished with value: 0.3155603798637658 and parameters: {'p_crossover': 0.7398333911489987, 'p_subtree_mutation': 0.11743322697763768, 'p_hoist_mutation': 0.06568254917365463, 'p_point_mutation': 0.06432205244716716, 'population_size': 8600, 'generations': 40, 'max_samples': 0.8502798784177958, 'parsimony_coefficient': 0.001554746988151462, 'init_depth': (3, 6), 'init_method': 'half and half'}. Best is trial 13 with value: 0.07956543100392245.




[I 2025-10-03 04:49:40,500] Trial 55 finished with value: 0.21823808032246803 and parameters: {'p_crossover': 0.7179133134575604, 'p_subtree_mutation': 0.06610376244850003, 'p_hoist_mutation': 0.06400973666810723, 'p_point_mutation': 0.060226568722981814, 'population_size': 9400, 'generations': 45, 'max_samples': 0.8855352448186213, 'parsimony_coefficient': 0.0018514924208811805, 'init_depth': (3, 6), 'init_method': 'half and half'}. Best is trial 13 with value: 0.07956543100392245.




[I 2025-10-03 04:55:00,140] Trial 56 finished with value: 0.25240296127436673 and parameters: {'p_crossover': 0.703225209377132, 'p_subtree_mutation': 0.08874824740906707, 'p_hoist_mutation': 0.06997482353951291, 'p_point_mutation': 0.06639608196137618, 'population_size': 8000, 'generations': 35, 'max_samples': 0.8251244749890813, 'parsimony_coefficient': 0.001171968350443869, 'init_depth': (3, 6), 'init_method': 'half and half'}. Best is trial 13 with value: 0.07956543100392245.




[I 2025-10-03 05:04:17,381] Trial 57 finished with value: 0.19210537550088352 and parameters: {'p_crossover': 0.7290970745691877, 'p_subtree_mutation': 0.0930124321294002, 'p_hoist_mutation': 0.05497689194933048, 'p_point_mutation': 0.07170252646726477, 'population_size': 8800, 'generations': 55, 'max_samples': 0.8580824432133856, 'parsimony_coefficient': 0.002059186123121941, 'init_depth': (3, 6), 'init_method': 'half and half'}. Best is trial 13 with value: 0.07956543100392245.




[I 2025-10-03 05:20:14,548] Trial 58 finished with value: 0.23145911310849107 and parameters: {'p_crossover': 0.5308754189552032, 'p_subtree_mutation': 0.08434494582881247, 'p_hoist_mutation': 0.051746196114655856, 'p_point_mutation': 0.06186770691127568, 'population_size': 7600, 'generations': 85, 'max_samples': 0.9998816902992175, 'parsimony_coefficient': 0.0013319714600212279, 'init_depth': (3, 6), 'init_method': 'half and half'}. Best is trial 13 with value: 0.07956543100392245.




[I 2025-10-03 05:28:05,101] Trial 59 finished with value: 0.3206271051573656 and parameters: {'p_crossover': 0.7155834358725425, 'p_subtree_mutation': 0.07304891692703226, 'p_hoist_mutation': 0.057917948773758596, 'p_point_mutation': 0.05330272982149861, 'population_size': 8400, 'generations': 50, 'max_samples': 0.8076524425107208, 'parsimony_coefficient': 0.0015768651839206365, 'init_depth': (3, 6), 'init_method': 'half and half'}. Best is trial 13 with value: 0.07956543100392245.




[I 2025-10-03 05:48:22,986] Trial 60 finished with value: 0.07542198209962676 and parameters: {'p_crossover': 0.69257089095968, 'p_subtree_mutation': 0.07911580289982224, 'p_hoist_mutation': 0.027919945220787088, 'p_point_mutation': 0.058349209207281344, 'population_size': 9200, 'generations': 95, 'max_samples': 0.8469202735838696, 'parsimony_coefficient': 0.0011752998431055492, 'init_depth': (3, 6), 'init_method': 'half and half'}. Best is trial 60 with value: 0.07542198209962676.




[I 2025-10-03 06:06:44,824] Trial 61 finished with value: 0.11083107270694863 and parameters: {'p_crossover': 0.6934618854400708, 'p_subtree_mutation': 0.07945674932709933, 'p_hoist_mutation': 0.027227162845707014, 'p_point_mutation': 0.0577668922156484, 'population_size': 9200, 'generations': 95, 'max_samples': 0.8450933138873884, 'parsimony_coefficient': 0.0010000292751236395, 'init_depth': (3, 6), 'init_method': 'half and half'}. Best is trial 60 with value: 0.07542198209962676.




[I 2025-10-03 06:29:05,540] Trial 62 finished with value: 0.1193154268386722 and parameters: {'p_crossover': 0.7414169853006448, 'p_subtree_mutation': 0.0697159278287124, 'p_hoist_mutation': 0.024483443545108653, 'p_point_mutation': 0.05301037894178577, 'population_size': 9600, 'generations': 100, 'max_samples': 0.8275450286315872, 'parsimony_coefficient': 0.001160545792517715, 'init_depth': (3, 6), 'init_method': 'half and half'}. Best is trial 60 with value: 0.07542198209962676.




[I 2025-10-03 06:44:01,847] Trial 63 finished with value: 0.16013833812938663 and parameters: {'p_crossover': 0.7080146004704487, 'p_subtree_mutation': 0.06345364488065373, 'p_hoist_mutation': 0.029374363648669674, 'p_point_mutation': 0.05627174859939002, 'population_size': 8000, 'generations': 95, 'max_samples': 0.8193765106886916, 'parsimony_coefficient': 0.001674212921120811, 'init_depth': (3, 6), 'init_method': 'half and half'}. Best is trial 60 with value: 0.07542198209962676.




[I 2025-10-03 06:55:37,244] Trial 64 finished with value: 0.113209701209643 and parameters: {'p_crossover': 0.7273341401009091, 'p_subtree_mutation': 0.09646403890112193, 'p_hoist_mutation': 0.06717124257098162, 'p_point_mutation': 0.062080305376136805, 'population_size': 7200, 'generations': 85, 'max_samples': 0.9603577927081012, 'parsimony_coefficient': 0.0022087222543683413, 'init_depth': (3, 6), 'init_method': 'half and half'}. Best is trial 60 with value: 0.07542198209962676.




[I 2025-10-03 07:09:38,516] Trial 65 finished with value: 0.1654381258568039 and parameters: {'p_crossover': 0.6934727983376812, 'p_subtree_mutation': 0.11963207801351118, 'p_hoist_mutation': 0.020411982304944735, 'p_point_mutation': 0.0666964846006669, 'population_size': 10000, 'generations': 80, 'max_samples': 0.8013740858630545, 'parsimony_coefficient': 0.0013304775125382823, 'init_depth': (3, 6), 'init_method': 'half and half'}. Best is trial 60 with value: 0.07542198209962676.




[I 2025-10-03 07:28:57,025] Trial 66 finished with value: 0.1567839359611224 and parameters: {'p_crossover': 0.7175615038788066, 'p_subtree_mutation': 0.08242297366455748, 'p_hoist_mutation': 0.03458786726901465, 'p_point_mutation': 0.0698114498721363, 'population_size': 8600, 'generations': 100, 'max_samples': 0.86124799258781, 'parsimony_coefficient': 0.0014330060583524556, 'init_depth': (2, 6), 'init_method': 'half and half'}. Best is trial 60 with value: 0.07542198209962676.




[I 2025-10-03 07:35:39,948] Trial 67 finished with value: 0.5586367067918782 and parameters: {'p_crossover': 0.6669741256786578, 'p_subtree_mutation': 0.05835870382729075, 'p_hoist_mutation': 0.043405937749864114, 'p_point_mutation': 0.05889275312623964, 'population_size': 9000, 'generations': 45, 'max_samples': 0.8507189628231071, 'parsimony_coefficient': 0.0026685166028072855, 'init_depth': (3, 6), 'init_method': 'half and half'}. Best is trial 60 with value: 0.07542198209962676.




[I 2025-10-03 07:47:20,847] Trial 68 finished with value: 0.20120572696036954 and parameters: {'p_crossover': 0.6073263161224477, 'p_subtree_mutation': 0.07722848074579187, 'p_hoist_mutation': 0.04780907873923061, 'p_point_mutation': 0.07377353203037139, 'population_size': 6600, 'generations': 95, 'max_samples': 0.8744508512161048, 'parsimony_coefficient': 0.001861593311894082, 'init_depth': (3, 6), 'init_method': 'half and half'}. Best is trial 60 with value: 0.07542198209962676.


In [None]:
# optunasız GA
"""from gplearn.genetic import SymbolicRegressor

sr = SymbolicRegressor(
    population_size=10000,
    generations=100,
    stopping_criteria=0.001,
    p_crossover=0.7,
    p_subtree_mutation=0.1,
    p_hoist_mutation=0.05,
    p_point_mutation=0.1,
    max_samples=0.95,
    parsimony_coefficient=0.003,
    function_set=['add', 'sub', 'mul', 'div', 'sqrt', 'log', 'sin', 'cos'],
    metric='mse',
    init_depth=(3, 8),
    init_method='half and half',
    const_range=None,
    verbose=1,
    random_state=42
)
sr.fit(X_train, y_train)

"""


In [None]:
from gplearn.genetic import SymbolicRegressor
from sklearn.metrics import mean_squared_error

# ... (optuna ile hyperparam tuning ve training kodun aynen)

# Eğitim sonrası
print("Bulunan formül:", sr._program)

# Formülde Xn (designation’ın indexi) var mı?
designation_idx = feature_cols.index(designation_feat)
if f"X{designation_idx}" in str(sr._program):
    print(f"Formülde designation (X{designation_idx}) KULLANILDI!")
else:
    print("Formülde designation YOK! (Model bunu matematiksel olarak gerek görmedi.)")


In [None]:
# Bulunan matematiksel formül
print("n_opt için bulunan formül:")
print(sr._program)


In [None]:
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
import numpy as np

y_pred_train = sr.predict(X_train)
y_pred_test = sr.predict(X_test)

# Metrikler
print("Train R2:", r2_score(y_train, y_pred_train))
print("Test R2 :", r2_score(y_test, y_pred_test))
print("Test MSE:", mean_squared_error(y_test, y_pred_test))
print("Test MAE:", mean_absolute_error(y_test, y_pred_test))


In [None]:
from sklearn.model_selection import cross_val_score

cv_scores = cross_val_score(sr, X_train, y_train, cv=5, scoring='r2')
print("5-Fold CV R2 Skorları (Train Seti):", cv_scores)
print("CV Ortalama R2:", np.mean(cv_scores))


In [None]:
"""
Symbolic Regression - Comprehensive Analysis
Optik Malzeme n_opt Tahmini - Görselleştirme ve Analiz
"""

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import pearsonr
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from collections import Counter

# Seaborn style ayarları
sns.set_theme(style="whitegrid", font_scale=1.25, rc={"axes.labelweight":"bold"})

In [None]:
# ====================================
# DATA PREPARATION & UTILITY FUNCTIONS
# ====================================

def to_numpy(X):
    """Convert to numpy array safely"""
    return X.values if hasattr(X, "values") else np.asarray(X)

def get_feature_names(X):
    """Extract feature names"""
    if hasattr(X, "columns"):
        return list(X.columns)
    X_np = to_numpy(X)
    return [f"X{i}" for i in range(X_np.shape[1])]

def calculate_metrics(y_true, y_pred):
    """Calculate all regression metrics"""
    corr, p_value = pearsonr(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    return {
        'correlation': corr,
        'p_value': p_value,
        'r2': r2,
        'mse': mse,
        'mae': mae
    }

# Veri hazırlığı
y_train_plot = pd.Series(y_train).reset_index(drop=True)
y_pred_train_plot = pd.Series(y_pred_train).reset_index(drop=True)
y_test_plot = pd.Series(y_test).reset_index(drop=True)
y_pred_test_plot = pd.Series(y_pred_test).reset_index(drop=True)

Xtr = to_numpy(X_train)
Xte = to_numpy(X_test)
feat_names = get_feature_names(X_train)

In [None]:
# ====================================
# VISUALIZATION FUNCTIONS
# ====================================

def plot_scatter_comparison(y_train, y_pred_train, y_test, y_pred_test):
    """Combined train-test scatter plot"""
    fig, ax = plt.subplots(1, 2, figsize=(12, 6))
    
    # Train scatter
    sns.scatterplot(x=y_train, y=y_pred_train, s=65, color="#3288bd", 
                   edgecolor='k', ax=ax[0])
    ax[0].plot([y_train.min(), y_train.max()], [y_train.min(), y_train.max()], 
               'r--', lw=2, label="Identity (y = x)")
    ax[0].set_xlabel("Actual $n_{opt}$ (Train Set)")
    ax[0].set_ylabel("Predicted $n_{opt}$ (Train Set)")
    ax[0].set_title("Train Set: Actual vs. Predicted")
    ax[0].legend()
    ax[0].grid(True, linestyle=':', alpha=0.7)
    
    # Test scatter
    sns.scatterplot(x=y_test, y=y_pred_test, s=65, color="#e08214", 
                   edgecolor='k', ax=ax[1])
    ax[1].plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 
               'r--', lw=2, label="Identity (y = x)")
    ax[1].set_xlabel("Actual $n_{opt}$ (Test Set)")
    ax[1].set_ylabel("Predicted $n_{opt}$ (Test Set)")
    ax[1].set_title("Test Set: Actual vs. Predicted")
    ax[1].legend()
    ax[1].grid(True, linestyle=':', alpha=0.7)
    
    plt.tight_layout()
    plt.show()

def plot_residual_analysis(y_test, y_pred_test):
    """Residual plot for test set"""
    residuals = y_test - y_pred_test
    plt.figure(figsize=(8, 4.5))
    sns.scatterplot(x=y_pred_test, y=residuals, s=95, color="#e08214", edgecolor='k')
    plt.axhline(0, color='red', linestyle='--', lw=2, label="Zero Error")
    plt.xlabel("Predicted $n_{opt}$ (Test Set)")
    plt.ylabel("Residual (Actual - Predicted)")
    plt.title("Test Set: Residual Analysis")
    plt.legend()
    plt.tight_layout()
    plt.show()

def plot_time_series_comparison(y_test, y_pred_test):
    """Time series comparison plot"""
    plt.figure(figsize=(13, 5))
    sns.lineplot(x=np.arange(len(y_test)), y=y_test, label='Actual $n_{opt}$', 
                lw=2.5, color='#1f77b4')
    sns.lineplot(x=np.arange(len(y_pred_test)), y=y_pred_test, 
                label='Predicted $n_{opt}$', lw=2.5, color='#ff7f0e')
    plt.fill_between(np.arange(len(y_test)), y_test, y_pred_test, 
                     color='grey', alpha=0.18, label='Error Area')
    plt.xlabel("Sample Index (Test Set)")
    plt.ylabel("$n_{opt}$ Value")
    plt.title("Test Set: Actual vs Predicted Comparison")
    plt.legend()
    plt.tight_layout()
    plt.show()

def plot_error_histogram(y_test, y_pred_test):
    """Absolute error histogram"""
    abs_error = np.abs(y_test - y_pred_test)
    plt.figure(figsize=(7, 4))
    sns.histplot(abs_error, bins=8, kde=True, color='#5dade2', 
                edgecolor='k', alpha=0.9)
    plt.xlabel("Absolute Error")
    plt.ylabel("Frequency")
    plt.title("Test Set: Absolute Error Distribution")
    plt.tight_layout()
    plt.show()

In [None]:
# ====================================
# METRICS CALCULATION & REPORTING
# ====================================

def print_metrics(y_train, y_pred_train, y_test, y_pred_test):
    """Calculate and print all metrics"""
    
    train_metrics = calculate_metrics(y_train, y_pred_train)
    test_metrics = calculate_metrics(y_test, y_pred_test)
    
    print("="*50)
    print("SYMBOLIC REGRESSION PERFORMANCE METRICS")
    print("="*50)
    
    print("\nTRAIN SET METRICS:")
    print(f"Pearson correlation: {train_metrics['correlation']:.4f} (p-value: {train_metrics['p_value']:.2e})")
    print(f"R² score           : {train_metrics['r2']:.4f}")
    print(f"MSE                : {train_metrics['mse']:.4f}")
    print(f"MAE                : {train_metrics['mae']:.4f}")
    
    print("\nTEST SET METRICS:")
    print(f"Pearson correlation: {test_metrics['correlation']:.4f} (p-value: {test_metrics['p_value']:.2e})")
    print(f"R² score           : {test_metrics['r2']:.4f}")
    print(f"MSE                : {test_metrics['mse']:.4f}")
    print(f"MAE                : {test_metrics['mae']:.4f}")
    print("="*50)
    
    return train_metrics, test_metrics

In [None]:
# ====================================
# ADVANCED ANALYSIS FUNCTIONS
# ====================================

def numerical_partials(estimator, X, eps=1e-4):
    """Numerical partial derivatives using central difference"""
    X = np.asarray(X, dtype=float)
    n, d = X.shape
    grads = np.zeros((n, d), dtype=float)
    
    for j in range(d):
        Xp = X.copy()
        Xm = X.copy()
        h = eps * (np.abs(X[:, j]) + 1.0)
        Xp[:, j] += h
        Xm[:, j] -= h
        yp = estimator.predict(Xp)
        ym = estimator.predict(Xm)
        grads[:, j] = (yp - ym) / (2.0 * h)
    
    return grads

def plot_sensitivity_analysis(estimator, X_train, feat_names, top_k=10):
    """Feature sensitivity analysis using partial derivatives"""
    
    grads_train = numerical_partials(estimator, X_train, eps=1e-4)
    mean_abs_grad = np.mean(np.abs(grads_train), axis=0)
    order = np.argsort(-mean_abs_grad)
    topk = min(top_k, len(feat_names))
    top_idx = order[:topk]
    
    # Bar plot
    plt.figure(figsize=(10, 5))
    plt.subplot(1, 2, 1)
    plt.bar([feat_names[i] for i in top_idx], mean_abs_grad[top_idx])
    plt.ylabel("Mean |∂ŷ/∂x|")
    plt.title(f"Feature Sensitivity (Top {topk})")
    plt.xticks(rotation=45, ha="right")
    
    # Heatmap
    plt.subplot(1, 2, 2)
    plt.imshow(grads_train[:, top_idx].T, aspect='auto', interpolation='nearest')
    plt.colorbar(label="∂ŷ/∂x")
    plt.yticks(range(topk), [feat_names[i] for i in top_idx])
    plt.xlabel("Sample Index")
    plt.title("Sensitivity Heatmap")
    
    plt.tight_layout()
    plt.show()
    
    return top_idx, mean_abs_grad

def pdp_1d(estimator, X, j, grid_resolution=40, q_low=0.01, q_high=0.99):
    """Partial Dependence Plot for single feature"""
    X = np.asarray(X, dtype=float)
    xj = X[:, j]
    lo = np.quantile(xj, q_low)
    hi = np.quantile(xj, q_high)
    grid = np.linspace(lo, hi, grid_resolution)
    
    pdp_vals = []
    for v in grid:
        Xtmp = X.copy()
        Xtmp[:, j] = v
        yhat = estimator.predict(Xtmp)
        pdp_vals.append(yhat.mean())
    
    return grid, np.array(pdp_vals)

def plot_pdp_analysis(estimator, X_train, feat_names, top_indices, n_features=3):
    """Plot PDP for top features"""
    n_plot = min(n_features, len(top_indices))
    
    fig, axes = plt.subplots(1, n_plot, figsize=(5*n_plot, 4))
    if n_plot == 1:
        axes = [axes]
    
    for i, j in enumerate(top_indices[:n_plot]):
        grid, pdp_vals = pdp_1d(estimator, X_train, j)
        axes[i].plot(grid, pdp_vals, lw=2, color='#2E86AB')
        axes[i].set_xlabel(feat_names[j])
        axes[i].set_ylabel("E[ŷ | x_j]")
        axes[i].set_title(f"PDP: {feat_names[j]}")
        axes[i].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()

In [None]:
# ====================================
# BOOTSTRAP STABILITY ANALYSIS
# ====================================

def clone_sr_with_params(base_sr, random_state):
    """Clone SR estimator with new random state"""
    params = base_sr.get_params(deep=True)
    params['random_state'] = random_state
    params['verbose'] = 0
    return type(base_sr)(**params)

def bootstrap_stability_analysis(estimator, X_train, y_train, X_test, y_test, 
                                n_bootstrap=30, random_seed=123):
    """Bootstrap analysis for model stability"""
    
    expr_list = []
    complexities = []
    Yhat_te = []
    
    rng = np.random.default_rng(random_seed)
    
    print(f"Running {n_bootstrap} bootstrap iterations...")
    
    for b in range(n_bootstrap):
        idx = rng.integers(0, X_train.shape[0], size=X_train.shape[0])
        Xb = X_train[idx]
        yb = to_numpy(y_train)[idx]
        
        sr_b = clone_sr_with_params(estimator, random_state=42 + b)
        sr_b.fit(Xb, yb)
        
        prog = getattr(sr_b, "_program", None)
        expr = str(prog) if prog is not None else None
        expr_list.append(expr)
        
        length = getattr(prog, "length_", None)
        depth = getattr(prog, "depth_", None)
        complexities.append({"length": length, "depth": depth})
        
        yhat = sr_b.predict(X_test)
        Yhat_te.append(yhat)
        
        if (b + 1) % 10 == 0:
            print(f"  Completed {b + 1}/{n_bootstrap}")
    
    Yhat_te = np.vstack(Yhat_te)
    return expr_list, complexities, Yhat_te

def plot_bootstrap_results(y_test, Yhat_te, expr_list, complexities):
    """Plot bootstrap analysis results"""
    
    # Prediction bands
    low = np.percentile(Yhat_te, 5, axis=0)
    med = np.percentile(Yhat_te, 50, axis=0)
    high = np.percentile(Yhat_te, 95, axis=0)
    
    plt.figure(figsize=(12, 5))
    plt.plot(range(len(y_test)), to_numpy(y_test), label="Actual", lw=2, color='#1f77b4')
    plt.plot(range(len(med)), med, label="Bootstrap Median", lw=2, color='#ff7f0e')
    plt.fill_between(range(len(low)), low, high, alpha=0.25, label="5%-95% Band", color='#ff7f0e')
    plt.xlabel("Sample Index (Test)")
    plt.ylabel("$n_{opt}$")
    plt.title("Bootstrap Prediction Uncertainty")
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()
    
    # Expression frequency
    ctr = Counter(expr_list)
    print("\nMost frequent expressions:")
    for expr, cnt in ctr.most_common(5):
        print(f"{cnt:>3}x  {expr}")
    
    # Complexity distribution
    lengths = [c["length"] for c in complexities if c["length"] is not None]
    depths = [c["depth"] for c in complexities if c["depth"] is not None]
    
    plt.figure(figsize=(10, 4))
    plt.subplot(1, 2, 1)
    plt.hist(lengths, bins=10, edgecolor='k', alpha=0.7, color='#5dade2')
    plt.xlabel("Program Length")
    plt.ylabel("Frequency")
    plt.title("Complexity Distribution (Length)")
    
    plt.subplot(1, 2, 2)
    plt.hist(depths, bins=10, edgecolor='k', alpha=0.7, color='#e08214')
    plt.xlabel("Program Depth")
    plt.ylabel("Frequency")
    plt.title("Complexity Distribution (Depth)")
    
    plt.tight_layout()
    plt.show()

In [None]:
# ====================================
# MAIN EXECUTION
# ====================================

def run_comprehensive_analysis():
    """Run complete SR analysis pipeline"""
    
    print("Starting Comprehensive Symbolic Regression Analysis...")
    print("="*60)
    
    # 1. Basic Visualizations
    print("\n1. Creating basic visualizations...")
    plot_scatter_comparison(y_train_plot, y_pred_train_plot, y_test_plot, y_pred_test_plot)
    plot_residual_analysis(y_test_plot, y_pred_test_plot)
    plot_time_series_comparison(y_test_plot, y_pred_test_plot)
    plot_error_histogram(y_test_plot, y_pred_test_plot)
    
    # 2. Metrics Calculation
    print("\n2. Calculating metrics...")
    train_metrics, test_metrics = print_metrics(y_train_plot, y_pred_train_plot, y_test_plot, y_pred_test_plot)
    
    # 3. Feature Sensitivity Analysis
    print("\n3. Running feature sensitivity analysis...")
    top_indices, sensitivities = plot_sensitivity_analysis(sr, Xtr, feat_names)
    
    # 4. Partial Dependence Plots
    print("\n4. Creating partial dependence plots...")
    plot_pdp_analysis(sr, Xtr, feat_names, top_indices)
    
    # 5. Bootstrap Stability Analysis
    print("\n5. Running bootstrap stability analysis...")
    expr_list, complexities, Yhat_te = bootstrap_stability_analysis(
        sr, Xtr, y_train, Xte, y_test, n_bootstrap=30
    )
    plot_bootstrap_results(y_test, Yhat_te, expr_list, complexities)
    
    print("\n" + "="*60)
    print("Comprehensive analysis completed!")
    
    return {
        'train_metrics': train_metrics,
        'test_metrics': test_metrics,
        'top_features': top_indices,
        'sensitivities': sensitivities,
        'bootstrap_results': (expr_list, complexities, Yhat_te)
    }

# Ana çalıştırma
if __name__ == "__main__":
    results = run_comprehensive_analysis()

In [None]:
# ==== INFERENCE TIME & FIGURE OF MERIT (ONLY THIS PART) ====
import time
import gc
import sys
import numpy as np

def _fmt_seconds(s):
    # İnsan gibi format: ns/µs/ms/s aralığında otomatik
    if s < 1e-6:
        return f"{s*1e9:.1f} ns"
    if s < 1e-3:
        return f"{s*1e6:.1f} µs"
    if s < 1:
        return f"{s*1e3:.2f} ms"
    return f"{s:.3f} s"

def measure_inference_speed(estimator, X,
                            warmup=50,
                            repeats_single=1000,
                            batch_sizes=(1, 8, 32, 128, None),
                            random_state=42):
    """
    batch_sizes:
      - 1: tek örnek gecikmesi
      - sayılar: mini-batch gecikmesi (N örnek bir seferde)
      - None: X'in tamamı (full-batch)
    """
    rng = np.random.default_rng(random_state)
    X_np = X.values if hasattr(X, "values") else np.asarray(X)
    n = X_np.shape[0]

    # Model karmaşıklığı (gplearn)
    prog = getattr(estimator, "_program", None)
    prog_len = getattr(prog, "length_", None)
    prog_depth = getattr(prog, "depth_", None)

    print("\n" + "="*72)
    print("INFERENCE SPEED & FIGURE OF MERIT")
    print("="*72)
    print(f"Python: {sys.version.split()[0]} | NumPy: {np.__version__}")
    if prog is not None:
        print(f"Expression length (nodes): {prog_len}, depth: {prog_depth}")
        # İstersen formülü de göster:
        # print(f"Expression: {prog}")
    else:
        print("Warning: gplearn program objesi bulunamadı.")

    # --- Warmup (JIT yok ama cache ve memory ayarı için faydalı) ---
    if n > 0:
        idx_warm = rng.integers(0, n, size=min(warmup, max(1, n)))
        for i in idx_warm:
            _ = estimator.predict(X_np[i:i+1])
        _ = estimator.predict(X_np[: min(n, 256)])  # küçük bir batch warmup

    results = []

    for bs in batch_sizes:
        if bs == 1:
            # Tek örnek gecikmesi (repeats_single kez farklı örneklerle)
            times = []
            gc.collect()
            for _ in range(repeats_single):
                i = rng.integers(0, n)
                x1 = X_np[i:i+1]
                t0 = time.perf_counter()
                _ = estimator.predict(x1)
                t1 = time.perf_counter()
                times.append(t1 - t0)
            times = np.array(times)
            mean_t = float(times.mean())
            median_t = float(np.median(times))
            p90 = float(np.percentile(times, 90))
            p99 = float(np.percentile(times, 99))
            thr = 1.0 / mean_t  # samples/sec

            results.append({
                "batch_size": 1,
                "mean_latency_s": mean_t,
                "median_latency_s": median_t,
                "p90_latency_s": p90,
                "p99_latency_s": p99,
                "throughput_sps": thr
            })

            print("\n--- Single-sample latency (batch=1) ---")
            print(f"Mean   : {_fmt_seconds(mean_t)}  | Throughput: {thr:,.0f} samples/s")
            print(f"Median : {_fmt_seconds(median_t)}")
            print(f"P90    : {_fmt_seconds(p90)}")
            print(f"P99    : {_fmt_seconds(p99)}")

        else:
            # Mini-batch veya full-batch
            if bs is None:
                # full-batch: tüm X bir kerede
                bs_eff = n
                if bs_eff == 0:
                    print("\nFull-batch ölçümü atlandı (X boş).")
                    continue
                batch = X_np
                label = "FULL-BATCH"
            else:
                bs_eff = min(bs, n) if n > 0 else 0
                if bs_eff == 0:
                    print(f"\nBatch={bs} ölçümü atlandı (X boş).")
                    continue
                idx = rng.integers(0, n, size=bs_eff)
                batch = X_np[idx]
                label = f"BATCH={bs_eff}"

            # Birkaç tekrar ile istatistik
            repeats = 50 if bs_eff >= 32 else 100
            times = []
            gc.collect()
            for _ in range(repeats):
                t0 = time.perf_counter()
                _ = estimator.predict(batch)
                t1 = time.perf_counter()
                times.append(t1 - t0)
            times = np.array(times)
            mean_t = float(times.mean())
            median_t = float(np.median(times))
            p90 = float(np.percentile(times, 90))
            p99 = float(np.percentile(times, 99))
            thr = bs_eff / mean_t  # samples/sec

            results.append({
                "batch_size": int(bs_eff),
                "mean_latency_s": mean_t,
                "median_latency_s": median_t,
                "p90_latency_s": p90,
                "p99_latency_s": p99,
                "throughput_sps": thr
            })

            print(f"\n--- {label} latency ---")
            print(f"Mean   : {_fmt_seconds(mean_t)}  | Eff. batch: {bs_eff}  | Throughput: {thr:,.0f} samples/s")
            print(f"Median : {_fmt_seconds(median_t)}")
            print(f"P90    : {_fmt_seconds(p90)}")
            print(f"P99    : {_fmt_seconds(p99)}")

    # Özet FoM (figure of merit)
    # - Single-sample median latency
    # - P99 latency
    # - Best throughput (max over all batches)
    one = next((r for r in results if r["batch_size"] == 1), None)
    best_thr = max(results, key=lambda r: r["throughput_sps"]) if results else None

    print("\n" + "-"*72)
    print("FIGURE OF MERIT (FoM)")
    if one:
        print(f"Single-sample median latency : {_fmt_seconds(one['median_latency_s'])}")
        print(f"Single-sample P99 latency    : {_fmt_seconds(one['p99_latency_s'])}")
    if best_thr:
        print(f"Max throughput               : {best_thr['throughput_sps']:,.0f} samples/s "
              f"(batch={best_thr['batch_size']})")
    if prog is not None:
        print(f"Expression complexity        : length={prog_len}, depth={prog_depth}")
    print("-"*72 + "\n")

    return results

# ÇALIŞTIR
_ = measure_inference_speed(sr, X_test)
# İstersen eğitim seti için de:
# _ = measure_inference_speed(sr, X_train)
