In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from sklearn.kernel_ridge import KernelRidge
from sklearn.model_selection import KFold
from sklearn.metrics import r2_score
from skopt import gp_minimize
from skopt.space import Real
from sklearn.preprocessing import MinMaxScaler

# Create an output directory to store results
output_dir = "Excel_Linear_Results"
os.makedirs(output_dir, exist_ok=True)

# Define the feature column labels for PIMs descriptors
FEATURE_COLUMNS = [
    "C", "H", "O", "N", "F", "S", "System_Size", "a", "b", "c",
    "density", "PLD", "LCD", "N2_SA",
    "Probe_Accessible", "Probe_Occupiable", "Rosenbluth_Weight"
]

# Load and preprocess the PIMs descriptors
data = pd.read_csv("PIM_ExpFeatures.csv")
features = data[FEATURE_COLUMNS].values

# Normalize PIMs descriptors
scaler = MinMaxScaler()
features = scaler.fit_transform(features)

# Load the Qst labels
labels_data = pd.read_csv("PIM_Qst_Labels.csv")
labels = labels_data["Qst_CO2_298K"].values  # Extract Qst labels

# Define hyperparameter search space for Bayesian Optimization (Linear Kernel)
space = [
    Real(1e-6, 1e2, "log-uniform", name="alpha")  # Regularization parameter
]

# Objective function for Bayesian Optimization
def objective(params):
    alpha = params[0]
    testing_r2_scores = []

    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    for train_idx, test_idx in kf.split(features):
        X_train, X_test = features[train_idx], features[test_idx]
        y_train, y_test = labels[train_idx], labels[test_idx]

        model = KernelRidge(kernel="linear", alpha=alpha)
        model.fit(X_train, y_train)
        test_predictions = model.predict(X_test)
        testing_r2_scores.append(r2_score(y_test, test_predictions))

    return -np.mean(testing_r2_scores)  # Negative because skopt minimizes

# Run Bayesian Optimization
result = gp_minimize(
    objective,
    space,
    n_calls=500,
    n_initial_points=50,
    random_state=42,
    verbose=True  # Display progress
)

# Extract the best hyperparameters
best_alpha = result.x[0]

# Save best hyperparameters to a CSV file
best_params = pd.DataFrame({"Alpha": [best_alpha]})
best_params.to_csv(os.path.join(output_dir, "Excel_Linear_best_hyperparameters.csv"), index=False)

# Evaluate the best model
kf = KFold(n_splits=5, shuffle=True, random_state=42)
training_r2_scores = []
testing_r2_scores = []
all_train_actuals, all_train_preds = [], []
all_test_actuals, all_test_preds = [], []

for train_idx, test_idx in kf.split(features):
    X_train, X_test = features[train_idx], features[test_idx]
    y_train, y_test = labels[train_idx], labels[test_idx]

    model = KernelRidge(kernel="linear", alpha=best_alpha)
    model.fit(X_train, y_train)

    train_predictions = model.predict(X_train)
    test_predictions = model.predict(X_test)

    all_train_actuals.extend(y_train)
    all_train_preds.extend(train_predictions)
    all_test_actuals.extend(y_test)
    all_test_preds.extend(test_predictions)

    training_r2_scores.append(r2_score(y_train, train_predictions))
    testing_r2_scores.append(r2_score(y_test, test_predictions))

# Save cross-validation R² scores
#cv_results = pd.DataFrame({
#    "Fold": range(1, len(training_r2_scores) + 1),
#    "Training R^2": training_r2_scores,
#    "Testing R^2": testing_r2_scores
#})
#cv_results.to_csv(os.path.join(output_dir, "Excel_Linear_cv_results.csv"), index=False)

# Compute average R²
average_training_r2 = np.mean(training_r2_scores)
average_testing_r2 = np.mean(testing_r2_scores)

# Generate parity plots
#for dataset, actuals, preds, avg_r2, filename in zip(
#    ["Training", "Testing"],
#    [all_train_actuals, all_test_actuals],
#    [all_train_preds, all_test_preds],
#    [average_training_r2, average_testing_r2],
#    ["Excel_Linear_training_parity_plot.png", "Excel_Linear_testing_parity_plot.png"]
#):
#    plt.figure()
#    plt.scatter(actuals, preds, alpha=0.7, label=f"{dataset} Data")
#    plt.plot([min(actuals), max(actuals)], [min(actuals), max(actuals)], 'k--', label="Perfect Prediction")
#    plt.xlabel("Actual Values")
#    plt.ylabel("Predicted Values")
#    plt.title(f"{dataset} Parity Plot ($R^2 = {avg_r2:.3f}$)")
#    plt.legend()
#    plt.grid()
#    plt.savefig(os.path.join(output_dir, filename))
#    plt.close()

#print(f"Results saved in {output_dir}")


Iteration No: 1 started. Evaluating function at random point.
Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.0025
Function value obtained: 3.9649
Current minimum: 3.9649
Iteration No: 2 started. Evaluating function at random point.
Iteration No: 2 ended. Evaluation done at random point.
Time taken: 0.0027
Function value obtained: -0.9039
Current minimum: -0.9039
Iteration No: 3 started. Evaluating function at random point.
Iteration No: 3 ended. Evaluation done at random point.
Time taken: 0.0020
Function value obtained: 3.0266
Current minimum: -0.9039
Iteration No: 4 started. Evaluating function at random point.
Iteration No: 4 ended. Evaluation done at random point.
Time taken: 0.0020
Function value obtained: 0.0623
Current minimum: -0.9039
Iteration No: 5 started. Evaluating function at random point.
Iteration No: 5 ended. Evaluation done at random point.
Time taken: 0.0020
Function value obtained: 0.1462
Current minimum: -0.9039
Iteration No: 6 started. Evalu



Iteration No: 85 ended. Search finished for the next optimal point.
Time taken: 0.5946
Function value obtained: 0.1526
Current minimum: -0.9575
Iteration No: 86 started. Searching for the next optimal point.
Iteration No: 86 ended. Search finished for the next optimal point.
Time taken: 0.5959
Function value obtained: -0.9572
Current minimum: -0.9575
Iteration No: 87 started. Searching for the next optimal point.
Iteration No: 87 ended. Search finished for the next optimal point.
Time taken: 0.5594
Function value obtained: -0.9568
Current minimum: -0.9575
Iteration No: 88 started. Searching for the next optimal point.
Iteration No: 88 ended. Search finished for the next optimal point.
Time taken: 0.5561
Function value obtained: -0.9575
Current minimum: -0.9575
Iteration No: 89 started. Searching for the next optimal point.
Iteration No: 89 ended. Search finished for the next optimal point.
Time taken: 0.7669
Function value obtained: -0.9574
Current minimum: -0.9575
Iteration No: 90 sta



Iteration No: 100 ended. Search finished for the next optimal point.
Time taken: 0.8532
Function value obtained: -0.6062
Current minimum: -0.9575
Iteration No: 101 started. Searching for the next optimal point.
Iteration No: 101 ended. Search finished for the next optimal point.
Time taken: 0.8386
Function value obtained: -0.9573
Current minimum: -0.9575
Iteration No: 102 started. Searching for the next optimal point.
Iteration No: 102 ended. Search finished for the next optimal point.
Time taken: 0.9338
Function value obtained: -0.9572
Current minimum: -0.9575
Iteration No: 103 started. Searching for the next optimal point.
Iteration No: 103 ended. Search finished for the next optimal point.
Time taken: 1.0312
Function value obtained: -0.9528
Current minimum: -0.9575
Iteration No: 104 started. Searching for the next optimal point.
Iteration No: 104 ended. Search finished for the next optimal point.
Time taken: 0.8545
Function value obtained: -0.9571
Current minimum: -0.9575
Iteration 



Iteration No: 135 ended. Search finished for the next optimal point.
Time taken: 1.4936
Function value obtained: 0.0242
Current minimum: -0.9575
Iteration No: 136 started. Searching for the next optimal point.
Iteration No: 136 ended. Search finished for the next optimal point.
Time taken: 1.3427
Function value obtained: -0.9572
Current minimum: -0.9575
Iteration No: 137 started. Searching for the next optimal point.
Iteration No: 137 ended. Search finished for the next optimal point.
Time taken: 1.5536
Function value obtained: -0.3881
Current minimum: -0.9575
Iteration No: 138 started. Searching for the next optimal point.
Iteration No: 138 ended. Search finished for the next optimal point.
Time taken: 1.6528
Function value obtained: -0.9556
Current minimum: -0.9575
Iteration No: 139 started. Searching for the next optimal point.
Iteration No: 139 ended. Search finished for the next optimal point.
Time taken: 1.6097
Function value obtained: -0.9562
Current minimum: -0.9575
Iteration N



Iteration No: 155 ended. Search finished for the next optimal point.
Time taken: 1.7912
Function value obtained: -0.9415
Current minimum: -0.9575
Iteration No: 156 started. Searching for the next optimal point.
Iteration No: 156 ended. Search finished for the next optimal point.
Time taken: 1.6620
Function value obtained: -0.9575
Current minimum: -0.9575
Iteration No: 157 started. Searching for the next optimal point.
Iteration No: 157 ended. Search finished for the next optimal point.
Time taken: 2.0708
Function value obtained: -0.9558
Current minimum: -0.9575
Iteration No: 158 started. Searching for the next optimal point.
Iteration No: 158 ended. Search finished for the next optimal point.
Time taken: 1.9382
Function value obtained: -0.9567
Current minimum: -0.9575
Iteration No: 159 started. Searching for the next optimal point.
Iteration No: 159 ended. Search finished for the next optimal point.
Time taken: 1.9832
Function value obtained: -0.9518
Current minimum: -0.9575
Iteration 



Iteration No: 172 ended. Search finished for the next optimal point.
Time taken: 2.0410
Function value obtained: 8.8357
Current minimum: -0.9575
Iteration No: 173 started. Searching for the next optimal point.
Iteration No: 173 ended. Search finished for the next optimal point.
Time taken: 2.2602
Function value obtained: -0.9573
Current minimum: -0.9575
Iteration No: 174 started. Searching for the next optimal point.
Iteration No: 174 ended. Search finished for the next optimal point.
Time taken: 2.3782
Function value obtained: -0.9566
Current minimum: -0.9575
Iteration No: 175 started. Searching for the next optimal point.
Iteration No: 175 ended. Search finished for the next optimal point.
Time taken: 2.2202
Function value obtained: -0.9574
Current minimum: -0.9575
Iteration No: 176 started. Searching for the next optimal point.
Iteration No: 176 ended. Search finished for the next optimal point.
Time taken: 2.1733
Function value obtained: -0.9564
Current minimum: -0.9575
Iteration N



Iteration No: 188 ended. Search finished for the next optimal point.
Time taken: 3.6044
Function value obtained: -0.9563
Current minimum: -0.9575
Iteration No: 189 started. Searching for the next optimal point.
Iteration No: 189 ended. Search finished for the next optimal point.
Time taken: 3.0486
Function value obtained: -0.9571
Current minimum: -0.9575
Iteration No: 190 started. Searching for the next optimal point.
Iteration No: 190 ended. Search finished for the next optimal point.
Time taken: 3.4734
Function value obtained: -0.9574
Current minimum: -0.9575
Iteration No: 191 started. Searching for the next optimal point.
Iteration No: 191 ended. Search finished for the next optimal point.
Time taken: 3.4279
Function value obtained: -0.9574
Current minimum: -0.9575
Iteration No: 192 started. Searching for the next optimal point.
Iteration No: 192 ended. Search finished for the next optimal point.
Time taken: 2.8097
Function value obtained: -0.9558
Current minimum: -0.9575
Iteration 



Iteration No: 219 ended. Search finished for the next optimal point.
Time taken: 3.8817
Function value obtained: 0.1482
Current minimum: -0.9575
Iteration No: 220 started. Searching for the next optimal point.
Iteration No: 220 ended. Search finished for the next optimal point.
Time taken: 3.6814
Function value obtained: -0.9574
Current minimum: -0.9575
Iteration No: 221 started. Searching for the next optimal point.
Iteration No: 221 ended. Search finished for the next optimal point.
Time taken: 3.6105
Function value obtained: -0.9574
Current minimum: -0.9575
Iteration No: 222 started. Searching for the next optimal point.
Iteration No: 222 ended. Search finished for the next optimal point.
Time taken: 3.8404
Function value obtained: -0.9572
Current minimum: -0.9575
Iteration No: 223 started. Searching for the next optimal point.
Iteration No: 223 ended. Search finished for the next optimal point.
Time taken: 4.0783
Function value obtained: -0.9574
Current minimum: -0.9575
Iteration N



Iteration No: 242 ended. Search finished for the next optimal point.
Time taken: 4.2779
Function value obtained: 24.5448
Current minimum: -0.9575
Iteration No: 243 started. Searching for the next optimal point.
Iteration No: 243 ended. Search finished for the next optimal point.
Time taken: 4.6887
Function value obtained: -0.9574
Current minimum: -0.9575
Iteration No: 244 started. Searching for the next optimal point.
Iteration No: 244 ended. Search finished for the next optimal point.
Time taken: 4.3622
Function value obtained: -0.9573
Current minimum: -0.9575
Iteration No: 245 started. Searching for the next optimal point.
Iteration No: 245 ended. Search finished for the next optimal point.
Time taken: 4.4635
Function value obtained: -0.9574
Current minimum: -0.9575
Iteration No: 246 started. Searching for the next optimal point.
Iteration No: 246 ended. Search finished for the next optimal point.
Time taken: 4.3494
Function value obtained: -0.9575
Current minimum: -0.9575
Iteration 



Iteration No: 260 ended. Search finished for the next optimal point.
Time taken: 4.8452
Function value obtained: 36.7646
Current minimum: -0.9575
Iteration No: 261 started. Searching for the next optimal point.
Iteration No: 261 ended. Search finished for the next optimal point.
Time taken: 5.1845
Function value obtained: -0.9569
Current minimum: -0.9575
Iteration No: 262 started. Searching for the next optimal point.
Iteration No: 262 ended. Search finished for the next optimal point.
Time taken: 5.4967
Function value obtained: -0.9573
Current minimum: -0.9575
Iteration No: 263 started. Searching for the next optimal point.
Iteration No: 263 ended. Search finished for the next optimal point.
Time taken: 5.2295
Function value obtained: -0.9567
Current minimum: -0.9575
Iteration No: 264 started. Searching for the next optimal point.
Iteration No: 264 ended. Search finished for the next optimal point.
Time taken: 5.0892
Function value obtained: -0.9567
Current minimum: -0.9575
Iteration 



Iteration No: 272 ended. Search finished for the next optimal point.
Time taken: 5.4254
Function value obtained: 0.6596
Current minimum: -0.9575
Iteration No: 273 started. Searching for the next optimal point.
Iteration No: 273 ended. Search finished for the next optimal point.
Time taken: 5.7511
Function value obtained: -0.9247
Current minimum: -0.9575
Iteration No: 274 started. Searching for the next optimal point.
Iteration No: 274 ended. Search finished for the next optimal point.
Time taken: 6.1581
Function value obtained: -0.9574
Current minimum: -0.9575
Iteration No: 275 started. Searching for the next optimal point.
Iteration No: 275 ended. Search finished for the next optimal point.
Time taken: 5.9773
Function value obtained: -0.9568
Current minimum: -0.9575
Iteration No: 276 started. Searching for the next optimal point.
Iteration No: 276 ended. Search finished for the next optimal point.
Time taken: 5.4247
Function value obtained: -0.9570
Current minimum: -0.9575
Iteration N



Iteration No: 312 ended. Search finished for the next optimal point.
Time taken: 7.1148
Function value obtained: -0.2307
Current minimum: -0.9575
Iteration No: 313 started. Searching for the next optimal point.
Iteration No: 313 ended. Search finished for the next optimal point.
Time taken: 7.6329
Function value obtained: -0.9563
Current minimum: -0.9575
Iteration No: 314 started. Searching for the next optimal point.
Iteration No: 314 ended. Search finished for the next optimal point.
Time taken: 7.2078
Function value obtained: -0.9560
Current minimum: -0.9575
Iteration No: 315 started. Searching for the next optimal point.
Iteration No: 315 ended. Search finished for the next optimal point.
Time taken: 7.6510
Function value obtained: -0.9563
Current minimum: -0.9575
Iteration No: 316 started. Searching for the next optimal point.
Iteration No: 316 ended. Search finished for the next optimal point.
Time taken: 7.2870
Function value obtained: -0.9515
Current minimum: -0.9575
Iteration 



Iteration No: 319 ended. Search finished for the next optimal point.
Time taken: 7.6727
Function value obtained: 9.9134
Current minimum: -0.9575
Iteration No: 320 started. Searching for the next optimal point.
Iteration No: 320 ended. Search finished for the next optimal point.
Time taken: 7.4007
Function value obtained: -0.9575
Current minimum: -0.9575
Iteration No: 321 started. Searching for the next optimal point.
Iteration No: 321 ended. Search finished for the next optimal point.
Time taken: 8.0340
Function value obtained: -0.9560
Current minimum: -0.9575
Iteration No: 322 started. Searching for the next optimal point.
Iteration No: 322 ended. Search finished for the next optimal point.
Time taken: 8.0345
Function value obtained: -0.9573
Current minimum: -0.9575
Iteration No: 323 started. Searching for the next optimal point.
Iteration No: 323 ended. Search finished for the next optimal point.
Time taken: 8.3323
Function value obtained: -0.9570
Current minimum: -0.9575
Iteration N



Iteration No: 335 ended. Search finished for the next optimal point.
Time taken: 8.5775
Function value obtained: 0.1533
Current minimum: -0.9575
Iteration No: 336 started. Searching for the next optimal point.
Iteration No: 336 ended. Search finished for the next optimal point.
Time taken: 8.2635
Function value obtained: -0.9570
Current minimum: -0.9575
Iteration No: 337 started. Searching for the next optimal point.
Iteration No: 337 ended. Search finished for the next optimal point.
Time taken: 8.7520
Function value obtained: -0.9570
Current minimum: -0.9575
Iteration No: 338 started. Searching for the next optimal point.
Iteration No: 338 ended. Search finished for the next optimal point.
Time taken: 9.2970
Function value obtained: -0.9572
Current minimum: -0.9575
Iteration No: 339 started. Searching for the next optimal point.
Iteration No: 339 ended. Search finished for the next optimal point.
Time taken: 8.8720
Function value obtained: -0.9377
Current minimum: -0.9575
Iteration N



Iteration No: 341 ended. Search finished for the next optimal point.
Time taken: 8.6935
Function value obtained: 41.3375
Current minimum: -0.9575
Iteration No: 342 started. Searching for the next optimal point.
Iteration No: 342 ended. Search finished for the next optimal point.
Time taken: 8.9643
Function value obtained: -0.9569
Current minimum: -0.9575
Iteration No: 343 started. Searching for the next optimal point.
Iteration No: 343 ended. Search finished for the next optimal point.
Time taken: 8.9603
Function value obtained: -0.9574
Current minimum: -0.9575
Iteration No: 344 started. Searching for the next optimal point.
Iteration No: 344 ended. Search finished for the next optimal point.
Time taken: 8.8897
Function value obtained: -0.9571
Current minimum: -0.9575
Iteration No: 345 started. Searching for the next optimal point.
Iteration No: 345 ended. Search finished for the next optimal point.
Time taken: 9.0530
Function value obtained: -0.9573
Current minimum: -0.9575
Iteration 



Iteration No: 375 ended. Search finished for the next optimal point.
Time taken: 10.9091
Function value obtained: -0.9544
Current minimum: -0.9575
Iteration No: 376 started. Searching for the next optimal point.
Iteration No: 376 ended. Search finished for the next optimal point.
Time taken: 10.0180
Function value obtained: -0.9469
Current minimum: -0.9575
Iteration No: 377 started. Searching for the next optimal point.
Iteration No: 377 ended. Search finished for the next optimal point.
Time taken: 11.1044
Function value obtained: -0.9547
Current minimum: -0.9575
Iteration No: 378 started. Searching for the next optimal point.
Iteration No: 378 ended. Search finished for the next optimal point.
Time taken: 13.1989
Function value obtained: -0.9571
Current minimum: -0.9575
Iteration No: 379 started. Searching for the next optimal point.
Iteration No: 379 ended. Search finished for the next optimal point.
Time taken: 11.2089
Function value obtained: -0.9575
Current minimum: -0.9575
Itera



Iteration No: 402 ended. Search finished for the next optimal point.
Time taken: 14.3627
Function value obtained: 22.0281
Current minimum: -0.9575
Iteration No: 403 started. Searching for the next optimal point.
Iteration No: 403 ended. Search finished for the next optimal point.
Time taken: 13.2083
Function value obtained: -0.9573
Current minimum: -0.9575
Iteration No: 404 started. Searching for the next optimal point.
Iteration No: 404 ended. Search finished for the next optimal point.
Time taken: 12.5520
Function value obtained: -0.9566
Current minimum: -0.9575
Iteration No: 405 started. Searching for the next optimal point.
Iteration No: 405 ended. Search finished for the next optimal point.
Time taken: 12.3774
Function value obtained: -0.9574
Current minimum: -0.9575
Iteration No: 406 started. Searching for the next optimal point.
Iteration No: 406 ended. Search finished for the next optimal point.
Time taken: 12.8673
Function value obtained: -0.9415
Current minimum: -0.9575
Itera



Iteration No: 414 ended. Search finished for the next optimal point.
Time taken: 13.3404
Function value obtained: 0.9122
Current minimum: -0.9575
Iteration No: 415 started. Searching for the next optimal point.




Iteration No: 415 ended. Search finished for the next optimal point.
Time taken: 17.1559
Function value obtained: 9.1704
Current minimum: -0.9575
Iteration No: 416 started. Searching for the next optimal point.
Iteration No: 416 ended. Search finished for the next optimal point.
Time taken: 15.2754
Function value obtained: -0.9574
Current minimum: -0.9575
Iteration No: 417 started. Searching for the next optimal point.
Iteration No: 417 ended. Search finished for the next optimal point.
Time taken: 15.4373
Function value obtained: -0.9557
Current minimum: -0.9575
Iteration No: 418 started. Searching for the next optimal point.
Iteration No: 418 ended. Search finished for the next optimal point.
Time taken: 13.4913
Function value obtained: -0.9295
Current minimum: -0.9575
Iteration No: 419 started. Searching for the next optimal point.
Iteration No: 419 ended. Search finished for the next optimal point.
Time taken: 14.2411
Function value obtained: -0.9574
Current minimum: -0.9575
Iterat



Iteration No: 434 ended. Search finished for the next optimal point.
Time taken: 13.7588
Function value obtained: 0.0684
Current minimum: -0.9575
Iteration No: 435 started. Searching for the next optimal point.
Iteration No: 435 ended. Search finished for the next optimal point.
Time taken: 14.5543
Function value obtained: -0.9575
Current minimum: -0.9575
Iteration No: 436 started. Searching for the next optimal point.
Iteration No: 436 ended. Search finished for the next optimal point.
Time taken: 12.5732
Function value obtained: -0.9569
Current minimum: -0.9575
Iteration No: 437 started. Searching for the next optimal point.
Iteration No: 437 ended. Search finished for the next optimal point.
Time taken: 13.7988
Function value obtained: -0.9540
Current minimum: -0.9575
Iteration No: 438 started. Searching for the next optimal point.
Iteration No: 438 ended. Search finished for the next optimal point.
Time taken: 13.6400
Function value obtained: -0.9575
Current minimum: -0.9575
Iterat



Iteration No: 446 ended. Search finished for the next optimal point.
Time taken: 13.9884
Function value obtained: 0.0607
Current minimum: -0.9575
Iteration No: 447 started. Searching for the next optimal point.
Iteration No: 447 ended. Search finished for the next optimal point.
Time taken: 14.9410
Function value obtained: -0.9575
Current minimum: -0.9575
Iteration No: 448 started. Searching for the next optimal point.
Iteration No: 448 ended. Search finished for the next optimal point.
Time taken: 13.3650
Function value obtained: -0.9562
Current minimum: -0.9575
Iteration No: 449 started. Searching for the next optimal point.
Iteration No: 449 ended. Search finished for the next optimal point.
Time taken: 14.0993
Function value obtained: -0.9359
Current minimum: -0.9575
Iteration No: 450 started. Searching for the next optimal point.
Iteration No: 450 ended. Search finished for the next optimal point.
Time taken: 14.0782
Function value obtained: -0.9570
Current minimum: -0.9575
Iterat



Iteration No: 468 ended. Search finished for the next optimal point.
Time taken: 14.7288
Function value obtained: 18.7341
Current minimum: -0.9575
Iteration No: 469 started. Searching for the next optimal point.
Iteration No: 469 ended. Search finished for the next optimal point.
Time taken: 15.3383
Function value obtained: -0.9570
Current minimum: -0.9575
Iteration No: 470 started. Searching for the next optimal point.
Iteration No: 470 ended. Search finished for the next optimal point.
Time taken: 15.7636
Function value obtained: -0.9465
Current minimum: -0.9575
Iteration No: 471 started. Searching for the next optimal point.
Iteration No: 471 ended. Search finished for the next optimal point.
Time taken: 16.0243
Function value obtained: -0.9574
Current minimum: -0.9575
Iteration No: 472 started. Searching for the next optimal point.
Iteration No: 472 ended. Search finished for the next optimal point.
Time taken: 15.1812
Function value obtained: -0.9521
Current minimum: -0.9575
Itera

In [2]:
# Final parity plot with both training and testing results
plt.figure(figsize=(6, 6))
plt.rcParams.update({'font.family': 'Times New Roman'})

# Scatter points
plt.scatter(all_train_actuals, all_train_preds, color='blue', alpha=0.7, edgecolor='k', label=f"Training ($R^2$ = {np.mean(training_r2_scores):.3f})")
plt.scatter(all_test_actuals, all_test_preds, color='red', alpha=0.7, edgecolor='k', label=f"Testing ($R^2$ = {np.mean(testing_r2_scores):.3f})")

# Diagonal reference line
min_val = min(min(all_train_actuals), min(all_test_actuals))
max_val = max(max(all_train_actuals), max(all_test_actuals))
plt.plot([min_val, max_val], [min_val, max_val], 'k--', linewidth=1)

# Labels and title
plt.xlabel("Actual Qst", fontsize=14)
plt.ylabel("Predicted Qst", fontsize=14)
plt.title("Linear KRR Prediction Results", fontsize=14)

# Styling
plt.xticks(fontsize=12, color='black')
plt.yticks(fontsize=12, color='black')
plt.grid(True, linestyle='--', linewidth=0.5)
plt.legend(loc='upper left', fontsize=12, frameon=False)
plt.gca().set_aspect('equal', adjustable='box')
plt.tight_layout()

# Save as PDF
plt.savefig(os.path.join(output_dir, "Linear_combined_parity_plot.pdf"), format='pdf')
plt.close()
