In [83]:
#Importing necessary libraries
import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

from bayes_opt import BayesianOptimization
from bayes_opt import UtilityFunction

from pycaret.regression import *

In [84]:
#Encoding method 1
df = pd.read_excel(r'xrd_new.xlsx', sheet_name='Sheet6')
df = df.drop(['Isc','schw 1', 'schw 2','schw 3', 'schw 4', 'schw 5', 'schw 6'], axis=1)

In [85]:
reg = setup(data=df , target = 'FF' ,normalize=True , normalize_method='minmax',train_size=0.8,fold=3
            ,keep_features=['FWHM 1', 'FWHM 2', 'FWHM 3', 'FWHM 4', 'FWHM 5',
            'FWHM 6', '2-theta 1', '2-theta 2', '2-theta 3', '2-theta 4',
            '2-theta 5', '2-theta 6','relative_intensity 1',
            'relative_intensity 2', 'relative_intensity 3', 'relative_intensity 4',
            'relative_intensity 5', 'relative_intensity 6']
            ,remove_multicollinearity = True, multicollinearity_threshold= 0.9
            , session_id=1543)

Unnamed: 0,Description,Value
0,Session id,1543
1,Target,FF
2,Target type,Regression
3,Original data shape,"(27, 26)"
4,Transformed data shape,"(27, 26)"
5,Transformed train set shape,"(21, 26)"
6,Transformed test set shape,"(6, 26)"
7,Numeric features,25
8,Keep features,18
9,Preprocess,True


In [86]:
# import RandomForestRegressor from sklearn
best = compare_models(cross_validation=False)

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
gbr,Gradient Boosting Regressor,0.4405,0.3105,0.5573,0.9556,0.012,0.0099,0.04
dt,Decision Tree Regressor,0.6857,0.9939,0.9969,0.858,0.0232,0.0159,0.03
xgboost,Extreme Gradient Boosting,0.7718,1.0272,1.0135,0.8532,0.0223,0.0175,0.05
ada,AdaBoost Regressor,0.9582,1.1494,1.0721,0.8357,0.0242,0.0224,0.07
rf,Random Forest Regressor,0.8802,1.1775,1.0851,0.8317,0.0257,0.0212,0.13
et,Extra Trees Regressor,1.2853,1.793,1.339,0.7437,0.0307,0.0302,0.09
lr,Linear Regression,1.2535,2.2228,1.4909,0.6823,0.034,0.0293,0.04
ridge,Ridge Regression,1.1977,2.3783,1.5422,0.6601,0.0359,0.0286,0.02
huber,Huber Regressor,1.4011,2.7383,1.6548,0.6086,0.0374,0.0326,0.02
par,Passive Aggressive Regressor,1.2048,2.8422,1.6859,0.5938,0.0397,0.0295,0.02


In [87]:
# the coefficient of determination of the prediction.
model = create_model('gbr',cross_validation = False, return_train_score=True)

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Test,0.4405,0.3105,0.5573,0.9556,0.012,0.0099
Train,0.1101,0.0207,0.1438,0.996,0.0033,0.0025


In [88]:
def wrapper(X, Y, **kwargs) -> float:
    
    input = np.array([[X, Y]])
    output = model.predict(input, **kwargs)
    output = float(output)

    if (output > -1) & (output < 10):
        return output
    
    else:
        return 10000000

In [99]:
def optimizer_instantiate(df, wrapper, pbounds, n_iter=10, kappa=5, xi=None, random_state=None):

    # df.plot(kind='scatter', x='X', y='Y', c='Z', cmap='plasma', figsize=(6,5))
    optimizer  = BayesianOptimization(wrapper, pbounds, verbose=2)

    # bo.maximize(init_points=2, n_iter=n_iter, acq='ucb', kappa=kappa)
    # plt.plot(bo.max["params"]["X"],bo.max["params"]["Y"], '*', markersize=10)
    # plt.show()
    utility = UtilityFunction(kind="ucb", kappa=kappa, xi=0)

    return optimizer, utility


def optimizer_suggest(optimizer, utility):
 
    next_point_to_probe = optimizer .suggest(utility)
    print(f"Next Point to Probe: {next_point_to_probe}")
    return next_point_to_probe

In [100]:
features = df.drop(columns=["FF"]).columns.to_list()

pbounds = {feature: (df[feature].min(), df[feature].max()) for feature in features}
# print(f"pbounds: {pbounds}")
# pbounds = {'X': (-5.0, 4.0), 'Y': (-3.0, 7.0)}
optimizer, utility = optimizer_instantiate(df, wrapper, pbounds, n_iter=100, kappa=10, xi=None, random_state=None)

In [101]:
next_point_to_probe = optimizer_suggest(optimizer, utility)
target = model.predict(pd.DataFrame(next_point_to_probe, index=[0]).values.reshape(1, -1))
print("Found the target value to be:", target)

Next Point to Probe: {'2-theta 1': 32.169719241042166, '2-theta 2': 34.86210606047435, '2-theta 3': 36.855714101294446, '2-theta 4': 48.17677884950854, '2-theta 5': 57.03732307628229, '2-theta 6': 63.882621700411946, 'DROP': 4.658364527437871, 'FWHM 1': 0.2598346020008028, 'FWHM 2': 0.2222909083560184, 'FWHM 3': 0.3840079441555695, 'FWHM 4': 0.5517406986414133, 'FWHM 5': 0.5853122699683866, 'FWHM 6': 0.7368625585160986, 'Intensity 1': 0.72404095111281, 'Intensity 2': 0.5320828146801186, 'Intensity 3': 0.8755959186171596, 'Intensity 4': 0.16963394663976325, 'Intensity 5': 0.26179532771277125, 'Intensity 6': 0.20687731864291004, 'relative_intensity 1': 92.01941540054008, 'relative_intensity 2': 38.41842270823152, 'relative_intensity 3': 79.93848962318741, 'relative_intensity 4': 15.565793340323244, 'relative_intensity 5': 16.64167379687857, 'relative_intensity 6': 17.621577518585088}
Found the target value to be: [43.50669067]


In [102]:
optimizer.register(
    params=next_point_to_probe,
    target=target[0],
)

In [104]:
for _ in range(6):
    next_point = optimizer.suggest(utility)
    target = model.predict(pd.DataFrame(next_point, index=[0]).values.reshape(1, -1))
    optimizer.register(params=next_point, target=target[0])
    
    print(target, next_point)
    print()
print(optimizer.max)

[43.79884942] {'2-theta 1': 32.09, '2-theta 2': 34.81, '2-theta 3': 36.69, '2-theta 4': 48.29, '2-theta 5': 57.01, '2-theta 6': 63.89, 'DROP': 1.0, 'FWHM 1': 0.45, 'FWHM 2': 0.17, 'FWHM 3': 0.23, 'FWHM 4': 0.24, 'FWHM 5': 0.3, 'FWHM 6': 0.25, 'Intensity 1': 0.95, 'Intensity 2': 0.34, 'Intensity 3': 1.0, 'Intensity 4': 0.13, 'Intensity 5': 0.43, 'Intensity 6': 0.15, 'relative_intensity 1': 100.0, 'relative_intensity 2': 62.1, 'relative_intensity 3': 78.94, 'relative_intensity 4': 13.68, 'relative_intensity 5': 43.0, 'relative_intensity 6': 23.07}

[43.49836066] {'2-theta 1': 32.09, '2-theta 2': 34.9, '2-theta 3': 36.86, '2-theta 4': 48.29, '2-theta 5': 57.42, '2-theta 6': 63.69, 'DROP': 1.0, 'FWHM 1': 0.45, 'FWHM 2': 0.39, 'FWHM 3': 0.52, 'FWHM 4': 0.73, 'FWHM 5': 0.74, 'FWHM 6': 0.925902394, 'Intensity 1': 0.5, 'Intensity 2': 0.34, 'Intensity 3': 1.0, 'Intensity 4': 0.19, 'Intensity 5': 0.1, 'Intensity 6': 0.23, 'relative_intensity 1': 100.0, 'relative_intensity 2': 38.2, 'relative_int

### References
- https://github.com/bayesian-optimization/BayesianOptimization/blob/master/examples/advanced-tour.ipynb
- https://github.com/bayesian-optimization/BayesianOptimization/blob/master/examples/exploitation_vs_exploration.ipynb