In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_val_score
from skopt import BayesSearchCV
from skopt.space import Real, Integer, Categorical
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR

# Problem
High-dimensional Optimisation - You’ve reach the final, 8-dimensional search space. High-dimensional black-box optimisation can be very difficult, so sticking to local solutions is not the worst idea here.

In [2]:
np.int = np.int_

In [3]:
X = np.load('initial_inputs.npy')
Y = np.load('initial_outputs.npy')

In [4]:
# data = pd.read_csv('../observations.csv')
# data = data[data['Fn'] == 8]
# X_ = data[['X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7', 'X8']]
# Y_ = data['Y']

# X = np.concatenate((X, X_), axis=0)
# Y = np.append(Y, Y_)

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state=1234)

In [279]:
search_space = {
    'C': Real(1e-6, 1e+6, prior='log-uniform'),
    'epsilon': Real(1e-6, 1e+1, prior='log-uniform'),
    'gamma': Real(1e-6, 1e+1, prior='log-uniform'),
    'kernel': Categorical(['rbf'])
}

bayes_search = BayesSearchCV(
    SVR(),
    search_spaces=search_space,
    n_iter=65, 
    cv=5
)
bayes_search.fit(X_train_scaled, y_train)
print("train. score: %s" % bayes_search.best_score_)
print("test score: %s" % bayes_search.score(X_test_scaled, y_test))
print("best params: %s" % str(bayes_search.best_params_))

best_estimator = bayes_search.best_estimator_

# Evaluate the best model
predictions = best_estimator.predict(X_test_scaled)
mse = mean_squared_error(y_test, predictions)
print("Mean Squared Error of the Best Model:", mse)

train. score: 0.9996830924000897
test score: 0.9997934165608959
best params: OrderedDict([('C', 27903.0280395747), ('epsilon', 2.5980856908077314e-05), ('gamma', 0.018575432525474667), ('kernel', 'rbf')])
Mean Squared Error of the Best Model: 0.0006450822854883764


Week 10 - train. score: 0.9995435490616366
test score: 0.9999318559534323
best params: OrderedDict([('C', 70456.96103165064), ('epsilon', 0.0003361820264921724), ('gamma', 0.014152435180834103), ('kernel', 'rbf')])
Mean Squared Error of the Best Model: 0.0001385720333001916

Week 8 - train. score: 0.9993964367496003
test score: 0.999865397447822
best params: OrderedDict([('C', 33751.02108675229), ('epsilon', 1.5076977582664344e-05), ('gamma', 0.028741317952931125), ('kernel', 'rbf')])
Mean Squared Error of the Best Model: 0.0002590002109514943

Week 7 - train. score: 0.9786263058971375
test score: 0.9951265829936222
best params: OrderedDict([('C', 5607.275056505338), ('epsilon', 1.5962500716886472e-05), ('gamma', 0.015357818918361629), ('kernel', 'rbf')])
Mean Squared Error of the Best Model: 0.007173837196658522

Week 5 -
train. score: 0.9593122727778879
test score: 0.9973420750058608
best params: OrderedDict([('C', 7340.555840276544), ('epsilon', 0.00022964149320824618), ('gamma', 0.019763592385008463), ('kernel', 'rbf')])
Mean Squared Error of the Best Model: 0.003622031250744552

In [357]:
# When exploring a large search space if needed.
search_space = [(0, 1),(0, 1),(0, 1),(0, 1),(0, 1),(0, 1), (0, 1),(0, 1)]
# Narrowing down the focus (± 0.1/0.01) to the areas known to yield good results when evaluating.
search_space = [(0.09341, 0.10402), (0.1451, 0.155134), (0.122341, 0.13468), (0.144059, 0.154240), (0.798709, 0.8087), (0.489635, 0.5), (0.19221161, 0.208255), (0.570933, 0.58121933)]
num_candidates = 100^8

best_value = 10.00002
# Optimization for loop.
for i in range(10000000):
    candidates = np.random.uniform(low=[s[0] for s in search_space],
                               high=[s[1] for s in search_space],
                               size=(num_candidates, len(search_space)))

    predicted_values = best_estimator.predict(candidates)

    max_index = np.argmax(predicted_values)
    max_optima_value = predicted_values[max_index]
    if max_optima_value > best_value:
        best_value = max_optima_value
        max_optima_point = candidates[max_index]
        print("Maximum Optima Point:", max_optima_point)
        print("Maximum Optima Value:", max_optima_value)

In [349]:
print(best_value)
rounded = [format(value, ".6f") for value in max_optima_point]
separator = "-"
string = separator.join(map(str, rounded))
print(string)

10.00002074160048
0.099802-0.145820-0.132512-0.156556-0.804737-0.499757-0.199474-0.581027


In [350]:
optimal_point = [0.098945, 0.148167, 0.132342, 0.14424 , 0.800284, 0.499635,
       0.198255, 0.580933]
new_search_space = []

for i, (start, end) in enumerate(search_space):
    optimal_value = optimal_point[i]
    new_range = (max(start, optimal_value - 0.01), min(end, optimal_value + 0.01))
    new_search_space.append(new_range)

print("New Search Space:", new_search_space)

New Search Space: [(0.09341, 0.10402), (0.1451, 0.155134), (0.12234199999999999, 0.13468), (0.144059, 0.15424000000000002), (0.798709, 0.8087), (0.489635, 0.5), (0.19221161, 0.208255), (0.570933, 0.58121933)]


In [334]:
# Test the best optimal points from the previous restuls if it prediects closely. 
print(best_estimator.predict([[0.098945, 0.148167, 0.132342, 0.14424 , 0.800284, 0.499635,
       0.198255, 0.580933]]))

[9.99984974]


# Retrain the best model previously found if needed

In [333]:
# X_train = np.concatenate((X_train, [[0.098945, 0.148167, 0.132342, 0.14424 , 0.800284, 0.499635,
#        0.198255, 0.580933]]), axis=0)
# y_train = np.append(y_train, 9.9999021629171)

# Fit the optimizer to your data
bayes_search.fit(X_train, y_train)
print("train. score: %s" % bayes_search.best_score_)
print("test score: %s" % bayes_search.score(X_test, y_test))
print("best params: %s" % str(bayes_search.best_params_))

best_estimator = bayes_search.best_estimator_
# Make predictions on the test data using the best estimator
predictions = best_estimator.predict(X_test)

# Evaluate the performance of the best estimator
mse = mean_squared_error(y_test, predictions)
print("Mean Squared Error:", mse)

train. score: 0.9996776030799633
test score: 0.9999213174586448
best params: OrderedDict([('C', 31082.87635117268), ('epsilon', 3.934829925239749e-05), ('gamma', 0.01217577172253597), ('kernel', 'rbf')])
Mean Squared Error: 0.00024569594651737493


Week 11 - train. score: 0.9996632979356551
test score: 0.9996053575912377
best params: OrderedDict([('C', 45068.694373509294), ('epsilon', 8.570482998478355e-06), ('gamma', 0.020413393116625187), ('kernel', 'rbf')])
Mean Squared Error: 0.0012323196288102016

Week 12 - train. score: 0.9996776030799633
test score: 0.9999213174586448
best params: OrderedDict([('C', 31082.87635117268), ('epsilon', 3.934829925239749e-05), ('gamma', 0.01217577172253597), ('kernel', 'rbf')])
Mean Squared Error: 0.00024569594651737493