SECTION 1: INITIAL CODE

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import clear_output
from scipy.stats import norm
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF
from sklearn.preprocessing import MinMaxScaler
##########################

function = 8
# Read the files
X_init = np.load("initial_inputs.npy")
y_init = np.load("initial_outputs.npy")
queries_file = "queries.txt"
observations_file = "observations.txt"

# Read queries data
import ast
queries_data = []
with open(queries_file, 'r') as f:
    for line in f:
        line = line.replace('array(', 'np.array(')
        queries_data.append(eval(line.strip()))

# Read observations data
observations_data = []
with open(observations_file, 'r') as f:
    for line in f:
        observations_data.append(eval(line.strip()))

# Extract the specified sub-arrays from queries
X = np.array([q[function - 1] for q in queries_data], dtype='float64')
y = np.array([o[function - 1] for o in observations_data])

# Find and remove duplicates
unique_indices = []
seen = set()
for i, x in enumerate(X):
    x_tuple = tuple(x)  # Convert to tuple for hashability
    if x_tuple not in seen:
        seen.add(x_tuple)
        unique_indices.append(i)

# Keep only unique queries and observations
X_unique = np.concatenate((X_init, X[unique_indices]))
y_unique = np.concatenate((y_init, y[unique_indices]))
queries_unique = [queries_data[i] for i in unique_indices]
observations_unique = [observations_data[i] for i in unique_indices]

# Save cleaned data to new files
with open("queries_unique.txt", "w") as f:
    for query in queries_unique:
        f.write(str(query) + "\n")

with open("observations_unique.txt", "w") as f:
    for obs in observations_unique:
        f.write(str(obs) + "\n")

# Save cleaned numpy arrays
np.save("initial_inputs_unique.npy", X_unique)
np.save("initial_outputs_unique.npy", y_unique)
#######################

# Create DataFrame
df = pd.DataFrame({
    'param1': X_unique[:, 0],
    'param2': X_unique[:, 1],
    'param3': X_unique[:, 2],
    'param4': X_unique[:, 3],
    'param5': X_unique[:, 4],
    'param6': X_unique[:, 5],
    'param7': X_unique[:, 6],
    'param8': X_unique[:, 7],
    'output': y_unique
})

############################

from sklearn.ensemble import RandomForestRegressor
import numpy as np

# Dataset
X = df[['param1', 'param2', 'param3', 'param4', 'param5', 'param6', 'param7', 'param8']].values
y = df['output'].values

# Train Random Forest
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X, y)

# Find the index of the highest actual 'y'
best_idx = np.argmax(y)

# Get the corresponding parameters and actual 'y'
best_params = X[best_idx]
best_y = y[best_idx]

# Generate perturbed points
perturbed_points = []
for i in range(8):
    for delta in [-0.05, 0.05]:
        new_params = best_params.copy()  # Use best_params instead of best_point
        new_params[i] = min(max(new_params[i] + delta, 0), 1)  # Perturb one parameter
        perturbed_points.append(new_params)

# Predict outputs for perturbed points
predictions = rf.predict(np.array(perturbed_points))
best_idx = np.argmax(predictions)
best_new_params = perturbed_points[best_idx]
best_predicted_output = predictions[best_idx]

# Print results
print("Best Original Parameters:", best_params)
print("Best Original y:", best_y)
print("Best Perturbed Parameters:", best_new_params)
print("Best Predicted Output:", best_predicted_output)

###########################

# Print the results
print("Predictions for perturbed points:")
for i, pred in enumerate(predictions):
    print(f"Point {i + 1}: Parameters = {perturbed_points[i]}, Predicted Output = {pred:.6f}")

print("\nBest Point:")
print(f"Index: {best_idx + 1}")
print(f"Parameters: {best_new_params}")
print(f"Best Predicted Output: {best_predicted_output:.6f}")



Best Original Parameters: [0.043432 0.100047 0.223423 0.016864 0.330007 0.363847 0.328406 0.832396]
Best Original y: 9.7798347140729
Best Perturbed Parameters: [0.043432 0.100047 0.223423 0.016864 0.330007 0.313847 0.328406 0.832396]
Best Predicted Output: 9.74819948781812
Predictions for perturbed points:
Point 1: Parameters = [0.       0.100047 0.223423 0.016864 0.330007 0.363847 0.328406 0.832396], Predicted Output = 9.747900
Point 2: Parameters = [0.093432 0.100047 0.223423 0.016864 0.330007 0.363847 0.328406 0.832396], Predicted Output = 9.713717
Point 3: Parameters = [0.043432 0.050047 0.223423 0.016864 0.330007 0.363847 0.328406 0.832396], Predicted Output = 9.742759
Point 4: Parameters = [0.043432 0.150047 0.223423 0.016864 0.330007 0.363847 0.328406 0.832396], Predicted Output = 9.703743
Point 5: Parameters = [0.043432 0.100047 0.173423 0.016864 0.330007 0.363847 0.328406 0.832396], Predicted Output = 9.738856
Point 6: Parameters = [0.043432 0.100047 0.273423 0.016864 0.330007

SECTION 2: CODE MODIFICATION

SECTION 3: FINAL RESULT

In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import clear_output
from scipy.stats import norm
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF
from sklearn.preprocessing import MinMaxScaler
##########################

function = 8
# Read the files
X_init = np.load("initial_inputs.npy")
y_init = np.load("initial_outputs.npy")
queries_file = "queries.txt"
observations_file = "observations.txt"

# Read queries data
import ast
queries_data = []
with open(queries_file, 'r') as f:
    for line in f:
        line = line.replace('array(', 'np.array(')
        queries_data.append(eval(line.strip()))

# Read observations data
observations_data = []
with open(observations_file, 'r') as f:
    for line in f:
        observations_data.append(eval(line.strip()))

# Extract the specified sub-arrays from queries
X = np.array([q[function - 1] for q in queries_data], dtype='float64')
y = np.array([o[function - 1] for o in observations_data])

# Find and remove duplicates
unique_indices = []
seen = set()
for i, x in enumerate(X):
    x_tuple = tuple(x)  # Convert to tuple for hashability
    if x_tuple not in seen:
        seen.add(x_tuple)
        unique_indices.append(i)

# Keep only unique queries and observations
X_unique = np.concatenate((X_init, X[unique_indices]))
y_unique = np.concatenate((y_init, y[unique_indices]))
queries_unique = [queries_data[i] for i in unique_indices]
observations_unique = [observations_data[i] for i in unique_indices]

# Save cleaned data to new files
with open("queries_unique.txt", "w") as f:
    for query in queries_unique:
        f.write(str(query) + "\n")

with open("observations_unique.txt", "w") as f:
    for obs in observations_unique:
        f.write(str(obs) + "\n")

# Save cleaned numpy arrays
np.save("initial_inputs_unique.npy", X_unique)
np.save("initial_outputs_unique.npy", y_unique)
#######################

# Create DataFrame
df = pd.DataFrame({
    'param1': X_unique[:, 0],
    'param2': X_unique[:, 1],
    'param3': X_unique[:, 2],
    'param4': X_unique[:, 3],
    'param5': X_unique[:, 4],
    'param6': X_unique[:, 5],
    'param7': X_unique[:, 6],
    'param8': X_unique[:, 7],
    'output': y_unique
})

############################

from sklearn.ensemble import RandomForestRegressor
import numpy as np

# Dataset
X = df[['param1', 'param2', 'param3', 'param4', 'param5', 'param6', 'param7', 'param8']].values
y = df['output'].values

# Train Random Forest
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X, y)

# Find the index of the highest actual 'y'
best_idx = np.argmax(y)

# Get the corresponding parameters and actual 'y'
best_params = X[best_idx]
best_y = y[best_idx]

# Generate perturbed points
perturbed_points = []
for i in range(8):
    for delta in [-0.05, 0.05]:
        new_params = best_params.copy()  # Use best_params instead of best_point
        new_params[i] = min(max(new_params[i] + delta, 0), 1)  # Perturb one parameter
        perturbed_points.append(new_params)

# Predict outputs for perturbed points
predictions = rf.predict(np.array(perturbed_points))
best_idx = np.argmax(predictions)
best_new_params = perturbed_points[best_idx]
best_predicted_output = predictions[best_idx]

# Print results
print("Best Original Parameters:", best_params)
print("Best Original y:", best_y)
print("Best Perturbed Parameters:", best_new_params)
print("Best Predicted Output:", best_predicted_output)

###########################

# Print the results
print("Predictions for perturbed points:")
for i, pred in enumerate(predictions):
    print(f"Point {i + 1}: Parameters = {perturbed_points[i]}, Predicted Output = {pred:.6f}")

print("\nBest Point:")
print(f"Index: {best_idx + 1}")
print(f"Parameters: {best_new_params}")
print(f"Best Predicted Output: {best_predicted_output:.6f}")



Best Original Parameters: [0.043432 0.100047 0.223423 0.016864 0.330007 0.363847 0.328406 0.832396]
Best Original y: 9.7798347140729
Best Perturbed Parameters: [0.043432 0.100047 0.223423 0.016864 0.330007 0.313847 0.328406 0.832396]
Best Predicted Output: 9.74819948781812
Predictions for perturbed points:
Point 1: Parameters = [0.       0.100047 0.223423 0.016864 0.330007 0.363847 0.328406 0.832396], Predicted Output = 9.747900
Point 2: Parameters = [0.093432 0.100047 0.223423 0.016864 0.330007 0.363847 0.328406 0.832396], Predicted Output = 9.713717
Point 3: Parameters = [0.043432 0.050047 0.223423 0.016864 0.330007 0.363847 0.328406 0.832396], Predicted Output = 9.742759
Point 4: Parameters = [0.043432 0.150047 0.223423 0.016864 0.330007 0.363847 0.328406 0.832396], Predicted Output = 9.703743
Point 5: Parameters = [0.043432 0.100047 0.173423 0.016864 0.330007 0.363847 0.328406 0.832396], Predicted Output = 9.738856
Point 6: Parameters = [0.043432 0.100047 0.273423 0.016864 0.330007