In [None]:
import pandas as pd
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C, WhiteKernel
from sklearn.model_selection import GridSearchCV, learning_curve
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import numpy as np
import matplotlib.pyplot as plt
import shap
import seaborn as sns

# Load the updated training and test datasets
training_data_path = 'training_data.csv'  # Adjust file path as needed
test_data_path = 'test_data.csv'  # Adjust file path as needed

# Load the datasets
training_data = pd.read_csv(training_data_path)
test_data = pd.read_csv(test_data_path)

# Clean the data: Remove commas from Force columns and convert them to float
training_data['Force (N)'] = training_data['Force (N)'].replace({',': ''}, regex=True).astype(float)
test_data['Predicted Force (N)'] = test_data['Predicted Force (N)'].replace({',': ''}, regex=True).astype(float)

# Prepare the training data
X_train = training_data[['Density (log transformation)', 'Elasticity (Pa)', 'Tensile Stress (Pa)', 'Thickness (mm)']]
y_train = training_data['Force (N)']

# Prepare the test data
X_test = test_data[['Density (log transformation)', 'Elasticity (Pa)', 'Tensile Stress (Pa)', 'Thickness (mm)']]

# Create polynomial features
poly = PolynomialFeatures(degree=2, include_bias=False)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)

# Standardize the features
scaler = StandardScaler()
X_train_poly = scaler.fit_transform(X_train_poly)
X_test_poly = scaler.transform(X_test_poly)

# Define the Gaussian Process Regressor with an RBF kernel
kernel = C(1.0, (1e-3, 1e1)) * RBF(length_scale=1.0, length_scale_bounds=(1e-2, 1e1)) + WhiteKernel(noise_level=1e-5, noise_level_bounds=(1e-10, 1e1))

gpr = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=10, random_state=42)

# Hyperparameter grid search
param_grid = {
    'alpha': [1e-2, 1e-3, 1e-4, 1e-5],
    'kernel__k1__k1__constant_value': [0.1, 1, 10, 100],
    'kernel__k1__k2__length_scale': [0.1, 1, 10, 100]
}

grid_search = GridSearchCV(gpr, param_grid, cv=10, scoring='neg_mean_squared_error')
grid_search.fit(X_train_poly, y_train)

# Get the best model
best_gpr = grid_search.best_estimator_

# Predict the Force (N) values for the test data
predicted_forces = best_gpr.predict(X_test_poly)

# Add the predicted forces to the test data
test_data['Predicted Force (N)'] = predicted_forces