<a href="https://colab.research.google.com/github/mmostafahareb/ESS_Project/blob/main/gaussian_regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import numpy as np
import pandas as pd
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF
from sklearn.model_selection import train_test_split, KFold, GridSearchCV
from sklearn.metrics import mean_squared_error

# Load the dataset
df = pd.read_csv('soh_lithium_ion.csv')

# Split the data into features (X) and target (y)
X = df.drop('SOH_discharge_capacity', axis=1)
y = df['SOH_discharge_capacity']

# Define the K-fold cross-validation object
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Define the model to use for the GridSearchCV
kernel = RBF(length_scale=1.0, length_scale_bounds=(1e-1, 100000.0))
model = GaussianProcessRegressor(kernel=kernel, alpha=0.1, n_restarts_optimizer=10, random_state=42)

# Define the grid of hyperparameters to search
param_grid = {'kernel__length_scale': np.logspace(-2, 2, 5),
              'alpha': [0.1, 1, 10]}

# Define the GridSearchCV object
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=kf, scoring='neg_mean_squared_error', n_jobs=-1)

# Fit the GridSearchCV object to the data
grid_search.fit(X, y)

# Get the best model
best_model = grid_search.best_estimator_

# Print the best hyperparameters and the best score
print("Best hyperparameters: ", grid_search.best_params_)
print("Best score: ", -grid_search.best_score_)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit the best model to the training data
best_model.fit(X_train, y_train)

# Calculate the mean squared error of the best model on the test set
y_pred = best_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print("Mean squared error on test set: ", mse)




Best hyperparameters:  {'alpha': 0.1, 'kernel__length_scale': 0.01}
Best score:  0.00014711829093236537
Mean squared error on test set:  0.00016082866805543846


