In [2]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from matplotlib import pyplot as plt
from sklearn.model_selection import KFold
from sklearn.model_selection import LeaveOneOut
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel, RBF, ConstantKernel
from sklearn.gaussian_process import GaussianProcessRegressor

In [3]:
ink_consentration = np.arange(0.1, 2.75, 0.25)
aquivion_to_Pt_ratio = np.arange(0, 6.5, 0.5)
IPA_vol_percent = np.arange(40, 95, 5)
pump_speed = np.arange(0.02, 0.55, 0.05)
temp_film_making = np.arange(50, 95, 5)
x_columns = ['Ink_consentration','Aquivion_to_Pt_ratio', 'IPA_vol_percent', 'Pump_speed', 'Temp_film_making']

#Define the basic
limit = np.array([ink_consentration.max()-ink_consentration.min(),
                  aquivion_to_Pt_ratio.max()-aquivion_to_Pt_ratio.min(),
                  IPA_vol_percent.max()-IPA_vol_percent.min(),
                  pump_speed.max()-pump_speed.min(),
                  temp_film_making.max()-temp_film_making.min()])
min_value = np.array([0.1, 0, 40, 0.02, 50])

grid = np.array([[a, b, c, d, e] for a in ink_consentration for b in aquivion_to_Pt_ratio for c in IPA_vol_percent for d in pump_speed for e in temp_film_making])
grid_table = pd.DataFrame(grid, columns = x_columns)

In [6]:
grid_table.size

778635

In [4]:
def data_loo(samples_number, file_location):
    df = pd.read_csv(file_location)
    #Normalize the x values
    x = (df[x_columns].iloc[0:samples_number].to_numpy()-min_value)/limit
    y = df['power_density_30C_100RH'].to_numpy()

    #LOO
    loo = LeaveOneOut()
    # Initialize lists to store train and test data
    y_real = []
    y_result = []

    # Perform the cross-validation
    for train_index, test_index in loo.split(x):
        x_train, x_test = x[train_index], x[test_index]
        y_train, y_test = y[train_index], y[test_index]
        y_real.append(y_test)

        #GPR model
        kernel = RBF() + WhiteKernel()
        gpr = GaussianProcessRegressor(kernel = kernel, random_state=0, normalize_y=True).fit(x_train, y_train)
        gpr.score(x_train, y_train)
        y_result.append(gpr.predict(x_test))

    #Reshaping the result
    y_result = np.reshape(y_result, (-1, 1))
    y_real = np.array(y_real)
    result = np.hstack([y_real, y_result])
    return result

def calculate_error(result):
    #Calculating MAE
    GP_AE = abs(result[:, 0] - result[:, 1])
    mae = round(GP_AE.mean(), 4)
    
    #Calculating R-squared
    r_square = round(r2_score(result[:, 1], result[:, 0]), 3)
    
    #Calculating RMSE
    rmse = round(np.sqrt(mean_squared_error(result[:, 1], result[:, 0])), 4)
    
    return np.array([mae, r_square, rmse])

In [5]:
samples_number = [3, 5, 10, 15, 23, 35, 56]
file_location = '../Data/Parameters.csv'

In [6]:
data = []
for i in range(len(samples_number)):
    data.append(calculate_error(data_loo(samples_number[i], file_location)))
data = np.array(data)
data_table = pd.DataFrame(samples_number, columns = ['Number of Sample'])
data_table.assign(MAE = data[:, 0], R_square = data[:, 1], RMSE = data[:, 2])

FileNotFoundError: [Errno 2] No such file or directory: '../Data/Parameters.csv'