In [4]:
import pickle
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, Matern, ExpSineSquared, ConstantKernel
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score


#Import dataset
df = pd.read_pickle('DoE_results_7d_50k.pkl')
points_df = df['points']

#Eliminate all NaN output values 
points_df_1 = points_df[(points_df['sigma_crit'] > 0) & (points_df['energy'] >= 0)]
sigma_crit_1 = points_df_1['sigma_crit']
energy_1 = points_df_1['energy']

#Find the quartiles to eliminate outliers
q1_s = np.percentile(sigma_crit_1,25.0)
q3_s = np.percentile(sigma_crit_1, 75.0)
q1_e = np.percentile(energy_1,25.0)
q3_e = np.percentile(energy_1, 75.0)

#Define the highest point which is not considered an outlier
high_s = q3_s + 1.5*(q3_s-q1_s)
high_e = q3_e + 1.5*(q3_e-q1_e)

#Eliminate outliers
points_df_2 = points_df[(points_df['sigma_crit'] < high_s) & (points_df['energy'] < high_e)]

#Convert the columns to arrays
RA   = points_df_2[['ratio_area']].to_numpy()
RIxx = points_df_2[['ratio_Ixx']].to_numpy()
RIyy = points_df_2[['ratio_Iyy']].to_numpy()
RJ   = points_df_2[['ratio_J']].to_numpy()
RP   = points_df_2[['ratio_pitch']].to_numpy()
RT   = points_df_2[['ratio_top_diameter']].to_numpy()
RS   = points_df_2[['ratio_shear_modulus']].to_numpy()

S  = points_df_2[['sigma_crit']].to_numpy()
E  = points_df_2[['energy']].to_numpy()

# xx1, xx2, xx3 = np.meshgrid(RD, RP, RT)
# input_points = np.array([xx1, xx2, xx3])
# yy1, yy2 = np.meshgrid(S, E)
# output_points = np.array([yy1, yy2])

# X_data = xx1, xx2, xx3
# y_data = yy1, yy2


#Make a training set for all inputs and outputs
train_size = 0.75
RD_train,RD_test,RP_train,RP_test,RT_train,RT_test,S_train,S_test,E_train,E_test=train_test_split(RD,RP,RT,S,E,train_size=train_size)


ValueError: Found input variables with inconsistent numbers of samples: [502, 21327, 21327, 21327, 21327]

In [None]:
input1 = RA   
input2 = RIxx 
input3 = RIyy 
input4 = RJ   
input5 = RP   
input6 = RT   
input7 = RS  
output = S

X_data = np.array([input1,input2,input3,input4,input5,input6,input7]).reshape(7,-1).T
Y_data = np.array([output]).reshape(-1,1)
xx1, xx2 = np.meshgrid(input1,input2, sparse=True)
yy1 = output

train_size= 0.75
X_Train,X_Test,Y_Train,Y_Test=train_test_split(X_data, Y_data, train_size=train_size)


from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, Matern, ExpSineSquared, ConstantKernel

kernel = ConstantKernel(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2)) # This is the standard RBF kernel
kernel = 1.0 * RBF(10, (1e-2, 1e2)) # Same kernel as above
                                    #(scikit-learn assumes constant
                                    # variance if you just write RBF
                                    # without the constant kernel or
                                    # without multiplying by 1.0)

# Other examples of kernels:
#kernel = ExpSineSquared(length_scale=3.0, periodicity=3.14,
#                       length_scale_bounds=(0.1, 10.0),
#                       periodicity_bounds=(0.1, 10)) * RBF(3.0, (1e-2, 1e2))
#kernel = Matern(length_scale=1.0, length_scale_bounds=(1e-2, 1e2),nu=1.5)
                
gp_model = GaussianProcessRegressor(kernel=kernel, alpha=1e-10, n_restarts_optimizer=10) # using a small alpha

# Fit to data using Maximum Likelihood Estimation of the parameters
gp_model.fit(X_Train, Y_Train)

y_data_pred, sigma_data_pred = gp_model.predict(X_data, return_std=True) # also output the uncertainty (standard deviation)

y_test_pred, sigma_test_pred = gp_model.predict(X_Test, return_std=True) # For error metrics


In [None]:
yy1 = np.reshape(yy1, np.shape(xx1))
yy1_data_pred = np.reshape(y_data_pred,np.shape(xx1))
nsamples, nx, ny = yy1.shape
d2_yy1 = yy1.reshape((nsamples,nx*ny))

d2_yy1_data_pred = yy1_data_pred.reshape((nsamples,nx*ny))
mse_value = mean_squared_error(d2_yy1.T, d2_yy1_data_pred.T)
r2_value  = r2_score(d2_yy1.T, d2_yy1_data_pred.T)
print('MSE =', mse_value)
print('R2 score =', r2_value)