In [4]:
import pickle
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from matplotlib import cm
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

#Import dataset
df = pd.read_pickle('DoE_results_7d_50k.pkl')
points_df = df['points']

#Eliminate all NaN output values 
points_df_1 = points_df[(points_df['sigma_crit'] > 0) & (points_df['energy'] >= 0)]
sigma_crit_1 = points_df_1['sigma_crit']
energy_1 = points_df_1['energy']

#Find the quartiles to eliminate outliers
q1_s = np.percentile(sigma_crit_1,25.0)
q3_s = np.percentile(sigma_crit_1, 75.0)
q1_e = np.percentile(energy_1,25.0)
q3_e = np.percentile(energy_1, 75.0)

#Define the highest point which is not considered an outlier
high_s = q3_s + 1.5*(q3_s-q1_s)
high_e = q3_e + 1.5*(q3_e-q1_e)

#Eliminate outliers
points_df_2 = points_df[(points_df['sigma_crit'] < high_s) & (points_df['energy'] < high_e)]
#Convert the columns to arrays
RA   = points_df_2[['ratio_area']].to_numpy()
RIxx = points_df_2[['ratio_Ixx']].to_numpy()
RIyy = points_df_2[['ratio_Iyy']].to_numpy()
RJ   = points_df_2[['ratio_J']].to_numpy()
RP   = points_df_2[['ratio_pitch']].to_numpy()
RT   = points_df_2[['ratio_top_diameter']].to_numpy()
RS   = points_df_2[['ratio_shear_modulus']].to_numpy()

S  = points_df_2[['sigma_crit']].to_numpy()
E  = points_df_2[['energy']].to_numpy()

In [5]:
input1 = RA   
input2 = RIxx 
input3 = RIyy 
input4 = RJ   
input5 = RP   
input6 = RT   
input7 = RS  
output = S

X_data = np.array([input1,input2,input3,input4,input5,input6,input7]).reshape(7,-1).T
Y_data = np.array([output]).reshape(-1,1)
xx1, xx2 = np.meshgrid(input1,input2, sparse=True)
yy1 = output


train_size= 0.75
X_Train,X_Test,Y_Train,Y_Test=train_test_split(X_data, Y_data, train_size=train_size)

In [6]:
for degree in range(1,9):
    model = make_pipeline(PolynomialFeatures(degree),LinearRegression())
    model.fit(X_Train, Y_Train)
    y_data_pred = model.predict(X_data)

    mse_value = mean_squared_error(yy1, y_data_pred)
    r2_value  = r2_score(yy1, y_data_pred)
    print('MSE for degree', degree,'=', mse_value)
    print('R2 score for degree', degree,'=', r2_value)

MSE for degree 1 = 379.80582202280124
R2 score for degree 1 = 0.5611399999196192
MSE for degree 2 = 153.58898633622906
R2 score for degree 2 = 0.822530201888752
MSE for degree 3 = 100.59760988604846
R2 score for degree 3 = 0.8837609522477858
MSE for degree 4 = 90.99881974348956
R2 score for degree 4 = 0.8948522120402228
MSE for degree 5 = 86.58467773172498
R2 score for degree 5 = 0.8999526877341459
MSE for degree 6 = 85.5716394997706
R2 score for degree 6 = 0.9011232384018244
MSE for degree 7 = 73.08201317695597
R2 score for degree 7 = 0.9155548165694315
MSE for degree 8 = 82.54170441067089
R2 score for degree 8 = 0.9046242835052509


In [None]:
set_cm = cm.cool

fig1 = plt.figure(figsize=plt.figaspect(2.0))
#
# Subplot 1 (top) of Figure 1: ground truth
ax1 = fig1.add_subplot(2, 1, 1, projection='3d')
#
# Don't forget that for a Surface plot we need the data
# coming out of meshgrid not in the format of X_data (!)
#
yy1 = np.reshape(yy1, np.shape(xx1))
surf = ax1.plot_surface(xx1, xx2, yy1,cmap=set_cm,
                        alpha=0.8,
                        linewidth=0, antialiased=False)

# Create axis labels and title:
ax1.set_xlabel('$Ratio Diameter$')
ax1.set_ylabel('$Ratio Top Diameter')
ax1.set_zlabel('Sigma Critical')
ax1.set_title("Ground truth")

# Subplot 2 (bottom) of Figure 1
ax2 = fig1.add_subplot(2, 1, 2, projection='3d')
#
yy1_data_pred = np.reshape(y_data_pred,np.shape(xx1))

surf = ax2.scatter(xx1, xx2, yy1_data_pred,label="LR prediction")
# Create axis labels and title:
ax2.set_xlabel('$Ratio Diameter$')
ax2.set_ylabel('$Ratio Top Diameter$')
ax2.set_zlabel('$Sigma Critical$')
ax2.set_title("Linear Regression Prediction")
ax2.scatter(X_Train[:,0], X_Train[:,1], Y_Train,
            marker='o', color='red',
            label="training points")
ax2.legend(loc='lower left')

In [None]:
print(yy1.shape)

mse_value = mean_squared_error(yy1, y_data_pred)
r2_value  = r2_score(yy1, y_data_pred)
print('MSE', mse_value)
print('R2 score', r2_value)