In [None]:
# Load library
import pandas as pd
import numpy as np
from scipy.io import loadmat
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, ParameterGrid
from xgboost import XGBRegressor, plot_importance, plot_tree
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
import os
import h5py
import time
from sklearn import multioutput
import xgboost as xgb

In [None]:
# Seed
np.random.seed(0)
tf.random.set_seed(0)

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Change the current working directory
os.chdir('/content/drive/My Drive/Colab Notebooks/PINN_Active_Passive_Paper/p5763')
# Print the current working directory
print("Current working directory: {0}".format(os.getcwd()))

Current working directory: /content/drive/My Drive/Colab Notebooks/PINN_Active_Passive_Paper/p5763


In [None]:
# Load Features
os.chdir('/content/drive/My Drive/Colab Notebooks/PINN_Active_Passive_Paper/p5763/Data')
Data1 = loadmat('p5763_Set1_Inputs.mat')
Data2 = loadmat('p5763_Set1_Outputs.mat')
Features = pd.DataFrame({'Time':Data1['Time'].ravel().round(2),'K':Data1['K'].ravel().round(2),'V':Data1['V'].ravel().round(2),'AE':Data1['AE'].ravel().round(2),'ln_V0_Theta':Data1['ln_V0_Theta'].ravel().round(2)})
Output = pd.DataFrame({'ln_V_V0':Data2['ln_V_V0'].ravel().round(2),'Time':Data2['Time'].ravel().round(2),'SS':Data2['SS'].ravel().round(2),'SR':Data2['SR'].ravel().round(2),'TTF':Data2['TTF'].ravel().round(2)})
df = Features.merge(Output, on = 'Time')
df1 = df[['K','V','AE','ln_V0_Theta','SS','TTF','SR','ln_V_V0','Time']]
sr = df1[['Time','ln_V_V0']]
df1 = df1[['K','V','AE','ln_V0_Theta','SS','TTF','Time']]
df2 = df1.merge(sr,on = 'Time')
df = df2[['K','V','AE','ln_V0_Theta','SS']]
xdf = df[['K','V','AE','ln_V0_Theta']]
ydf = df[['SS']]

print("Input Data:\n", xdf)
print("Target Data:\n",ydf)

Input Data:
             K      V    AE  ln_V0_Theta
0     5957.24  17.78  2.00         9.95
1     5960.89  17.79  2.00         9.94
2     6214.84  17.81  2.00         9.94
3     4543.97  17.74  2.00         9.95
4     4554.15  17.76  1.67         9.94
...       ...    ...   ...          ...
9495  1033.99  16.82  2.00         9.75
9496  1032.72  16.81  2.00         9.77
9497  1032.13  16.81  1.67         9.76
9498  1032.36  16.81  1.67         9.74
9499  1114.37  16.83  1.50         9.74

[9500 rows x 4 columns]
Target Data:
         SS
0     6.05
1     6.06
2     6.06
3     6.07
4     6.07
...    ...
9495  5.92
9496  5.92
9497  5.93
9498  5.93
9499  5.94

[9500 rows x 1 columns]


In [None]:
# History
def create_timesteps(data, n_steps, step):
	x = []
	y = []
	for i in range(len(data)-1):
		end_ix = i + n_steps
		if end_ix > len(data)-1:
			break
		x1, y1 = data[i:end_ix, :-1], data[(end_ix):(end_ix + step), -1:]  #last  2 column is the target variable # -2:
		x.append(x1)
		y.append(y1)
	return np.array(x), np.array(y)

In [None]:
n_steps = 100 # History for input features
step = 50  # Steps to predict in future

# Preprocessing
arr = df.to_numpy()
xdf, ydf = create_timesteps(arr, n_steps, step)

print('Features shape, X = ', np.shape(xdf))
print('Target shape, Y = ', np.shape(ydf))

# # Reshape features from 3D to 2D
in_dim = xdf.shape[1]*xdf.shape[2]
xdf = xdf.reshape((xdf.shape[0], in_dim))
print('After reshaping, X = ', np.shape(xdf))

Features shape, X =  (9400, 100, 4)
Target shape, Y =  (9400,)
After reshaping, X =  (9400, 400)


  return np.array(x), np.array(y)


In [None]:
print(xdf)

[[5.95724e+03 1.77800e+01 2.00000e+00 ... 1.73500e+01 1.67000e+00
  9.95000e+00]
 [5.96089e+03 1.77900e+01 2.00000e+00 ... 1.73800e+01 1.33000e+00
  9.94000e+00]
 [6.21484e+03 1.78100e+01 2.00000e+00 ... 1.74100e+01 1.33000e+00
  9.94000e+00]
 ...
 [1.02925e+03 1.68300e+01 1.33000e+00 ... 1.68100e+01 2.00000e+00
  9.77000e+00]
 [1.02968e+03 1.68300e+01 1.67000e+00 ... 1.68100e+01 1.67000e+00
  9.76000e+00]
 [1.03121e+03 1.68300e+01 1.67000e+00 ... 1.68100e+01 1.67000e+00
  9.74000e+00]]


In [None]:
print(ydf)

[array([[6.08],
        [6.09],
        [6.09],
        [6.1 ],
        [6.1 ],
        [6.1 ],
        [6.11],
        [6.11],
        [6.12],
        [6.12],
        [6.12],
        [6.13],
        [6.13],
        [6.14],
        [6.14],
        [6.14],
        [6.14],
        [6.15],
        [6.15],
        [6.15],
        [6.15],
        [6.15],
        [6.14],
        [6.13],
        [5.96],
        [5.89],
        [5.9 ],
        [5.91],
        [5.92],
        [5.93],
        [5.93],
        [5.94],
        [5.94],
        [5.95],
        [5.95],
        [5.96],
        [5.96],
        [5.97],
        [5.98],
        [5.98],
        [5.99],
        [5.99],
        [6.  ],
        [6.  ],
        [6.01],
        [6.01],
        [6.02],
        [6.02],
        [6.03],
        [6.03]]) array([[6.09],
                        [6.09],
                        [6.1 ],
                        [6.1 ],
                        [6.1 ],
                        [6.11],
                        

In [None]:
# Extract ydf
ydf = np.concatenate(ydf, axis=0)
print(ydf)
print(ydf.shape)

[[6.08]
 [6.09]
 [6.09]
 ...
 [5.93]
 [5.94]
 [5.94]]
(468775, 1)


In [None]:
# # Reshape ydf
ydf = ydf[0:468750]
ydf = np.reshape(ydf, (9375,50))
print(ydf.shape)

(9375, 50)


In [None]:
# # Reshape Train
xdf = xdf[0:9375,:]
print(xdf.shape)

(9375, 400)


In [None]:
# Split into train-val-test
x_train, x_test, y_train, y_test = train_test_split(xdf, ydf, random_state = 0, test_size=0.1, shuffle=False)
X_train, X_val, Y_train, Y_val = train_test_split(x_train, y_train, random_state = 0, test_size=0.111, shuffle=False)

# Normalize the input
scaler = MinMaxScaler()
X_train = pd.DataFrame(scaler.fit_transform(X_train))
X_val = pd.DataFrame(scaler.transform(X_val))
x_test = pd.DataFrame(scaler.transform(x_test))

print("Training input samples:", np.shape(X_train))
print("Validation input samples:", np.shape(X_val))
print("Testing input samples:", np.shape(x_test))

print("Training output samples:", np.shape(Y_train))
print("Validation output samples:", np.shape(Y_val))
print("Testing output samples:", np.shape(y_test))

Training input samples: (7500, 400)
Validation input samples: (937, 400)
Testing input samples: (938, 400)
Training output samples: (7500, 50)
Validation output samples: (937, 50)
Testing output samples: (938, 50)


In [None]:
# Train the model
model = XGBRegressor(learning_rate=0.1, max_depth=6, n_estimators=100)
model = multioutput.MultiOutputRegressor(model)
start_time = time.time()
model.fit(X_train, Y_train, verbose=True)
end_time = time.time()
print("---Training time: %0.8f seconds ---" % (end_time - start_time))

---Training time: 420.83449078 seconds ---


In [None]:
# Evaluate the model

## Training
y_predTrain = pd.DataFrame(model.predict(X_train))
ss_train_r2 = r2_score(Y_train[:,0], y_predTrain[0])
ss_train_rmse = np.sqrt(mean_squared_error(Y_train[:,0], y_predTrain[0]))

## Validation
y_predVal = pd.DataFrame(model.predict(X_val))
ss_val_r2 = r2_score(Y_val[:,0], y_predVal[0])
ss_val_rmse = np.sqrt(mean_squared_error(Y_val[:,0], y_predVal[0]))

## Testing
y_predTest = pd.DataFrame(model.predict(x_test))
ss_test_r2 = r2_score(y_test[:,0], y_predTest[0])
ss_test_rmse = np.sqrt(mean_squared_error(y_test[:,0], y_predTest[0]))

##
print('Shear Stress')
print('R2 score:', ss_train_r2, ss_val_r2 ,ss_test_r2, '\nRMSE:', ss_train_rmse, ss_val_rmse ,ss_test_rmse)

Shear Stress
R2 score: 0.9976883963010311 0.9194465065534426 0.8041587589738693 
RMSE: 0.006506716040150613 0.04347979352050504 0.0675842999784283
