# 1. Import libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import GridSearchCV, KFold
from sklearn.model_selection import train_test_split
from sklearn import base, metrics, model_selection, preprocessing, tree, utils
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error as MAE
from sklearn.metrics import mean_absolute_percentage_error as MAPE
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor

# 2. Import the Data

In [2]:
data = pd.read_csv("FRP-confined corroded RC columns_Database.csv",index_col=0)
data.head()

Unnamed: 0_level_0,Spec.,D,H,b,h,Ag,Circular,r,ρ,fc,...,tfrp,Efrp,Ffrp,Ltype,ρs,Ebar,Fbar,e,η,Pmax
No.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,CFRP1,152,457,−,−,18136.64,Yes,0,1.0,21.0,...,0.17,228.0,3790.0,"4 NO,9.5",1.563292,209.0,483.0,0.0,36.0,774.0
2,CFRP2,152,457,−,−,18136.64,Yes,0,1.0,21.0,...,0.17,228.0,3790.0,"4 NO,9.5",1.563292,209.0,483.0,0.0,45.0,720.6
3,CFRP3,152,457,−,−,18136.64,Yes,0,1.0,21.0,...,0.17,228.0,3790.0,"4 NO,9.5",1.563292,209.0,483.0,0.0,25.0,747.3
4,CFRP4,152,457,−,−,18136.64,Yes,0,1.0,21.0,...,0.17,228.0,3790.0,"4 NO,9.5",1.563292,209.0,483.0,0.0,51.0,645.0
5,CFRP-COR,203,914,−,−,32349.07,Yes,0,1.0,21.0,...,0.17,228.0,3790.0,"8 NO,9.5",1.752933,209.0,483.0,0.0,0.0,1926.0


In [3]:
data["Circular"] = (data["Circular"]== "Yes").astype("int")
data['r'] = data['r'].astype(float)
data['Nfrp'] = data['Nfrp'].astype(float)

# All the Data
Data= data[[ 'Ag', 'ρ', 'fc', 'tfrp', 'Efrp', 'Ffrp', 'ρs', 'Ebar','Fbar','e','η','Pmax']]

# Concentric RC-Columns only
DataConcentric = Data[(Data["e"]==0)]
DataConcentric = DataConcentric.drop('e', axis = 1)

# Eccentric RC-Columns only
DataEccentric =  Data[(Data["e"] > 0)]

# 3. Machine learning model

# 3.1.Decision Tree

In [4]:
y = Data['Pmax']
X = Data.drop('Pmax', axis = 1)

X.head()

Unnamed: 0_level_0,Ag,ρ,fc,tfrp,Efrp,Ffrp,ρs,Ebar,Fbar,e,η
No.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,18136.64,1.0,21.0,0.17,228.0,3790.0,1.563292,209.0,483.0,0.0,36.0
2,18136.64,1.0,21.0,0.17,228.0,3790.0,1.563292,209.0,483.0,0.0,45.0
3,18136.64,1.0,21.0,0.17,228.0,3790.0,1.563292,209.0,483.0,0.0,25.0
4,18136.64,1.0,21.0,0.17,228.0,3790.0,1.563292,209.0,483.0,0.0,51.0
5,32349.07,1.0,21.0,0.17,228.0,3790.0,1.752933,209.0,483.0,0.0,0.0


In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
model = DecisionTreeRegressor()

In [7]:
param_grid = {'max_depth': [2, 4, 6, 8, 10],
              'min_samples_split': [2, 4, 6, 8, 10],
              'min_samples_leaf': [1, 2, 3, 4, 5]}

kfold = KFold(n_splits=10, shuffle=True, random_state=42)
grid_search = GridSearchCV(model, param_grid, cv=kfold, n_jobs=-1)
grid_search.fit(X, y)

In [8]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score

best_model = grid_search.best_estimator_
y_pred_train = best_model.predict(X_train)
y_pred_test = best_model.predict(X_test)

rmse_train = np.sqrt(mean_squared_error(y_train, y_pred_train))
rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test))
mae_train = mean_absolute_error(y_train, y_pred_train)
mae_test = mean_absolute_error(y_test, y_pred_test)
mape_train = mean_absolute_percentage_error(y_train, y_pred_train)
mape_test = mean_absolute_percentage_error(y_test, y_pred_test)
r2_train = r2_score(y_train, y_pred_train)
r2_test = r2_score(y_test, y_pred_test)

print("Best params:", grid_search.best_params_)
print("Train RMSE:", rmse_train)
print("Test RMSE:", rmse_test)
print("Train MAE:", mae_train)
print("Test MAE:", mae_test)
print("Train MAPE:", mape_train)
print("Test MAPE:", mape_test)
print("Train R2 score:", r2_train)
print("Test R2 score:", r2_test)

Best params: {'max_depth': 6, 'min_samples_leaf': 4, 'min_samples_split': 8}
Train RMSE: 196.78402216406744
Test RMSE: 240.212402722387
Train MAE: 138.89820080990953
Test MAE: 152.5242097062509
Train MAPE: 0.175437002913748
Test MAPE: 0.15381328732372923
Train R2 score: 0.9059142117974326
Test R2 score: 0.8779294204786752


# 3.2.Random Forset

In [9]:
y = Data['Pmax']
X = Data.drop('Pmax', axis = 1)

X.head()

Unnamed: 0_level_0,Ag,ρ,fc,tfrp,Efrp,Ffrp,ρs,Ebar,Fbar,e,η
No.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,18136.64,1.0,21.0,0.17,228.0,3790.0,1.563292,209.0,483.0,0.0,36.0
2,18136.64,1.0,21.0,0.17,228.0,3790.0,1.563292,209.0,483.0,0.0,45.0
3,18136.64,1.0,21.0,0.17,228.0,3790.0,1.563292,209.0,483.0,0.0,25.0
4,18136.64,1.0,21.0,0.17,228.0,3790.0,1.563292,209.0,483.0,0.0,51.0
5,32349.07,1.0,21.0,0.17,228.0,3790.0,1.752933,209.0,483.0,0.0,0.0


In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [11]:
model = RandomForestRegressor(n_estimators=100)

In [None]:
param_grid = {'max_depth': [2, 4, 6, 8, 10],
              'min_samples_split': [2, 4, 6, 8, 10],
              'min_samples_leaf': [1, 2, 3, 4, 5]}

kfold = KFold(n_splits=10, shuffle=True, random_state=42)
grid_search = GridSearchCV(model, param_grid, cv=kfold, n_jobs=-1)
grid_search.fit(X, y)

In [27]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score

best_model = grid_search.best_estimator_
y_pred_train = best_model.predict(X_train)
y_pred_test = best_model.predict(X_test)

rmse_train = np.sqrt(mean_squared_error(y_train, y_pred_train))
rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test))
mae_train = mean_absolute_error(y_train, y_pred_train)
mae_test = mean_absolute_error(y_test, y_pred_test)
mape_train = mean_absolute_percentage_error(y_train, y_pred_train)
mape_test = mean_absolute_percentage_error(y_test, y_pred_test)
r2_train = r2_score(y_train, y_pred_train)
r2_test = r2_score(y_test, y_pred_test)

print("Best params:", grid_search.best_params_)
print("Train RMSE:", rmse_train)
print("Test RMSE:", rmse_test)
print("Train MAE:", mae_train)
print("Test MAE:", mae_test)
print("Train MAPE:", mape_train)
print("Test MAPE:", mape_test)
print("Train R2 score:", r2_train)
print("Test R2 score:", r2_test)

Best params: {'max_depth': 8, 'min_samples_leaf': 1, 'min_samples_split': 6}
Train RMSE: 150.40049629927458
Test RMSE: 206.14064896367
Train MAE: 100.14374984442472
Test MAE: 141.0188423703451
Train MAPE: 0.132895372456497
Test MAPE: 0.14174180223036584
Train R2 score: 0.9450404839158522
Test R2 score: 0.9101025382364658


# 3.3.GBDT

In [28]:
y = Data['Pmax']
X = Data.drop('Pmax', axis = 1)

X.head()

Unnamed: 0_level_0,Ag,ρ,fc,tfrp,Efrp,Ffrp,ρs,Ebar,Fbar,e,η
No.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,18136.64,1.0,21.0,0.17,228.0,3790.0,1.563292,209.0,483.0,0.0,36.0
2,18136.64,1.0,21.0,0.17,228.0,3790.0,1.563292,209.0,483.0,0.0,45.0
3,18136.64,1.0,21.0,0.17,228.0,3790.0,1.563292,209.0,483.0,0.0,25.0
4,18136.64,1.0,21.0,0.17,228.0,3790.0,1.563292,209.0,483.0,0.0,51.0
5,32349.07,1.0,21.0,0.17,228.0,3790.0,1.752933,209.0,483.0,0.0,0.0


In [29]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [30]:
model = GradientBoostingRegressor(n_estimators=100)

In [31]:
param_grid = {'max_depth': [2, 4, 6, 8, 10],
              'min_samples_split': [2, 4, 6, 8, 10],
              'min_samples_leaf': [1, 2, 3, 4, 5]}

kfold = KFold(n_splits=10, shuffle=True, random_state=42)
grid_search = GridSearchCV(model, param_grid, cv=kfold, n_jobs=-1)
grid_search.fit(X, y)

In [33]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score

best_model = grid_search.best_estimator_
y_pred_train = best_model.predict(X_train)
y_pred_test = best_model.predict(X_test)

rmse_train = np.sqrt(mean_squared_error(y_train, y_pred_train))
rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test))
mae_train = mean_absolute_error(y_train, y_pred_train)
mae_test = mean_absolute_error(y_test, y_pred_test)
mape_train = mean_absolute_percentage_error(y_train, y_pred_train)
mape_test = mean_absolute_percentage_error(y_test, y_pred_test)
r2_train = r2_score(y_train, y_pred_train)
r2_test = r2_score(y_test, y_pred_test)

print("Best params:", grid_search.best_params_)
print("Train RMSE:", rmse_train)
print("Test RMSE:", rmse_test)
print("Train MAE:", mae_train)
print("Test MAE:", mae_test)
print("Train MAPE:", mape_train)
print("Test MAPE:", mape_test)
print("Train R2 score:", r2_train)
print("Test R2 score:", r2_test)

Best params: {'max_depth': 4, 'min_samples_leaf': 1, 'min_samples_split': 2}
Train RMSE: 112.60302010195628
Test RMSE: 158.56892685469523
Train MAE: 66.27730533940986
Test MAE: 89.95170132763243
Train MAPE: 0.10432604095479311
Test MAPE: 0.10253020785013614
Train R2 score: 0.9691933525199254
Test R2 score: 0.9468067744686355
