In [29]:
import time
import pandas as pd
import numpy as np
from scipy import stats
from scipy.optimize import differential_evolution
from sklearn.decomposition import PCA
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MaxAbsScaler, MinMaxScaler, Normalizer, PowerTransformer, RobustScaler, StandardScaler, PolynomialFeatures
from sklearn.metrics import max_error, mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, median_absolute_error, r2_score
from sklearn.base import BaseEstimator, RegressorMixin
from GRNN import GRNN, calculate_error_cost

In [41]:
def relative_root_mean_squared_error(true, pred):
    n = len(true) # update
    num = np.sum(np.square(true - pred)) / n  # update
    den = np.sum(np.square(pred))
    squared_error = num / den
    rrmse_loss = np.sqrt(squared_error)
    return rrmse_loss

In [30]:
# Load all data with pandas
data = pd.read_csv('/content/Tunneling_Induced_building_damage_dataset.txt', sep='\t')
data = data.drop(labels = 'Tot No. Simulations', axis=1)
# remove outliers
data = data[(np.abs(stats.zscore(data.select_dtypes(exclude='object'))) < 3).all(axis=1)]
data_columns = data.columns
data.head()
X = data.iloc[:,:15].to_numpy()
# Y = data.iloc[:,15:]
Y = data.iloc[:,[29, 30]]
# Y = Y.iloc[:,1:-2]
target_columns = Y.columns
Y = Y.to_numpy()
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
# Transform data
scaler_x = MaxAbsScaler()
scaler_y = MaxAbsScaler()

X_train = scaler_x.fit_transform(X_train)
Y_train = scaler_y.fit_transform(Y_train)

X_test = scaler_x.transform(X_test)
Y_test = scaler_y.transform(Y_test)

In [31]:
scaler_name = 'Task1_80_20_Z3_MaxAbsScalerXY'
model_name = 'HistGradientBoostingRegressor'
embedded_name = 'FastICA' # SGTM PCA KernelPCA IncrementalPCA FastICA
score_name = 'R2'

In [32]:
results = {}
y_train_predictions_1 = []
y_test_predictions_1 = []
grnn_sigmas_1 = []
for i, (y_train, y_test) in enumerate(zip(Y_train[:].T, Y_test[:].T)):
    print(f'Training: {target_columns[i]}')
    regressor = HistGradientBoostingRegressor(random_state=42)
    # gbr = GradientBoostingRegressor(random_state=42)
    regressor.fit(X_train, y_train)
    # Predict
    y_train_pred = regressor.predict(X_train)
    y_test_pred = regressor.predict(X_test)
    # Save results
    y_train_predictions_1.append(y_train_pred)
    y_test_predictions_1.append(y_test_pred)
    # Save xlsx
    MaxError = max_error                    (y_test, y_test_pred)
    MedError = median_absolute_error        (y_test, y_test_pred)
    MAE = mean_absolute_error               (y_test, y_test_pred)
    MAPE = mean_absolute_percentage_error   (y_test, y_test_pred)
    MSE = mean_squared_error                (y_test, y_test_pred)
    RMSE = mean_squared_error               (y_test, y_test_pred, squared=False)
    RRMSE = relative_root_mean_squared_error(y_test, y_test_pred)
    R2 = r2_score                           (y_test, y_test_pred)
    results.update({
    f'{target_columns[i]}':
        {
            'MaxError' : MaxError,
            'MedError' : MedError,
            'MAE' : MAE,
            'MAPE' : MAPE,
            'MSE' : MSE,
            'RMSE' : RMSE,
            'RRMSE' : RRMSE,
            'R2' : R2
        }
    })

exp_result = pd.DataFrame(results)
exp_result.to_excel(f'{scaler_name}_{model_name}_{score_name}.xlsx')

Training: Local Avg.
Training: Global Avg.


In [33]:
# train_y_regressor = np.stack(y_train_predictions_1, axis=1)
# test_y_regressor = np.stack(y_test_predictions_1, axis=1)

# train_pca_input = np.concatenate((X_train, train_y_regressor), axis=1)
# test_pca_input = np.concatenate((X_test, test_y_regressor), axis=1)

# print(train_y_regressor.shape)
# print(X_train.shape)
# print(train_pca_input.shape)

# pca = PCA()
# PCA_train = pca.fit_transform(train_pca_input)
# PCA_test = pca.transform(test_pca_input)

# #explained_variance = pca.explained_variance_ratio_
# print(PCA_train.shape)

from sklearn.decomposition import PCA, FastICA, IncrementalPCA, KernelPCA
from gtm import MultiLabelGTM

train_y_regressor = np.stack(y_train_predictions_1, axis=1)
test_y_regressor = np.stack(y_test_predictions_1, axis=1)

train_decomposition_input = np.concatenate((X_train, train_y_regressor), axis=1)
test_decomposition_input = np.concatenate((X_test, test_y_regressor), axis=1)

if embedded_name == 'PCA':
    pca = PCA()
    X_train_new = pca.fit_transform(train_decomposition_input)
    X_test_new = pca.transform(test_decomposition_input)

elif embedded_name == 'KernelPCA':
    pca = KernelPCA(kernel = 'poly')
    X_train_new = pca.fit_transform(train_decomposition_input)
    X_test_new = pca.transform(test_decomposition_input)

elif embedded_name == 'FastICA':
    pca = FastICA()
    X_train_new = pca.fit_transform(train_decomposition_input)
    X_test_new = pca.transform(test_decomposition_input)

elif embedded_name == 'IncrementalPCA':
    pca = IncrementalPCA()
    X_train_new = pca.fit_transform(train_decomposition_input)
    X_test_new = pca.transform(test_decomposition_input)

elif embedded_name == 'SGTM':
    gtm = MultiLabelGTM(center_of_mass = True)
    gtm.fit(train_decomposition_input, train_decomposition_input)
    X_train_new = gtm.predict(train_decomposition_input)
    X_test_new = gtm.predict(test_decomposition_input)



In [34]:
# from lazypredict.Supervised import LazyRegressor
from sklearn import datasets
from sklearn.utils import shuffle
import numpy as np

results = {}
predictions = []

X_train = X_train_new
X_test = X_test_new
for i, (y_train, y_test) in enumerate(zip(Y_train[:].T, Y_test[:].T)):
    print(f'Training: {target_columns[i]}')
    regressor = HistGradientBoostingRegressor(random_state=42)
    # regressor = GradientBoostingRegressor(random_state=42)
    regressor.fit(X_train, y_train)
    # Predict
    y_test_pred = regressor.predict(X_test)
    # Save results
    predictions.append(y_test_pred)
    # Save xlsx
    MaxError = max_error                    (y_test, y_test_pred)
    MedError = median_absolute_error        (y_test, y_test_pred)
    MAE = mean_absolute_error               (y_test, y_test_pred)
    MAPE = mean_absolute_percentage_error   (y_test, y_test_pred)
    MSE = mean_squared_error                (y_test, y_test_pred)
    RMSE = mean_squared_error               (y_test, y_test_pred, squared=False)
    RRMSE = relative_root_mean_squared_error(y_test, y_test_pred)
    R2 = r2_score                           (y_test, y_test_pred)
    results.update({
    f'{target_columns[i]}':
        {
            'MaxError' : MaxError,
            'MedError' : MedError,
            'MAE' : MAE,
            'MAPE' : MAPE,
            'MSE' : MSE,
            'RMSE' : RMSE,
            'RRMSE' : RRMSE,
            'R2' : R2
        }
    })

exp_result = pd.DataFrame(results)
exp_result.to_excel(f'{scaler_name}_{model_name}_{embedded_name}_{score_name}.xlsx')


# for i, (y_train, y_test) in enumerate(zip(Y_train[:].T, Y_test[:].T)):
#   print(f'Training: {target_columns[i]}')
#   reg = LazyRegressor(verbose=0, ignore_warnings=False, custom_metric=None)
#   models,predictions = reg.fit(X_train, X_test, y_train, y_test)
#   print(models)
#   print()

Training: Local Avg.
Training: Global Avg.


In [35]:
# results = {}
# predictions = []

# X_train = PCA_train
# X_test = PCA_test
# for i, (y_train, y_test) in enumerate(zip(Y_train[:].T, Y_test[:].T)):
#     print(f'Training: {target_columns[i]}')
#     regressor = HistGradientBoostingRegressor(random_state=42)
#     # gbr = GradientBoostingRegressor(random_state=42)
#     regressor.fit(X_train, y_train)
#     # Predict
#     y_train_pred = regressor.predict(X_train)
#     y_test_pred = regressor.predict(X_test)
#     # Save results
#     predictions.append(y_test_pred)
#     # Save xlsx
#     MaxError = max_error                    (y_test, y_test_pred)
#     MAE = mean_absolute_error               (y_test, y_test_pred)
#     MSE = mean_squared_error                (y_test, y_test_pred)
#     MedError = median_absolute_error        (y_test, y_test_pred)
#     RMSE = mean_squared_error               (y_test, y_test_pred, squared=False)
#     MAPE = mean_absolute_percentage_error   (y_test, y_test_pred)
#     R2 = r2_score                           (y_test, y_test_pred)
#     results.update({
#     f'{target_columns[i]}':
#         {
#             'MaxError' : MaxError,
#             'MAE' : MAE,
#             'MSE' : MSE,
#             'MedError' : MedError,
#             'RMSE' : RMSE,
#             'MAPE' : MAPE,
#             'R2' : R2
#         }
#     })


# exp_result = pd.DataFrame(results)
# exp_result.to_excel(f'{scaler_name}_{model_name}_{embedded_name}_{score_name}.xlsx')