In [123]:
import numpy as np
import pandas as pd
from GRNN import GRNN, calculate_error_cost
from gtm import MultiLabelGTM
from scipy import stats
from scipy.optimize import differential_evolution
from sklearn.base import clone
from sklearn.decomposition import PCA
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import KFold, train_test_split
from sklearn.preprocessing import MaxAbsScaler, MinMaxScaler, Normalizer, PowerTransformer, RobustScaler, StandardScaler, PolynomialFeatures
from sklearn.metrics import max_error, mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, median_absolute_error, r2_score
from sklearn.base import BaseEstimator, RegressorMixin

In [124]:
from sklearn.linear_model import Ridge

In [125]:
class ModelStrategy(object):
    def __init__(self, regressor) -> None:
        self._strategy = regressor
        self._regressors = []

    def get_strategy(self):
        return self._strategy

    def get_regressors(self):
        return self._regressors

    def set_strategy(self, regressor) -> None:
        self._strategy = regressor

    def fit(self, X_train, Y_train) -> None:
        for y_train in Y_train.T:
            regressor = clone(self._strategy, safe=True)
            # Fit and save all regressors
            regressor.fit(X_train, y_train)
            self._regressors.append(regressor)

    def fit_transform(self, X_train, Y_train):
        y_train_pred = []
        for y_train in Y_train.T:
            regressor = clone(self._strategy, safe=True)
            # Fit and save all regressors
            regressor.fit(X_train, y_train)
            self._regressors.append(regressor)
            # Predict and save prediction for future use
            y_train_pred.append(regressor.predict(X_train))

        # Transfrom data to numpy array and return
        return np.stack(y_train_pred, axis=1)

    def transform(self, X):
        y_pred = []
        for regressor in self._regressors:
            # For each saved regressor predict an output value
            y_pred.append(regressor.predict(X))
        return np.stack(y_pred, axis=1)

class AdapterSGTM(object):
    def __init__(self):
        self._gtm = MultiLabelGTM(center_of_mass = True, print_steps = False)

    def fit(self, X_train) -> None:
        self._gtm.fit(X_train, X_train)

    def fit_transform(self, X_train):
        self._gtm.fit(X_train, X_train)
        return self._gtm.predict(X_train)

    def transform(self, X):
        return self._gtm.predict(X)


def relative_root_mean_squared_error(y, y_pred):
    n = len(y) # update
    num = np.sum(np.square(y - y_pred)) / n  # update
    den = np.sum(np.square(y_pred))
    squared_error = num / den
    rrmse_loss = np.sqrt(squared_error)
    return rrmse_loss

def calculate_errors(y, y_pred):
    MaxError = max_error                         (y, y_pred)
    MedError = median_absolute_error             (y, y_pred)
    MAE = mean_absolute_error                    (y, y_pred)
    MAPE = mean_absolute_percentage_error        (y, y_pred)
    MSE = mean_squared_error                     (y, y_pred)
    RMSE = mean_squared_error                    (y, y_pred, squared=False)
    RRMSE = relative_root_mean_squared_error     (y, y_pred)
    R2 = r2_score                                (y, y_pred)

    return {
            'MaxError' : round(MaxError, 5),
            'MedError' : round(MedError, 5),
            'MAE' :      round(MAE, 5),
            'MAPE' :     round(MAPE, 5),
            'MSE' :      round(MSE, 5),
            'RMSE' :     round(RMSE, 5),
            'RRMSE' :    round(RRMSE, 5),
            'R2' :       round(R2, 5)}

In [126]:
# Choose task
task_name = 'Task1' # Task1 Task2
additional_notes = '_max_iter'

# scaler_name = f'{task_name}_Z3_MaxAbsScalerXY'
# model_name = 'HistGradientBoostingRegressor'
# embedded_name = 'SGTM' # SGTM PCA KernelPCA IncrementalPCA FastICA

In [127]:
######### TODO: Refactor
# Load all data with pandas
data = pd.read_csv('/content/Tunneling_Induced_building_damage_dataset.txt', sep='\t')
data = data.drop(labels = 'Tot No. Simulations', axis=1)
# remove outliers
data = data[(np.abs(stats.zscore(data.select_dtypes(exclude='object'))) < 3).all(axis=1)]
data_columns = data.columns
data.head()
X = data.iloc[:,:15].to_numpy()
if task_name == 'Task1':
    Y = data.iloc[:,[29, 30]]
elif task_name == 'Task2':
    Y = data.iloc[:,15:]
    Y = Y.iloc[:,1:-2]
target_columns = Y.columns
Y = Y.to_numpy()
######### END: Refactor

model_1_errors_train = []
model_1_errors_test = []
model_2_errors_train = []
model_2_errors_test = []

kf = KFold(n_splits=5, shuffle=True, random_state=42)

for train_index, test_index in kf.split(X, Y):
    X_train, X_test = X[train_index], X[test_index]
    Y_train, Y_test = Y[train_index], Y[test_index]

    scaler_x = RobustScaler()
    scaler_y = RobustScaler()
    model1 = ModelStrategy(HistGradientBoostingRegressor(max_iter=50, random_state=42))
    model2 = AdapterSGTM()
    model3 = ModelStrategy(HistGradientBoostingRegressor(max_iter=50, random_state=42))

    X_train = scaler_x.fit_transform(X_train)
    Y_train = scaler_y.fit_transform(Y_train)
    X_test = scaler_x.transform(X_test)

    # Model 1 fit on the model
    Y_train_pred = model1.fit_transform(X_train, Y_train)
    Y_test_pred = model1.transform(X_test)

    # Use decomposition to increase input features by merge X_train & Y_pred
    X_train_new = model2.fit_transform(np.concatenate((X_train, Y_train_pred), axis=1))
    X_test_new = model2.transform(np.concatenate((X_test, Y_test_pred), axis=1))

    # Model 2 fit on the model
    Y_train_pred_new = model3.fit_transform(X_train_new, Y_train)
    Y_test_pred_new = model3.transform(X_test_new)

    for i, target_name in enumerate(target_columns):
      # Model 1 (Save results for calculating errors)
      model_1_errors_train.append({f'{target_name}' : calculate_errors(Y_train.T[i], Y_train_pred.T[i])})
      model_1_errors_test.append({f'{target_name}' : calculate_errors(Y_test.T[i], scaler_y.inverse_transform(Y_test_pred).T[i])})

      # Model 2 (Save results for calculating errors)
      model_2_errors_train.append({f'{target_name}' : calculate_errors(Y_train.T[i], Y_train_pred_new.T[i])})
      model_2_errors_test.append({f'{target_name}' : calculate_errors(Y_test.T[i], scaler_y.inverse_transform(Y_test_pred_new).T[i])})

17
--- 0.01610708236694336 seconds ---
17
--- 0.016488313674926758 seconds ---
17
--- 0.01725935935974121 seconds ---
17
--- 0.015920639038085938 seconds ---
17
--- 0.015824079513549805 seconds ---


In [128]:
class AveragesCalculator:
    def __init__(self, data):
        self.data = data

    def _split_data(self):
        splitted_data = {}

        for dictionary in self.data:
            for key, inner_dict in dictionary.items():
                if key not in splitted_data:
                    splitted_data[key] = []
                splitted_data[key].append(inner_dict)

        return splitted_data

    def _sum_values(self, data):
        sums = {}

        for key, values in data.items():
            sums[key] = {}
            num_elements = len(values)
            for dictionary in values:
                for inner_key, value in dictionary.items():
                    if inner_key in sums[key]:
                        sums[key][inner_key] += value
                    else:
                        sums[key][inner_key] = value
            # Store the number of elements for each key
            sums[key]['num_elements'] = num_elements

        return sums

    def _compute_averages(self, sums):
        averages = {}

        for key, value in sums.items():
            averages[key] = {}
            num_elements = value.pop('num_elements')  # Remove num_elements from the dictionary
            for inner_key, inner_value in value.items():
                averages[key][inner_key] = inner_value / num_elements

        return averages

    def calculate_averages(self):
        splitted_data = self._split_data()
        sums = self._sum_values(splitted_data)
        averages = self._compute_averages(sums)

        return averages

# Calculate the averages
_model_1_errors_train = AveragesCalculator(model_1_errors_train).calculate_averages()
_model_1_errors_test = AveragesCalculator(model_1_errors_test).calculate_averages()
_model_2_errors_train = AveragesCalculator(model_2_errors_train).calculate_averages()
_model_2_errors_test = AveragesCalculator(model_2_errors_test).calculate_averages()

def add_sufix_to_keys(dictionary, sufix):
    return {key + sufix : value for key, value in dictionary.items()}

_model_1_errors_train = add_sufix_to_keys(_model_1_errors_train, '_train_1')
_model_1_errors_test = add_sufix_to_keys(_model_1_errors_test, '_test_1')
_model_2_errors_train = add_sufix_to_keys(_model_2_errors_train, '_train_2')
_model_2_errors_test = add_sufix_to_keys(_model_2_errors_test, '_test_2')

results = {
    **dict(sorted({**_model_1_errors_train, **_model_1_errors_test}.items(), reverse=True)),
    **dict(sorted({**_model_2_errors_train, **_model_2_errors_test}.items(), reverse=True))
}

results = pd.DataFrame(results)
results.to_excel(f'{task_name}_KFold_Decomposition{additional_notes}.xlsx')
results

Unnamed: 0,Local Avg._train_1,Local Avg._test_1,Global Avg._train_1,Global Avg._test_1,Local Avg._train_2,Local Avg._test_2,Global Avg._train_2,Global Avg._test_2
MaxError,0.65915,6.246528,0.428664,0.001316,0.57042,6.310292,0.220154,0.001402
MedError,0.052986,0.732302,0.033756,0.000134,0.009152,0.37607,0.015206,0.000134
MAE,0.085532,1.18042,0.053976,0.00022,0.03201,1.01647,0.02513,0.00021
MAPE,150548200000000.0,2026853000000000.0,309324300000.0,0.597352,20908340000000.0,1222663000000000.0,272506100000.0,0.489118
MSE,0.016166,2.96138,0.006666,0.0,0.003604,2.820114,0.001512,0.0
RMSE,0.126964,1.717598,0.081568,0.000314,0.059924,1.675092,0.038846,0.000308
RRMSE,0.005908,0.035202,0.005026,0.01743,0.002666,0.033786,0.002324,0.017042
R2,0.965004,0.725978,0.982104,0.879094,0.992206,0.734588,0.99594,0.882718


In [141]:
def black_box_function(x, y):
    """Function with unknown internals we wish to maximize.

    This is just serving as an example, for all intents and
    purposes think of the internals of this function, i.e.: the process
    which generates its output values, as unknown.
    """
    return -(-x ** 2 - (y - 1) ** 2 + 1)

In [None]:
!pip install bayesian-optimization

In [151]:
from bayes_opt import BayesianOptimization

# Bounded region of parameter space
pbounds = {'x': (2, 4), 'y': (-3, 3)}

optimizer = BayesianOptimization(
    f=black_box_function,
    pbounds=pbounds,
    random_state=1,
    allow_duplicate_points=True
)

optimizer.maximize(
    init_points=2,
    n_iter=10,
)

print(optimizer.max)
for i, res in enumerate(optimizer.res):
    print("Iteration {}: \n\t{}".format(i, res))