In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MaxAbsScaler, MinMaxScaler, Normalizer, PowerTransformer, RobustScaler, StandardScaler, PolynomialFeatures
import numpy as np
from sklearn.base import BaseEstimator, RegressorMixin
from scipy.optimize import differential_evolution
from sklearn.metrics import max_error, mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, median_absolute_error, r2_score
import time


scalers = {
    "PolynomialFeatures" : PolynomialFeatures
    # "MaxAbsScaler"     : MaxAbsScaler,
    # "MinMaxScaler"     : MinMaxScaler,
    # "Normalizer"       : Normalizer,
    # "PowerTransformer" : PowerTransformer,
    # "RobustScaler"     : RobustScaler,
    # "StandardScaler"   : StandardScaler
}

class GRNN(BaseEstimator, RegressorMixin):
    def __init__(self, name = "GRNN", sigma = 0.1):
        self.name = name
        self.sigma = 2 * np.power(sigma, 2)

    def predict(self, instance_X, X_train_scaled, Y_train):
        gausian_distances = np.exp(-np.power(np.sqrt((np.square(X_train_scaled-instance_X).sum(axis=1))),2) / self.sigma)
        gausian_distances_sum = gausian_distances.sum()
        gausian_distances_sum = max(gausian_distances_sum, 1e-07)
        return np.multiply(gausian_distances, Y_train).sum() / gausian_distances_sum

# Load all data with pandas
data = pd.read_csv('/content/Tunneling_Induced_building_damage_dataset.txt', sep='\t')
data = data.drop(labels = 'Tot No. Simulations', axis=1)
data_columns = data.columns
data.head()

X = data.iloc[:,:15]
Y = data.iloc[:,15:]
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.5, random_state=42)
Y_train = Y_train.to_numpy()
Y_test = Y_test.to_numpy()


for (name, scaler_type) in scalers.items():
  # Transform data
  data_scaler = scaler_type()
  X_train = data_scaler.fit_transform(X_train)
  X_test = data_scaler.transform(X_test)

  results = {}
  # cost function to optimize
  def f(params, X_train, Y_train, X_test, Y_test):
      s, = params  # Unpack the parameters
      grnn = GRNN(sigma=s)
      predictions = np.array([grnn.predict(i, X_train, Y_train) for i in X_test])
      return mean_squared_error(Y_test, predictions) # USE MSE

  for i in range(Y_train.shape[1]):
      start_time = time.time()

      res = differential_evolution(f, bounds=[(0.001, 10)], args=(X_train, Y_train[:, i], X_test, Y_test[:, i]))
      s = res["x"][0]

      grnn = GRNN(sigma=s)
      predictions = np.apply_along_axis(lambda x: grnn.predict(x, X_train, Y_train[:, i]), axis=1, arr=X_test)

      exp_time = time.time() - start_time


      MaxError = max_error                    (Y_test[:,i].ravel(), predictions)
      MAE = mean_absolute_error               (Y_test[:,i].ravel(), predictions)
      MSE = mean_squared_error                (Y_test[:,i].ravel(), predictions)
      MedError = median_absolute_error        (Y_test[:,i].ravel(), predictions)
      RMSE = mean_squared_error               (Y_test[:,i].ravel(), predictions, squared=False)
      MAPE = mean_absolute_percentage_error   (Y_test[:,i].ravel(), predictions)
      R2 = r2_score                           (Y_test[:,i].ravel(), predictions)

      results.update({
          f'test_{i+1}':
              {
                  'time': exp_time,
                  'sigma': s,
                  # 'y_true': Y_test[:,i].ravel(),
                  # 'y_pred': predictions,
                  'MaxError' : MaxError,
                  'MAE' : MAE,
                  'MSE' : MSE,
                  'MedError' : MedError,
                  'RMSE' : RMSE,
                  'MAPE' : MAPE,
                  'R2' : R2
              }
      })

  exp_result = pd.DataFrame(results)
  exp_result.to_excel(f'MSE_{name}.xlsx')