In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### Libs & Code

In [1]:
!pip install scikit-learn-intelex
from sklearnex import patch_sklearn
patch_sklearn()

Collecting scikit-learn-intelex
  Downloading scikit_learn_intelex-2024.7.0-py310-none-manylinux1_x86_64.whl.metadata (12 kB)
Collecting daal4py==2024.7.0 (from scikit-learn-intelex)
  Downloading daal4py-2024.7.0-py310-none-manylinux1_x86_64.whl.metadata (7.4 kB)
Collecting daal==2024.7.0 (from daal4py==2024.7.0->scikit-learn-intelex)
  Downloading daal-2024.7.0-py2.py3-none-manylinux1_x86_64.whl.metadata (1.1 kB)
Downloading scikit_learn_intelex-2024.7.0-py310-none-manylinux1_x86_64.whl (191 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m191.5/191.5 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading daal4py-2024.7.0-py310-none-manylinux1_x86_64.whl (11.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.3/11.3 MB[0m [31m80.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading daal-2024.7.0-py2.py3-none-manylinux1_x86_64.whl (66.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.6/66.6 MB[0m [31m10.4 MB/s[0m eta 

Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)


In [2]:
import numpy as np
import pandas as pd
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.svm import NuSVR
from sklearn.base import clone
from sklearn.model_selection import KFold
from sklearn.preprocessing import *
from sklearn.metrics import *
from sklearn.decomposition import *
from sklearn.ensemble import *
from sklearn.linear_model import *
from collections.abc import Iterable

In [3]:
class ModelStrategy(object):
    def __init__(self, regressor) -> None:
        self._regressors = regressor

    def get_regressors(self):
        return self._regressors

    def set_strategy(self, regressor) -> None:
        self._regressors = regressor

    def fit(self, X_train, Y_train) -> None:
        self._fitted_regressors = []
        if not isinstance(self._regressors, Iterable):
            # Single regressors for all
            for y_train in Y_train.T:
                regressor = clone(self._regressors, safe=True)
                # Fit and save all regressors
                regressor.fit(X_train, y_train)
                self._fitted_regressors.append(regressor)
        else:
            assert len(self._regressors) == len(Y_train.T), \
                "Length of regressor should be the same as outputs in data"
            # Multiple regressors
            for _regressor, y_train in zip(self._regressors, Y_train.T):
                regressor = clone(_regressor, safe=True)
                # Fit and save all regressors
                regressor.fit(X_train, y_train)
                self._fitted_regressors.append(regressor)

        self._regressors = self._fitted_regressors

    def fit_transform(self, X_train, Y_train):
        # Fit on data
        self.fit(X_train, Y_train)
        # Transfrom data to numpy array and return
        return self.transform(X_train)

    def transform(self, X):
        y_pred = []
        for regressor in self._fitted_regressors:
            # For each saved regressor predict an output value
            y_pred.append(regressor.predict(X))
        return np.stack(y_pred, axis=1)


def relative_root_mean_squared_error(y, y_pred):
    n = len(y) # update
    num = np.sum(np.square(y - y_pred)) / n  # update
    den = np.sum(np.square(y_pred))
    squared_error = num / den
    rrmse_loss = np.sqrt(squared_error)
    return rrmse_loss

def calculate_errors(y, y_pred):
    MaxError = max_error                         (y, y_pred)
    MedError = median_absolute_error             (y, y_pred)
    MAE = mean_absolute_error                    (y, y_pred)
    MSE = mean_squared_error                     (y, y_pred)
    RMSE = mean_squared_error                    (y, y_pred, squared=False)
    RRMSE = relative_root_mean_squared_error     (y, y_pred)
    R2 = r2_score                                (y, y_pred)

    return {
            'MaxError' : MaxError,
            'MedError' : MedError,
            'MAE' :      MAE,
            'MSE' :      MSE,
            'RMSE' :     RMSE,
            'RRMSE' :    RRMSE,
            'R2' :       R2}

class AveragesCalculator:
    def __init__(self, data):
        self.data = data

    def _split_data(self):
        splitted_data = {}

        for dictionary in self.data:
            for key, inner_dict in dictionary.items():
                if key not in splitted_data:
                    splitted_data[key] = []
                splitted_data[key].append(inner_dict)

        return splitted_data

    def _sum_values(self, data):
        sums = {}

        for key, values in data.items():
            sums[key] = {}
            num_elements = len(values)
            for dictionary in values:
                for inner_key, value in dictionary.items():
                    if inner_key in sums[key]:
                        sums[key][inner_key] += value
                    else:
                        sums[key][inner_key] = value
            # Store the number of elements for each key
            sums[key]['num_elements'] = num_elements

        return sums

    def _compute_averages(self, sums):
        averages = {}

        for key, value in sums.items():
            averages[key] = {}
            num_elements = value.pop('num_elements')
            # Remove num_elements from the dictionary
            for inner_key, inner_value in value.items():
                averages[key][inner_key] = inner_value / num_elements

        return averages

    def calculate_averages(self):
        splitted_data = self._split_data()
        sums = self._sum_values(splitted_data)
        averages = self._compute_averages(sums)

        return averages


def add_sufix_to_keys(dictionary, sufix):
    return {key + sufix : value for key, value in dictionary.items()}

In [None]:
# Load all data with pandas
data = pd.read_csv('/content/drive/MyDrive/Master/Tunneling_Induced_building_damage_dataset_V2.txt', sep='\t')
data = data.drop(labels = 'Number', axis=1)

data = data.drop(labels = [
    'Identifier',
    'Unnamed: 17',
    'Unnamed: 19',
    'Unnamed: 21',
    'Unnamed: 22',
    'Unnamed: 27',
    'Unnamed: 27',
    'Unnamed: 28',
    'Unnamed: 29',
    'Unnamed: 30',
    'Unnamed: 31'
], axis=1)

# remove outliers
data = data[(np.abs(stats.zscore(data.select_dtypes(exclude='object'))) < 3).all(axis=1)]
data_columns = data.columns
data.head()
X = data.iloc[:,:15].to_numpy()
Y = data[['C. Width', 'Tot. Cracks ', 'L. Average', 'Slope', 'Tilt', 'Ang. Dist.', 'Max Strain', 'G. Average']]
target_columns = Y.columns
Y = Y.to_numpy()

### Train Proposed-Method

In [None]:
import sklearn

best_params = {
 'C. Width':     sklearn.svm.NuSVR(C=0.1867913599020783),
 'Tot. Cracks ': sklearn.svm.NuSVR(C=0.2967302408188875),
 'L. Average':   sklearn.svm.NuSVR(C=0.5941133984965042),
 'Slope':        sklearn.svm.NuSVR(C=2.38168555197616),
 'Tilt':         sklearn.svm.NuSVR(C=0.943787827777539),
 'Ang. Dist.':   sklearn.svm.NuSVR(C=0.529197873595845),
 'Max Strain':   sklearn.svm.NuSVR(C=0.8406652885618334),
 'G. Average':   sklearn.svm.NuSVR(C=0.5941133984965042),
}

model3 = ModelStrategy(best_params.values())

In [None]:
model_1_errors_train = []
model_1_errors_test = []
model_2_errors_train = []
model_2_errors_test = []

kf = KFold(n_splits=5, shuffle=True, random_state=42)

for train_index, test_index in kf.split(X, Y):
    X_train, X_test = X[train_index], X[test_index]
    Y_train, Y_test = Y[train_index], Y[test_index]

    scaler_x = MaxAbsScaler()
    scaler_y = MaxAbsScaler()

    model1 = ModelStrategy(NuSVR())
    model2 = PCA(n_components=0.95, svd_solver='full')
    # model3 = ModelStrategy(NuSVR())

    X_train = scaler_x.fit_transform(X_train)
    Y_train_scaled = scaler_y.fit_transform(Y_train)
    X_test = scaler_x.transform(X_test)

    Y_train_pred = model1.fit_transform(X_train, Y_train_scaled)
    Y_test_pred = model1.transform(X_test)

    pretrain_X = np.concatenate((X_train, Y_train_scaled), axis=1)

    # Training PCA
    model2.fit(pretrain_X)

    train_X = np.concatenate((X_train, Y_train_pred), axis=1)
    test_X = np.concatenate((X_test, Y_test_pred), axis=1)

    # Application PCA
    PC_train = model2.transform(train_X)
    PC_test = model2.transform(test_X)

    Y_train_pred_new = model3.fit_transform(PC_train, Y_train_scaled)
    Y_test_pred_new = model3.transform(PC_test)

    for i, target_name in enumerate(target_columns):
      # Model 1 (Save results for calculating errors)
      model_1_errors_train.append({f'{target_name}' : calculate_errors(Y_train.T[i], scaler_y.inverse_transform(Y_train_pred).T[i])})
      # Model 1 (Save results for calculating errors)
      model_1_errors_test.append({f'{target_name}' : calculate_errors(Y_test.T[i], scaler_y.inverse_transform(Y_test_pred).T[i])})

      # Model 2 (Save results for calculating errors)
      model_2_errors_train.append({f'{target_name}' : calculate_errors(Y_train.T[i], scaler_y.inverse_transform(Y_train_pred_new).T[i])})
      # Model 2 (Save results for calculating errors)
      model_2_errors_test.append({f'{target_name}' : calculate_errors(Y_test.T[i], scaler_y.inverse_transform(Y_test_pred_new).T[i])})

In [None]:
# Calculate the averages
_model_1_errors_train = AveragesCalculator(model_1_errors_train).calculate_averages()
_model_1_errors_test = AveragesCalculator(model_1_errors_test).calculate_averages()
_model_2_errors_train = AveragesCalculator(model_2_errors_train).calculate_averages()
_model_2_errors_test = AveragesCalculator(model_2_errors_test).calculate_averages()

_model_1_errors_train = add_sufix_to_keys(_model_1_errors_train, '_train_1')
_model_1_errors_test = add_sufix_to_keys(_model_1_errors_test, '_test_1')
_model_2_errors_train = add_sufix_to_keys(_model_2_errors_train, '_train')
_model_2_errors_test = add_sufix_to_keys(_model_2_errors_test, '_test')

results = {
    # **dict(sorted({**_model_1_errors_train, **_model_1_errors_test}.items(), reverse=True)),
    **dict(sorted({**_model_2_errors_train, **_model_2_errors_test}.items(), reverse=True))
}

results = pd.DataFrame(results)
results.to_excel(f'NuSVR_PCA_NuSVR.xlsx')
results

Unnamed: 0,Tot. Cracks _train,Tot. Cracks _test,Tilt_train,Tilt_test,Slope_train,Slope_test,Max Strain_train,Max Strain_test,L. Average_train,L. Average_test,G. Average_train,G. Average_test,C. Width_train,C. Width_test,Ang. Dist._train,Ang. Dist._test
MaxError,6.176809,8.056381,0.001638681,0.002447374,0.001476275,0.002445227,0.001908488,0.002597095,3.852821,6.37107,0.001042704,0.00158725,8.71041,9.51096,0.0006776687,0.001038175
MedError,0.056068,0.520634,2.516156e-06,0.0001862869,2.44374e-06,0.0001941882,1.978759e-06,0.0001871988,0.007527,0.720963,1.931885e-06,0.0001373279,0.138218,0.972935,1.047857e-06,4.8134e-05
MAE,0.410288,1.032138,2.067612e-05,0.0002842182,7.99943e-06,0.0002920591,5.011541e-05,0.0003012406,0.197022,1.115562,2.164143e-05,0.0002043843,0.561362,1.590868,2.653856e-05,9.572562e-05
MSE,0.840917,2.717315,9.618222e-09,1.954491e-07,4.269887e-09,2.03337e-07,2.941046e-08,2.252447e-07,0.303011,2.559053,6.774348e-09,9.501309e-08,1.390324,5.402628,6.371541e-09,2.805185e-08
RMSE,0.91648,1.642376,9.707428e-05,0.0004385052,6.363858e-05,0.0004476633,0.0001710673,0.0004727587,0.550164,1.595401,8.174304e-05,0.0003075188,1.178818,2.317936,7.974368e-05,0.0001661462
RRMSE,0.008625,0.032595,0.001618449,0.01504045,0.00101999,0.01476409,0.00586302,0.03512566,0.004587,0.028425,0.002055666,0.01596798,0.008854,0.037093,0.006580555,0.02941273
R2,0.930647,0.776158,0.9948357,0.8933076,0.9978773,0.8971135,0.9620752,0.7081815,0.975941,0.796676,0.9923563,0.8910658,0.922511,0.697327,0.954204,0.7993699


In [None]:
# Calculate the averages
_model_2_errors_test = AveragesCalculator(model_2_errors_test).calculate_averages()
results = pd.DataFrame(_model_2_errors_test)
results.loc['MaxError'] = results.loc['MaxError'].apply(round, ndigits=3)
results.loc['MedError'] = results.loc['MedError'].apply(round, ndigits=4)
results.loc['MAE']      = results.loc['MAE'].apply(round, ndigits=4)
results.loc['MSE']      = results.loc['MSE'].apply(round, ndigits=8)
results.loc['RMSE']     = results.loc['RMSE'].apply(round, ndigits=4)
results.loc['RRMSE']    = results.loc['RRMSE'].apply(round, ndigits=2)
results.loc['R2']       = results.loc['R2'].apply(round, ndigits=2)
results.to_excel(f'NuSVRBestResults.xlsx')
print('Test Method 2')
results

Test Method 2


Unnamed: 0,C. Width,Tot. Cracks,L. Average,Slope,Tilt,Ang. Dist.,Max Strain,G. Average
MaxError,9.511,8.056,6.371,0.002,0.002,0.001,0.003,0.002
MedError,0.9729,0.5206,0.721,0.0002,0.0002,0.0,0.0002,0.0001
MAE,1.5909,1.0321,1.1156,0.0003,0.0003,0.0001,0.0003,0.0002
MSE,5.402628,2.717315,2.559053,2e-07,2e-07,3e-08,2.3e-07,1e-07
RMSE,2.3179,1.6424,1.5954,0.0004,0.0004,0.0002,0.0005,0.0003
RRMSE,0.04,0.03,0.03,0.01,0.02,0.03,0.04,0.02
R2,0.7,0.78,0.8,0.9,0.89,0.8,0.71,0.89


### Deployment

In [None]:
!pip install gradio

In [None]:
def predict_with_proposed_method(E, Ft, Gft, Height, Length, Opening, Distance, E_Soil, Soil_Piossons, Trough, Friction, VL, Depth, Diameter, Fc):
    X_input = [E, Ft, Gft, Height, Length, Opening, Distance, E_Soil, Soil_Piossons, Trough, Friction, VL, Depth, Diameter, Fc]
    X_input = np.array(X_input).reshape(1, -1)
    # Scale
    X_input = scaler_x.transform(X_input)
    # Model 1
    Y_input_pred = model1.transform(X_input)
    # PCA transform
    input_X = np.concatenate((X_input, Y_input_pred), axis=1)
    PC_input = model2.transform(input_X)
    # Model 2
    Y_input_pred_new = model3.transform(PC_input)
    # Inverse Scale
    Y_input_pred_new = scaler_y.inverse_transform(Y_input_pred_new)
    Y_input_pred_new = abs(Y_input_pred_new.reshape(-1))
    Y_input_pred_new[0] = Y_input_pred_new[0].round(2)
    Y_input_pred_new[1] = Y_input_pred_new[1].round()
    Y_input_pred_new[2] = Y_input_pred_new[2].round(4)
    Y_input_pred_new[3] = Y_input_pred_new[3].round(6)
    Y_input_pred_new[4] = Y_input_pred_new[4].round(6)
    Y_input_pred_new[5] = Y_input_pred_new[5].round(6)
    Y_input_pred_new[6] = Y_input_pred_new[6].round(6)
    Y_input_pred_new[7] = Y_input_pred_new[7].round(6)
    output = Y_input_pred_new.tolist()
    return output

predict_with_proposed_method(5.13287365e+03, 8.20382828e-01, 2.51497220e-02, 5.84471107e+00,
       3.16364973e+01, 3.20986902e-01, 7.66017529e+00, 1.66447362e+02,
       3.53817848e-01, 1.71115194e+01, 1.87632468e-01, 8.27565000e-01,
       4.13622336e+01, 1.37558136e+01, 1.55174602e+01)

[0.37, 0.0, 0.001, 0.000894, 0.000873, 0.000103, 0.000112, 0.000498]

In [None]:
import matplotlib.pyplot as plt
import numpy as np

import gradio as gr

demo = gr.Interface(
    fn = predict_with_proposed_method,
    inputs =
    [
        gr.Slider(898, 8945, randomize=True, label="E (MPa)",
                  info="Модуль пружності матеріалу"),
        gr.Slider(0.1148, 1.16, randomize=True, label="Ft (MPa)",
                  info="Міцність розриву"),
        gr.Slider(0.0042, 0.03, randomize=True, label="Gft (N.mm/mm2)",
                  info="Енергія руйнування будівельного матеріалу при розтягуванні "),
        gr.Slider(3, 20, randomize=True, label="Height (m)",
                  info="Висота будівлі"),
        gr.Slider(8, 60, randomize=True, label="Length (m)",
                  info="Довжина будівлі"),
        gr.Slider(0.15, 30, randomize=True, label="Opening Rate (%)",
                  info="Ступінь відкриття фасаду"),
        gr.Slider(0.12, 45, randomize=True, label="Distance (m)",
                  info="Відстань від центральної лінії тунелю"),
        gr.Slider(4, 250, randomize=True, label="E_Soil (MPa)",
                  info="Модуль пружності ґрунту"),
        gr.Slider(0.1, 0.50, randomize=True, label="Soil_Piossons (-)",
                  info="Коефіцієнт Пуассона"),
        gr.Slider(0.4, 44, randomize=True, label="Trough Width (m)",
                  info="Ширина жолоба"),
        gr.Slider(0.00268,  0.6, randomize=True, label="Friction coefficient (-)",
                  info="Коефіцієнт тертя"),
        gr.Slider(0.2,  5, randomize=True, label="VL (%)",
                  info="Втрата об’єму"),
        gr.Slider(9, 90, randomize=True, label="Depth (m)",
                  info="Глибина"),
        gr.Slider(4, 20, randomize=True, label="Diameter (m)",
                  info="Діаметр тунелю"),
        gr.Slider(1.5, 30, randomize=True, label="Fc (MPa)",
                  info="Міцність на стиск"),
    ],
    outputs = [
        gr.Number(label="Maximum Crack Width (mm)",
                  info="Максимальна ширина тріщини"),
        gr.Number(label="Total Number of Cracks (-)",
                  info="Загальна кількість тріщин"),
        gr.Number(label="Local Average (-)",
                  info="Локальне пошкодження – це середнє значення між шириною тріщини та кількістю тріщин"),
        gr.Number(label="Maximum Slope (mm/mm)",
                  info="Максимальний схил"),
        gr.Number(label="Max Tilt (mm/mm)",
                  info="Максимальний нахил"),
        gr.Number(label="Max Angular Distortion (-)",
                  info="Максимальне кутове викривлення"),
        gr.Number(label="Max Horizontal Strain (mm/mm)",
                  info="Максимальна горизонтальна деформація"),
        gr.Number(label="Global Average (-)",
                  info="Глобальне пошкодження – це середнє значення між нахилом, нахилом, кутовим викривленням і максимальною горизонтальною деформацією"),
    ],
    examples=[
        [3653, 0.607, 0.027, 7.2, 20.5, 22.1, 16, 210, 0.153, 10, 0.1, 0.74, 34, 14, 9.4],
        [8876, 0.9, 0.0187, 12.9, 29.5, 15.8, 29.2, 203.5, 0.3175, 17.7, 0.458, 4.43, 34.9, 11.7, 13.9],
        [8230, 1.13, 0.0272, 13.5, 8, 21.5, 15.5, 158, 0.39, 15, 0.474, 2.37, 37.4, 11.4, 20]
    ]
)

if __name__ == "__main__":
    demo.launch(inbrowser=True, share=True, show_api=False)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://3e0be4f4cb7f6bd27f.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)
