# Модель машинного обучения 

In [1]:
from python_files.data import data
from python_files.strength.str_col_list import str_x_col_list, str_col_list_norm, str_col_list_std
from python_files.strength.str_split_data import X_train_str, X_test_str, y_train_str, y_test_str
# from python_files.elasticity.ela_outliers import X_train_trimmed_3S, y_train_trimmed_3S
# from python_files.elasticity.ela_outliers import X_train_trimmed_QT, y_train_trimmed_QT
# from python_files.elasticity.ela_outliers import X_train_sub_bound_3S, y_train_sub_bound_3S
# from python_files.elasticity.ela_outliers import X_train_sub_bound_QT, y_train_sub_bound_QT
# from python_files.elasticity.ela_outliers import X_train_sub_med_3S, y_train_sub_med_3S
# from python_files.elasticity.ela_outliers import import X_train_sub_med_QT, y_train_sub_med_QT

In [2]:
import numpy as np
import pandas as pd

In [3]:
from sklearn.preprocessing import PowerTransformer, StandardScaler, QuantileTransformer, MaxAbsScaler, RobustScaler
from sklearn.compose import ColumnTransformer, TransformedTargetRegressor
from sklearn.pipeline import Pipeline
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression, Lasso, ElasticNet, Ridge
from sklearn.svm import SVR

## Preprocessing

In [4]:
preprocessor = ColumnTransformer(
    transformers=[
        ('normalization', PowerTransformer(), str_col_list_norm), 
        ('scaling', StandardScaler(), str_col_list_std) 
    ])

In [5]:
preprocessor_ = ColumnTransformer(
    transformers=[
        #('normalization', PowerTransformer(), ela_x_col_list)
        ('scaling', RobustScaler(), str_x_col_list)
    ])

In [6]:
preprocessor_MAS = Pipeline(steps = [('scaling_1', StandardScaler(with_mean=True, with_std=False)),
                                     ('scaling_2', MaxAbsScaler())
                                    ])

## Model

In [5]:
# model = GradientBoostingRegressor(learning_rate = 0.01, n_estimators = 100, random_state = 7)

In [42]:
# model = ElasticNet(alpha = 0.2, l1_ratio = 0.1, random_state = 7)

In [61]:
# model = Lasso(alpha = 0.4, max_iter = 1000, random_state = 7) 

In [16]:
# model = RandomForestRegressor(random_state = 7, min_samples_split = 300, n_estimators = 200)

In [44]:
# model = SVR(kernel = 'linear', C = 5, gamma = 'scale')

In [None]:
# model = Ridge(alpha = 2136, positive = False) 

In [7]:
model = TransformedTargetRegressor(regressor = ElasticNet(alpha = 0.2, l1_ratio = 0.1, random_state = 7),
                                  transformer = QuantileTransformer(n_quantiles = 716, output_distribution="normal", random_state = 7))
                                   #func = np.log, inverse_func = np.exp)

In [8]:
Regressor = Pipeline(steps=[('preprocessor', preprocessor_),
                      ('model', model)],
                     verbose=True)

## Training

In [9]:
Regressor.fit(X_train_str, y_train_str)

[Pipeline] ...... (step 1 of 2) Processing preprocessor, total=   0.2s
[Pipeline] ............. (step 2 of 2) Processing model, total=   0.5s


## Predictions

In [10]:
predictions = Regressor.predict(X_test_str)

## Evaluation

In [11]:
r2_score(y_test_str, predictions)

0.0002670186306691358

In [18]:
np.sqrt(mean_squared_error(y_test_str, predictions))

502.4238129465602

In [13]:
mean_absolute_error(y_test_str, predictions)

391.1870126443346

#### Пользовательские метрики

In [17]:
(1 - (mean_absolute_error(y_test_str, predictions) / y_test_str.mean())) * 100

84.16946550825068

In [68]:
mean_absolute_error(y_test_str, predictions) * 100 / (y_test_str.max() - y_test_str.min())

14.203206412854954

In [21]:
y_test_str.describe()

count     307.000000
mean     2471.091629
std       503.311299
min      1036.856605
25%      2147.336531
50%      2468.491611
75%      2785.368040
max      3791.072810
Name: Прочность при растяжении, МПа, dtype: float64