# Исследование алгоритмов

### Проверка и установка рабочей директории, должен быть корень проекта

In [1]:
%pwd

'C:\\Users\\Kuroha\\source\\repos_py\\bauman_final_project\\notebooks'

In [2]:
%cd ..

C:\Users\Kuroha\source\repos_py\bauman_final_project


### Загрузка датасета

In [3]:
import numpy as np
import pandas as pd

import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

from src.utils import *

In [4]:
path = get_filepath(DATA_PROCESSED_TRAIN, is_raw=False)
df = pd.read_csv(path, parse_dates=['date'])

In [5]:
df.head().T

Unnamed: 0,0,1,2,3,4
uid,0.0,0.0,0.0,0.0,0.0
date,2008-01-01 00:00:00,2008-01-02 00:00:00,2008-01-03 00:00:00,2008-01-04 00:00:00,2008-01-05 00:00:00
latitude,0.312439,0.312439,0.312439,0.312439,0.312439
longitude,0.699807,0.699807,0.699807,0.699807,0.699807
temperature,0.438202,0.314607,0.146067,0.168539,0.325843
cloud,1.0,0.0,0.0,0.666667,1.0
is_fallback_data,1,1,1,1,1
water_level,138.0,138.0,138.0,138.0,138.0
uid_0,1.0,1.0,1.0,1.0,1.0
uid_1,0.0,0.0,0.0,0.0,0.0


### Подготовка данных

Необходимо проверить, зависит ли качество моделей от способа кодирования погоды: одной колонкой с наличием осадков, либо с указанием их вида, двумя - дождь и снег, либо тремя - дождь, гроза и снег.

Датасеты с разные вариантами кодирования погоды будут именоваться как **v1**, **v2**, **v3**, **v4**.

In [6]:
df.columns

Index(['uid', 'date', 'latitude', 'longitude', 'temperature', 'cloud',
       'is_fallback_data', 'water_level', 'uid_0', 'uid_1', 'uid_2', 'uid_3',
       'uid_4', 'uid_5', 'uid_6', 'uid_7', 'uid_8', 'uid_9', 'uid_10',
       'uid_11', 'uid_12', 'uid_13', 'uid_14', 'year', 'day_sin', 'day_cos',
       'weather_v1_precip', 'weather_v2_rain', 'weather_snow',
       'weather_v3_rain', 'weather_v3_storm', 'weather_v4'],
      dtype='object')

Для обучения модели используем данные за 2008-2016 года, а для тестирования - за 2017.

In [7]:
df_X_train = df.loc[(df['date'] < '2017-01-01')]
df_X_test = df.loc[(df['date'] >= '2017-01-01')]
y_train = df_X_train['water_level']
y_test = df_X_test['water_level']

print(f'''Размерность оригинального датасета: {df.shape}
Размерность тренировочного датасета: {df_X_train.shape}
Размерность тестового датасета: {df_X_test.shape}
Размер тестовой выборки: {df_X_test.shape[0] / df_X_train.shape[0] * 100:.2f}%''')

Размерность оригинального датасета: (71159, 32)
Размерность тренировочного датасета: (61535, 32)
Размерность тестового датасета: (9624, 32)
Размер тестовой выборки: 15.64%


In [8]:
def get_all_df(input_df):
    df_all_base = input_df.drop(['date', 'water_level', 'uid'], axis=1)
    df_all_v1 = df_all_base.drop(['weather_v4', 'weather_v2_rain', 'weather_snow', 'weather_v3_rain', 'weather_v3_storm'], axis=1)
    df_all_v2 = df_all_base.drop(['weather_v4', 'weather_v1_precip', 'weather_v3_rain', 'weather_v3_storm'], axis=1)
    df_all_v3 = df_all_base.drop(['weather_v4', 'weather_v1_precip', 'weather_v2_rain'], axis=1)
    
    df_all_v4 = df_all_base.drop(['weather_v2_rain', 'weather_snow', 'weather_v1_precip'], axis=1)
    df_all_v4 = df_all_v4.drop(['weather_v3_rain', 'weather_v3_storm'], axis=1)
    
    return [df_all_v1, df_all_v2, df_all_v3, df_all_v4]

df_X_train_all = get_all_df(df_X_train)
df_X_test_all = get_all_df(df_X_test)

### Применение алгоритмов машинного обучения

Для оценки качества моделей будут использованы 2 меры качества: средняя квадратичная ошибка (MSE) и коэффициент детерминации ($R^2$).

In [9]:
from sklearn.metrics import mean_absolute_error as mae
from sklearn.metrics import r2_score

results = {}  # словарь с сохранёнными метриками
algos = {}  # словарь со всеми алгоритмами

In [10]:
# Сохранение модели вместе с оценкой предсказания
def add_result(model, predict, model_name):
    mae_result = round(mae(y_test, predict), 3)
    r2_score_result = round(r2_score(y_test, predict), 3)
    #mae_result = round(mae(y_test, predict), 3)
    #r2_score_result = round(r2_score(y_test, predict), 3)
    print(model_name)
    print(f'MAE: {mae_result}')
    print(f'R2_score: {r2_score_result}')
    print('=' * 20)
    
    results[model_name] = {'MAE': mae_result, 'R2_score': r2_score_result, 'model': model}

# Предсказание по заранее обученной модели
def predict_result(model, X_test, model_name):
    predict = model.predict(X_test)    
    add_result(model, predict, model_name)

# Применение алгоритма ко всем вариациям датасета с последующей оценкой качества
def apply_to_all(algo, model_name):
    if model_name not in algos.keys():
        algos[model_name] = algo
    for i in range(len(df_X_train_all)):
        model = algo(df_X_train_all[i], df_X_test_all[i])    
        predict_result(model, df_X_test_all[i], f'{model_name} (df_v{i+1})')

#### 1. Заглушка

Представим, что модель всегда возвращает средний уровень воды. Полученные ошибки будут использованы для оценки качества реальных моделей.

In [11]:
y_predicted = pd.Series([y_train.mean()] * y_test.shape[0])
y_predicted

0       266.42995
1       266.42995
2       266.42995
3       266.42995
4       266.42995
          ...    
9619    266.42995
9620    266.42995
9621    266.42995
9622    266.42995
9623    266.42995
Length: 9624, dtype: float64

In [12]:
add_result(None, y_predicted, 'Mean')

Mean
MAE: 153.267
R2_score: -0.014


#### 2. Линейная регрессия

In [13]:
from sklearn.linear_model import LinearRegression 

def linear_regression(X_train, _):    
    linear_regression = LinearRegression()
    linear_regression.fit(X_train, y_train)  
    
    return linear_regression

In [14]:
apply_to_all(linear_regression, 'Linear Regression')

Linear Regression (df_v1)
MAE: 109.322
R2_score: 0.408
Linear Regression (df_v2)
MAE: 109.304
R2_score: 0.408
Linear Regression (df_v3)
MAE: 109.31
R2_score: 0.409
Linear Regression (df_v4)
MAE: 109.318
R2_score: 0.408


In [15]:
pd.DataFrame.from_dict(results, orient='index')

Unnamed: 0,MAE,R2_score,model
Mean,153.267,-0.014,
Linear Regression (df_v1),109.322,0.408,LinearRegression()
Linear Regression (df_v2),109.304,0.408,LinearRegression()
Linear Regression (df_v3),109.31,0.409,LinearRegression()
Linear Regression (df_v4),109.318,0.408,LinearRegression()


#### 3. Дерево решений

In [16]:
from sklearn.tree import DecisionTreeRegressor

def decision_tree(X_train, _):
    decision_tree = DecisionTreeRegressor()
    decision_tree.fit(X_train, y_train)
    
    return decision_tree

In [17]:
apply_to_all(decision_tree, 'Decision Tree')

Decision Tree (df_v1)
MAE: 70.35
R2_score: 0.593
Decision Tree (df_v2)
MAE: 70.473
R2_score: 0.593
Decision Tree (df_v3)
MAE: 70.516
R2_score: 0.592
Decision Tree (df_v4)
MAE: 70.432
R2_score: 0.593


#### 4. Гребневая регрессия

In [18]:
from sklearn.linear_model import Ridge

def ridge(X_train, _):
    ridge = Ridge()
    ridge.fit(X_train, y_train)
    
    return ridge

In [19]:
apply_to_all(ridge, 'Ridge')

Ridge (df_v1)
MAE: 109.323
R2_score: 0.408
Ridge (df_v2)
MAE: 109.305
R2_score: 0.408
Ridge (df_v3)
MAE: 109.311
R2_score: 0.409
Ridge (df_v4)
MAE: 109.319
R2_score: 0.408


#### 5. Лассо

In [20]:
from sklearn.linear_model import Lasso

def lasso(X_train, _):
    lasso = Lasso()
    lasso.fit(X_train, y_train)
    
    return lasso

In [21]:
apply_to_all(lasso, 'Lasso')

Lasso (df_v1)
MAE: 118.103
R2_score: 0.364
Lasso (df_v2)
MAE: 118.103
R2_score: 0.364
Lasso (df_v3)
MAE: 118.103
R2_score: 0.364
Lasso (df_v4)
MAE: 118.175
R2_score: 0.364


#### 6. Простая нейронная сеть

In [22]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.losses import mean_absolute_error


def simple_neural_network(X_train, X_test):
    model = Sequential()
    model.add(Dense(512, activation='relu', input_shape=[X_train.shape[1]]))
    model.add(Dense(512, activation='relu'))
    model.add(Dense(1))
    
    model.compile(loss=mean_absolute_error, optimizer='Adam', metrics=['mae'])
    model.fit(X_train, y_train,
              validation_data=(X_test, y_test),
              epochs=10,
              verbose=1)
    
    return model

In [23]:
#apply_to_all(simple_neural_network, 'Neural Network (simple)')

#### 7. Нейронная сеть с ранней остановкой

In [24]:
from tensorflow.keras.callbacks import EarlyStopping


early_stopping = EarlyStopping(
    min_delta=0.1,
    patience=20,
    restore_best_weights=True,
)

def neural_network_with_ES(X_train, X_test):
    model = Sequential()
    model.add(Dense(512, activation='relu', input_shape=[X_train.shape[1]]))
    model.add(Dense(512, activation='relu'))
    model.add(Dense(1))
    
    model.compile(loss=mean_absolute_error, optimizer='Adam', metrics=['mae'])
    model.fit(X_train, y_train,
              validation_data=(X_test, y_test),
              epochs=100,
              callbacks=[early_stopping],
              verbose=1)
    
    return model

In [25]:
#apply_to_all(neural_network_with_ES, 'Neural Network (early stopping)')

#### 8. Нейронная сеть со слоями Batch Normalization и Dropout

In [26]:
from tensorflow.keras.layers import BatchNormalization, Dropout


def neural_network_with_Dropout(X_train, X_test):
    model = Sequential()
    model.add(Dense(1024, activation='relu', input_shape=[X_train.shape[1]]))
    model.add(Dropout(0.3))
    model.add(BatchNormalization())
    model.add(Dense(1024, activation='relu'))
    model.add(Dropout(0.3))
    model.add(BatchNormalization())
    model.add(Dense(1024, activation='relu'))
    model.add(Dropout(0.3))
    model.add(BatchNormalization())
    model.add(Dense(1))
    
    model.compile(loss=mean_absolute_error, optimizer='Adam', metrics=['mae'])
    model.fit(X_train, y_train,
              validation_data=(X_test, y_test),
              epochs=100,
              callbacks=[early_stopping],
              verbose=1)
    
    return model

In [27]:
#apply_to_all(neural_network_with_Dropout, 'Neural Network (Dropout)')

#### 9. Эластичная сеть

In [28]:
from sklearn.linear_model import ElasticNet

def elastic_net(X_train, _):
    elastic_net = ElasticNet()
    elastic_net.fit(X_train, y_train)
    
    return elastic_net

In [29]:
apply_to_all(elastic_net, 'Elastic Net')

Elastic Net (df_v1)
MAE: 141.898
R2_score: 0.142
Elastic Net (df_v2)
MAE: 141.902
R2_score: 0.142
Elastic Net (df_v3)
MAE: 141.902
R2_score: 0.142
Elastic Net (df_v4)
MAE: 141.914
R2_score: 0.142


#### 10. Регрессия опорных векторов

In [30]:
from sklearn.svm import SVR

def svr(X_train, _):
    svr = SVR()
    svr.fit(X_train, y_train)
    
    return svr

In [31]:
#apply_to_all(svr, 'SVR')

#### 11. Градиентный бустинг

In [32]:
from sklearn.ensemble import GradientBoostingRegressor

def gbr(X_train, _):
    gbr = GradientBoostingRegressor()
    gbr.fit(X_train, y_train)
    
    return gbr

In [33]:
apply_to_all(gbr, 'Gradient Boosting')

Gradient Boosting (df_v1)
MAE: 66.847
R2_score: 0.682
Gradient Boosting (df_v2)
MAE: 66.847
R2_score: 0.682
Gradient Boosting (df_v3)
MAE: 66.847
R2_score: 0.682
Gradient Boosting (df_v4)
MAE: 66.847
R2_score: 0.682


#### 12. Случайный лес 

In [34]:
from sklearn.ensemble import RandomForestRegressor

def random_forest(X_train, _):
    random_forest = RandomForestRegressor()
    random_forest.fit(X_train, y_train)
    
    return random_forest

In [35]:
apply_to_all(random_forest, 'Random Forest')

Random Forest (df_v1)
MAE: 68.417
R2_score: 0.625
Random Forest (df_v2)
MAE: 68.389
R2_score: 0.624
Random Forest (df_v3)
MAE: 68.412
R2_score: 0.626
Random Forest (df_v4)
MAE: 68.304
R2_score: 0.625


#### 13. XGBoost

In [47]:
from xgboost.sklearn import XGBRegressor


def xgboost(X_train, _):
    xgboost = XGBRegressor()
    xgboost.fit(X_train, y_train)
    
    return xgboost

In [37]:
apply_to_all(xgboost, 'XGBoost')

XGBoost (df_v1)
MAE: 66.044
R2_score: 0.653
XGBoost (df_v2)
MAE: 66.342
R2_score: 0.65
XGBoost (df_v3)
MAE: 66.391
R2_score: 0.648
XGBoost (df_v4)
MAE: 65.685
R2_score: 0.655


#### 14. Настроенный XGBoost

Поиск лучших гиперпараметров через GridSearchCV возможно улучшит работу алгоритма XGBoost.

In [74]:
from sklearn.model_selection import GridSearchCV

X_train = df_X_train_all[0]
parameters = {
              'gamma':[0.0, 0.025, 0.05, 0.075],  # 0
              'learning_rate': [0.05, 0.1, 0.15, 0.2, 0.3],  # 0.3
              'max_depth':[6, 7, 8, 9],  # default=6
              'min_child_weight': [0, 1]  # 1
             }

In [72]:
xgboost_tuned = XGBRegressor()
xgb_grid = GridSearchCV(xgboost_tuned, parameters, cv = 5, n_jobs = 5, verbose=True, 
                        scoring='r2')
#xgb_grid.fit(X_train, y_train)
#xgb_grid.best_params_
print("{'gamma': 0.0, 'learning_rate': 0.05, 'max_depth': 6, 'min_child_weight': 0}")

Fitting 5 folds for each of 160 candidates, totalling 800 fits


In [75]:
def xgboost_tuned(X_train, _):
    xgboost_tuned = XGBRegressor(learning_rate=0.05, min_child_weight=0)
    xgboost_tuned.fit(X_train, y_train)
    
    return xgboost_tuned

In [77]:
apply_to_all(xgboost_tuned, 'XGBoost (Tuned)')

XGBoost (Tuned) (df_v1)
MAE: 58.261
R2_score: 0.754
XGBoost (Tuned) (df_v2)
MAE: 51.216
R2_score: 0.821
XGBoost (Tuned) (df_v3)
MAE: 73.714
R2_score: 0.587
XGBoost (Tuned) (df_v4)
MAE: 65.657
R2_score: 0.661


In [38]:
frame = pd.DataFrame.from_dict(results, orient='index')
frame.sort_values(by='R2_score', ascending=False)

Unnamed: 0,MAE,R2_score,model
Gradient Boosting (df_v3),66.847,0.682,([DecisionTreeRegressor(criterion='friedman_ms...
Gradient Boosting (df_v4),66.847,0.682,([DecisionTreeRegressor(criterion='friedman_ms...
Gradient Boosting (df_v2),66.847,0.682,([DecisionTreeRegressor(criterion='friedman_ms...
Gradient Boosting (df_v1),66.847,0.682,([DecisionTreeRegressor(criterion='friedman_ms...
XGBoost (df_v4),65.685,0.655,"XGBRegressor(base_score=0.5, booster='gbtree',..."
XGBoost (df_v1),66.044,0.653,"XGBRegressor(base_score=0.5, booster='gbtree',..."
XGBoost (df_v2),66.342,0.65,"XGBRegressor(base_score=0.5, booster='gbtree',..."
XGBoost (df_v3),66.391,0.648,"XGBRegressor(base_score=0.5, booster='gbtree',..."
Random Forest (df_v3),68.412,0.626,"(DecisionTreeRegressor(max_features=1.0, rando..."
Random Forest (df_v1),68.417,0.625,"(DecisionTreeRegressor(max_features=1.0, rando..."


Наиболее качественные прогнозы дают алгоритмы **Gradient Boosting**, **XGBoost** и **Random Forest**.

Кодирование погоды 3-им (дождь, гроза и снег) способами показывают схожие, либо лучшие результаты относительно кодирования остальными способами. В дальнейшем погода будет кодироваться именно таким образом.
***
Попробуем использовать те же алгоритмы, но на датасетах с **третьим** способом кодирования погоды, в которых используется целочисленное значение uid, без индивидуальных столбцов (**v1**), либо убраны данные об облачности (**v2**), либо всё сразу (**v3**).

In [39]:
def get_all_df(input_df):
    df_all_base = input_df.drop(['date', 'water_level', 'weather_v4', 'weather_v1_precip', 'weather_v4'], axis=1)
    df_all_base = df_all_base.drop(['weather_v2_rain'], axis=1)
    
    uid_cols = [f'uid_{i}' for i in range(0, 15)]
    
    df_all_v1 = df_all_base.drop(uid_cols, axis=1)
    df_all_v2 = df_all_base.drop(['uid', 'cloud'], axis=1)
    df_all_v3 = df_all_base.drop(uid_cols + ['cloud'], axis=1)
    return [df_all_v1, df_all_v2, df_all_v3]

df_X_train_all = get_all_df(df_X_train)
df_X_test_all = get_all_df(df_X_test)

results_weather = results.copy()
results = {}  # очищение списка результатов

In [40]:
for name, algo in algos.items():
    apply_to_all(algo, name)
    print('*' * 25)

Linear Regression (df_v1)
MAE: 122.497
R2_score: 0.339
Linear Regression (df_v2)
MAE: 108.495
R2_score: 0.41
Linear Regression (df_v3)
MAE: 121.528
R2_score: 0.34
*************************
Decision Tree (df_v1)
MAE: 55.109
R2_score: 0.749
Decision Tree (df_v2)
MAE: 70.333
R2_score: 0.593
Decision Tree (df_v3)
MAE: 54.816
R2_score: 0.752
*************************
Ridge (df_v1)
MAE: 122.496
R2_score: 0.339
Ridge (df_v2)
MAE: 108.497
R2_score: 0.41
Ridge (df_v3)
MAE: 121.527
R2_score: 0.34
*************************
Lasso (df_v1)
MAE: 125.044
R2_score: 0.327
Lasso (df_v2)
MAE: 117.08
R2_score: 0.367
Lasso (df_v3)
MAE: 123.975
R2_score: 0.33
*************************
Elastic Net (df_v1)
MAE: 142.892
R2_score: 0.135
Elastic Net (df_v2)
MAE: 141.956
R2_score: 0.141
Elastic Net (df_v3)
MAE: 142.945
R2_score: 0.134
*************************
Gradient Boosting (df_v1)
MAE: 58.972
R2_score: 0.793
Gradient Boosting (df_v2)
MAE: 66.847
R2_score: 0.682
Gradient Boosting (df_v3)
MAE: 58.972
R2_score: 

In [41]:
frame = pd.DataFrame.from_dict(results, orient='index')
frame.sort_values(by='R2_score', ascending=False)

Unnamed: 0,MAE,R2_score,model
XGBoost (df_v3),50.787,0.82,"XGBRegressor(base_score=0.5, booster='gbtree',..."
XGBoost (df_v1),50.841,0.82,"XGBRegressor(base_score=0.5, booster='gbtree',..."
Gradient Boosting (df_v3),58.972,0.793,([DecisionTreeRegressor(criterion='friedman_ms...
Gradient Boosting (df_v1),58.972,0.793,([DecisionTreeRegressor(criterion='friedman_ms...
Random Forest (df_v1),53.034,0.779,"(DecisionTreeRegressor(max_features=1.0, rando..."
Random Forest (df_v3),53.353,0.774,"(DecisionTreeRegressor(max_features=1.0, rando..."
Decision Tree (df_v3),54.816,0.752,DecisionTreeRegressor()
Decision Tree (df_v1),55.109,0.749,DecisionTreeRegressor()
Gradient Boosting (df_v2),66.847,0.682,([DecisionTreeRegressor(criterion='friedman_ms...
XGBoost (df_v2),65.796,0.65,"XGBRegressor(base_score=0.5, booster='gbtree',..."


Как и в прошлом эксперименте, наиболее качественные прогнозы дают алгоритмы **XGBoost**, **Gradient Boosting** и **Random Forest**.

Самый лучший результат у **XGBoost** с ошибками **50.787** и **0.82**.

Качество прогноза заметно повышается на некоторых алгоритмах, когда для кодирования uid используется одно число. Отсутствие данных об облачности незначительно влияет на предсказания. В дальнейшем из датасетов будут исключаться данные об облачности (**cloud**).
***
Теперь попробуем использовать алгоритмы на датасетах без синуса (**v1**) или косинуса (**v2**) дня от года как с числовым представлением uid, так и с индивидуальными столбцами (**v3** и **v4**).

In [42]:
def get_all_df(input_df):
    df_all_base = input_df.drop(['date', 'water_level', 'weather_v1_precip', 'weather_v2_rain', 'weather_v4'], axis=1)
    
    uid_cols = [f'uid_{i}' for i in range(0, 15)]
    df_all_v1 = df_all_base.drop(uid_cols + ['day_sin'], axis=1)
    df_all_v2 = df_all_base.drop(uid_cols + ['day_cos'], axis=1)
    
    df_all_v3 = df_all_base.drop(['uid', 'day_sin'], axis=1)
    df_all_v4 = df_all_base.drop(['uid', 'day_cos'], axis=1)
    
    return [df_all_v1, df_all_v2, df_all_v3, df_all_v4]

df_X_train_all = get_all_df(df_X_train)
df_X_test_all = get_all_df(df_X_test)

results_uids_cloud = results.copy()
results = {}  # очищение списка результатов

In [43]:
for name, algo in algos.items():
    apply_to_all(algo, name)
    print('*' * 25)

Linear Regression (df_v1)
MAE: 123.282
R2_score: 0.333
Linear Regression (df_v2)
MAE: 122.39
R2_score: 0.312
Linear Regression (df_v3)
MAE: 109.333
R2_score: 0.404
Linear Regression (df_v4)
MAE: 109.632
R2_score: 0.38
*************************
Decision Tree (df_v1)
MAE: 70.184
R2_score: 0.532
Decision Tree (df_v2)
MAE: 56.951
R2_score: 0.744
Decision Tree (df_v3)
MAE: 84.359
R2_score: 0.396
Decision Tree (df_v4)
MAE: 71.542
R2_score: 0.605
*************************
Ridge (df_v1)
MAE: 123.277
R2_score: 0.333
Ridge (df_v2)
MAE: 122.391
R2_score: 0.312
Ridge (df_v3)
MAE: 109.33
R2_score: 0.404
Ridge (df_v4)
MAE: 109.632
R2_score: 0.38
*************************
Lasso (df_v1)
MAE: 125.301
R2_score: 0.321
Lasso (df_v2)
MAE: 124.791
R2_score: 0.305
Lasso (df_v3)
MAE: 117.656
R2_score: 0.36
Lasso (df_v4)
MAE: 117.493
R2_score: 0.343
*************************
Elastic Net (df_v1)
MAE: 142.405
R2_score: 0.126
Elastic Net (df_v2)
MAE: 145.766
R2_score: 0.1
Elastic Net (df_v3)
MAE: 141.394
R2_score

In [44]:
frame = pd.DataFrame.from_dict(results, orient='index')
frame.sort_values(by='R2_score', ascending=False)

Unnamed: 0,MAE,R2_score,model
XGBoost (df_v2),52.868,0.81,"XGBRegressor(base_score=0.5, booster='gbtree',..."
Random Forest (df_v2),53.89,0.789,"(DecisionTreeRegressor(max_features=1.0, rando..."
Decision Tree (df_v2),56.951,0.744,DecisionTreeRegressor()
Gradient Boosting (df_v2),66.301,0.728,([DecisionTreeRegressor(criterion='friedman_ms...
XGBoost (df_v1),59.816,0.721,"XGBRegressor(base_score=0.5, booster='gbtree',..."
Random Forest (df_v1),60.872,0.708,"(DecisionTreeRegressor(max_features=1.0, rando..."
Gradient Boosting (df_v1),69.167,0.701,([DecisionTreeRegressor(criterion='friedman_ms...
XGBoost (df_v4),67.051,0.65,"XGBRegressor(base_score=0.5, booster='gbtree',..."
Random Forest (df_v4),69.426,0.635,"(DecisionTreeRegressor(max_features=1.0, rando..."
Gradient Boosting (df_v4),74.155,0.634,([DecisionTreeRegressor(criterion='friedman_ms...


Отсутствие столбца с косинусом дня положительно сказалось на алгоритме **Random Forest**, однако лучший результат был получен в прошлом эксперименте.