In [1]:
import csv
import numpy as np
from sklearn import linear_model
from sklearn import preprocessing

from sklearn.experimental import enable_halving_search_cv  # noqa
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, HalvingGridSearchCV

In [2]:
def get_river_dataset(fname, pr_list=None, y_name='H_max'):
    pr_arr = []
    y_arr = []
    with open(fname, newline='') as f:
        reader = csv.DictReader(f, delimiter=';')
        for row in reader:
            pr_arr_row = []
            for pr in pr_list:
                pr_arr_row.append(row[pr])

            pr_arr.append(pr_arr_row)
            y_arr.append(row[y_name])
    X = np.asarray(pr_arr, dtype=np.float64)
    y = np.asarray(y_arr, dtype=np.float64)
    return X, y

#### Сумма, средний, высший, низший уровни

In [3]:
def get_sum(h_max):
    return np.sum(h_max)
    
def get_avg(h_max):
    return np.mean(h_max)
    
def get_max(h_max):
    return np.amax(h_max)
    
def get_min(h_max):
    return np.amin(h_max)

#### Среднеквадратическая погрешность прогноза S

In [4]:
def get_s(h_max, h_forecast=None):
    # Среднеквадратическая погрешность прогноза
    n = h_max.shape[0]
    sqr_diff = np.sum((h_max - h_forecast) ** 2) / (n - 1)
    std = sqr_diff ** 0.5
    return std    

#### Среднеквадратическое отклонение sigma

In [5]:
def get_sigma(h_max):
    # Среднеквадратическая погрешность климатическая.
    # Рассчитывается только по всей совокупности данных.
    return np.std(h_max, ddof=1)

In [6]:
def get_hmax_avg(h_max):
    # Среднее значение h_max.
    # Рассчитывается только по всей совокупности данных.
    return np.mean(h_max)

#### Допустимая погрешность прогноза delta_dop

In [7]:
def get_delta_dop(sigma):
    return 0.674 * sigma

#### Критерий эффективности метода прогнозирования климатический S/sigma

In [8]:
def get_criterion(s, sigma):
    return s / sigma

#### Климатическая обеспеченность Pk

In [9]:
def get_pk(h_max, h_max_avg, delta_dop):
    #avg_level = np.mean(h_max)
    diff = np.abs(h_max - h_max_avg) / delta_dop
    trusted_values = diff[diff <= 1.0]
    m = trusted_values.shape[0]
    n = h_max.shape[0]
    return m / n * 100.00

#### Обеспеченность метода (оправдываемость) Pm

In [10]:
def get_pm(h_max, h_forecast, delta_dop):
    diff = np.abs(h_max - h_forecast) / delta_dop
    trusted_values = diff[diff <= 1.00000000000000000000]
    m = trusted_values.shape[0]
    n = h_max.shape[0]
    return m / n * 100.00

#### Корреляционное отношение

In [11]:
def get_correlation_ratio(criterion):
    c_1 = (1 - criterion ** 2)
    ro = c_1 ** 0.5 if c_1 > 0 else 0
    return ro

#### Вероятная ошибка прогноза S'

In [12]:
def get_forecast_error(s):
    return 0.674 * s

#### Ошибки климатического/природного прогноза для каждого года delta50

In [13]:
def get_delta50(h_max, delta_dop, h_max_avg=None, h_max_forecast=None):
    if h_max_forecast is None:
        # delta50 климатическая
        return (h_max - h_max_avg) / delta_dop
    else:
        # delta50 прогноза
        return (h_max - h_max_forecast) / delta_dop
  

#### Функция записи в csv файл

In [14]:
import csv
def write_dataset_csv(dataset, dataset_name, fieldnames, pr_group):
    with open(f'results/{dataset_name}/group-{pr_group}/{dataset_name}-гр{pr_group}.csv', 'w', newline='', encoding='utf-8') as csvfile:# , encoding='utf-8'
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter=';', extrasaction='ignore')
        writer.writeheader()
        writer.writerows(dataset)


#### Функция разделения набора данных на тренировочный и тестовый

In [15]:
def train_test_split(X, y, n_test):
    X_train = X[:-n_test]
    y_train = y[:-n_test]
    X_test = X[-n_test:]
    y_test = y[-n_test:]
    return X_train, y_train, X_test, y_test

#### Функция формирования тестового набора данных с подстановкой нормированных значений

In [16]:
def test_norm(x, pr_list, norms):
    x_norm = np.copy(x)
    for col, pr in enumerate(pr_list):
        if pr in norms:
            x_norm[:, col:col+1] = norms[pr]
    return x_norm

#### Функция получения датасетов

In [17]:
def get_datasets():
    datasets = {
        'Неман-Белица': 'Неман',
        'Неман-Гродно': 'Неман',
        'Неман-Мосты': 'Неман',
        'Неман-Столбцы': 'Неман',
    }
    return datasets

#### Функция получения списка предикторов по названию датасета

In [18]:
def get_predictors(dataset_name, pr_group):

    datasets = get_datasets()   
    
    # predictors_lists = {
    #     'Неман': ['s_2802', 's_max', 'h', 'x', 'x1', 'x2', 'x3', 'x4', 'xs'],
    # }

    predictors_lists = {
        'Неман': (
            ['S_2802', 'Smax', 'H_2802', 'X', 'X1', 'X2', 'X3', 'Xs'],
            ['Smax', 'H_2802', 'X', 'X1', 'X3'],
            ['S_2802', 'H_2802', 'X2', 'X3', 'Xs'],
        )
    }
    return predictors_lists[datasets[dataset_name]][pr_group]
    

In [19]:
def get_norms(dataset_name):
    norms_list = {
        'Неман-Белица': {'X1': 46.0, 'X2':35.0},
        'Неман-Гродно': {'X1': 36.0, 'X2':26.0},
        'Неман-Мосты': {'x1': 40.0, 'x2':31.0},
        'Неман-Столбцы': {'x1': 43.0, 'x2':34.0},
    }
    return norms_list[dataset_name]

    

In [20]:
np.logspace(-2, 3, num=100)

array([1.00000000e-02, 1.12332403e-02, 1.26185688e-02, 1.41747416e-02,
       1.59228279e-02, 1.78864953e-02, 2.00923300e-02, 2.25701972e-02,
       2.53536449e-02, 2.84803587e-02, 3.19926714e-02, 3.59381366e-02,
       4.03701726e-02, 4.53487851e-02, 5.09413801e-02, 5.72236766e-02,
       6.42807312e-02, 7.22080902e-02, 8.11130831e-02, 9.11162756e-02,
       1.02353102e-01, 1.14975700e-01, 1.29154967e-01, 1.45082878e-01,
       1.62975083e-01, 1.83073828e-01, 2.05651231e-01, 2.31012970e-01,
       2.59502421e-01, 2.91505306e-01, 3.27454916e-01, 3.67837977e-01,
       4.13201240e-01, 4.64158883e-01, 5.21400829e-01, 5.85702082e-01,
       6.57933225e-01, 7.39072203e-01, 8.30217568e-01, 9.32603347e-01,
       1.04761575e+00, 1.17681195e+00, 1.32194115e+00, 1.48496826e+00,
       1.66810054e+00, 1.87381742e+00, 2.10490414e+00, 2.36448941e+00,
       2.65608778e+00, 2.98364724e+00, 3.35160265e+00, 3.76493581e+00,
       4.22924287e+00, 4.75081016e+00, 5.33669923e+00, 5.99484250e+00,
      

In [21]:
# from sklearn.utils.fixes import loguniform
# loguniform(1e-2, 1e0)

# import scipy.stats as stats
# stats.uniform(0, 1)

# np.logspace(-7, -5, num=10)

#np.linspace(0.01, 1.0, num=50)

#np.arange(0.1, 1.5, 50)
#np.arange(1, 11)
#np.logspace(-7, -4, num=4)
#np.append(np.linspace(1.1, 2.0, 10), [1.35])
np.linspace(0.05, 0.95, 10)

array([0.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95])

#### Функция обучения и оценки моделей

In [22]:
def compare_models(pr_group, n_test=None, norms=True, top_best=None):
    from sklearn.linear_model import LinearRegression
    from sklearn.linear_model import Ridge, RidgeCV
    from sklearn.linear_model import Lasso, LassoCV
    from sklearn.linear_model import ElasticNet, ElasticNetCV
    from sklearn.linear_model import Lars, LarsCV
    from sklearn.linear_model import LassoLars
    from sklearn.linear_model import OrthogonalMatchingPursuit
    from sklearn.linear_model import BayesianRidge
    from sklearn.linear_model import ARDRegression
    from sklearn.linear_model import TweedieRegressor
    from sklearn.linear_model import SGDRegressor
    from sklearn.linear_model import PassiveAggressiveRegressor
    from sklearn.linear_model import HuberRegressor
    from sklearn.linear_model import TheilSenRegressor
    from sklearn.linear_model import QuantileRegressor

    from sklearn.kernel_ridge import KernelRidge

    from sklearn.gaussian_process import GaussianProcessRegressor
    from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel, RBF

    import scipy.stats as stats
    from sklearn.pipeline import Pipeline, make_pipeline
    from sklearn.feature_selection import SelectKBest, SelectFromModel
    from sklearn.feature_selection import r_regression

    
    
    
    ds_dir = 'data' # В константы
    
    names = [
        'LinearRegression',
        'Ridge',
        'Lasso',
        'ElasticNet',
        'Lars1',
        'Lars2',
        'Lars3',
        'Lars4',
        'Lars5',
        # 'Lars6',
        # 'Lars7',
        # 'Lars8',
        'LassoLars',
        'OMP1',
        'OMP2',
        'OMP3',
        'OMP4',
        'OMP5',
        'BayesianRidge',
        'ARDRegression',
        #'TweedieRegressor',
        'SGDRegressor', 
        'PassiveAggressiveRegressor',
        'HuberRegressor',
        'TheilSenRegressor',
        'QuantileRegressor',
        
        
        
        #'Perceptron'
        #'GaussianProcessRegressor',
        
    ]
    rng = np.random.RandomState(0)
    #alphas = np.logspace(-2, 3, num=100)
    alphas = np.logspace(-4, 3, num=100)
    sgd_alphas = np.logspace(-4, 1, num=100)
    l1_ratio = np.linspace(0.01, 1.0, num=50)

    alphas_lambdas = np.logspace(-7, -5, num=16)
    threshold_lambdas = np.arange(5000, 15000, step=10)
    powers = np.arange(0, 4)
    losses = ['squared_error', 'huber', 'epsilon_insensitive', 'squared_epsilon_insensitive']
    cc = np.linspace(0.1, 1.5, 50)
    epsilons = np.append(np.linspace(1.1, 2.0, 10), [1.35])
    huber_alphas = np.logspace(-4, 0, num=5)
    
    alphas_init = np.linspace(0.5, 2, 5)
    lambdas_init = np.logspace(-3, 1, num=5)
    alphas_lambdas = np.logspace(-7, -4, num=4)

    n_subsamples = np.arange(6, 24)
    quantiles = np.linspace(0.05, 0.95, 10)
    q_alphas = np.linspace(0, 2, 20)
    
    
    
    
    # n_nonzero_coefs = np.arange(1, len(get_predictors(ds, pr_group)))
    
    regressors = [
        LinearRegression(),
        GridSearchCV(estimator=Ridge(random_state=rng), param_grid={"alpha": alphas}),
        GridSearchCV(estimator=Lasso(random_state=rng), param_grid={"alpha": alphas}),
        GridSearchCV(estimator=ElasticNet(random_state=rng), param_grid={"alpha": alphas, "l1_ratio": l1_ratio}, n_jobs=-1),
        Lars(n_nonzero_coefs=1),
        Lars(n_nonzero_coefs=2),
        Lars(n_nonzero_coefs=3),
        Lars(n_nonzero_coefs=4),
        Lars(n_nonzero_coefs=5),
        # Lars(n_nonzero_coefs=6),
        # Lars(n_nonzero_coefs=7),
        # Lars(n_nonzero_coefs=8),
        GridSearchCV(estimator=LassoLars(random_state=rng), param_grid={"alpha": alphas}),
        OrthogonalMatchingPursuit(n_nonzero_coefs=1),
        OrthogonalMatchingPursuit(n_nonzero_coefs=2),
        OrthogonalMatchingPursuit(n_nonzero_coefs=3),
        OrthogonalMatchingPursuit(n_nonzero_coefs=4),
        OrthogonalMatchingPursuit(n_nonzero_coefs=5),
        GridSearchCV(estimator=BayesianRidge(), param_grid={"alpha_init": alphas_init, "lambda_init": lambdas_init}, n_jobs=-1),
        GridSearchCV(estimator=ARDRegression(), param_grid={"alpha_1": alphas_lambdas, "alpha_2": alphas_lambdas,"lambda_1": alphas_lambdas,"lambda_2": alphas_lambdas}, n_jobs=-1),
        #GridSearchCV(estimator=TweedieRegressor(), param_grid={"power": powers, "alpha": alphas}, n_jobs=-1),
        GridSearchCV(estimator=SGDRegressor(random_state=rng), param_grid={"loss": losses, "alpha": sgd_alphas}, n_jobs=-1),
        GridSearchCV(estimator=PassiveAggressiveRegressor(random_state=rng), param_grid={"C": cc}, n_jobs=-1, cv=3),
        GridSearchCV(estimator=HuberRegressor(), param_grid={"epsilon": epsilons, "alpha": alphas}, n_jobs=-1, cv=5),
        GridSearchCV(estimator=TheilSenRegressor(random_state=rng), param_grid={"n_subsamples": n_subsamples}, n_jobs=-1, cv=5),
        GridSearchCV(estimator=QuantileRegressor(), param_grid={"alpha": q_alphas}, n_jobs=-1),
        
        
        
        

        # BayesianRidge(tol=1e-6),
        ##PassiveAggressiveRegressor(random_state=rng),
        ##GridSearchCV(estimator=HuberRegressor(), param_grid={"epsilon": epsilons, "alpha": huber_alphas}, n_jobs=-1),
        ##GridSearchCV(estimator=BayesianRidge(tol=1e-6), param_grid={"alpha_1": alphas_lambdas, "alpha_2": alphas_lambdas, "lambda_1": alphas_lambdas, "lambda_2": alphas_lambdas}, n_jobs=-1),       
        #GaussianProcessRegressor(kernel=RBF(length_scale=1.1) + WhiteKernel() + DotProduct(), random_state=0)
        #GridSearchCV(Pipeline(steps=[('feature_selection', SelectKBest(r_regression, k=4)), ('regression', Lasso(random_state=rng))]), param_grid={"regression__alpha": alphas}),
        #GridSearchCV(Pipeline(steps=[('feature_selection', SelectFromModel(Lars(n_nonzero_coefs=4))), ('regression', Lasso(random_state=rng))]), param_grid={"regression__alpha": alphas}),
    ]

    datasets = get_datasets()

    fieldnames = ['Predictors', 'Equations', 'Method', 'Criterion', 'Correlation', 'Pm']

    # datasets_result = {
    #     "hydropost_0": [
    #         { model_row }
    #         { model_row }
    #     ],
    #     ...,
    #     "hydropost_n": [
    #         { model_row }
    #         { model_row }
    #     ],
    # }
    
    
    # Итерация по датасетам
    datasets_result = dict()
    for ds in datasets:
        result_list = []
        
        pr_list = get_predictors(ds, pr_group)
        
        X, y = get_river_dataset(f'{ds_dir}/{ds}.csv', pr_list=pr_list)

        if n_test is not None and n_test != 0:
            X_train, y_train, X_test, y_test = train_test_split(X, y, n_test)
        else:
            X_train = X[:]
            y_train = y[:]
            X_test = X_train
            y_test = y_train

        if norms:
            norms = get_norms(ds)
            X_test = test_norm(X_test, pr_list, norms)
            
        # Итерация по моделям регрессии
        for name, regr in zip(names, regressors):
            one_model_row = dict()

            # try:
            regr.fit(X_train, y_train)
            y_predicted = regr.predict(X_test)
            #print(y_predicted)
            # except Exception:
                # continue
            
            # pipe_params = regr.get_params()
            # print(pipe_params)

            try:
                coef = regr.best_estimator_.coef_
                intercept = regr.best_estimator_.intercept_
                
                if isinstance(intercept, np.ndarray):
                    intercept = intercept[0]
                
                # coef = regr.best_estimator_.named_steps['regression'].coef_
                # intercept = regr.best_estimator_.named_steps['regression'].intercept_
                # print('cv_coef', coef)
                print('cv_intercept', intercept, type(intercept))
                # print(ds, regr.best_estimator_.alpha_)
            except Exception as error:
                
                coef = regr.coef_
                intercept = regr.intercept_
                
                if isinstance(intercept, np.ndarray):
                    intercept = intercept[0]
                
                # print('rg_coef', coef)
                print('rg_intercept', intercept, type(intercept))
                # print(ds, regr.alpha_)
                print(error)
           
                    
                
            # Коэффициенты уравнения (если есть)
            print('COEF', coef)
            coef = np.around(coef, 3)
            intercept = round(intercept, 3)
            try:
                predictors_coef = {f: c for f, c in zip(pr_list, coef) if c != 0.0}
                predictors = ", ".join(predictors_coef.keys())
                #predictors = predictors.upper()
                print(intercept, predictors_coef.items())
                #equation = str(round(intercept, 2)) + ' ' + ' '.join(str(round(c, 2))+'*'+f for f, c in predictors_coef.items())
                equation = str(intercept) + ' ' + ' '.join(str(c) + '*' + f for f, c in predictors_coef.items())
                equation = equation.replace(" -", "-")
                equation = equation.replace(" ", " + ")
                equation = equation.replace("-", " - ")
                #equation = equation.upper()
    
                one_model_row['Predictors'] = predictors
                one_model_row['Equations'] = equation
            except Exception as error:
                print(error)
                one_model_row['Predictors'] = ""
                one_model_row['Equations'] = ""

            # Название датасета
            one_model_row['Dataset_name'] = ds

            # Группа предикторов
            one_model_row['Group'] = pr_group
                
            # Название метода
            one_model_row['Method'] = name

            # Расчет показателей качества по методике
            
            one_model_row['H_sum'] = get_sum(y)
            one_model_row['H_max'] = get_max(y)
            one_model_row['H_min'] = get_min(y)
            
            # Среднее значение максимального уровня по всей выборке
            h_max_avg = get_hmax_avg(y)
            one_model_row['H_avg'] = h_max_avg
            
            # Среднеквадратическое отклонение
            sigma = get_sigma(y)
            one_model_row['Sigma'] = sigma

            # Допустимая погрешность прогноза
            delta_dop = get_delta_dop(sigma)
            one_model_row['Delta_dop'] = delta_dop

            # Обеспеченность климатическая Pk
            pk = get_pk(y_test, h_max_avg, delta_dop)
            one_model_row['Pk'] = pk

            # Обеспеченность метода (оправдываемость) Pm
            pm = get_pm(y_test, y_predicted, delta_dop)
            one_model_row['Pm'] = pm

            # Среднеквадратическая погрешность прогноза
            s_forecast = get_s(y_test, y_predicted)
            one_model_row['S'] = s_forecast
            
            # Критерий эффективности метода прогнозирования климатический S/sigma
            criterion_forecast = get_criterion(s_forecast, sigma)
            one_model_row['Criterion'] = criterion_forecast

            # Критерий эффективности метода прогнозирования климатический S/sigma в квадрате
            criterion_sqr = get_criterion(s_forecast, sigma) ** 2.0
            one_model_row['Criterion_sqr'] = criterion_sqr
            
            # Корреляционное отношение ro
            correlation_forecast = get_correlation_ratio(criterion_forecast)
            one_model_row['Correlation'] = correlation_forecast
                        
            # Model
            one_model_row['Model'] = regr

            # models_list.append(one_model_row)
            result_list.append(one_model_row)

        # Сортировка результатов по каждому датасету
        result_list.sort(key=lambda row: (row['Criterion'], -row['Correlation'], -row['Pm']))

        datasets_result[ds] = result_list

        # Запись в .csv файл
        write_dataset_csv(result_list, ds, fieldnames, pr_group=pr_group)

        for i, rl in enumerate(result_list):
            if top_best is not None:
                if i >= top_best:
                    break
            verify_forecast(ds, rl, i, pr_group=pr_group, n_test=n_test)

    return datasets_result

#### Функция формирования проверочных прогнозов

In [23]:
def verify_forecast(dataset_name, rl, num, pr_group, n_test=None, norms=True):

    ds_dir = 'data' # В константы

    pr_list = get_predictors(dataset_name, pr_group)
    pr_list = ['year'] + pr_list
    
    fieldnames = [
        '№', 'Год',
        'Hmax фактический', 'Hф-Hср', '(Hф-Hср)^2', 'δ50% Погрешность климатических прогнозов в долях от допустимой погрешности',
        'Hmax прогнозный', 'Hф-Hп', '(Hф-Hп)^2', 'δ50% Погрешность проверочных прогнозов в долях от допустимой погрешности',
    ]

    # fieldnames = [
    #     'Year',
    #     'Hmax fact', 'Hf-Havg', '(Hf-Havg)^2', 'Error climate',
    #     'Hmax forecast', 'Hf-Hfor', '(Hf-Hfor)^2', 'Error forecast',
    # ]

    X, y = get_river_dataset(f'{ds_dir}/{dataset_name}.csv', pr_list=pr_list, y_name='H_max')

    if n_test is not None and n_test != 0:
        _, _, X_test, y_test = train_test_split(X, y, n_test)
    else:
        X_test = X
        y_test = y

    if norms:
        norms = get_norms(dataset_name)
        X_test = test_norm(X_test, pr_list, norms)

    # Выделение первой колонки (года) из набора предикторов
    years = X_test[:, 0]
    X_test = X_test[:, 1:]
    
    # Forecast
    #h_max_forecast = np.around(model.predict(X_test))
    h_max_forecast = rl['Model'].predict(X_test)
    #print(h_max_forecast)
    
    # Hсредний
    #h_max_avg = np.around(np.mean(y))
    h_max_avg = np.mean(y)

    # H - Hсредний
    diff_fact = y_test - h_max_avg

    # (H - Hсредний) в квадрате
    diff_fact_sqr = diff_fact ** 2

    # Погрешность климатических прогнозов в долях от допустимой погрешности
    delta_dop = get_delta_dop(get_sigma(y))
    error_climate = get_delta50(y_test, delta_dop, h_max_avg=h_max_avg)

    # H - Hпрогнозный
    diff_forecast = y_test - h_max_forecast

    # (H - Hпрогнозный) в квадрате
    diff_forecast_sqr = diff_forecast ** 2       

    # Погрешность проверочных прогнозов в долях от допустимой погрешности
    error_forecast = get_delta50(y_test, delta_dop, h_max_forecast=h_max_forecast)

    # Номер по порядку
    rows_num = y_test.shape[0]
    npp = np.arange(1, rows_num + 1, 1)

    # Конкатенация массивов
    att_tuple = (npp, years, y_test, diff_fact, diff_fact_sqr, error_climate, h_max_forecast, diff_forecast, diff_forecast_sqr, error_forecast)
    arr = np.column_stack(att_tuple)
    arr = arr.tolist()

    # Обеспеченность метода (оправдываемость) Pm
    pm = get_pm(y_test, h_max_forecast, delta_dop)
    
    # Запись проверочного прогноза в csv файл
    with open(f'results/{dataset_name}/group-{pr_group}/{dataset_name}-проверочный-гр{pr_group}-{num:0>2}.csv', 'w', newline='', encoding='utf-8') as csvfile: #, encoding='utf-8'
        stat_header = (
            f"Таблица  - Проверочные прогнозы максимумов весеннего половодья\n"
            f"р.{rl['Dataset_name']}\n"
            f"Предикторы:;; {rl['Predictors']}\n"
            f"Уравнение:;; {rl['Equations']}\n"
            f"Модель:;; {rl['Method']}\n\n"
        )
        csvfile.write(stat_header)
       
        writer = csv.writer(csvfile, delimiter=';')
        writer.writerow(fieldnames)
        writer.writerows(arr)
        
        stat_footer = (
            f"Сумма;;{rl['H_sum']}\n"  
            f"Средний;;{rl['H_avg']}\n" 
            f"Высший;;{rl['H_max']}\n"
            f"Низший;;{rl['H_min']}\n\n"
            
            f"σ = ;;{rl['Sigma']};;σ -;среднеквадратическое отклонение (см)\n" 
            f"δдоп =;;{rl['Delta_dop']};;δдоп -;допустимая погрешность прогноза (см)\n" 
            f"Pк =;;{rl['Pk']};;Pк -;климатическая обеспеченность в %\n"
            f"Pм =;;{rl['Pm']};;Pм -;обеспеченность метода в %\n"
            f"S =;;{rl['S']};;;(допустимой погрешности проверочных прогнозов)\n"
            f"S/σ =;;{rl['Criterion']};;S -;среднеквадратическая погрешность (см)\n" 
            f"(S/σ)^2 =;;{rl['Criterion_sqr']};;S/σ -;критерий эффективности метода прогнозирования\n"
            f"ρ =;;{rl['Correlation']};;ρ -;корреляционное отношение\n"
            f";;;;;(оценка эффективности метода прогнозирования)\n"
            f";;;;δ50% -;погрешность (ошибка) прогнозов (см)\n"
        )
        
        csvfile.write(stat_footer) 

In [24]:
top_best = None
result = compare_models(pr_group=1, n_test=0, norms=True, top_best=top_best)          
result = compare_models(pr_group=2, n_test=0, norms=True, top_best=top_best)

rg_intercept 67.03603750643762 <class 'numpy.float64'>
'LinearRegression' object has no attribute 'best_estimator_'
COEF [1.33679606 0.13208292 0.51642084 0.50910961 0.30562716]
67.036 dict_items([('Smax', 1.337), ('H_2802', 0.132), ('X', 0.516), ('X1', 0.509), ('X3', 0.306)])
cv_intercept 71.39038596428838 <class 'numpy.float64'>
COEF [1.29751278 0.13573773 0.50858066 0.47637203 0.30245078]
71.39 dict_items([('Smax', 1.298), ('H_2802', 0.136), ('X', 0.509), ('X1', 0.476), ('X3', 0.302)])
cv_intercept 115.62785494636356 <class 'numpy.float64'>
COEF [1.24606794 0.04013546 0.38206321 0.30954099 0.28238926]
115.628 dict_items([('Smax', 1.246), ('H_2802', 0.04), ('X', 0.382), ('X1', 0.31), ('X3', 0.282)])
cv_intercept 104.8562622062845 <class 'numpy.float64'>
COEF [1.03323667 0.13848729 0.45147163 0.28206004 0.27610317]
104.856 dict_items([('Smax', 1.033), ('H_2802', 0.138), ('X', 0.451), ('X1', 0.282), ('X3', 0.276)])
rg_intercept 265.77888449549846 <class 'numpy.float64'>
'Lars' object h



cv_intercept 13.191997261028652 <class 'numpy.float64'>
COEF [1.25757876 0.36621312 0.73489019 0.57558119 0.23867321]
13.192 dict_items([('Smax', 1.258), ('H_2802', 0.366), ('X', 0.735), ('X1', 0.576), ('X3', 0.239)])
rg_intercept 116.78428548199801 <class 'numpy.float64'>
'LinearRegression' object has no attribute 'best_estimator_'
COEF [ 2.385955   -0.16834766 -0.10549564  0.54218642  0.41517187]
116.784 dict_items([('Smax', 2.386), ('H_2802', -0.168), ('X', -0.105), ('X1', 0.542), ('X3', 0.415)])
cv_intercept 121.32186164520743 <class 'numpy.float64'>
COEF [ 2.30028856 -0.16324766 -0.09137188  0.50691849  0.40957155]
121.322 dict_items([('Smax', 2.3), ('H_2802', -0.163), ('X', -0.091), ('X1', 0.507), ('X3', 0.41)])
cv_intercept 191.7192285804181 <class 'numpy.float64'>
COEF [ 1.6106888  -0.         -0.          0.          0.03305793]
191.719 dict_items([('Smax', 1.611), ('X3', 0.033)])
cv_intercept 191.7192285804181 <class 'numpy.float64'>
COEF [ 1.6106888  -0.         -0.         



cv_intercept 175.1857566954158 <class 'numpy.float64'>
COEF [ 1.73660013e+00 -9.13212103e-02 -1.81618863e-01  8.89129499e-12
  2.30910407e-01]
175.186 dict_items([('Smax', 1.737), ('H_2802', -0.091), ('X', -0.182), ('X3', 0.231)])
rg_intercept 10.942337167421698 <class 'numpy.float64'>
'LinearRegression' object has no attribute 'best_estimator_'
COEF [ 1.94817236 -0.01829083  0.53118734  0.89369827  0.38069265]
10.942 dict_items([('Smax', 1.948), ('H_2802', -0.018), ('X', 0.531), ('X1', 0.894), ('X3', 0.381)])
cv_intercept 19.396085865108944 <class 'numpy.float64'>
COEF [ 1.87664051 -0.01558151  0.51939129  0.82071924  0.37272106]
19.396 dict_items([('Smax', 1.877), ('H_2802', -0.016), ('X', 0.519), ('X1', 0.821), ('X3', 0.373)])
cv_intercept 149.68000637612244 <class 'numpy.float64'>
COEF [1.40669577 0.         0.11521148 0.         0.07678466]
149.68 dict_items([('Smax', 1.407), ('X', 0.115), ('X3', 0.077)])
cv_intercept 149.68000637612244 <class 'numpy.float64'>
COEF [1.40669577 0. 



cv_intercept 84.90537471971952 <class 'numpy.float64'>
COEF [1.78675390e+00 7.28637234e-02 4.47871736e-10 2.30029845e-01
 1.39378186e-01]
84.905 dict_items([('Smax', 1.787), ('H_2802', 0.073), ('X1', 0.23), ('X3', 0.139)])
rg_intercept 114.76413437530005 <class 'numpy.float64'>
'LinearRegression' object has no attribute 'best_estimator_'
COEF [ 0.47528777 -0.17147738  0.29467437  0.23840365  0.22044957]
114.764 dict_items([('Smax', 0.475), ('H_2802', -0.171), ('X', 0.295), ('X1', 0.238), ('X3', 0.22)])
cv_intercept 116.3751535778987 <class 'numpy.float64'>
COEF [ 0.46884824 -0.16307048  0.28806608  0.22274791  0.21542146]
116.375 dict_items([('Smax', 0.469), ('H_2802', -0.163), ('X', 0.288), ('X1', 0.223), ('X3', 0.215)])
cv_intercept 114.76415303135106 <class 'numpy.float64'>
COEF [ 0.47528773 -0.1714772   0.29467426  0.2384034   0.22044948]
114.764 dict_items([('Smax', 0.475), ('H_2802', -0.171), ('X', 0.295), ('X1', 0.238), ('X3', 0.22)])
cv_intercept 129.92483887221917 <class 'nump



cv_intercept 113.53761947138763 <class 'numpy.float64'>
COEF [ 0.47393164 -0.27944105  0.25614494  0.37597751  0.25447751]
113.538 dict_items([('Smax', 0.474), ('H_2802', -0.279), ('X', 0.256), ('X1', 0.376), ('X3', 0.254)])
rg_intercept 88.99987015107098 <class 'numpy.float64'>
'LinearRegression' object has no attribute 'best_estimator_'
COEF [1.44798087 0.20384832 0.95871042 0.23285844 0.30338826]
89.0 dict_items([('S_2802', 1.448), ('H_2802', 0.204), ('X2', 0.959), ('X3', 0.233), ('Xs', 0.303)])
cv_intercept 94.22603990072099 <class 'numpy.float64'>
COEF [1.41656596 0.19443908 0.91262891 0.23307958 0.29893326]
94.226 dict_items([('S_2802', 1.417), ('H_2802', 0.194), ('X2', 0.913), ('X3', 0.233), ('Xs', 0.299)])
cv_intercept 201.65393695941043 <class 'numpy.float64'>
COEF [1.20582148 0.         0.35710474 0.14049107 0.03740137]
201.654 dict_items([('S_2802', 1.206), ('X2', 0.357), ('X3', 0.14), ('Xs', 0.037)])
cv_intercept 135.95663481916134 <class 'numpy.float64'>
COEF [1.14818564 0



cv_intercept 72.91095655138314 <class 'numpy.float64'>
COEF [1.1877222  0.25318147 1.2667895  0.26000938 0.3301264 ]
72.911 dict_items([('S_2802', 1.188), ('H_2802', 0.253), ('X2', 1.267), ('X3', 0.26), ('Xs', 0.33)])
rg_intercept 123.43288837951042 <class 'numpy.float64'>
'LinearRegression' object has no attribute 'best_estimator_'
COEF [ 2.21211088 -0.08351351  0.81306523  0.43576761  0.19149877]
123.433 dict_items([('S_2802', 2.212), ('H_2802', -0.084), ('X2', 0.813), ('X3', 0.436), ('Xs', 0.191)])
cv_intercept 127.97972445206554 <class 'numpy.float64'>
COEF [ 2.1559059  -0.08517689  0.76973201  0.42672638  0.18977021]
127.98 dict_items([('S_2802', 2.156), ('H_2802', -0.085), ('X2', 0.77), ('X3', 0.427), ('Xs', 0.19)])
cv_intercept 205.90295273945708 <class 'numpy.float64'>
COEF [ 1.85684303 -0.          0.          0.07520332  0.        ]
205.903 dict_items([('S_2802', 1.857), ('X3', 0.075)])
cv_intercept 205.90295273945708 <class 'numpy.float64'>
COEF [ 1.85684303 -0.          0. 



cv_intercept 148.88597582947057 <class 'numpy.float64'>
COEF [ 1.74316492e+00  9.42589400e-12  2.28807878e-01  3.87873044e-01
 -3.32031363e-12]
148.886 dict_items([('S_2802', 1.743), ('X2', 0.229), ('X3', 0.388)])
rg_intercept 24.258142178724825 <class 'numpy.float64'>
'LinearRegression' object has no attribute 'best_estimator_'
COEF [1.82318151 0.14710803 1.30399945 0.35748074 0.48196035]
24.258 dict_items([('S_2802', 1.823), ('H_2802', 0.147), ('X2', 1.304), ('X3', 0.357), ('Xs', 0.482)])
cv_intercept 31.211444656805895 <class 'numpy.float64'>
COEF [1.78221346 0.14121748 1.22779586 0.34775753 0.47178017]
31.211 dict_items([('S_2802', 1.782), ('H_2802', 0.141), ('X2', 1.228), ('X3', 0.348), ('Xs', 0.472)])
cv_intercept 197.04879351588698 <class 'numpy.float64'>
COEF [1.32679632 0.         0.         0.         0.        ]
197.049 dict_items([('S_2802', 1.327)])
cv_intercept 197.04879351588698 <class 'numpy.float64'>
COEF [1.32679632 0.         0.         0.         0.        ]
197.049



cv_intercept 103.09402838433175 <class 'numpy.float64'>
COEF [1.68477515 0.24613046 0.5068796  0.04542093 0.00772254]
103.094 dict_items([('S_2802', 1.685), ('H_2802', 0.246), ('X2', 0.507), ('X3', 0.045), ('Xs', 0.008)])
rg_intercept 88.65173873975867 <class 'numpy.float64'>
'LinearRegression' object has no attribute 'best_estimator_'
COEF [0.55643116 0.13826359 0.73737418 0.22462987 0.14894077]
88.652 dict_items([('S_2802', 0.556), ('H_2802', 0.138), ('X2', 0.737), ('X3', 0.225), ('Xs', 0.149)])
cv_intercept 92.60675882186315 <class 'numpy.float64'>
COEF [0.5447606  0.12437044 0.69997863 0.22202913 0.14661658]
92.607 dict_items([('S_2802', 0.545), ('H_2802', 0.124), ('X2', 0.7), ('X3', 0.222), ('Xs', 0.147)])
cv_intercept 118.92380215730685 <class 'numpy.float64'>
COEF [0.48575563 0.01996025 0.5645517  0.19626231 0.11373619]
118.924 dict_items([('S_2802', 0.486), ('H_2802', 0.02), ('X2', 0.565), ('X3', 0.196), ('Xs', 0.114)])
cv_intercept 114.03158521649837 <class 'numpy.float64'>
CO

