# Подключим необходимые библиотеки

In [None]:
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
from sklearn.metrics import *
from collections import defaultdict
from sklearn.linear_model import LinearRegression
from catboost import CatBoostRegressor
import warnings
import plotly.express as px
import os, shutil, pickle
import plotly.graph_objs as go
warnings.filterwarnings("ignore")

# Посмотрим на данные и обработаем их

In [None]:
df = pd.read_csv('../input/good-data/data_processed.csv')
df.isna().sum()  # посмотрим на кол-во nan'ов

In [None]:
def preproc(df): # обработаем исходный датасет 
    full_dt = []
    df = df[df['call_time'].notna()]
    df = df[df['substation'].notna()]
    df = df[df['hospitalized_to'].notna()]
    ct_time = df['call_time']
    ct_date = df['call_date']
    ct_num = df['call_number']
    for i in tqdm(range(len(df))):
        cur_id = ct_num.iloc[i]
        if '1970' not in str(ct_time.iloc[i])[:-5]:
            full_dt.append(str(ct_date.iloc[i]) + ' ' + str(ct_time.iloc[i])[:-5] + '00:00')
        else:
            full_dt.append(np.nan)
        df['call_number'].iloc[i] = cur_id.split('(')[0]
    df = df.drop(columns=['call_time', 'call_date'])
    df['date_time'] = full_dt
    df = df[df['date_time'].notna()]
    df['date_time'] = pd.to_datetime(df['date_time'])
    return df

df = preproc(df)
df.head()  

In [None]:
df['hour'] = df['date_time'].dt.hour  # добавим временные фичи для построения графиков
df['day'] = df['date_time'].dt.day
df['month'] = df['date_time'].dt.month
df['dayofweek'] = df['date_time'].dt.dayofweek

# Посмотрим на графики

In [None]:
import plotly.io as pio  # придадим им красивый вид
from plotly.graph_objs.layout import Template

pio.templates['custom_dark'] = Template({
    'data': {'bar': [{'error_x': {'color': '#ffffff'},
                      'error_y': {'color': '#ffffff'},
                      'marker': {'line': {'color': '#002845', 'width': 0.5},
                                 'pattern': {'fillmode': 'overlay', 'size': 10, 'solidity': 0.2}},
                      'type': 'bar'}],
             'barpolar': [{'marker': {'line': {'color': '#002845', 'width': 0.5},
                                      'pattern': {'fillmode': 'overlay', 'size': 10, 'solidity': 0.2}},
                           'type': 'barpolar'}],
             'carpet': [{'aaxis': {'endlinecolor': '#A2B1C6',
                                   'gridcolor': '#506784',
                                   'linecolor': '#506784',
                                   'minorgridcolor': '#506784',
                                   'startlinecolor': '#A2B1C6'},
                         'baxis': {'endlinecolor': '#A2B1C6',
                                   'gridcolor': '#506784',
                                   'linecolor': '#506784',
                                   'minorgridcolor': '#506784',
                                   'startlinecolor': '#A2B1C6'},
                         'type': 'carpet'}],
             'choropleth': [{'colorbar': {'outlinewidth': 0, 'ticks': ''}, 'type': 'choropleth'}],
             'contour': [{'colorbar': {'outlinewidth': 0, 'ticks': ''},
                          'colorscale': [[0.0, '#0d0887'], [0.1111111111111111,
                                         '#46039f'], [0.2222222222222222,
                                         '#7201a8'], [0.3333333333333333,
                                         '#9c179e'], [0.4444444444444444,
                                         '#bd3786'], [0.5555555555555556,
                                         '#d8576b'], [0.6666666666666666,
                                         '#ed7953'], [0.7777777777777778,
                                         '#fb9f3a'], [0.8888888888888888,
                                         '#fdca26'], [1.0, '#f0f921']],
                          'type': 'contour'}],
             'contourcarpet': [{'colorbar': {'outlinewidth': 0, 'ticks': ''}, 'type': 'contourcarpet'}],
             'heatmap': [{'colorbar': {'outlinewidth': 0, 'ticks': ''},
                          'colorscale': [[0.0, '#0d0887'], [0.1111111111111111,
                                         '#46039f'], [0.2222222222222222,
                                         '#7201a8'], [0.3333333333333333,
                                         '#9c179e'], [0.4444444444444444,
                                         '#bd3786'], [0.5555555555555556,
                                         '#d8576b'], [0.6666666666666666,
                                         '#ed7953'], [0.7777777777777778,
                                         '#fb9f3a'], [0.8888888888888888,
                                         '#fdca26'], [1.0, '#f0f921']],
                          'type': 'heatmap'}],
             'heatmapgl': [{'colorbar': {'outlinewidth': 0, 'ticks': ''},
                            'colorscale': [[0.0, '#0d0887'], [0.1111111111111111,
                                           '#46039f'], [0.2222222222222222,
                                           '#7201a8'], [0.3333333333333333,
                                           '#9c179e'], [0.4444444444444444,
                                           '#bd3786'], [0.5555555555555556,
                                           '#d8576b'], [0.6666666666666666,
                                           '#ed7953'], [0.7777777777777778,
                                           '#fb9f3a'], [0.8888888888888888,
                                           '#fdca26'], [1.0, '#f0f921']],
                            'type': 'heatmapgl'}],
             'histogram': [{'marker': {'pattern': {'fillmode': 'overlay', 'size': 10, 'solidity': 0.2}},
                            'type': 'histogram'}],
             'histogram2d': [{'colorbar': {'outlinewidth': 0, 'ticks': ''},
                              'colorscale': [[0.0, '#0d0887'],
                                             [0.1111111111111111, '#46039f'],
                                             [0.2222222222222222, '#7201a8'],
                                             [0.3333333333333333, '#9c179e'],
                                             [0.4444444444444444, '#bd3786'],
                                             [0.5555555555555556, '#d8576b'],
                                             [0.6666666666666666, '#ed7953'],
                                             [0.7777777777777778, '#fb9f3a'],
                                             [0.8888888888888888, '#fdca26'], [1.0,
                                             '#f0f921']],
                              'type': 'histogram2d'}],
             'histogram2dcontour': [{'colorbar': {'outlinewidth': 0, 'ticks': ''},
                                     'colorscale': [[0.0, '#0d0887'],
                                                    [0.1111111111111111,
                                                    '#46039f'],
                                                    [0.2222222222222222,
                                                    '#7201a8'],
                                                    [0.3333333333333333,
                                                    '#9c179e'],
                                                    [0.4444444444444444,
                                                    '#bd3786'],
                                                    [0.5555555555555556,
                                                    '#d8576b'],
                                                    [0.6666666666666666,
                                                    '#ed7953'],
                                                    [0.7777777777777778,
                                                    '#fb9f3a'],
                                                    [0.8888888888888888,
                                                    '#fdca26'], [1.0, '#f0f921']],
                                     'type': 'histogram2dcontour'}],
             'mesh3d': [{'colorbar': {'outlinewidth': 0, 'ticks': ''}, 'type': 'mesh3d'}],
             'parcoords': [{'line': {'colorbar': {'outlinewidth': 0, 'ticks': ''}}, 'type': 'parcoords'}],
             'pie': [{'automargin': True, 'type': 'pie'}],
             'scatter': [{'marker': {'line': {'color': '#283442'}}, 'type': 'scatter'}],
             'scatter3d': [{'line': {'colorbar': {'outlinewidth': 0, 'ticks': ''}},
                            'marker': {'colorbar': {'outlinewidth': 0, 'ticks': ''}},
                            'type': 'scatter3d'}],
             'scattercarpet': [{'marker': {'colorbar': {'outlinewidth': 0, 'ticks': ''}}, 'type': 'scattercarpet'}],
             'scattergeo': [{'marker': {'colorbar': {'outlinewidth': 0, 'ticks': ''}}, 'type': 'scattergeo'}],
             'scattergl': [{'marker': {'line': {'color': '#283442'}}, 'type': 'scattergl'}],
             'scattermapbox': [{'marker': {'colorbar': {'outlinewidth': 0, 'ticks': ''}}, 'type': 'scattermapbox'}],
             'scatterpolar': [{'marker': {'colorbar': {'outlinewidth': 0, 'ticks': ''}}, 'type': 'scatterpolar'}],
             'scatterpolargl': [{'marker': {'colorbar': {'outlinewidth': 0, 'ticks': ''}}, 'type': 'scatterpolargl'}],
             'scatterternary': [{'marker': {'colorbar': {'outlinewidth': 0, 'ticks': ''}}, 'type': 'scatterternary'}],
             'surface': [{'colorbar': {'outlinewidth': 0, 'ticks': ''},
                          'colorscale': [[0.0, '#0d0887'], [0.1111111111111111,
                                         '#46039f'], [0.2222222222222222,
                                         '#7201a8'], [0.3333333333333333,
                                         '#9c179e'], [0.4444444444444444,
                                         '#bd3786'], [0.5555555555555556,
                                         '#d8576b'], [0.6666666666666666,
                                         '#ed7953'], [0.7777777777777778,
                                         '#fb9f3a'], [0.8888888888888888,
                                         '#fdca26'], [1.0, '#f0f921']],
                          'type': 'surface'}],
             'table': [{'cells': {'fill': {'color': '#506784'}, 'line': {'color': '#002845'}},
                        'header': {'fill': {'color': '#2a3f5f'}, 'line': {'color': '#002845'}},
                        'type': 'table'}]},
    'layout': {'annotationdefaults': {'arrowcolor': '#ffffff', 'arrowhead': 0, 'arrowwidth': 1},
               'autotypenumbers': 'strict',
               'coloraxis': {'colorbar': {'outlinewidth': 0, 'ticks': ''}},
               'colorscale': {'diverging': [[0, '#8e0152'], [0.1, '#c51b7d'],
                                            [0.2, '#de77ae'], [0.3, '#f1b6da'],
                                            [0.4, '#fde0ef'], [0.5, '#f7f7f7'],
                                            [0.6, '#e6f5d0'], [0.7, '#b8e186'],
                                            [0.8, '#7fbc41'], [0.9, '#4d9221'], [1,
                                            '#276419']],
                              'sequential': [[0.0, '#0d0887'],
                                             [0.1111111111111111, '#46039f'],
                                             [0.2222222222222222, '#7201a8'],
                                             [0.3333333333333333, '#9c179e'],
                                             [0.4444444444444444, '#bd3786'],
                                             [0.5555555555555556, '#d8576b'],
                                             [0.6666666666666666, '#ed7953'],
                                             [0.7777777777777778, '#fb9f3a'],
                                             [0.8888888888888888, '#fdca26'], [1.0,
                                             '#f0f921']],
                              'sequentialminus': [[0.0, '#0d0887'],
                                                  [0.1111111111111111, '#46039f'],
                                                  [0.2222222222222222, '#7201a8'],
                                                  [0.3333333333333333, '#9c179e'],
                                                  [0.4444444444444444, '#bd3786'],
                                                  [0.5555555555555556, '#d8576b'],
                                                  [0.6666666666666666, '#ed7953'],
                                                  [0.7777777777777778, '#fb9f3a'],
                                                  [0.8888888888888888, '#fdca26'],
                                                  [1.0, '#f0f921']]},
               'colorway': ['#e898ac', '#00cfcc', '#ff9973', '#FECB52', '#ffd6e1', '#19d3f3',
                            '#f64975', '#B6E880', '#FF97FF', '#FECB52'],
               'font': {'color': '#ffffff', 'family': 'Jost', 'size': 15},
               'geo': {'bgcolor': '#002845',
                       'lakecolor': '#002845',
                       'landcolor': '#002845',
                       'showlakes': True,
                       'showland': True,
                       'subunitcolor': '#506784'},
               'hoverlabel': {'align': 'left'},
               'hovermode': 'closest',
               'mapbox': {'style': 'dark'},
               'paper_bgcolor': '#002845',
               'plot_bgcolor': '#002845',
               'polar': {'angularaxis': {'gridcolor': '#506784', 'linecolor': '#506784', 'ticks': ''},
                         'bgcolor': '#002845',
                         'radialaxis': {'gridcolor': '#506784', 'linecolor': '#506784', 'ticks': ''}},
               'scene': {'xaxis': {'backgroundcolor': '#002845',
                                   'gridcolor': '#506784',
                                   'gridwidth': 2,
                                   'linecolor': '#506784',
                                   'showbackground': True,
                                   'ticks': '',
                                   'zerolinecolor': '#C8D4E3'},
                         'yaxis': {'backgroundcolor': '#002845',
                                   'gridcolor': '#506784',
                                   'gridwidth': 2,
                                   'linecolor': '#506784',
                                   'showbackground': True,
                                   'ticks': '',
                                   'zerolinecolor': '#C8D4E3'},
                         'zaxis': {'backgroundcolor': '#002845',
                                   'gridcolor': '#506784',
                                   'gridwidth': 2,
                                   'linecolor': '#506784',
                                   'showbackground': True,
                                   'ticks': '',
                                   'zerolinecolor': '#C8D4E3'}},
               'shapedefaults': {'line': {'color': '#ffffff'}},
               'sliderdefaults': {'bgcolor': '#C8D4E3', 'bordercolor': '#002845', 'borderwidth': 1, 'tickwidth': 0},
               'ternary': {'aaxis': {'gridcolor': '#506784', 'linecolor': '#506784', 'ticks': ''},
                           'baxis': {'gridcolor': '#506784', 'linecolor': '#506784', 'ticks': ''},
                           'bgcolor': '#002845',
                           'caxis': {'gridcolor': '#506784', 'linecolor': '#506784', 'ticks': ''}},
               'title': {'x': 0.05},
               'updatemenudefaults': {'bgcolor': '#506784', 'borderwidth': 0},
               'xaxis': {'automargin': True,
                         'gridcolor': '#4f6372',
                         'linecolor': '#506784',
                         'ticks': '',
                         'title': {'standoff': 15},
                         'zerolinecolor': '#4f6372',
                         'zerolinewidth': 2},
               'yaxis': {'automargin': True,
                         'gridcolor': '#4f6372',
                         'linecolor': '#506784',
                         'ticks': '',
                         'title': {'standoff': 15},
                         'zerolinecolor': '#4f6372',
                         'zerolinewidth': 2}
                   }
}) 

pio.templates.default = 'custom_dark'

In [None]:
px.histogram(df['hour'], nbins=24, title='Распределение вызовов по часам')

In [None]:
px.histogram(df['month'], nbins=12, title='Распределение вызовов по месяцам')

In [None]:
px.histogram(df['day'], nbins=31, title='Распределение вызовов по дням')

In [None]:
px.histogram(df['dayofweek'], nbins=7, title='Распределение вызовов по дням недели')

In [None]:
def preproc2(df): # окончательная обработка - тут уже мы получаем датасет для обучения
    enc_pds = {x: i for i, x in enumerate(pd.unique(df['substation']))}
    times = defaultdict(list)
    min_time = None
    max_time = None
    for i in tqdm(range(len(df))):
        cur_time = df['date_time'].iloc[i]
        if min_time is None:
            min_time = cur_time
            max_time = cur_time
        if min_time > cur_time:
            min_time = cur_time
        if max_time < cur_time:
            max_time = cur_time
    while min_time <= max_time:
        times[min_time] = [0 for _ in range(len(enc_pds))]
        min_time += pd.to_timedelta(1, unit='h')
    for i in tqdm(range(len(df))):
        cur_time = df['date_time'].iloc[i]
        cur_pds = df['substation'].iloc[i]
        times[cur_time][enc_pds[cur_pds]] += 1
    times = [[x, *times[x]] for x in times]
    times = pd.DataFrame(times, columns = ['date'] + [x for x in enc_pds])
    
    return times

ft = preproc2(df)

In [None]:
sms = []  
for i in range(len(ft)):
    cur = ft.iloc[i].drop('date').sum()
    sms.append(cur)
print(np.mean(sms), np.max(sms), np.min(sms)) # смотрим сколько у нас в среднем вызовов в час

# Обучим модели
Для каждой подстанции мы обучим по 5 катбустов (для лучшего результата) и 2 линейных регрессии (для детрендизации данных)

In [None]:
def rmse(y_true, y_pred): # функция ошибки
    return mean_squared_error(y_true, y_pred) ** 0.5

def get_funcs():  # дополнительные фичи
    res = []
    names = []
    for i in range(1, 6):
        res.append(lambda x: np.sin(i*x))
        res.append(lambda x: np.cos(i*x))
        res.append(lambda x: np.tanh(i*x))
        res.append(lambda x: np.sin(x/i))
        res.append(lambda x: np.cos(x/i))
        res.append(lambda x: np.tanh(x/i))
        
        names.append(f'sin({i}*x)')
        names.append(f'cos({i}*x)')
        names.append(f'tanh({i}*x)')
        
        names.append(f'sin(x/{i})')
        names.append(f'cos(x/{i})')
        names.append(f'tanh(x/{i})')
    return res,names

def make_features(df):   # функция для выделения фичей
    df['date'] = pd.to_datetime(df['date'])
    df['hour'] = df['date'].dt.hour
    df['day'] = df['date'].dt.day
    df['month'] = df['date'].dt.month
    df['day_of_week'] = df['date'].dt.dayofweek
    df['is_morning'] = (df['hour']>=4)&(df['hour']<=12)
    df['is_day'] = (df['hour']>=13)&(df['hour']<=18)
    df['is_evening'] = df['hour']>=19
    df['is_night'] = df['hour']<4
    df['full_hours'] = ((df['date'].dt.year-2015) * 365 + df['month']*30 + df['day'])*24 + df['hour']
    funcs,nms = get_funcs()
    for i, func in enumerate(funcs):
        for col in ['hour', 'day', 'month']:
            df[f"{nms[i]}_func_{col}"] = func(df[col])
    return df.drop(columns=['date'])

scores = []  # тут хранятся метрики по каждой модели
test_size = 24*30  # размер валидации - последний месяц
try:
    os.mkdir(f"models") # создаем папку для сохранения моделей
except:
    pass
for col in tqdm(ft.columns[1:]):
    train = ft[['date', col]]
    times = train['date'].iloc[-test_size:] 
    train = make_features(train)    
    try:
        os.mkdir(f"models/{col}")
    except:
        pass
    
    # излевкаем тренд
    trend_fts = train[['full_hours', col]].copy()
    trend = LinearRegression().fit(X=trend_fts.drop(columns=col), y=trend_fts[col])
    trend_df = train.copy()
    trend_df[col] -= trend.predict(trend_fts.drop(columns=col))
    with open(f"models/{col}/trend_model.pkl", 'wb') as f:
        pickle.dump(trend, f)
    # извлекаем изменение амплитуду
    shrinkage_fts = trend_df[['full_hours', col]].copy()
    shrinkage_fts = shrinkage_fts.join(shrinkage_fts.groupby(shrinkage_fts['full_hours'] // 24).std()[col], on=shrinkage_fts['full_hours'] // 24, rsuffix='_max_min')
    shrinkage_fts.drop(columns=col, inplace=True)
    shrinkage = LinearRegression().fit(X=shrinkage_fts.drop(columns=f"{col}_max_min"), y=shrinkage_fts[f"{col}_max_min"])

    shrinkage_df = trend_df.copy()
    shrinkage_df[col] /= shrinkage.predict(shrinkage_fts.drop(columns=f"{col}_max_min"))
    train = shrinkage_df.copy()
    with open(f"models/{col}/shrink_model.pkl", 'wb') as f:
        pickle.dump(shrinkage, f)
    # обучаем модели
    train, test = train.iloc[:-test_size], train.iloc[-test_size:]
    X_train, y_train = train[[x for x in train if x !=col]], train[col]
    X_val, y_val = test[[x for x in train if x != col]], test[col]
    seeds = [0, 42, 56, 337, 7575]
    models = []
    for sd in seeds:
            model = CatBoostRegressor(random_seed=sd,
                              iterations=200,
                              verbose=0,
                              max_depth=5,
                              eval_metric='RMSE',
                              cat_features=['hour', 'day', 'month', 'day_of_week'],
                              loss_function='RMSE',
                              thread_count=-1,
                             )
            model.fit(X_train, y_train,
             eval_set=(X_val, y_val)
             )
            models.append(model)
    for j, model in enumerate(models):
        with open(f"models/{col}/model_{j}.pkl", 'wb') as f:
            pickle.dump(model, f)
            
    # считаем метрику
    preds = np.mean([model.predict(X_val) for model in models], axis=0)*shrinkage.predict(X_val[['full_hours']]) + trend.predict(X_val[['full_hours']])
    y_val = y_val*shrinkage.predict(X_val[['full_hours']]) + trend.predict(X_val[['full_hours']])
    scores.append(rmse(y_val, preds))
    # строим график предсказаний и истинных значений для каждой подстанции
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=times, y=preds, name='preds'))
    fig.add_trace(go.Scatter(x=times, y=y_val, name='true'))
    fig.show()

In [None]:
print(np.mean(scores))  # посмотрим на итоговую функцию ошибки

# Инференс
Таким образом у нас выглядит предсказание нагруженности для новых данных

In [None]:
# df - датасет с одной колонкой - "date" в формате pd_datetime
import os,shutil,pickle,catboost
import pandas as pd
import numpy as np

def rmse(y_true, y_pred):
    return mean_squared_error(y_true, y_pred) ** 0.5

def get_funcs():
    res = []
    names = []
    for i in range(1, 6):
        res.append(lambda x: np.sin(i*x))
        res.append(lambda x: np.cos(i*x))
        res.append(lambda x: np.tanh(i*x))
        res.append(lambda x: np.sin(x/i))
        res.append(lambda x: np.cos(x/i))
        res.append(lambda x: np.tanh(x/i))
        
        names.append(f'sin({i}*x)')
        names.append(f'cos({i}*x)')
        names.append(f'tanh({i}*x)')
        
        names.append(f'sin(x/{i})')
        names.append(f'cos(x/{i})')
        names.append(f'tanh(x/{i})')
    return res,names

def make_features(df):
    df['date'] = pd.to_datetime(df['date'])
    df['hour'] = df['date'].dt.hour
    df['day'] = df['date'].dt.day
    df['month'] = df['date'].dt.month
    df['day_of_week'] = df['date'].dt.dayofweek
    df['is_morning'] = (df['hour']>=4)&(df['hour']<=12)
    df['is_day'] = (df['hour']>=13)&(df['hour']<=18)
    df['is_evening'] = df['hour']>=19
    df['is_night'] = df['hour']<4
    df['full_hours'] = ((df['date'].dt.year-2015) * 365 + df['month']*30 + df['day'])*24 + df['hour']
    funcs,nms = get_funcs()
    for i, func in enumerate(funcs):
        for col in ['hour', 'day', 'month']:
            df[f"{nms[i]}_func_{col}"] = func(df[col])
    return df.drop(columns=['date'])


def make_preds(df, model_dir):
    res = dict()
    res['date_time'] = df['date']
    targets = os.listdir(model_dir)
    good_df = make_features(df)
    tds, shr = [], []
    for target in targets:
        models = [pickle.load(open(f"{model_dir}/{target}/{pth}", 'rb')) for pth in os.listdir(f"{model_dir}/{target}/") if 'shrink' not in pth and 'trend' not in pth]
        trend = [pickle.load(open(f"{model_dir}/{target}/{pth}", 'rb')) for pth in os.listdir(f"{model_dir}/{target}/") if 'trend' in pth][0]
        shrink = [pickle.load(open(f"{model_dir}/{target}/{pth}", 'rb')) for pth in os.listdir(f"{model_dir}/{target}/") if 'shrink' in pth][0]
        preds = np.mean([model.predict(good_df) for model in models], axis=0)*shrink.predict(good_df[['full_hours']]) + trend.predict(good_df[['full_hours']])
        tds.append(trend.predict(good_df[['full_hours']]))
        shr.append(shrink.predict(good_df[['full_hours']]))
        preds[preds<0] = 0
        res[target] = preds
    return pd.DataFrame(res), tds, shr  # tds и shr - штуки чисто для графика, можешь их удалить

test_size = 24*30
val_ft = ft[-test_size:] # возьмем нашу старую валидацию чтобы проверить
res, tds, shr = make_preds(val_ft[['date']], '../input/models')

In [None]:
# строим графики тренда и амплитуды для каждой подстанции
fig = go.Figure()
for i in tqdm(range(len(tds))):
    fig.add_trace(go.Scatter(x=res['date_time'], y=tds[i], name=f'trend_{i}'))
    fig.add_trace(go.Scatter(x=res['date_time'], y=shr[i], name=f'shrink_{i}'))
fig.show()

In [None]:
scores = []
for cl in val_ft.columns[1:]:
    scores.append(rmse(val_ft[cl], res[cl]))
np.mean(scores)

In [None]:
# просуммируем значения по подстанциям
res['result'] = res[res.columns[1:]].sum(axis=1)
res = res[['date_time', 'result']]
res

In [None]:
val_ft['result'] = val_ft[val_ft.columns[1:]].sum(axis=1)
val_ft = val_ft[['date', 'result']]
val_ft.columns = ['date_time', 'result']
val_ft

In [None]:
# посмотрим на ошибку для всех подстанций сразу
rmse(val_ft['result'], res['result'])

In [None]:
rmse(val_ft['result'], np.round(res['result']))

In [None]:
# посмотрим на графики предсказаний и истинных значений по часам для всех подстанций
cl = 'result'
fig = go.Figure()
fig.add_trace(go.Scatter(x=res['date_time'], y=res[cl], name='preds'))
fig.add_trace(go.Scatter(x=res['date_time'], y=val_ft[cl], name='true'))
fig.show()

In [None]:
cl = 'result'
fig = go.Figure()
fig.add_trace(go.Scatter(x=res['date_time'], y=np.round(res[cl]), name='preds'))
fig.add_trace(go.Scatter(x=res['date_time'], y=val_ft[cl], name='true'))
fig.show()

In [None]:
# не забудем сохранить и скачать модели
import shutil
shutil.make_archive('models', 'zip', './models')

In [None]:
from IPython.display import FileLink 
FileLink(r'./models.zip')