In [1]:
from IPython.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

from pylab import rcParams
rcParams['figure.figsize'] = 12,8

In [2]:
import matplotlib.pyplot as plt
import seaborn as sns

import pandas as pd
import numpy as np

from datetime import datetime, timedelta

import re

import os
from glob import glob
from tqdm import tqdm

import yaml
from yaml import dump
import uuid
import itertools
from shutil import copy2

import matplotlib.pyplot as plt

In [3]:
import xgboost
from xgboost import XGBRegressor

from sklearn.ensemble import RandomForestRegressor

from lightgbm import LGBMRegressor

from prophet import Prophet

from statsmodels.tsa.statespace.sarimax import SARIMAX
from functools import partial

In [4]:
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

In [5]:
import warnings
warnings.simplefilter(action="ignore")

## Forecasting

#### initialize all required valiables, prepare datasets

In [6]:
def init():

    train_end = datetime(2025, 8, 31, 23)
    test_start = datetime(2025, 9, 1, 0)
    test_end = datetime(2025, 9, 14, 23)
    
    train_features_set = [
        ['month', 'week_day', 'year_day', 'is_day', 'daylight_seconds', 'temperature_mean_3_weeks', 'temperature_mean_3_years'],
#         ['month', 'year_day', 'is_day', 'sunshine_duration', 'temperature_min_3_weeks', 'temperature_max_3_weeks', 'temperature_mean_3_weeks',
#          'temperature_min_3_years', 'temperature_max_3_years', 'temperature_mean_3_years', 'cloud_cover_mean_7_days', 'pressure_msl_mean_7_days'],
#         ['month', 'year_day', 'is_day', 'sunshine_duration', 'temperature_min_3_weeks', 'temperature_max_3_weeks', 'temperature_mean_3_weeks',
#         'relative_humidity_min_7_days', 'temperature_min_7_days', 'temperature_mean_7_days', 'temperature_lag_168'],
#         ['month', 'year_day', 'is_day', 'sunshine_duration', 'temperature_min_3_weeks', 'temperature_max_3_weeks', 'temperature_mean_3_weeks']
    ]

    
    date_parse = lambda dates: pd.to_datetime(dates)
    path = f"/masters_diploma/processed_data/history_weather_with_daylight.csv"
    
    full_set = pd.read_csv(
        path,
        parse_dates=["date"],
        date_parser=date_parse,
        index_col=["date"],
    )
    

    full_set = full_set[:test_end].fillna(0)
#     full_set = future_target(full_set, test_start, test_end)
    
    
#     test_start = datetime(full_set.loc[test_start:].index[0].year, full_set.loc[test_start:].index[0].month, full_set.loc[test_start:].index[0].day)
#     train_end = datetime(full_set.loc[:train_end].index[-1].year, full_set.loc[:train_end].index[-1].month, full_set.loc[:train_end].index[-1].day)
    
    
    return full_set, train_end, test_start, test_end, train_features_set

In [7]:
def future_target(df, date_start, date_end):
    
    date_range = pd.date_range(date_start, date_end, freq='H')
    
    add_df = pd.DataFrame(index=date_range, columns=df.columns)
    
#     add_df
    
    df = pd.concat([df, add_df])
    
    return df    

In [8]:
def models_hyperparameter_random_forest():

    depth_list = [4, 5, 6, 7]
    n_estimators_list = [50, 100, 200, 500, 1000]
    
    hyperparameters_for_model = []
    
    for depth, n_estimators in itertools.product(depth_list, n_estimators_list):
        hyperparameters_for_model.append({
                        'n_estimators': n_estimators,
                        'n_jobs': -1,
                        'random_state': 2,
                        'max_depth': depth,
            })

    return hyperparameters_for_model


def models_hyperparameter_xgboost():

    depth_list = [5, 7, 9]
    n_estimators_list = [50, 100, 200, 500, 1000]
    
    hyperparameters_for_model = []
    
    for depth, n_estimators in itertools.product(depth_list, n_estimators_list):
        hyperparameters_for_model.append({
                    'n_estimators': n_estimators,
                    'n_jobs': -1,
                    'max_depth': depth,
                    'eta': 0.3,
                    'booster': 'gbtree',
                    'objective': 'reg:squarederror',
                    'eval_metric': 'rmse',
                    'subsample': 1,
                    'colsample_bytree': 1,
                    'min_child_weight': 1,
                    'random_state': 2,
            })

    return hyperparameters_for_model


def models_hyperparameter_lgbm():

    
    depth_list = [6, 7]
    n_estimators_list = [10, 50, 100]
    
    hyperparameters_for_model = []
    
    for depth, n_estimators in itertools.product(depth_list, n_estimators_list):
        hyperparameters_for_model.append({
                    'n_estimators': n_estimators,
                    'n_jobs': -1,
                    'max_depth': depth,
                    'eta': 0.3,
                    'random_state': 2,
                    'objective': 'binary',
                    'verbosity': -1,
                    'metric': 'binary', 
            })

    return hyperparameters_for_model


def models_hyperparameter_prophet():
    
    hyperparameters_for_model.append({})

    seasonality_list = ["seasonality_yearly", "seasonality_daily"]
    season_list = ["additive", "multiplicative"]
    
    hyperparameters_for_model = []
    
    for seasonality, season in itertools.product(seasonality_list, season_list):
        hyperparameters_for_model.append({
                        'growth': "logistic",
                        'season': season,
                        f'{seasonality}': True,
            })

    return hyperparameters_for_model


def models_hyperparameter_sarimax():

    s = 24

    p_list, d_list, q_list = [1], [1], [1]
    P_list, D_list, Q_list = [1], [0, 1], [1]
    trend_list = ["n", "c"]

    hyperparameters_for_model = []
    
    for (p, d, q, P, D, Q, trend) in itertools.product(p_list, d_list, q_list, P_list, D_list, Q_list, trend_list):
        if d == 0 and D == 0 and trend == "n":
            continue

        hp = dict(
            order=(p, d, q),
            seasonal_order=(P, D, Q, s),
            trend=trend,
            enforce_stationarity=False,
            enforce_invertibility=False,
            concentrate_scale=True,
        )
        hyperparameters_for_model.append(hp)

    return hyperparameters_for_model


def fit_options_sarimax_simple():
    return [
        dict(disp=False),
        dict(method="lbfgs", maxiter=200, disp=False, cov_type="opg"),
    ]

In [9]:
def define_parameters(train_end, test_start, test_end, train_features_set, forecast_steps, models_dict):
    
    list_of_configs = []
    
    model = None
    
#     for duration in [90, 240]:
    for train_start in [datetime(2024, 9, 1), datetime(2024, 1, 1), datetime(2023, 9, 1)]:    
        for md in models_dict.values():
            if md == 'random_forest':
                hyperparameters_for_model = models_hyperparameter_random_forest()
            elif md == 'xgboost':
                hyperparameters_for_model = models_hyperparameter_xgboost()
            elif md == 'lightgbm':
                hyperparameters_for_model = models_hyperparameter_lgbm()
            elif md == 'prophet':
                hyperparameters_for_model = models_hyperparameter_prophet()
            elif md == 'sarimax':
                hyperparameters_for_model = models_hyperparameter_sarimax()
            else:
                print('Unknown model')
                return


            for hp in hyperparameters_for_model:

                if md == 'random_forest':
                    model = RandomForestRegressor(**hp)
                    fit_kwargs_list = [None]
                elif md == 'xgboost':
                    model = XGBRegressor(**hp)
                    fit_kwargs_list = [None]
                elif md == 'lightgbm':
                    model = LGBMRegressor(**hp)
                    fit_kwargs_list = [None]
                elif md == 'prophet':
                    model = Prophet()
    #                 model = Prophet(**hp)
                    fit_kwargs_list = [None]
                elif md == 'sarimax':
                    model = partial(SARIMAX, **hp)
                    fit_kwargs_list = fit_options_sarimax_simple()
                else:
                    print('Unknown model')
                    
                    
                for kw in fit_kwargs_list:
                    
                    for train_features in train_features_set:
                        config = {
                            'unique_uuid': str(uuid.uuid1()),
                            'train_start': train_start,
                            'train_end': train_end,
                            'test_start': test_start,
                            'test_end': test_end,
        #                     'duration_training_history': duration,
                            'target_column': 'temperature',
                            'train_features': train_features,
                            'path_to_result': f'/masters_diploma/',
                            'forecast_days': forecast_steps,
            #                 'hour_mean_value': 5,
                            'model_name': md,
                            'model': model,
                            'model_hyperparameters': hp,
                        }
                        if md == 'sarimax':
                            config['fit_kwargs'] = kw

                        list_of_configs.append(config.copy())
    
    return list_of_configs

#### functions used in wfv service

In [10]:
def data(day, X_full_set, y_full_set, train_start, config, forecast_steps):

    X_train = X_full_set.loc[train_start:config["train_end"]]
    X_test = X_full_set.loc[config["test_start"]+timedelta(days=day): config["test_start"]+timedelta(days=day+forecast_steps, hours=23)]
    
    y_train = y_full_set.loc[train_start:config["train_end"]]
    y_test = y_full_set.loc[config["test_start"]+timedelta(days=day): config["test_start"]+timedelta(days=day+forecast_steps, hours=23)]
    
    return X_train, X_test, y_train, y_test

In [11]:
def standardize_mean_values(day, df_test, df_train, full_set, config):
    
    agg_cols = [col for col in config['train_features'] if (col.startswith(f"{config['target_column']}_m")) & (col.endswith('days'))]

    if agg_cols:
        for agg in agg_cols:
            if agg in df_test.columns:
                try:
#                 print(config["test_start"]+timedelta(days=day), df_test.loc[config["test_start"]+timedelta(days=day), agg])
                    num = df_test.loc[config["test_start"]+timedelta(days=day), agg]

                except KeyError as e:
                    num = df_train[agg].iloc[-1]

                finally:

                    _df = df_test.loc[config["test_start"]+timedelta(days=day):, agg]
                    _df = _df.replace(_df.values, num)

#                 print(df_test.loc[config["test_start"]+timedelta(days=day):, agg], _df.values.ravel())

                    df_test.loc[config["test_start"]:, agg] = _df.values.ravel()
    else:
        pass

    
    return df_test

In [12]:
def estimations(day, df_stats, y_pred_df, y_test, config):
    
    dates = y_test.index
    
    for date in dates:
        step_day = int((date-(config["test_start"]+timedelta(days=day))).days)

        try:
            pred = y_pred_df.loc[date, 'predicted_mean']
            real = y_test.loc[date, config['target_column']]

            err = abs(pred / real - 1) * 100

            df_stats.loc[date, f'd-{step_day}' + '_total_abs_error'] = np.round(abs(pred-real))
            df_stats.loc[date, f'd-{step_day}' + '_total_relative_error'] = np.round(abs(pred / real - 1), 4) * 100
            df_stats.loc[date, f'd-{step_day}' + '_more_5'] = 1 if (err > 5) else 0
            df_stats.loc[date, f'd-{step_day}' + '_more_10'] = 1 if (err > 10) else 0
            
        except ZeroDivisionError as e:
            print(e)

            df_stats.loc[date, :] = 0

        except KeyError as e:
            print(e)

            df_stats.loc[date, :] = 0
    
    return df_stats

In [13]:
def write_predictions(day, forecast_steps, y_pred_df, config, research_task_uuid):
    
    for step in range(forecast_steps+1):
        try:
            pred = y_pred_df.iloc[24*step:24*(step+1), 0].dropna().sort_index()
            pred.index.name = 'date_time'

            path_to_files = os.path.join(config['path_to_result'], "forecast", config['model_name'], 
                                         f"research_task_{research_task_uuid}", 
                                         f"{config['model_name']}_{config['unique_uuid']}")
            if not os.path.isdir(path_to_files):
                os.makedirs(path_to_files)
                
            file_name = os.path.join(path_to_files, 
                    f"forecast_d-{step}_{config['model_name']}_{(config['test_start']+timedelta(days=day)).strftime('%Y-%m-%d')}_({pred.index[0].strftime('%Y-%m-%d')}).csv")

            pd.DataFrame(pred).to_csv(file_name)
            
#             print(file_name)

        except KeyError:
            pass
        
        except IndexError:
            pass

### wfv service

In [14]:
def run_wfv(full_set: pd.DataFrame, config: dict, research_task_uuid: str, forecast_steps: int, models_dict: dict):
    
    X_full_set = full_set.loc[:, config['train_features']]
    y_full_set = full_set.loc[:, [config['target_column']]]
    
    if X_full_set.shape[0] != y_full_set.shape[0]:
        common_index = list(set(X_full_set.index) & set(y_full_set.index))
        common_index.sort()
        X_full_set = X_full_set.loc[common_index, :]
        y_full_set = y_full_set.loc[common_index, :]
    print(X_full_set.shape, y_full_set.shape)
    

    df_preds = pd.DataFrame()
    df_stats = pd.DataFrame()

    count_days = (test_end - test_start).days + 1
    
    
    model_name = config['model_name']
    print(model_name)
    
    model = config['model']
    kwargs = config['fit_kwargs']

    unique_uuid = config['unique_uuid']
    
    if not os.path.isdir(config['path_to_result']):
        os.makedirs(config['path_to_result'])

    path_folder_result = os.path.join(config['path_to_result'], "wf_result", model_name,
                                      f"research_task_{research_task_uuid}")
    if not os.path.isdir(path_folder_result):
        os.makedirs(path_folder_result)
        
        
    print(count_days)
    for day in tqdm(range(count_days)):
        print(day, test_start+timedelta(days=day))
        
        train_start = config.get('train_start', None)
        if train_start is None:
            if config.get('duration_training_history', None) is None:
                train_start = X_full_set.index[0]
                config['train_start'] = datetime(train_start.year, train_start.month, train_start.day)
            else:
                train_start = config['train_end'] + timedelta(days=i - config['duration_training_history'])

        try:

            X_train, X_test, y_train, y_test = data(day, X_full_set, y_full_set, train_start, config, forecast_steps)
            X_test = standardize_mean_values(day, X_test.copy(), X_train, full_set, config)
            
            print(min(X_test.index), max(X_test.index))
            
            if model_name == 'sarimax':
                y_pred = model(endog=y_train, exog=X_train, dates=y_train.index, freq="H")\
                        .fit(**kwargs)\
                        .get_forecast(steps=len(y_test), exog=X_test)\
                        .predicted_mean.values
            else:
                y_pred = model.fit(X_train, y_train).predict(X_test)
                

            y_pred_df = pd.DataFrame(y_pred, index=y_test.index)
            y_pred_df.columns = ['predicted_mean']
            
            print(min(y_pred_df.index), max(y_pred_df.index))
            
            write_predictions(day, forecast_steps, y_pred_df, config, research_task_uuid)

            df_stats = estimations(day, df_stats, y_pred_df, y_test, config)
#             print('\n\n')
        


        except KeyError as e:
            print(e)
            continue
            
        except ValueError as e:
            print(e)
            continue


    last_index = df_stats.index[-1]
    df_stats.loc[last_index, 'model_hyperparameters'] = str(config['model_hyperparameters'])
    df_stats.loc[last_index, 'train_features'] = str(config['train_features'])
    
    path_to_save_result_csv = os.path.join(path_folder_result, f'{model_name}_{unique_uuid}.csv')
    df_stats.round(2).to_csv(path_to_save_result_csv, date_format='%Y-%m-%d %H:%M:%S')
    
    config_to_save = config.copy()
    config_to_save.pop('model', None)
    with open(os.path.join(path_folder_result, f'{model_name}_{unique_uuid}.yaml'), 'w') as outfile:
        dump(config_to_save, outfile, default_flow_style=False)

In [None]:
models_list = ['SARIMAX']    #'Prophet', 'XGBoost', 'LightGBM', 'Random_Forest'
models_dict = dict([("".join(re.findall('([A-Z])', k)).lower(), k.lower()) for k in models_list])

forecast_steps = 3        # means that forecast will be made on {n} futute days 

full_set, train_end, test_start, test_end, train_features_set = init()
print(f'train ends: {train_end}\t test starts: {test_start}')

_research_task_uuid = str(uuid.uuid1())
print(f'_research_task_uuid = {_research_task_uuid}\n')

configs = define_parameters(train_end, test_start, test_end, train_features_set, forecast_steps, models_dict)
print(f'count_configs {len(configs)} \n')

for i, _ in enumerate(configs):
    print(i, _['model_name'], '==', _, '\n')

    run_wfv(full_set, _, _research_task_uuid, forecast_steps, models_dict)

train ends: 2025-08-31 23:00:00	 test starts: 2025-09-01 00:00:00
_research_task_uuid = cadc8e42-b99e-11f0-b4e4-b1b3fada54e9

count_configs 24 

0 sarimax == {'unique_uuid': 'cadc8e43-b99e-11f0-90ae-b1b3fada54e9', 'train_start': datetime.datetime(2024, 9, 1, 0, 0), 'train_end': datetime.datetime(2025, 8, 31, 23, 0), 'test_start': datetime.datetime(2025, 9, 1, 0, 0), 'test_end': datetime.datetime(2025, 9, 14, 23, 0), 'target_column': 'temperature', 'train_features': ['month', 'week_day', 'year_day', 'is_day', 'daylight_seconds', 'temperature_mean_3_weeks', 'temperature_mean_3_years'], 'path_to_result': '/masters_diploma/', 'forecast_days': 3, 'model_name': 'sarimax', 'model': functools.partial(<class 'statsmodels.tsa.statespace.sarimax.SARIMAX'>, order=(1, 1, 1), seasonal_order=(1, 0, 1, 24), trend='n', enforce_stationarity=False, enforce_invertibility=False, concentrate_scale=True), 'model_hyperparameters': {'order': (1, 1, 1), 'seasonal_order': (1, 0, 1, 24), 'trend': 'n', 'enforce_st

  0%|                                                                                           | 0/14 [00:00<?, ?it/s]

0 2025-09-01 00:00:00
2025-09-01 00:00:00 2025-09-04 23:00:00


  7%|█████▊                                                                            | 1/14 [01:55<24:59, 115.33s/it]

2025-09-01 00:00:00 2025-09-04 23:00:00
1 2025-09-02 00:00:00
2025-09-02 00:00:00 2025-09-05 23:00:00


 14%|███████████▋                                                                      | 2/14 [03:39<21:44, 108.68s/it]

2025-09-02 00:00:00 2025-09-05 23:00:00
2 2025-09-03 00:00:00
2025-09-03 00:00:00 2025-09-06 23:00:00


 21%|█████████████████▌                                                                | 3/14 [05:20<19:17, 105.26s/it]

2025-09-03 00:00:00 2025-09-06 23:00:00
3 2025-09-04 00:00:00
2025-09-04 00:00:00 2025-09-07 23:00:00


 29%|███████████████████████▍                                                          | 4/14 [07:06<17:34, 105.46s/it]

2025-09-04 00:00:00 2025-09-07 23:00:00
4 2025-09-05 00:00:00
2025-09-05 00:00:00 2025-09-08 23:00:00


 36%|█████████████████████████████▎                                                    | 5/14 [09:05<16:32, 110.30s/it]

2025-09-05 00:00:00 2025-09-08 23:00:00
5 2025-09-06 00:00:00
2025-09-06 00:00:00 2025-09-09 23:00:00


 43%|███████████████████████████████████▏                                              | 6/14 [10:57<14:49, 111.14s/it]

2025-09-06 00:00:00 2025-09-09 23:00:00
6 2025-09-07 00:00:00
2025-09-07 00:00:00 2025-09-10 23:00:00


 50%|█████████████████████████████████████████                                         | 7/14 [12:45<12:49, 109.98s/it]

2025-09-07 00:00:00 2025-09-10 23:00:00
7 2025-09-08 00:00:00
2025-09-08 00:00:00 2025-09-11 23:00:00


 57%|██████████████████████████████████████████████▊                                   | 8/14 [14:46<11:21, 113.51s/it]

2025-09-08 00:00:00 2025-09-11 23:00:00
8 2025-09-09 00:00:00
2025-09-09 00:00:00 2025-09-12 23:00:00


 64%|████████████████████████████████████████████████████▋                             | 9/14 [16:34<09:19, 111.82s/it]

2025-09-09 00:00:00 2025-09-12 23:00:00
9 2025-09-10 00:00:00
2025-09-10 00:00:00 2025-09-13 23:00:00


 71%|█████████████████████████████████████████████████████████▊                       | 10/14 [18:37<07:40, 115.21s/it]

2025-09-10 00:00:00 2025-09-13 23:00:00
10 2025-09-11 00:00:00
2025-09-11 00:00:00 2025-09-14 23:00:00


 79%|███████████████████████████████████████████████████████████████▋                 | 11/14 [20:24<05:38, 112.70s/it]

2025-09-11 00:00:00 2025-09-14 23:00:00
11 2025-09-12 00:00:00
2025-09-12 00:00:00 2025-09-14 23:00:00


 86%|█████████████████████████████████████████████████████████████████████▍           | 12/14 [22:17<03:45, 112.75s/it]

2025-09-12 00:00:00 2025-09-14 23:00:00
12 2025-09-13 00:00:00
2025-09-13 00:00:00 2025-09-14 23:00:00


 93%|███████████████████████████████████████████████████████████████████████████▏     | 13/14 [24:14<01:53, 113.96s/it]

2025-09-13 00:00:00 2025-09-14 23:00:00
13 2025-09-14 00:00:00
2025-09-14 00:00:00 2025-09-14 23:00:00


100%|█████████████████████████████████████████████████████████████████████████████████| 14/14 [26:05<00:00, 111.80s/it]


2025-09-14 00:00:00 2025-09-14 23:00:00
1 sarimax == {'unique_uuid': 'cadc8e44-b99e-11f0-b061-b1b3fada54e9', 'train_start': datetime.datetime(2024, 9, 1, 0, 0), 'train_end': datetime.datetime(2025, 8, 31, 23, 0), 'test_start': datetime.datetime(2025, 9, 1, 0, 0), 'test_end': datetime.datetime(2025, 9, 14, 23, 0), 'target_column': 'temperature', 'train_features': ['month', 'week_day', 'year_day', 'is_day', 'daylight_seconds', 'temperature_mean_3_weeks', 'temperature_mean_3_years'], 'path_to_result': '/masters_diploma/', 'forecast_days': 3, 'model_name': 'sarimax', 'model': functools.partial(<class 'statsmodels.tsa.statespace.sarimax.SARIMAX'>, order=(1, 1, 1), seasonal_order=(1, 0, 1, 24), trend='n', enforce_stationarity=False, enforce_invertibility=False, concentrate_scale=True), 'model_hyperparameters': {'order': (1, 1, 1), 'seasonal_order': (1, 0, 1, 24), 'trend': 'n', 'enforce_stationarity': False, 'enforce_invertibility': False, 'concentrate_scale': True}, 'fit_kwargs': {'method': 

  0%|                                                                                           | 0/14 [00:00<?, ?it/s]

0 2025-09-01 00:00:00
2025-09-01 00:00:00 2025-09-04 23:00:00


  7%|█████▊                                                                            | 1/14 [01:50<23:54, 110.37s/it]

2025-09-01 00:00:00 2025-09-04 23:00:00
1 2025-09-02 00:00:00
2025-09-02 00:00:00 2025-09-05 23:00:00


 14%|███████████▋                                                                      | 2/14 [03:48<22:59, 114.94s/it]

2025-09-02 00:00:00 2025-09-05 23:00:00
2 2025-09-03 00:00:00
2025-09-03 00:00:00 2025-09-06 23:00:00


 21%|█████████████████▌                                                                | 3/14 [05:37<20:34, 112.18s/it]

2025-09-03 00:00:00 2025-09-06 23:00:00
3 2025-09-04 00:00:00
2025-09-04 00:00:00 2025-09-07 23:00:00


 29%|███████████████████████▍                                                          | 4/14 [07:29<18:39, 111.95s/it]

2025-09-04 00:00:00 2025-09-07 23:00:00
4 2025-09-05 00:00:00
2025-09-05 00:00:00 2025-09-08 23:00:00


 36%|█████████████████████████████▎                                                    | 5/14 [09:18<16:40, 111.15s/it]

2025-09-05 00:00:00 2025-09-08 23:00:00
5 2025-09-06 00:00:00
2025-09-06 00:00:00 2025-09-09 23:00:00


 43%|███████████████████████████████████▏                                              | 6/14 [11:08<14:46, 110.81s/it]

2025-09-06 00:00:00 2025-09-09 23:00:00
6 2025-09-07 00:00:00
2025-09-07 00:00:00 2025-09-10 23:00:00


 50%|█████████████████████████████████████████                                         | 7/14 [12:51<12:37, 108.24s/it]

2025-09-07 00:00:00 2025-09-10 23:00:00
7 2025-09-08 00:00:00
2025-09-08 00:00:00 2025-09-11 23:00:00


 57%|██████████████████████████████████████████████▊                                   | 8/14 [14:48<11:05, 110.89s/it]

2025-09-08 00:00:00 2025-09-11 23:00:00
8 2025-09-09 00:00:00
2025-09-09 00:00:00 2025-09-12 23:00:00


 64%|████████████████████████████████████████████████████▋                             | 9/14 [16:51<09:33, 114.67s/it]

2025-09-09 00:00:00 2025-09-12 23:00:00
9 2025-09-10 00:00:00
2025-09-10 00:00:00 2025-09-13 23:00:00


 71%|█████████████████████████████████████████████████████████▊                       | 10/14 [19:03<07:59, 119.98s/it]

2025-09-10 00:00:00 2025-09-13 23:00:00
10 2025-09-11 00:00:00
2025-09-11 00:00:00 2025-09-14 23:00:00


 79%|███████████████████████████████████████████████████████████████▋                 | 11/14 [20:51<05:49, 116.51s/it]

2025-09-11 00:00:00 2025-09-14 23:00:00
11 2025-09-12 00:00:00
2025-09-12 00:00:00 2025-09-14 23:00:00


 86%|█████████████████████████████████████████████████████████████████████▍           | 12/14 [22:56<03:57, 118.99s/it]

2025-09-12 00:00:00 2025-09-14 23:00:00
12 2025-09-13 00:00:00
2025-09-13 00:00:00 2025-09-14 23:00:00


 93%|███████████████████████████████████████████████████████████████████████████▏     | 13/14 [24:35<01:52, 112.98s/it]

2025-09-13 00:00:00 2025-09-14 23:00:00
13 2025-09-14 00:00:00
2025-09-14 00:00:00 2025-09-14 23:00:00


100%|█████████████████████████████████████████████████████████████████████████████████| 14/14 [26:12<00:00, 112.32s/it]


2025-09-14 00:00:00 2025-09-14 23:00:00
2 sarimax == {'unique_uuid': 'cadc8e45-b99e-11f0-8fa5-b1b3fada54e9', 'train_start': datetime.datetime(2024, 9, 1, 0, 0), 'train_end': datetime.datetime(2025, 8, 31, 23, 0), 'test_start': datetime.datetime(2025, 9, 1, 0, 0), 'test_end': datetime.datetime(2025, 9, 14, 23, 0), 'target_column': 'temperature', 'train_features': ['month', 'week_day', 'year_day', 'is_day', 'daylight_seconds', 'temperature_mean_3_weeks', 'temperature_mean_3_years'], 'path_to_result': '/masters_diploma/', 'forecast_days': 3, 'model_name': 'sarimax', 'model': functools.partial(<class 'statsmodels.tsa.statespace.sarimax.SARIMAX'>, order=(1, 1, 1), seasonal_order=(1, 0, 1, 24), trend='c', enforce_stationarity=False, enforce_invertibility=False, concentrate_scale=True), 'model_hyperparameters': {'order': (1, 1, 1), 'seasonal_order': (1, 0, 1, 24), 'trend': 'c', 'enforce_stationarity': False, 'enforce_invertibility': False, 'concentrate_scale': True}, 'fit_kwargs': {'disp': Fa

  0%|                                                                                           | 0/14 [00:00<?, ?it/s]

0 2025-09-01 00:00:00
2025-09-01 00:00:00 2025-09-04 23:00:00


  7%|█████▉                                                                             | 1/14 [01:16<16:34, 76.54s/it]

2025-09-01 00:00:00 2025-09-04 23:00:00
1 2025-09-02 00:00:00
2025-09-02 00:00:00 2025-09-05 23:00:00


 14%|███████████▊                                                                       | 2/14 [02:32<15:13, 76.11s/it]

2025-09-02 00:00:00 2025-09-05 23:00:00
2 2025-09-03 00:00:00
2025-09-03 00:00:00 2025-09-06 23:00:00


 21%|█████████████████▊                                                                 | 3/14 [03:55<14:30, 79.18s/it]

2025-09-03 00:00:00 2025-09-06 23:00:00
3 2025-09-04 00:00:00
2025-09-04 00:00:00 2025-09-07 23:00:00


 29%|███████████████████████▋                                                           | 4/14 [05:10<12:57, 77.77s/it]

2025-09-04 00:00:00 2025-09-07 23:00:00
4 2025-09-05 00:00:00
2025-09-05 00:00:00 2025-09-08 23:00:00


 36%|█████████████████████████████▋                                                     | 5/14 [06:28<11:40, 77.81s/it]

2025-09-05 00:00:00 2025-09-08 23:00:00
5 2025-09-06 00:00:00
2025-09-06 00:00:00 2025-09-09 23:00:00


 43%|███████████████████████████████████▌                                               | 6/14 [08:00<10:59, 82.45s/it]

2025-09-06 00:00:00 2025-09-09 23:00:00
6 2025-09-07 00:00:00
2025-09-07 00:00:00 2025-09-10 23:00:00


 50%|█████████████████████████████████████████▌                                         | 7/14 [09:36<10:09, 87.08s/it]

2025-09-07 00:00:00 2025-09-10 23:00:00
7 2025-09-08 00:00:00
2025-09-08 00:00:00 2025-09-11 23:00:00


 57%|███████████████████████████████████████████████▍                                   | 8/14 [10:55<08:26, 84.41s/it]

2025-09-08 00:00:00 2025-09-11 23:00:00
8 2025-09-09 00:00:00
2025-09-09 00:00:00 2025-09-12 23:00:00


 64%|█████████████████████████████████████████████████████▎                             | 9/14 [12:41<07:36, 91.23s/it]

2025-09-09 00:00:00 2025-09-12 23:00:00
9 2025-09-10 00:00:00
2025-09-10 00:00:00 2025-09-13 23:00:00


 71%|██████████████████████████████████████████████████████████▌                       | 10/14 [14:14<06:06, 91.75s/it]

2025-09-10 00:00:00 2025-09-13 23:00:00
10 2025-09-11 00:00:00
2025-09-11 00:00:00 2025-09-14 23:00:00


 79%|████████████████████████████████████████████████████████████████▍                 | 11/14 [15:30<04:20, 86.77s/it]

2025-09-11 00:00:00 2025-09-14 23:00:00
11 2025-09-12 00:00:00
2025-09-12 00:00:00 2025-09-14 23:00:00


 86%|██████████████████████████████████████████████████████████████████████▎           | 12/14 [17:11<03:02, 91.11s/it]

2025-09-12 00:00:00 2025-09-14 23:00:00
12 2025-09-13 00:00:00
2025-09-13 00:00:00 2025-09-14 23:00:00


 93%|████████████████████████████████████████████████████████████████████████████▏     | 13/14 [18:40<01:30, 90.48s/it]

2025-09-13 00:00:00 2025-09-14 23:00:00
13 2025-09-14 00:00:00
2025-09-14 00:00:00 2025-09-14 23:00:00


100%|██████████████████████████████████████████████████████████████████████████████████| 14/14 [20:01<00:00, 85.79s/it]


2025-09-14 00:00:00 2025-09-14 23:00:00
3 sarimax == {'unique_uuid': 'cadc8e46-b99e-11f0-ad4c-b1b3fada54e9', 'train_start': datetime.datetime(2024, 9, 1, 0, 0), 'train_end': datetime.datetime(2025, 8, 31, 23, 0), 'test_start': datetime.datetime(2025, 9, 1, 0, 0), 'test_end': datetime.datetime(2025, 9, 14, 23, 0), 'target_column': 'temperature', 'train_features': ['month', 'week_day', 'year_day', 'is_day', 'daylight_seconds', 'temperature_mean_3_weeks', 'temperature_mean_3_years'], 'path_to_result': '/masters_diploma/', 'forecast_days': 3, 'model_name': 'sarimax', 'model': functools.partial(<class 'statsmodels.tsa.statespace.sarimax.SARIMAX'>, order=(1, 1, 1), seasonal_order=(1, 0, 1, 24), trend='c', enforce_stationarity=False, enforce_invertibility=False, concentrate_scale=True), 'model_hyperparameters': {'order': (1, 1, 1), 'seasonal_order': (1, 0, 1, 24), 'trend': 'c', 'enforce_stationarity': False, 'enforce_invertibility': False, 'concentrate_scale': True}, 'fit_kwargs': {'method': 

  0%|                                                                                           | 0/14 [00:00<?, ?it/s]

0 2025-09-01 00:00:00
2025-09-01 00:00:00 2025-09-04 23:00:00


  7%|█████▊                                                                            | 1/14 [01:47<23:17, 107.50s/it]

2025-09-01 00:00:00 2025-09-04 23:00:00
1 2025-09-02 00:00:00
2025-09-02 00:00:00 2025-09-05 23:00:00


 14%|███████████▊                                                                       | 2/14 [03:15<19:11, 95.99s/it]

2025-09-02 00:00:00 2025-09-05 23:00:00
2 2025-09-03 00:00:00
2025-09-03 00:00:00 2025-09-06 23:00:00


 21%|█████████████████▊                                                                 | 3/14 [04:35<16:16, 88.75s/it]

2025-09-03 00:00:00 2025-09-06 23:00:00
3 2025-09-04 00:00:00
2025-09-04 00:00:00 2025-09-07 23:00:00


 29%|███████████████████████▋                                                           | 4/14 [05:58<14:26, 86.64s/it]

2025-09-04 00:00:00 2025-09-07 23:00:00
4 2025-09-05 00:00:00
2025-09-05 00:00:00 2025-09-08 23:00:00


 36%|█████████████████████████████▋                                                     | 5/14 [07:15<12:26, 82.93s/it]

2025-09-05 00:00:00 2025-09-08 23:00:00
5 2025-09-06 00:00:00
2025-09-06 00:00:00 2025-09-09 23:00:00


 43%|███████████████████████████████████▌                                               | 6/14 [08:32<10:46, 80.81s/it]

2025-09-06 00:00:00 2025-09-09 23:00:00
6 2025-09-07 00:00:00
2025-09-07 00:00:00 2025-09-10 23:00:00


 50%|█████████████████████████████████████████▌                                         | 7/14 [09:48<09:15, 79.38s/it]

2025-09-07 00:00:00 2025-09-10 23:00:00
7 2025-09-08 00:00:00
2025-09-08 00:00:00 2025-09-11 23:00:00


 57%|███████████████████████████████████████████████▍                                   | 8/14 [11:04<07:49, 78.30s/it]

2025-09-08 00:00:00 2025-09-11 23:00:00
8 2025-09-09 00:00:00
2025-09-09 00:00:00 2025-09-12 23:00:00


 64%|█████████████████████████████████████████████████████▎                             | 9/14 [12:30<06:44, 80.85s/it]

2025-09-09 00:00:00 2025-09-12 23:00:00
9 2025-09-10 00:00:00
2025-09-10 00:00:00 2025-09-13 23:00:00


 71%|██████████████████████████████████████████████████████████▌                       | 10/14 [13:48<05:18, 79.74s/it]

2025-09-10 00:00:00 2025-09-13 23:00:00
10 2025-09-11 00:00:00
2025-09-11 00:00:00 2025-09-14 23:00:00


 79%|████████████████████████████████████████████████████████████████▍                 | 11/14 [15:07<03:58, 79.54s/it]

2025-09-11 00:00:00 2025-09-14 23:00:00
11 2025-09-12 00:00:00
2025-09-12 00:00:00 2025-09-14 23:00:00


 86%|██████████████████████████████████████████████████████████████████████▎           | 12/14 [16:28<02:40, 80.07s/it]

2025-09-12 00:00:00 2025-09-14 23:00:00
12 2025-09-13 00:00:00
2025-09-13 00:00:00 2025-09-14 23:00:00


 93%|████████████████████████████████████████████████████████████████████████████▏     | 13/14 [17:49<01:20, 80.37s/it]

2025-09-13 00:00:00 2025-09-14 23:00:00
13 2025-09-14 00:00:00
2025-09-14 00:00:00 2025-09-14 23:00:00


100%|██████████████████████████████████████████████████████████████████████████████████| 14/14 [19:09<00:00, 82.12s/it]


2025-09-14 00:00:00 2025-09-14 23:00:00
4 sarimax == {'unique_uuid': 'cadc8e47-b99e-11f0-a52e-b1b3fada54e9', 'train_start': datetime.datetime(2024, 9, 1, 0, 0), 'train_end': datetime.datetime(2025, 8, 31, 23, 0), 'test_start': datetime.datetime(2025, 9, 1, 0, 0), 'test_end': datetime.datetime(2025, 9, 14, 23, 0), 'target_column': 'temperature', 'train_features': ['month', 'week_day', 'year_day', 'is_day', 'daylight_seconds', 'temperature_mean_3_weeks', 'temperature_mean_3_years'], 'path_to_result': '/masters_diploma/', 'forecast_days': 3, 'model_name': 'sarimax', 'model': functools.partial(<class 'statsmodels.tsa.statespace.sarimax.SARIMAX'>, order=(1, 1, 1), seasonal_order=(1, 1, 1, 24), trend='n', enforce_stationarity=False, enforce_invertibility=False, concentrate_scale=True), 'model_hyperparameters': {'order': (1, 1, 1), 'seasonal_order': (1, 1, 1, 24), 'trend': 'n', 'enforce_stationarity': False, 'enforce_invertibility': False, 'concentrate_scale': True}, 'fit_kwargs': {'disp': Fa

  0%|                                                                                           | 0/14 [00:00<?, ?it/s]

0 2025-09-01 00:00:00
2025-09-01 00:00:00 2025-09-04 23:00:00


  7%|█████▋                                                                          | 1/14 [06:54<1:29:52, 414.79s/it]

2025-09-01 00:00:00 2025-09-04 23:00:00
1 2025-09-02 00:00:00
2025-09-02 00:00:00 2025-09-05 23:00:00


 14%|███████████▍                                                                    | 2/14 [13:51<1:23:07, 415.67s/it]

2025-09-02 00:00:00 2025-09-05 23:00:00
2 2025-09-03 00:00:00
2025-09-03 00:00:00 2025-09-06 23:00:00


 21%|█████████████████▏                                                              | 3/14 [21:13<1:18:29, 428.10s/it]

2025-09-03 00:00:00 2025-09-06 23:00:00
3 2025-09-04 00:00:00
2025-09-04 00:00:00 2025-09-07 23:00:00


 29%|██████████████████████▊                                                         | 4/14 [28:42<1:12:43, 436.33s/it]

2025-09-04 00:00:00 2025-09-07 23:00:00
4 2025-09-05 00:00:00
2025-09-05 00:00:00 2025-09-08 23:00:00


 36%|████████████████████████████▌                                                   | 5/14 [36:22<1:06:43, 444.83s/it]

2025-09-05 00:00:00 2025-09-08 23:00:00
5 2025-09-06 00:00:00
2025-09-06 00:00:00 2025-09-09 23:00:00


 43%|██████████████████████████████████▎                                             | 6/14 [44:57<1:02:27, 468.45s/it]

2025-09-06 00:00:00 2025-09-09 23:00:00
6 2025-09-07 00:00:00
2025-09-07 00:00:00 2025-09-10 23:00:00


 50%|█████████████████████████████████████████                                         | 7/14 [52:54<54:58, 471.24s/it]

2025-09-07 00:00:00 2025-09-10 23:00:00
7 2025-09-08 00:00:00
2025-09-08 00:00:00 2025-09-11 23:00:00
2025-09-08 00:00:00 2025-09-11 23:00:00


 57%|█████████████████████████████████████████████▋                                  | 8/14 [1:02:09<49:48, 498.10s/it]

8 2025-09-09 00:00:00
2025-09-09 00:00:00 2025-09-12 23:00:00
2025-09-09 00:00:00 2025-09-12 23:00:00


 64%|███████████████████████████████████████████████████▍                            | 9/14 [1:11:00<42:21, 508.27s/it]

9 2025-09-10 00:00:00
2025-09-10 00:00:00 2025-09-13 23:00:00


 71%|████████████████████████████████████████████████████████▍                      | 10/14 [1:20:46<35:29, 532.44s/it]

2025-09-10 00:00:00 2025-09-13 23:00:00
10 2025-09-11 00:00:00
2025-09-11 00:00:00 2025-09-14 23:00:00


 79%|██████████████████████████████████████████████████████████████                 | 11/14 [1:28:08<25:13, 504.52s/it]

2025-09-11 00:00:00 2025-09-14 23:00:00
11 2025-09-12 00:00:00
2025-09-12 00:00:00 2025-09-14 23:00:00


 86%|███████████████████████████████████████████████████████████████████▋           | 12/14 [1:35:35<16:14, 487.15s/it]

2025-09-12 00:00:00 2025-09-14 23:00:00
12 2025-09-13 00:00:00
2025-09-13 00:00:00 2025-09-14 23:00:00


 93%|█████████████████████████████████████████████████████████████████████████▎     | 13/14 [1:42:49<07:51, 471.09s/it]

2025-09-13 00:00:00 2025-09-14 23:00:00
13 2025-09-14 00:00:00
2025-09-14 00:00:00 2025-09-14 23:00:00


In [None]:
copy2('wfv.ipynb', f'/masters_diploma/archive/wfv_{str(uuid.uuid1())}_{datetime.now().strftime("%Y%m%d_%H%M%S")}')