In [None]:
# https://github.com/ITMO-NSS-team/fedot-examples/blob/main/notebooks/latest/3_intro_ts_forecasting.ipynb

In [2]:
# Additional imports 
import pandas as pd 
import numpy as np

# Imports for creating plots
import matplotlib.pyplot as plt
from pylab import rcParams
rcParams['figure.figsize'] = 18, 7

import logging
logging.raiseExceptions = False

# Import data
df = pd.read_csv('../data/preproc3_train.csv')
df['Time'] = pd.to_datetime(df['Time'])

# test_df = pd.read_csv('../data/preproc3_test.csv')
# test_df['consumption'] = np.zeros(len(test_df))

In [2]:

# Pipeline and nodes
from fedot.core.pipelines.pipeline import Pipeline
from fedot.core.pipelines.node import PrimaryNode, SecondaryNode

# Data 
from fedot.core.data.data import InputData
from fedot.core.data.data_split import train_test_data_setup
from fedot.core.repository.dataset_types import DataTypesEnum

# Tasks
from fedot.core.repository.tasks import Task, TaskTypesEnum, TsForecastingParams

# Metric
from sklearn.metrics import mean_absolute_error

In [3]:
forecast_length = 24 * 7

In [4]:
# Wrapp data into InputData
task = Task(TaskTypesEnum.ts_forecasting,
                TsForecastingParams(forecast_length=forecast_length))

# Get time series from dataframe
features = np.array(df.drop('Solar radiation', axis=1))
target = np.array(df['consumption'])
input_data = InputData(idx=np.arange(0, len(target)),
                       features=features,
                       target=target,
                       task=task,
                       data_type=DataTypesEnum.ts)

# Split data into train and test
train_input, predict_input = train_test_data_setup(input_data)

In [3]:
df.drop('Solar radiation', axis=1).to_csv('../data/preproc3_no_sol_train.csv', index=False)

In [11]:
from fedot.api.main import Fedot
from fedot.core.data.data import InputData
from fedot.core.data.data_split import train_test_data_setup
from fedot.core.repository.tasks import Task, TaskTypesEnum, TsForecastingParams

# specify the task and the forecast length (required depth of forecast)
task = Task(TaskTypesEnum.ts_forecasting,
            TsForecastingParams(forecast_length=24*7))

# load data from csv
train_input = InputData.from_csv_time_series(task=task,
                                             file_path='../data/preproc3_no_sol_train.csv',
                                             delimiter=',',
                                             target_column='consumption',
                                             index_col='building_id',
                                             possible_idx_keywords=['building_id', 'Time'])
# split data for train and test
train_data, test_data = train_test_data_setup(train_input)

# init model for the time-series forecasting
model = Fedot(problem='ts_forecasting', task_params=task.task_params, n_jobs=-1, show_progress=True, preset='gpu', cv_folds=5)



In [12]:
# run AutoML model design
pipeline = model.fit(train_data)
# plot obtained pipeline
pipeline.show()

# use model to obtain out-of-sample forecast with one step
forecast = model.forecast(test_data)
print(model.get_metrics(metric_names=['smape', 'mae', 'mape'], target=test_data.target))

# plot forecasting result
model.plot_prediction()

In [5]:
def get_two_branches_pipeline(lagged_param_1=10, lagged_param_2=10,
                              model_1='ridge', model_2='ridge', model_3='ridge'):
    """
    Pipeline looking like this
    lagged -> model_1 \
                       \
                        model_3 -> final forecast
                       /
    lagged -> model_2 /
    """
    # First level
    node_lagged_1 = PrimaryNode('lagged')
    node_lagged_1.parameters = {'window_size': lagged_param_1}
    node_lagged_2 = PrimaryNode('lagged')
    node_lagged_2.parameters = {'window_size': lagged_param_2}

    # Second level
    node_second_1 = SecondaryNode(model_1, nodes_from=[node_lagged_1])
    node_second_2 = SecondaryNode(model_2, nodes_from=[node_lagged_2])

    # Third level - root node
    node_final = SecondaryNode(model_3, nodes_from=[node_second_1, node_second_2])
    pipeline = Pipeline(node_final)
    
    return pipeline


def plot_results(actual_time_series, predicted_values, len_train_data, y_name = 'Parameter'):
    """
    Function for drawing plot with predictions
    
    :param actual_time_series: the entire array with one-dimensional data
    :param predicted_values: array with predicted values
    :param len_train_data: number of elements in the training sample
    :param y_name: name of the y axis
    """
    
    plt.plot(np.arange(0, len(actual_time_series)), 
             actual_time_series, label = 'Actual values', c = 'green')
    plt.plot(np.arange(len_train_data, len_train_data + len(predicted_values)), 
             predicted_values, label = 'Predicted', c = 'blue')
    # Plot black line which divide our array into train and test
    plt.plot([len_train_data, len_train_data],
             [min(actual_time_series), max(actual_time_series)], c = 'black', linewidth = 1)
    plt.ylabel(y_name, fontsize = 15)
    plt.xlabel('Time index', fontsize = 15)
    plt.legend(fontsize = 15, loc='upper left')
    plt.grid()
    plt.show()

In [6]:
# Initialise pipeline for making forecasting
pipeline = get_two_branches_pipeline(lagged_param_1=144, 
                                     lagged_param_2=12)

# Fit
pipeline.fit(train_input)

# Make forecast
output = pipeline.predict(predict_input)
forecast = np.ravel(np.array(output.predict))



KeyboardInterrupt: 

In [None]:
plot_results(actual_time_series = target,
             predicted_values = forecast, 
             len_train_data = len(target)-forecast_length)

# Print MAE metric
print(f'Mean absolute error: {mean_absolute_error(predict_input.target, forecast):.3f}')

In [None]:
from fedot.core.pipelines.tuning.tuner_builder import TunerBuilder
from golem.core.tuning.simultaneous import SimultaneousTuner
from fedot.core.repository.quality_metrics_repository import \
    MetricsRepository, RegressionMetricsEnum


init_pipeline = get_two_branches_pipeline()

# Start tuning all nodes in the pipeline 
tuner = TunerBuilder(train_input.task)\
        .with_tuner(SimultaneousTuner)\
        .with_metric(RegressionMetricsEnum.MAE)\
        .with_iterations(50) \
        .build(train_input)
tuned_pipeline = tuner.tune(init_pipeline)

#final fit
tuned_pipeline.fit(train_input)
print('Pipeline tuned')

In [None]:
# Make prediction
tuned_output = tuned_pipeline.predict(predict_input)
tuned_forecast = np.ravel(np.array(tuned_output.predict))


plot_results(actual_time_series = traffic,
             predicted_values = tuned_forecast, 
             len_train_data = len(traffic)-forecast_length)

# Print MAE metric
print(f'Mean absolute error: {mean_absolute_error(predict_input.target, tuned_forecast):.3f}')

In [None]:
tuned_pipeline.print_structure()