In [1]:
from pytorch_forecasting.models.nn.rnn import LSTM
from pytorch_lightning.callbacks import EarlyStopping
import torch

from pytorch_forecasting import Baseline, NBeats, TimeSeriesDataSet
from pytorch_forecasting.data import NaNLabelEncoder
from pytorch_forecasting.data.examples import generate_ar_data
from pytorch_forecasting.metrics import SMAPE

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
from neuralprophet import NeuralProphet
import pandas as pd
import numpy as np

In [4]:
data_location = "../"
df = pd.read_csv(data_location + "example_data/yosemite_temps.csv")
df.head(3)

Unnamed: 0,ds,y
0,2017-05-01 00:00:00,27.8
1,2017-05-01 00:05:00,27.0
2,2017-05-01 00:10:00,26.8


In [5]:
import pytorch_lightning as pl

In [6]:
from neuralprophet import configure
from neuralprophet import time_net
from neuralprophet import time_dataset
from neuralprophet import df_utils
from neuralprophet import utils
from neuralprophet import utils_torch
from neuralprophet.plot_forecast import plot, plot_components
from neuralprophet.plot_model_parameters import plot_parameters
from neuralprophet import metrics
from neuralprophet.utils import set_logger_level

In [9]:
class LSTM_NP:
    """LSTM forecaster.
    """

    def __init__(
        self,
        n_forecasts=1,
        num_hidden_layers=0,
        d_hidden=None,
        learning_rate=None,
        epochs=None,
        batch_size=None,
        loss_func="Huber",
        optimizer="AdamW",
        train_speed=None,
        normalize="auto",
        impute_missing=True,
    ):
        """
        Args:

            ## Model Config
            n_forecasts (int): Number of steps ahead of prediction time step to forecast.
            num_hidden_layers (int): number of hidden layer to include in AR-Net. defaults to 0.
            d_hidden (int): dimension of hidden layers of the AR-Net. Ignored if num_hidden_layers == 0.

            ## Train Config
            learning_rate (float): Maximum learning rate setting for 1cycle policy scheduler.
                default: None: Automatically sets the learning_rate based on a learning rate range test.
                For manual values, try values ~0.001-10.
            epochs (int): Number of epochs (complete iterations over dataset) to train model.
                default: None: Automatically sets the number of epochs based on dataset size.
                    For best results also leave batch_size to None.
                For manual values, try ~5-500.
            batch_size (int): Number of samples per mini-batch.
                default: None: Automatically sets the batch_size based on dataset size.
                    For best results also leave epochs to None.
                For manual values, try ~1-512.
            loss_func (str, torch.nn.modules.loss._Loss, 'typing.Callable'):
                Type of loss to use: str ['Huber', 'MSE'],
                or torch loss or callable for custom loss, eg. asymmetric Huber loss

            ## Data config
            normalize (str): Type of normalization to apply to the time series.
                options: ['auto', 'soft', 'off', 'minmax, 'standardize']
                default: 'auto' uses 'minmax' if variable is binary, else 'soft'
                'soft' scales minimum to 0.1 and the 90th quantile to 0.9
            impute_missing (bool): whether to automatically impute missing dates/values
                imputation follows a linear method up to 10 missing values, more are filled with trend.
        """
        kwargs = locals()

        # General
        self.name = "LSTM"
        self.n_forecasts = n_forecasts

        # Data Preprocessing
        self.normalize = normalize
        self.impute_missing = impute_missing
        self.impute_limit_linear = 5
        self.impute_rolling = 20

        # Training
        self.config_train = configure.from_kwargs(configure.Train, kwargs)



        self.metrics = metrics.MetricsCollection(
            metrics=[
                metrics.LossMetric(self.config_train.loss_func),
                metrics.MAE(),
                metrics.MSE(),
            ],
            value_metrics=[
                # metrics.ValueMetric("Loss"),
                metrics.ValueMetric("RegLoss"),
            ],
        )

        
        # Model
        self.config_model = configure.from_kwargs(configure.Model, kwargs)

        # set during fit()
        self.data_freq = None

        # Set during _train()
        self.fitted = False
        self.data_params = None
        self.optimizer = None
        self.scheduler = None
        self.model = None

        # set during prediction
        self.future_periods = None
        # later set by user (optional)
        self.highlight_forecast_step_n = None
        self.true_ar_weights = None

In [10]:
m = LSTM_NP()

TypeError: __init__() missing 1 required positional argument: 'ar_sparsity'

In [12]:
import inspect

In [13]:
inspect.signature(configure.Train).parameters

mappingproxy({'learning_rate': <Parameter "learning_rate: (<class 'float'>, None)">,
              'epochs': <Parameter "epochs: (<class 'int'>, None)">,
              'batch_size': <Parameter "batch_size: (<class 'int'>, None)">,
              'loss_func': <Parameter "loss_func: (<class 'str'>, <class 'torch.nn.modules.loss._Loss'>, 'typing.Callable')">,
              'optimizer': <Parameter "optimizer: (<class 'str'>, <class 'torch.optim.optimizer.Optimizer'>)">,
              'train_speed': <Parameter "train_speed: (<class 'int'>, <class 'float'>, None)">,
              'ar_sparsity': <Parameter "ar_sparsity: (<class 'float'>, None)">,
              'reg_lambda_trend': <Parameter "reg_lambda_trend: float = None">,
              'trend_reg_threshold': <Parameter "trend_reg_threshold: (<class 'bool'>, <class 'float'>) = None">,
              'reg_lambda_season': <Parameter "reg_lambda_season: float = None">})

In [None]:
normalize (str): Type of normalization to apply to the time series.
    options: ['auto', 'soft', 'off', 'minmax, 'standardize']
    default: 'auto' uses 'minmax' if variable is binary, else 'soft'
    'soft' scales minimum to 0.1 and the 90th quantile to 0.9
impute_missing (bool): whether to automatically impute missing dates/values
    imputation follows a linear method up to 10 missing values, more are filled with trend.

In [None]:
# Data Preprocessing
self.normalize = normalize
self.impute_missing = impute_missing
self.impute_limit_linear = 5
self.impute_rolling = 20

In [None]:
def _handle_missing_data(self, df, freq, predicting=False):
    """Checks, auto-imputes and normalizes new data

    Args:
        df (pd.DataFrame): raw data with columns 'ds' and 'y'
        freq (str): data frequency
        predicting (bool): when no lags, allow NA values in 'y' of forecast series or 'y' to miss completely

    Returns:
        pre-processed df
    """

    # add missing dates for autoregression modelling
    df, missing_dates = df_utils.add_missing_dates_nan(df, freq=freq)
    if missing_dates > 0:
        if self.impute_missing:
            log.info("{} missing dates added.".format(missing_dates))
        else:
            raise ValueError(
                "{} missing dates found. Please preprocess data manually or set impute_missing to True.".format(
                    missing_dates
                )
            )

    # impute missing values
    data_columns = []
    data_columns.append("y")
    
    for column in data_columns:
        sum_na = sum(df[column].isnull())
        if sum_na > 0:
            if self.impute_missing:
                df.loc[:, column], remaining_na = df_utils.fill_linear_then_rolling_avg(
                        df[column],
                        limit_linear=self.impute_limit_linear,
                        rolling=self.impute_rolling,
                    )
                log.info("{} NaN values in column {} were auto-imputed.".format(sum_na - remaining_na, column))
                if remaining_na > 0:
                    raise ValueError(
                        "More than {} consecutive missing values encountered in column {}. "
                        "{} NA remain. Please preprocess data manually.".format(
                            2 * self.impute_limit_linear + self.impute_rolling, column, remaining_na
                        )
                    )
            else:  # fail because set to not impute missing
                raise ValueError(
                    "Missing values found. " "Please preprocess data manually or set impute_missing to True."
                )
    return df

In [None]:
df = df_utils.normalize(df, self.data_params)
df = df_utils.check_dataframe(df, check_y=True)

In [25]:
freq = 'D'

In [26]:
df_for_tsd = df.copy(deep = True)
df_for_tsd['ds'] = pd.to_datetime(df_for_tsd['ds'])
df_for_tsd = pd.DataFrame(pd.date_range(start=df_for_tsd.ds.min(), end=df_for_tsd.ds.max(), freq = freq),
             columns = ['ds']).merge(df_for_tsd, how = 'left')
df_for_tsd = df_for_tsd.sort_values('ds')
df_for_tsd = df_for_tsd.reset_index()
df_for_tsd.columns = ['time_idx', 'date', 'value']
df_for_tsd['group'] = 0

In [27]:
df_for_tsd

Unnamed: 0,time_idx,date,value,group
0,0,2017-05-01,27.8,0
1,1,2017-05-02,29.4,0
2,2,2017-05-03,31.3,0
3,3,2017-05-04,33.1,0
4,4,2017-05-05,33.6,0
...,...,...,...,...
61,61,2017-07-01,42.2,0
62,62,2017-07-02,40.6,0
63,63,2017-07-03,41.0,0
64,64,2017-07-04,40.9,0


In [None]:


m = NeuralProphet(n_lags = 10, epochs = 15)
df = pd.DataFrame()
df['ds'] = pd.date_range(start = '2020-01-01', periods = 100)
df['y'] = np.random.randint(0, 100, size = len(df))
a = m.fit(df, freq = 'D')