### Optimizing Time Series Forecasts with Neural Prophet and Hyperopt

### Contents

#### 1. Import Data

#### 2. Prepare Data in Neural Prophet Format

- 2.1. Choose `ds`, `y`, `ID` columns
- 2.2. Convert `ds` to datetime format
- 2.3. Downsample data (if necessary)

#### 3. Check Time-Series Data

- 3.1. Augmented Dickey-Fuller (ADF) Test
    - Assess data stationarity
    - Determine differencing order (`num_diff`)
    - Add Integrated (I) component based on `num_diff`
- 3.2. Autocorrelation Function (ACF)
    - Find optimal `n_lags` value
- 3.3. Add Moving Average (MA) component
- 3.4. Choose lagged regressors

#### 4. Define the Model & Parameters

#### 5. Run the Model

#### 6. Results

### 1. Import Data

In [82]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from neuralprophet import NeuralProphet, set_random_seed
from datetime import timezone, datetime as dt
from dateutil import parser
from hyperopt import hp, fmin, tpe, Trials,STATUS_OK 
from hyperopt.pyll.base import scope
from hyperopt.early_stop import no_progress_loss
import torch
from sklearn.preprocessing import LabelEncoder
import warnings
warnings.filterwarnings("ignore")

df=pd.read_csv('C:/Users/partha/Downloads/example_wp_log_peyton_manning.csv',engine='pyarrow')
# df=pd.read_csv('C:/Users/partha/Downloads/weatherHistory.csv',engine='pyarrow')
# df = pd.read_csv("https://github.com/ourownstory/neuralprophet-data/raw/main/datasets/multivariate/ercot-panel.csv")

# drop columns that have no correlation to other variables
columns_with_only_na = df.corr().columns[df.corr().isna().all()].tolist()
df = df.drop(columns=columns_with_only_na)

# Separate numeric and categorical columns
numeric_cols = df.select_dtypes(include=['int64', 'float64']).columns
categorical_cols = df.select_dtypes(include=['object']).columns
categorical_cols = [col for col in categorical_cols if all(x not in col for x in ['date','Date','ds','ID'])] # don't consider columns with these names for categorical. 

# Apply Label Encoder to categorical columns
label_encoder = LabelEncoder()
for col in categorical_cols:
    df[col] = label_encoder.fit_transform(df[col])

### 2. Prepare Data in Neural Prophet Format

#### 2.1. Choose `ds`, `y`, `ID` columns

In [83]:
import ipywidgets as widgets
from IPython.display import display

# Create dropdown widgets for selecting columns
ds_widget = widgets.Dropdown(
    options=list(df.columns),
    description='ds:',
    disabled=False,
)

y_widget = widgets.Dropdown(
    options=list(numeric_cols),
    description='y:',
    disabled=False,
)

id_widget = widgets.Dropdown(
    options=[None]+list(df.columns),
    description='ID:',
    disabled=False,
)

# Arrange the widgets horizontally
hbox = widgets.HBox([ds_widget, y_widget, id_widget])

# Display the horizontal arrangement of widgets
display(hbox)

HBox(children=(Dropdown(description='ds:', options=('ds', 'y'), value='ds'), Dropdown(description='y:', option…

In [84]:
# Rename the selected columns
if ds_widget.value in df.columns:
    df.rename(columns={ds_widget.value: 'ds'}, inplace=True)

if y_widget.value in df.columns:
    df.rename(columns={y_widget.value: 'y'}, inplace=True)

if id_widget.value in df.columns:
    df.rename(columns={id_widget.value: 'ID'}, inplace=True)

#### 2.2. Convert `ds` to datetime format

In [85]:
# Multiple ID's can have same ds. Drop duplicates if there is only only ID group
df.dropna(inplace=True)
if 'ID' not in df.columns: # if there are no multiple ID groups of time series
    df.drop_duplicates(subset=['ds'],inplace=True)
df['ds']=df['ds'].astype(str)

# Convert ds into datetime
try:
    df['ds']=pd.to_datetime(df['ds'],utc=True)  
    df['ds']=df['ds'].apply(lambda x: x.replace(tzinfo=None))
except:                                # use the parser function if pandas doesn't recognize the datetime format
    df['ds'] = df['ds'].apply(lambda x: pd.to_datetime(parser.parse(x)))        
df.sort_values(by=['ds'],inplace=True)
df=df.reset_index(drop=True)

In [113]:
df.head()

Unnamed: 0,ds,y,MA_7,MA_14,MA_30,MA_60,MA_90,MA_180,MA_365
0,2007-12-10,9.590761,8.351109,8.235347,8.345384,8.702106,8.402572,7.88216,7.821749
1,2007-12-11,8.51959,8.351109,8.235347,8.345384,8.702106,8.402572,7.88216,7.821749
2,2007-12-12,8.183677,8.351109,8.235347,8.345384,8.702106,8.402572,7.88216,7.821749
3,2007-12-13,8.072467,8.351109,8.235347,8.345384,8.702106,8.402572,7.88216,7.821749
4,2007-12-14,7.893572,8.351109,8.235347,8.345384,8.702106,8.402572,7.88216,7.821749


#### 2.3. Down sample data (if necessary)

In [87]:
ds_widget = widgets.Dropdown(
    options=[None]+list(['D','W','M','Y']),
    description='Downsample to:',
    disabled=False,
)

# Arrange the widgets horizontally
hbox = widgets.HBox([ds_widget])

# Display the horizontal arrangement of widgets
display(hbox)

HBox(children=(Dropdown(description='Downsample to:', options=(None, 'D', 'W', 'M', 'Y'), value=None),))

In [88]:
if ds_widget.value:
    # Set the 'ds' column as the index
    df = df.set_index('ds')

    # Downsample each ID's time series to daily frequency using a custom function
    def downsample_by_id(group):
        return group.resample(ds_widget.value[0]).sum()  # Use 'sum' as an example aggregation

    # Apply the downsampling function to each ID group using groupby
    df = df.groupby('ID').apply(downsample_by_id)

    # Reset the index to display the 'ds' column normally
    df = df.reset_index()    

In [89]:
df.head()

Unnamed: 0,ds,y
0,2007-12-10,9.590761
1,2007-12-11,8.51959
2,2007-12-12,8.183677
3,2007-12-13,8.072467
4,2007-12-14,7.893572


### 3. Check Data

#### 3.1. Augmented Dickey-Fuller (ADF) Test
      - Assess data stationarity
      - Determine differencing order (`num_diff`)
      - Add Integrated (I) component based on `num_diff`

In [114]:
from statsmodels.tsa.stattools import adfuller

def apply_differencing(series, max_diff, alpha):
    # H0: Time-Series is Non-Stationary
    # H1: Time-Series is Stationary
    
    # if p_value <= alpha: # reject null hypothesis (stationary)
    # else: # fail to reject null hypothesis (non-stationary)        
    
    num_diff = 0
    result = adfuller(series.dropna(), autolag='AIC')
    p_value = result[1]

    while p_value > alpha and num_diff < max_diff: # while non-stationary, keep differencing the series to make it stationary
        series = series.diff().dropna()
        result = adfuller(series, autolag='AIC')
        p_value = result[1]
        num_diff += 1

    return num_diff, p_value

def make_series_stationary(df, max_diff=10, alpha=0.05):
    if 'ID' in df.columns:
        for id_value in df['ID'].unique():
            series = df[df['ID'] == id_value]['y']
            num_diff, p_value = apply_differencing(series, max_diff, alpha)
            print(f"ID {id_value}: Series is {'stationary' if p_value <= alpha else 'non-stationary'} \
                                                                after {num_diff} differencing operation(s).")
            if num_diff > 0:
                df.loc[df['ID'] == id_value, 'I'] = series.diff(periods=num_diff).fillna(0)
    else:
        series = df['y']
        num_diff, p_value = apply_differencing(series, max_diff, alpha)
        print(f"Series is {'stationary' if p_value <= alpha else 'non-stationary'} after {num_diff} differencing operation(s).")
        if num_diff > 0:
            df['I'] = series.diff(periods=num_diff).fillna(0)

    return df

# Example usage
df = make_series_stationary(df).fillna(0)

Series is stationary after 0 differencing operation(s).


In [91]:
df.head()

Unnamed: 0,ds,y
0,2007-12-10,9.590761
1,2007-12-11,8.51959
2,2007-12-12,8.183677
3,2007-12-13,8.072467
4,2007-12-14,7.893572


#### 3.2. Autocorrelation Function (ACF)
    - Find optimal `n_lags` value

In [92]:
import numpy as np
from statsmodels.tsa.stattools import acf

def find_optimal_lags(series, alpha):
    autocorr, confint = acf(series.dropna(), alpha=alpha, nlags=100, fft=True)
    conf_offset = confint[:, 1] - autocorr
    optimal_lags = np.where((autocorr < conf_offset) & (autocorr > -conf_offset))[0]

    if len(optimal_lags) == 0:
        return 0
    else:
        return optimal_lags[0] - 1

def calculate_optimal_lags(df, alpha=0.05):
    optimal_lags_dict = {}
    if 'ID' in df.columns:
        for id_value in df['ID'].unique():
            series = df[df['ID'] == id_value]['y']
            optimal_lags_dict[id_value] = find_optimal_lags(series, alpha)
    else:
        series = df['y']
        optimal_lags_dict['1'] = find_optimal_lags(series, alpha)

    return optimal_lags_dict

optimal_lags_dict = calculate_optimal_lags(df)   
optimal_lags = max(optimal_lags_dict.values())

print('Optimal n_lags: {}'.format(optimal_lags))

Optimal n_lags: 66


#### 3.3. Add Moving Average (MA) component 

In [93]:
freq_widget = widgets.Dropdown(
    options=['D','W','M','H','T'],
    description='Frequency:',
    disabled=False,
)

# Arrange the widgets horizontally
hbox = widgets.HBox([freq_widget])

# Display the horizontal arrangement of widgets
display(hbox)

HBox(children=(Dropdown(description='Frequency:', options=('D', 'W', 'M', 'H', 'T'), value='D'),))

In [94]:
def calculate_moving_averages(df, freq=None):
    # Mapping common frequencies to moving average intervals
    intervals = {
        'D': [7, 14, 30, 60, 90, 180, 365],  # Daily data: week, fortnight, month, 2-months, quarter, half-year, year
        'W': [4, 8, 13, 26, 52],             # Weekly data: month, 2-months, quarter, half-year, year
        'M': [1, 3, 6, 12, 24],              # Monthly data: month, quarter, half-year, year, 2-years
        'H': [24, 72, 168, 336, 720],        # Hourly data: day, 3-days, week, 2-weeks, month
        'T': [15, 30, 60, 120, 240, 720, 1440] # Minutely data: quarter-hour, half-hour, hour, 2-hours, 4-hours, 12-hours, day
    }
                
    if freq:
        selected_intervals = intervals.get(freq)
        for interval in selected_intervals:
            column_name = f'MA_{interval}'
            if 'ID' in df.columns:
                # Calculate moving average per ID and backfill within each ID
                df[column_name] = df.groupby('ID')['y'].transform(lambda x: x.rolling(window=interval).mean()\
                                                                                      .fillna(method='bfill'))
            else:
                # Calculate moving average for entire series and backfill
                df[column_name] = df['y'].rolling(window=interval).mean().fillna(method='bfill')            
    else:
        print("Provided frequency is not recognized. Unable to calculate moving averages.")

    return df

# Add MA_7, MA_30 columns if frequency = 'D'
df = calculate_moving_averages(df, freq_widget.value[0])

In [95]:
df.head()

Unnamed: 0,ds,y,MA_7,MA_14,MA_30,MA_60,MA_90,MA_180,MA_365
0,2007-12-10,9.590761,8.351109,8.235347,8.345384,8.702106,8.402572,7.88216,7.821749
1,2007-12-11,8.51959,8.351109,8.235347,8.345384,8.702106,8.402572,7.88216,7.821749
2,2007-12-12,8.183677,8.351109,8.235347,8.345384,8.702106,8.402572,7.88216,7.821749
3,2007-12-13,8.072467,8.351109,8.235347,8.345384,8.702106,8.402572,7.88216,7.821749
4,2007-12-14,7.893572,8.351109,8.235347,8.345384,8.702106,8.402572,7.88216,7.821749


#### 3.4. Choose lagged regressors

In [96]:
# Create a multi-select widget for selecting lagged regressors
lagged_regressors_widget = widgets.SelectMultiple(
    options=list(set(df.columns)-set(['ds','y','ID']))+[None],
    description='Lagged Regressors:',
    disabled=False
)

hbox = widgets.HBox([lagged_regressors_widget])

# Display the horizontal arrangement of widgets
display(hbox)

HBox(children=(SelectMultiple(description='Lagged Regressors:', options=('MA_60', 'MA_14', 'MA_365', 'MA_7', '…

In [97]:
# Store the selected lagged regressors
if lagged_regressors_widget.value[0]:
    lagged_regressor_cols = list(lagged_regressors_widget.value)
    
    # Drop columns not used in Neural Prophet model 
    for col in df.columns:
        if col not in ['ds','y','I','ID'] + lagged_regressor_cols:
            df = df.drop(columns=[col])    
else:
    lagged_regressor_cols = None

### 4. Define the model & parameters

In [102]:
# model_params for model parameters used in Hyperparameter tuning 
# op_params for model parameters not used in Hyperparameter tuning
# ip_params for non-model related parameters such as frequency, number of historic points, number of forecast points etc.

epochs=[200]
daily_seasonality=['auto']
weekly_seasonality=['auto']
yearly_seasonality=['auto']
loss_func=['MAE','MSE','Huber']
seasonality_mode=['additive','multiplicative']
n_changepoints=[30,60]
learning_rate=[0.01,0.001,1]

model_params =\
{
'epochs':hp.choice('epochs',epochs), 
'daily_seasonality':hp.choice('daily_seasonality',daily_seasonality),
'weekly_seasonality':hp.choice('weekly_seasonality',weekly_seasonality),
'yearly_seasonality':hp.choice('yearly_seasonality',yearly_seasonality),
'loss_func':hp.choice('loss_func',loss_func),
'seasonality_mode': hp.choice('seasonality_mode',seasonality_mode),     # additive = T+S+e, (Trend, Seasonality, error)
                                                                        # multiplicative = T*S*e 
'n_changepoints':hp.choice('n_changepoints',n_changepoints),            # Number of potential trend changepoints to include
'learning_rate':hp.choice('learning_rate',learning_rate),               
}

ip_params=\
{
'df':df,                                        # dataframe
'freq':None,                                    # model calculates frequency automatically
'n_historic_predictions':True,                  # number of historic points included for past projection
'periods':24,                                   # number of points for future projection
'valid_p':0.2,                                  # train_test_split
'max_evals': 10,                                # maximum evaluations for hyperparameter tuning
'lagged_regressor_cols': lagged_regressor_cols, # columns used as lagged regressors
}

op_params=\
{
'n_lags': optimal_lags ,                        # previous time series steps to include in AR (or) AR-Order
'n_forecasts': 24,                              # Number of steps ahead of prediction time step to forecast.   
'ar_layers':[32, 32, 32, 32],                   # architecture layers for autoregression
}

# 'n_lags':0, # previous time series steps to include in AR (or) AR-Order
# 'n_forecasts': 1, # Number of steps ahead of prediction time step to forecast.   
# 'ar_layers': None,    

In [103]:
df

Unnamed: 0,ds,y,MA_7,MA_14,MA_30,MA_60,MA_90,MA_180,MA_365
0,2007-12-10,9.590761,8.351109,8.235347,8.345384,8.702106,8.402572,7.882160,7.821749
1,2007-12-11,8.519590,8.351109,8.235347,8.345384,8.702106,8.402572,7.882160,7.821749
2,2007-12-12,8.183677,8.351109,8.235347,8.345384,8.702106,8.402572,7.882160,7.821749
3,2007-12-13,8.072467,8.351109,8.235347,8.345384,8.702106,8.402572,7.882160,7.821749
4,2007-12-14,7.893572,8.351109,8.235347,8.345384,8.702106,8.402572,7.882160,7.821749
...,...,...,...,...,...,...,...,...,...
2900,2016-01-16,7.817223,8.116764,8.261031,8.114988,8.199135,8.193907,7.992027,7.713200
2901,2016-01-17,9.273878,8.258500,8.334214,8.173155,8.213980,8.199819,8.002208,7.714499
2902,2016-01-18,10.333775,8.524649,8.408402,8.278559,8.250134,8.221338,8.018409,7.718748
2903,2016-01-19,9.125871,8.666195,8.448231,8.319710,8.268016,8.236312,8.027585,7.720655


In [104]:
def train_neural_prophet(df, model_params, ip_params, op_params):    
    # Combine model parameters & additional input & output parameters
    args = {'model_params':model_params,'ip_params':ip_params,'op_params':op_params} 
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # GPU Check        
    print('Device Used: {}'.format(device))
    trainer_config = {"accelerator":"cuda"} # use GPU if available, no need for model.to(device) for neuralprophet           
    
    def optimize(args): # Hyperparameter tuning with Hyperopt
        df = args['ip_params']['df']
        set_random_seed(40)  
        if 'ID' in df.columns: # add local trend & seasonality for each ID if present
            global_local = 'local'
        else: # else model the whole series as a whole
            global_local = 'global'            
        model = NeuralProphet( **{**args['model_params'],**args['op_params']},trend_global_local=global_local,\
                                                                              season_global_local=global_local)        
        if args['ip_params']['lagged_regressor_cols'] is not None:
            if args['op_params']['n_lags']>0:
                for col in args['ip_params']['lagged_regressor_cols']:
                    model = model.add_lagged_regressor(col, normalize="standardize")
            else:
                df = df[list(set(df.columns) - set(ip_params['lagged_regressor_cols']))]
        df_train, df_test = model.split_df(df, freq=args['ip_params']['freq'], valid_p=args['ip_params']['valid_p'])
        train_metrics = model.fit(df_train, freq=args['ip_params']['freq'])
        test_metrics = model.test(df_test)    
        return {'loss':test_metrics['RMSE_val'].reset_index(drop=True)[0], 'status': STATUS_OK }

    early_stop_fn = no_progress_loss(iteration_stop_count=int((args['ip_params']['max_evals'])*0.7), percent_increase=0.5)
    trials = Trials()
    best_results = fmin(optimize, space=args, algo=tpe.suggest, trials=trials, max_evals=args['ip_params']['max_evals'],\
                       early_stop_fn = early_stop_fn)
    best_model_params =\
    {
    'epochs':epochs[best_results['epochs']], 
    'daily_seasonality':daily_seasonality[best_results['daily_seasonality']],
    'weekly_seasonality':weekly_seasonality[best_results['weekly_seasonality']],
    'yearly_seasonality':yearly_seasonality[best_results['yearly_seasonality']],
    'loss_func':loss_func[best_results['loss_func']],
    'seasonality_mode':seasonality_mode[best_results['seasonality_mode']], 
    'n_changepoints':n_changepoints[best_results['n_changepoints']],
    'learning_rate':learning_rate[best_results['learning_rate']], 
    }
    
    set_random_seed(20)
    df = args['ip_params']['df']    
    if 'ID' in df.columns: # add local trend & seasonality for each ID if present
        global_local = 'local'
    else: # else model the whole series as a whole
        global_local = 'global'                
    model = NeuralProphet( **{**best_model_params,**args['op_params']},trend_global_local=global_local,\
                                                                              season_global_local=global_local) 
        
    if args['ip_params']['lagged_regressor_cols'] is not None:    
        if args['op_params']['n_lags']>0:
            for col in args['ip_params']['lagged_regressor_cols']:
                model = model.add_lagged_regressor(col, normalize="standardize")
        else:
            df = df[list(set(df.columns) - set(ip_params['lagged_regressor_cols']))]        
    df_train, df_test = model.split_df(df, freq=args['ip_params']['freq'], valid_p=args['ip_params']['valid_p'])
    train_metrics = model.fit(df_train, freq=args['ip_params']['freq'])
    test_metrics = model.test(df_test)    
    future = model.make_future_dataframe(df, periods=args['ip_params']['periods'],\
                                             n_historic_predictions=args['ip_params']['n_historic_predictions'])
    forecast = model.predict(future)
    final_train_metrics = train_metrics.iloc[-1:].reset_index(drop=True)
    final_test_metrics = test_metrics.iloc[-1:].reset_index(drop=True)
    return model, forecast, best_model_params, final_train_metrics, final_test_metrics

### 5. Run the model

In [105]:
model, forecast, best_model_params, train_metrics, test_metrics =\
        train_neural_prophet(df, model_params, ip_params, op_params)

Device Used: cpu

  0%|                                                                           | 0/10 [00:00<?, ?trial/s, best loss=?][A

INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 98.967% of the data.
INFO - (NP.df_utils._infer_frequency) - Dataframe freq automatically defined as D
INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column
INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column
INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 98.838% of the data.
INFO - (NP.df_utils._infer_frequency) - Dataframe freq automatically defined as D
INFO - (NP.config.init_data_params) - Setting normalization to global as only one dataframe provided for training.
INFO - (NP.utils.set_auto_seasonalities) - Disabling daily seasonality. Run NeuralProphet with daily_seasonality=True to override this.
INFO - (NP.config.set_auto_batch_epoch) - Auto-set batch_size to 32


Training: 0it [00:00, ?it/s]

INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 99.312% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - D


Testing: 0it [00:00, ?it/s]


 10%|████▉                                            | 1/10 [01:04<09:39, 64.35s/trial, best loss: 0.5377740859985352][A

INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 98.967% of the data.
INFO - (NP.df_utils._infer_frequency) - Dataframe freq automatically defined as D
INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column
INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column
INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 98.838% of the data.
INFO - (NP.df_utils._infer_frequency) - Dataframe freq automatically defined as D
INFO - (NP.config.init_data_params) - Setting normalization to global as only one dataframe provided for training.
INFO - (NP.utils.set_auto_seasonalities) - Disabling daily seasonality. Run NeuralProphet with daily_seasonality=True to override this.
INFO - (NP.config.set_auto_batch_epoch) - Auto-set batch_size to 32


Training: 0it [00:00, ?it/s]

INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 99.312% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - D


Testing: 0it [00:00, ?it/s]


 20%|█████████▊                                       | 2/10 [02:12<08:52, 66.58s/trial, best loss: 0.5377740859985352][A

INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 98.967% of the data.
INFO - (NP.df_utils._infer_frequency) - Dataframe freq automatically defined as D
INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column
INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column
INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 98.838% of the data.
INFO - (NP.df_utils._infer_frequency) - Dataframe freq automatically defined as D
INFO - (NP.config.init_data_params) - Setting normalization to global as only one dataframe provided for training.
INFO - (NP.utils.set_auto_seasonalities) - Disabling daily seasonality. Run NeuralProphet with daily_seasonality=True to override this.
INFO - (NP.config.set_auto_batch_epoch) - Auto-set batch_size to 32


Training: 0it [00:00, ?it/s]

INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 99.312% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - D


Testing: 0it [00:00, ?it/s]


 30%|██████████████▋                                  | 3/10 [03:22<07:56, 68.10s/trial, best loss: 0.5377740859985352][A

INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 98.967% of the data.
INFO - (NP.df_utils._infer_frequency) - Dataframe freq automatically defined as D
INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column
INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column
INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 98.838% of the data.
INFO - (NP.df_utils._infer_frequency) - Dataframe freq automatically defined as D
INFO - (NP.config.init_data_params) - Setting normalization to global as only one dataframe provided for training.
INFO - (NP.utils.set_auto_seasonalities) - Disabling daily seasonality. Run NeuralProphet with daily_seasonality=True to override this.
INFO - (NP.config.set_auto_batch_epoch) - Auto-set batch_size to 32


Training: 0it [00:00, ?it/s]

INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 99.312% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - D


Testing: 0it [00:00, ?it/s]


 40%|███████████████████▌                             | 4/10 [04:51<07:38, 76.44s/trial, best loss: 0.5377740859985352][A

INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 98.967% of the data.
INFO - (NP.df_utils._infer_frequency) - Dataframe freq automatically defined as D
INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column
INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column
INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 98.838% of the data.
INFO - (NP.df_utils._infer_frequency) - Dataframe freq automatically defined as D
INFO - (NP.config.init_data_params) - Setting normalization to global as only one dataframe provided for training.
INFO - (NP.utils.set_auto_seasonalities) - Disabling daily seasonality. Run NeuralProphet with daily_seasonality=True to override this.
INFO - (NP.config.set_auto_batch_epoch) - Auto-set batch_size to 32


Training: 0it [00:00, ?it/s]

INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 99.312% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - D


Testing: 0it [00:00, ?it/s]


 50%|████████████████████████▌                        | 5/10 [06:07<06:22, 76.41s/trial, best loss: 0.5377740859985352][A

INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 98.967% of the data.
INFO - (NP.df_utils._infer_frequency) - Dataframe freq automatically defined as D
INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column
INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column
INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 98.838% of the data.
INFO - (NP.df_utils._infer_frequency) - Dataframe freq automatically defined as D
INFO - (NP.config.init_data_params) - Setting normalization to global as only one dataframe provided for training.
INFO - (NP.utils.set_auto_seasonalities) - Disabling daily seasonality. Run NeuralProphet with daily_seasonality=True to override this.
INFO - (NP.config.set_auto_batch_epoch) - Auto-set batch_size to 32


Training: 0it [00:00, ?it/s]

INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 99.312% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - D


Testing: 0it [00:00, ?it/s]


 60%|█████████████████████████████▍                   | 6/10 [07:24<05:05, 76.49s/trial, best loss: 0.5377740859985352][A

INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 98.967% of the data.
INFO - (NP.df_utils._infer_frequency) - Dataframe freq automatically defined as D
INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column
INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column
INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 98.838% of the data.
INFO - (NP.df_utils._infer_frequency) - Dataframe freq automatically defined as D
INFO - (NP.config.init_data_params) - Setting normalization to global as only one dataframe provided for training.
INFO - (NP.utils.set_auto_seasonalities) - Disabling daily seasonality. Run NeuralProphet with daily_seasonality=True to override this.
INFO - (NP.config.set_auto_batch_epoch) - Auto-set batch_size to 32


Training: 0it [00:00, ?it/s]

INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 99.312% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - D


Testing: 0it [00:00, ?it/s]


 70%|██████████████████████████████████▎              | 7/10 [08:42<03:44, 74.67s/trial, best loss: 0.5377740859985352][A

INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 98.967% of the data.
INFO - (NP.df_utils._infer_frequency) - Dataframe freq automatically defined as D
INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column
INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column
INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 98.838% of the data.
INFO - (NP.df_utils._infer_frequency) - Dataframe freq automatically defined as D
INFO - (NP.config.init_data_params) - Setting normalization to global as only one dataframe provided for training.
INFO - (NP.utils.set_auto_seasonalities) - Disabling daily seasonality. Run NeuralProphet with daily_seasonality=True to override this.





INFO - (NP.config.set_auto_batch_epoch) - Auto-set batch_size to 32


Training: 0it [00:00, ?it/s]

INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 99.312% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - D


Testing: 0it [00:00, ?it/s]

INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 98.967% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - D
INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column
INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 98.976% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - D
INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 98.976% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - D


Predicting: 73it [00:00, ?it/s]

INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column


### 6. Results

In [106]:
# Define a function to select the first non-zero, non-NaN yhat value
def select_yhat(row, yhat_columns):
    for col in yhat_columns:
        if pd.notna(row[col]) and row[col] != 0:
            return row[col]
    return np.nan

yhat_columns = [col for col in forecast.columns if col.startswith('yhat')]
forecast['yhat_final'] = forecast.apply(select_yhat, axis=1, yhat_columns=yhat_columns)

In [107]:
import plotly.graph_objects as go
from plotly_resampler import FigureResampler

def plot_time_series_for_each_id(df, forecast, test_metrics):
    """Plots time series data for each ID or a single plot if no ID column."""

    if 'ID' in df.columns:
        unique_ids = df['ID'].unique()
        for id_value in unique_ids:
            plot_time_series(forecast[forecast['ID'] == id_value].reset_index(drop=True), id_value, test_metrics)
    else:
        plot_time_series(forecast, None, test_metrics)

def plot_time_series(forecast_data, id_value=None, test_metrics=None):
    """Plots time series data for a single ID or full dataset."""

    future_start_index = forecast_data['y'].last_valid_index() + 1

    fig = go.Figure()
    fig.add_trace(go.Scattergl(x=forecast_data['ds'], y=forecast_data['y'], mode='lines', name='Actual'))
    fig.add_trace(go.Scattergl(x=forecast_data['ds'][:future_start_index+1], y=forecast_data['yhat_final'][:future_start_index+1],
                                mode='lines', name='Predicted (Past)', line=dict(color='blue')))
    fig.add_trace(go.Scattergl(x=forecast_data['ds'][future_start_index:], y=forecast_data['yhat_final'][future_start_index:],
                                mode='lines', name='Predicted (Future)', line=dict(color='#FF00FF')))

    mae = test_metrics['MAE_val'][0]
    rmse = test_metrics['RMSE_val'][0]
    fig.add_annotation(text=f'Test MAE: {mae:.2f}<br>Test RMSE: {rmse:.2f}',
                        xref='paper', yref='paper',
                        x=1, y=0, showarrow=False, font=dict(size=12),
                        align="left", bordercolor="black", borderwidth=2,
                        bgcolor="white", opacity=0.8)

    title = f'Time Series Plot for ID: {id_value}' if id_value else 'Time Series Plot'
    fig.update_layout(title=title, xaxis_title='ds', yaxis_title='Values')
    # Use Plotly Resampler to optimize rendering
    fig = FigureResampler(fig).show()

# Call the function with your DataFrame, forecast DataFrame, and test metrics
plot_time_series_for_each_id(df, forecast, test_metrics)    

In [108]:
best_model_params

{'epochs': 200,
 'daily_seasonality': 'auto',
 'weekly_seasonality': 'auto',
 'yearly_seasonality': 'auto',
 'loss_func': 'MAE',
 'seasonality_mode': 'multiplicative',
 'n_changepoints': 30,
 'learning_rate': 0.001}

In [109]:
test_metrics

Unnamed: 0,MAE_val,RMSE_val,Loss_test,RegLoss_test
0,0.423262,0.542339,0.092275,0.0


In [112]:
# (100*test_metrics['MAE_val'][0])/(df['y'].max()-df['y'].min())