In [1]:
import pandas as pd
import numpy as np

from statsforecast import StatsForecast
from statsforecast.models import (
    # HoltWinters,
    # CrostonClassic as Croston, 
    # HistoricAverage,
    DynamicOptimizedTheta as DOT,
    SeasonalNaive,
    # AutoARIMA
)
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import TimeSeriesSplit
import re

  from tqdm.autonotebook import tqdm


In [None]:
def extractData(data):
    """
    Function that extracts data for python from the SS grid.

    Input:
    - data: hisGrid (<class 'hxpy.haystack.grid.Grid>)
    Output:
    - DataFrame with following columns 
        - pointID => point id of target variable
        - unit
        - dqType => type of data quality issue
        - dqStart => timestamp of start of data quality issue
        - dqDuration => duration of data quality issue
        - pointInterval => logging interval for the point
        - features => point ids of model features
        - his => history to be used as training data

    ** NOTE_: this function is written to mainly be compatable with python on SS. Running it locally will not work (since it is designed for 
    an input of <class 'hxpy.haystack.grid.Grid> type from SS) 
    
    """

    # convert the Grid object to df to be able to manipulate it (capitalizing on the hxPy facilitation using the .to_dataframe() function)
    ssData = data.to_dataframe()

    # initiate a new empty dataframe to construct the output
    pythonDF = pd.DataFrame()

    # loop over the ssData and extract the data from each row
    for i in range(len(ssData)):
        pythonDF.loc[i, 'pointID'] = ssData['id'].iloc[i]
        pythonDF.loc[i, 'unit'] = ssData["unit"].iloc[i]                                                      # TO DO: ADD THIS IN THE MASTER TABLE
        pythonDF.loc[i, 'dqType'] = ssData["dqType"].iloc[i]                                               # TO DO: ADD THIS IN THE MASTER TABLE
        pythonDF.loc[i, 'dqStart'] = ssData['ts'].iloc[i]
        pythonDF.loc[i, 'dqDuration'] = pd.Timedelta(ssData['dur'].iloc[i], "min")
        pythonDF.loc[i, 'pointInterval'] =  pd.Timedelta(5, "min" )                       # TO DO: ADD THIS IN THE MASTER TABLE
        pythonDF.loc[i, 'features'] =  ssData['featId'].iloc[i]                             
        pythonDF.loc[i, 'his'] =  ssData['data'].iloc[i]#.to_dataframe()                             
        
    return pythonDF


In [None]:
def seasonalNaive(df, length_of_missing_data, data_logging_interval, ts_column_name="ts", values_column_name="v0"):
    """
    Inputs
    df: df used for training set (from SS)
    length_of_missing_data: interval length of missing data (from SS)
    data_logging_interval: data logging interval - called from the hisDQInterval tag on the point (from SS)
    ts_column_name: name of timestamp column
    values_column_name: name of values column

    Output
    forecasts_df: dataframe with predictions for the period missing data. Index names as ts, values column named as "v0
    """

    # number of predictions
    horizon = int(length_of_missing_data/data_logging_interval)

    # training set size (relative to the horizon/prediction size)
    training_set_size = horizon * 10

    # season length
    season_length = int(pd.Timedelta(24, 'h') / data_logging_interval)      

    # frequency
    freq = str(data_logging_interval.total_seconds()/3600)+"h"

    # Reformat the df for statsforecast library
    df = df.to_dataframe().reset_index()
    df = df.rename(columns={ts_column_name: 'ds', values_column_name: "y"})
    df['unique_id'] = "v0" 

    # slice the training set as per the training set size
    train_data = df.iloc[-1*int(training_set_size):].reset_index(drop=True)


    # The Model
    model = SeasonalNaive(season_length=season_length)
    
    # Model fitting
    model = model.fit(y=train_data["y"])

    # Predictions
    forecasts_df = model.predict(h=horizon)
    forecasts_df = pd.DataFrame(forecasts_df)

    #forecasts_df.set_index("ds", inplace=True)
    #forecasts_df.index.name='ts'

#    forecasts_df = forecasts_df.rename(columns={forecasts_df.columns[0]: "ts", forecasts_df.columns[1]:"v0"})

    return forecasts_df