In [1]:
import warnings 
warnings.filterwarnings('ignore')

In [2]:
import pandas as pd
import numpy as np
import collections
import hts
from pmdarima import auto_arima
from fbprophet import Prophet
from statsmodels.tsa.holtwinters import ExponentialSmoothing as HWES
from arch import arch_model
import datetime as dt
from datetime import timedelta
from datetime import datetime
from collections import OrderedDict

Importing plotly failed. Interactive plots will not work.


In [3]:
funct = lambda x: np.floor(abs(x)) if ((abs(x) - np.floor(abs(x))) <= 0.5) else np.ceil(abs(x))
funct1 = lambda x: 1 if x == 0 else x

In [4]:
def train_test_data(df: pd.DataFrame):
    """ This function splits the data into train and test datasets in the ratio of 80:20
    
    Args : 
        df( Pandas DataFrame ) :  it's a Pandas Dataframe of the time series with all the columns of the hierarchial time series

    Returns :
        train_data(Pandas DataFrame) : Time series data of all the nodes of hierarchial time series with 80 percent of total data
        test_data(Pandas DataFrame) : Time series data of all the nodes of hierarchial time series with 20 percent of total data

    Erros :
        Value Error : The Input pandas DataFrame should have atleast 12 rows, if not it will raise an error
    """
    if df.shape[0] >= 12 :
        train_sample_size = np.int(np.floor(df.shape[0]*0.8))
        train_data = df.iloc[:train_sample_size,: ]
        test_data = df.iloc[train_sample_size : df.shape[0], :]
        return train_data, test_data
    else :
        raise ValueError(" The number of rows of the input dataframe should be atleast 12 ")

In [5]:
def define_tree(temp_train: pd.DataFrame, hier : OrderedDict()):
    """This Function creates key variables for forecasting  hierarchial time series  like summarising matrix.

    Args :
        temp_train (Pandas DataFrame) : its the train data derived in the train_test_data function 
        hier(Ordered Dictionary) : its the heirarchial structure of the heirarchical time series

    Returns :
        tree( hierarchy tree) : it prints the hierarchical tree of the time series data
        sum_mat(numpy array) : its summarising matrix
        sum_mat_labels(list ) : its a list of the columns of the hierarchical time series data

    """
    tree = hts.hierarchy.HierarchyTree.from_nodes(hier, temp_train, root='total')
    sum_mat, _ = hts.functions.to_sum_mat(tree)
    sum_mat_labels = temp_train.columns
    return tree, sum_mat, sum_mat_labels

In [74]:
def time_series_scores(forecast, actual):
    """This Function Calculates the MAPE , RMSE, MAE, FIRST WEEK MAPE.
    
    Args :
        forecast(Pandas Series/ numpy array/ list) : The forecasting Values of a time series 
        actual(Pandas Series/ numpy array/ list) : The actual Values of a time series 

    Returns :
        mape(int) : Mean Absolute Percentage Error calculated using forecast and actual
        rmse(float) :  Root Mean Square Error  calculated using forecast and actual
        mae(int) : Mean Absolute Percenatage Error calculated using forecast and actual
        first_wk_mape : Mean Absolute Percentage Error calculated using forecast and actual of first value(n+1 value)
    """
    mape = np.floor(np.mean(np.abs(forecast - actual) / np.abs(actual.apply(funct1))) * 100)
    rmse = np.sqrt(((forecast - actual) ** 2).mean())
    mae = np.mean(np.abs(forecast - actual))
    first_wk_mape = 100 * np.abs(forecast[0] - actual[0]) / funct1(np.abs(actual[0]))
    return mape, rmse, mae, first_wk_mape

In [88]:
def forecast_models(temp_train, temp_test, tree, sum_mat, sum_mat_labels, exogenus_variables,m):

    # With ARIMA Model
    forecasts_ARIM = pd.DataFrame()
    forecasts_ARIM_1 = pd.DataFrame()
    for col in sum_mat_labels:
        try:
            try:
                stepwise_model = auto_arima(temp_train[col].values, exogenous=temp_train[exogenus_variables], stepwise=True,error_action='ignore', seasonal=True,  start_P=1, D=None, start_Q=1,   max_p= 2, max_q= 2, max_d= 2, max_P=2, max_D=2, max_Q=2, n_fits= 5).fit(temp_train[col].values)                                        
                # print("with exogeneous-m-12")
                fcst = stepwise_model.predict(n_periods= len(temp_test), start=temp_test.index[0], end=temp_test.index[-1], exog=temp_test[exogenus_variables])
            except:
                stepwise_model = auto_arima(temp_train[col].values, stepwise=True, m=m, error_action='ignore', seasonal=True,   start_P=1, D=0, start_Q=1, max_p= 3, max_q= 3, max_d= 3, max_P=2, max_D=1, max_Q=2, random= True, n_fits= 10).fit(temp_train[col].values)
                # print(col , "without exogeneous-m-12")
                fcst = stepwise_model.predict(n_periods= len(temp_test))
        except:
            try:
                try:
                    stepwise_model = auto_arima(temp_train[col].values, exogenous=temp_train[exogenus_variables], stepwise=True,  error_action='ignore', seasonal=True, m=12, start_P=1, D=None, start_Q=1,   max_p= 3, max_q= 3, max_d= 3, max_P=2, max_D=1,max_Q=2, n_fits= 10).fit(temp_train[col].values)

                    # print("with exogeneous-m-6")
                    fcst = stepwise_model.predict(n_periods=len(temp_test), start=temp_test.index[0],end=temp_test.index[-1], exog=temp_test[exogenus_variables])
                except:
                    stepwise_model = auto_arima(temp_train[col].values, stepwise=True, m=12, error_action='ignore', seasonal=True, start_P=1, D=None, start_Q=1, max_p= 3, max_q= 3, max_d= 3, max_P=2, max_D=1, max_Q=2, random= True, n_fits= 10).fit(temp_train[col].values)
                    # print("without exogeneous-m-6")
                    fcst = stepwise_model.predict(n_periods= len(temp_test))
            except:
                try:
                    try:
                        stepwise_model = auto_arima(temp_train[col].values, exogenous=temp_train[exogenus_variables], stepwise=True, error_action='ignore', seasonal=True,  m= 4, start_P=1, D=None, start_Q=1,   max_p= 3, max_q= 3, max_d= 3, max_P=2, max_D=1, max_Q=2,  n_fits= 10).fit(temp_train[col].values)

                        # print("with exogeneous-m-1")
                        fcst = stepwise_model.predict(n_periods=len(temp_test), start=temp_test.index[0],end=temp_test.index[-1], exog=temp_test[exogenus_variables])
                    except:
                        stepwise_model = auto_arima(temp_train[col].values, stepwise=True, error_action='ignore', seasonal=True, m=4,  start_P=1, D=None, start_Q=1, max_p= 3, max_q= 3, max_d= 3, max_P=2, max_D=1, max_Q=2, random= True, n_fits= 10).fit(temp_train[col].values)
                        # print("without exogeneous-m-1")
                        fcst = stepwise_model.predict(n_periods= len(temp_test))
                except:
                    fcst = np.random.randint(low = 0,high=2,size=len(temp_test))
        forecasts_ARIM[col] = fcst
        # print("ending ARIMA:", col)
    forecasts_ARIM.index = temp_test.index
    pred_dict_ARIM = collections.OrderedDict()
    for label in forecasts_ARIM.columns:
        if np.all(np.array(forecasts_ARIM[label].values) != 0):
            forecasts_ARIM_1[label] = forecasts_ARIM[label]
        else :
            forecasts_ARIM_1[label] = np.random.randint(low = 0,high=2,size=len(temp_test))
        pred_dict_ARIM[label] = pd.DataFrame(data=abs(forecasts_ARIM_1[label].values), columns=['yhat'])
    revised_ARIM = hts.functions.optimal_combination(pred_dict_ARIM, sum_mat, method='OLS', mse={})
    revised_forecasts_ARIM = pd.DataFrame(data=revised_ARIM[0:, 0:], index=forecasts_ARIM.index,
                                          columns=forecasts_ARIM.columns)
    mape_ARIM = pd.DataFrame(columns=forecasts_ARIM.columns)
    rmse_ARIM = pd.DataFrame(columns=temp_train.columns)
    mae_ARIM = pd.DataFrame(columns=temp_train.columns)
    first_wk_mape_ARIM = pd.DataFrame(columns=temp_train.columns)
    for col in revised_forecasts_ARIM.columns:
        # print("column name", col)
        revised_forecasts_ARIM[col] = revised_forecasts_ARIM[col].apply(funct)
        mape_ARIM.loc[0, col], rmse_ARIM.loc[0, col], mae_ARIM.loc[0, col], first_wk_mape_ARIM.loc[0, col] = time_series_scores(revised_forecasts_ARIM[col], temp_test[col])   



    # With Holt's Winter Smooth Exponential Model

    HWSE_model = pd.DataFrame(columns=sum_mat_labels)
    forecasts_HWSE = pd.DataFrame(columns=sum_mat_labels)
    forecasts_HWSE_1 = pd.DataFrame(columns=sum_mat_labels)
    for col in sum_mat_labels:
        # print('Starting HWSE:', col)
        try:
            HWSE_model = HWES(temp_train[col], seasonal_periods=52, trend='add', seasonal='add').fit()
            forecasts_HWSE[col]=HWSE_model.forecast(steps=len(temp_test))
        except:
            try:
                HWSE_model = HWES(temp_train[col], seasonal_periods=12, trend='add', seasonal='add').fit()
                forecasts_HWSE[col]=HWSE_model.forecast(steps=len(temp_test))
            except:
                try:
                    HWSE_model = HWES(temp_train[col], seasonal_periods=4, trend='add', seasonal='add').fit()
                    forecasts_HWSE[col]=HWSE_model.forecast(steps=len(temp_test))
                except:
                    forecasts_HWSE[col] = np.random.randint(low = 0,high=2,size=len(temp_test))

        forecasts_HWSE[col] = forecasts_HWSE[col].fillna(0)
        # print('Ending HWSE:', col)
    forecasts_HWSE.index = temp_test.index
    pred_dict_HWSE = collections.OrderedDict()
    for label in forecasts_HWSE.columns:
        if  np.all(np.array(forecasts_HWSE[label].values))  :
            forecasts_HWSE_1[label] = forecasts_HWSE[label]
        else :
            forecasts_HWSE_1[label]  =  np.random.randint(low = 0,high=2,size=len(temp_test))
        pred_dict_HWSE[label] = pd.DataFrame(data=abs(forecasts_HWSE_1[label].values), columns=['yhat'])
    revised_HWSE = hts.functions.optimal_combination(pred_dict_HWSE, sum_mat, method='OLS', mse={})
    revised_forecasts_HWSE = pd.DataFrame(data=revised_HWSE[0:, 0:], index=forecasts_HWSE.index, columns=sum_mat_labels)
    mape_HWSE = pd.DataFrame(columns=forecasts_HWSE.columns)
    rmse_HWSE = pd.DataFrame(columns=forecasts_HWSE.columns)
    mae_HWSE = pd.DataFrame(columns=forecasts_HWSE.columns)
    first_wk_mape_HWSE = pd.DataFrame(columns=forecasts_HWSE.columns)
    for col in forecasts_HWSE.columns:
        revised_forecasts_HWSE[col] = revised_forecasts_HWSE[col].apply(funct)
        mape_HWSE.loc[0, col], rmse_HWSE.loc[0, col], mae_HWSE.loc[0, col], first_wk_mape_HWSE.loc[
            0, col] = time_series_scores(revised_forecasts_HWSE[col], temp_test[col])

    # with Fb Prophet Model 

    PROP_model = pd.DataFrame(columns=sum_mat_labels)
    forecasts_PROP = pd.DataFrame(columns=sum_mat_labels)
    forecasts_PROP_1 = pd.DataFrame(columns=sum_mat_labels)
    for col in sum_mat_labels:
        temp_prop_train = pd.DataFrame()
        temp_prop_train['ds'] = temp_train.index
        temp_prop_train['y'] = temp_train[col].values
        temp_prop_test = pd.DataFrame()
        temp_prop_test['ds'] = temp_test.index
        for exogenous_col in exogenus_variables :
            temp_prop_train[exogenous_col] = temp_train[exogenous_col].values
            temp_prop_test[exogenous_col] = temp_test[exogenous_col].values
        try:
            PROP_model = Prophet(daily_seasonality=False, yearly_seasonality=True, weekly_seasonality=False, interval_width=0.80)
            PROP_model = PROP_model.add_seasonality(name='weekly', period=7, fourier_order=1, prior_scale=0.02).fit(temp_prop_train)
            for exogenous_col in exogenus_variables :
                PROP_model = PROP_model.add_regressor(exogenous_col)
            forecasts_PROP[col] = PROP_model.predict(temp_prop_test)['yhat'].values
            print("prop successful")
        except:
            PROP_model = Prophet(daily_seasonality=False, yearly_seasonality=True, weekly_seasonality=False,
                                 interval_width=0.80)
            PROP_model = PROP_model.add_seasonality(name='weekly', period=7, fourier_order=1, prior_scale=0.02).fit(temp_prop_train)
            forecasts_PROP[col] = PROP_model.predict(temp_prop_test)['yhat'].values
    forecasts_PROP.index = temp_test.index
    pred_dict_PROP = collections.OrderedDict()
    for label in forecasts_PROP.columns:
        if np.all(np.array(forecasts_PROP[label].values) ) :
            forecasts_PROP_1[label] = np.array(forecasts_PROP[label])
        else :
            forecasts_PROP_1[label]  =  np.random.randint(low = 0,high=2,size=len(temp_test))
        pred_dict_PROP[label] = pd.DataFrame(data=abs(forecasts_PROP_1[label].values), columns=['yhat'])
    revised_PROP = hts.functions.optimal_combination(pred_dict_PROP, sum_mat, method='OLS', mse={})
    revised_forecasts_PROP = pd.DataFrame(data=revised_PROP[0:, 0:], index=forecasts_PROP.index,
                                          columns=forecasts_PROP.columns)
    mape_PROP = pd.DataFrame(columns=forecasts_PROP.columns)
    rmse_PROP = pd.DataFrame(columns=forecasts_PROP.columns)
    mae_PROP = pd.DataFrame(columns=forecasts_PROP.columns)
    first_wk_mape_PROP = pd.DataFrame(columns=forecasts_PROP.columns)
    for col in forecasts_PROP.columns:
        revised_forecasts_PROP[col] = revised_forecasts_PROP[col].apply(funct)
        mape_PROP.loc[0, col], rmse_PROP.loc[0, col], mae_PROP.loc[0, col], first_wk_mape_PROP.loc[
            0, col] = time_series_scores(revised_forecasts_PROP[col], temp_test[col])
    print(revised_forecasts_PROP.head())

    # With ARCH Model 

    forecasts_ARCH = pd.DataFrame(columns=sum_mat_labels)
    forecasts_ARCH_1 = pd.DataFrame(columns=sum_mat_labels)
    for col in sum_mat_labels:
        try:
            try:
                stepwise_model = arch_model(temp_train[[col]],  vol='ARCH',  p=1, lags=12).fit()
                fcst = stepwise_model.forecast(horizon=len(temp_test)).mean.values[-1, :]
                print("ARCH_fcst:", fcst)
            except:
                try:
                    stepwise_model = arch_model(temp_train[[col]], vol='ARCH', p=1, lags=6).fit()
                    fcst = stepwise_model.forecast(horizon=len(temp_test)).mean.values[-1, :]
                    print("ARCH_fcst:", fcst)
                except:
                    try:
                        stepwise_model = arch_model(temp_train[[col]], vol='ARCH',  p=1, lags=3).fit()
                        fcst = stepwise_model.forecast(horizon=len(temp_test)).mean.values[-1, :]
                        print("ARCH_fcst:", fcst)
                    except:
                        stepwise_model = arch_model(temp_train[[col]], vol='ARCH',  p=1, lags=1).fit()
                        fcst = stepwise_model.forecast(horizon=len(temp_test)).mean.values[-1, :]
                        print("ARCH_fcst:", fcst)
        except:
            print("ARCH_fcst: failed")
            fcst = np.random.randint(low = 0,high=2,size=len(temp_test))
        forecasts_ARCH[col] = fcst
        print("ending ARCH:", col)
    forecasts_ARCH.index = temp_test.index
    print("forecasts_ARCH:", forecasts_ARCH)
    pred_dict_ARCH = collections.OrderedDict()
    for label in forecasts_ARCH.columns:
        if np.all(np.array(forecasts_ARCH[label].values)) :
            forecasts_ARCH_1[label] = np.array(forecasts_ARCH[label])
        else :
            forecasts_ARCH_1[label]  =  np.random.randint(low = 0,high=2,size=len(temp_test))
        pred_dict_ARCH[label] = pd.DataFrame(data=abs(forecasts_ARCH_1[label].values), columns=['yhat'])
    revised_ARCH = hts.functions.optimal_combination(pred_dict_ARCH, sum_mat, method='OLS', mse={})
    revised_forecasts_ARCH = pd.DataFrame(data=revised_ARCH[0:, 0:], index=forecasts_ARCH.index,
                                          columns=forecasts_ARCH.columns)
    mape_ARCH = pd.DataFrame(columns=forecasts_ARCH.columns)
    rmse_ARCH = pd.DataFrame(columns=temp_train.columns)
    mae_ARCH = pd.DataFrame(columns=temp_train.columns)
    first_wk_mape_ARCH = pd.DataFrame(columns=temp_train.columns)
    for col in revised_forecasts_ARCH.columns:
        revised_forecasts_ARCH[col] = revised_forecasts_ARCH[col].apply(funct)
        mape_ARCH.loc[0, col], rmse_ARCH.loc[0, col], mae_ARCH.loc[0, col], first_wk_mape_ARCH.loc[
            0, col] = time_series_scores(revised_forecasts_ARCH[col], temp_test[col])

    return  revised_forecasts_ARIM, revised_forecasts_HWSE, revised_forecasts_PROP, revised_forecasts_ARCH,  mape_ARIM, mape_HWSE, mape_PROP, mape_ARCH



In [143]:
def ensemble_fcst(temp_test, revised_forecasts_ARIM, revised_forecasts_HWSE, revised_forecasts_PROP, revised_forecasts_ARCH,  mape_ARIM,
                  mape_HWSE, mape_PROP, mape_ARCH, sum_mat_labels):
    select_model = pd.DataFrame(columns=sum_mat_labels)
    forecasts_ENSE = pd.DataFrame(columns=sum_mat_labels)
    models_list = ["ARIM", "HWSE", "PROP", "ARCH"]
    revised_forecasts_ARIM['approach'] = "ARIM"
    revised_forecasts_HWSE['approach'] = "HWSE"
    revised_forecasts_PROP['approach'] = "PROP"
    revised_forecasts_ARCH['approach'] = "ARCH"
    mape_ARIM['approach'] = "ARIM"
    mape_HWSE['approach'] = "HWSE"
    mape_PROP['approach'] = "PROP"
    mape_ARCH['approach'] = "ARCH"
    print("mape_ARIM", mape_ARIM.shape)
    print("mape_HWSE", mape_HWSE.shape)
    print("mape_PROP", mape_PROP.shape)
    print("mape_ARCH", mape_ARCH.shape)
    mape_data = ((mape_ARIM.append(mape_HWSE)).append(mape_PROP)).append(mape_ARCH)
    for col in sum_mat_labels :
        print(f"column name {col}",mape_data[mape_data[col] == mape_data[col].min()]['approach'].to_numpy()[0])
    # print("mape_data")
    # print(mape_data.transpose())
    for col in sum_mat_labels:
        select_model.loc[0, col] = mape_data[mape_data[col] == mape_data[col].min()]['approach'].to_numpy()[0]
        # select_model[col] = select_model[col].to_string()[6:10]
    select_model = select_model.transpose().reset_index()
    select_model.columns = ['area/dealer', 'APPROACH']
    list_ARIMA = select_model[select_model['APPROACH'] == "ARIM"]['area/dealer'].to_list()
    list_HWSE = select_model[select_model['APPROACH'] == "HWSE"]['area/dealer'].to_list()
    list_PROP = select_model[select_model['APPROACH'] == "PROP"]['area/dealer'].to_list()
    list_ARCH = select_model[select_model['APPROACH'] == "ARCH"]['area/dealer'].to_list()
    print("list of ARIMA", list_ARIMA)
    print("list of HWSE", list_HWSE)
    print("list of PROP", list_PROP)
    print("list of ARCH", list_ARCH)
    return list_ARIMA, list_HWSE, list_PROP, list_ARCH

In [152]:
def forecast_refit(ts_data_2, list_ARIMA,  list_HWSE,  list_PROP, list_ARCH, tree, sum_mat, sum_mat_labels, fcst_input_data,exogenus_variables):
#def forecast_refit(ts_data_2, list_ARIMA, list_HWSE,  tree, sum_mat, sum_mat_labels,fcst_input_data):

    forecasts_ARIM = pd.DataFrame(columns=list_ARIMA)
    fcst_start_date= fcst_input_data.index[0]
    print("fcst_start_date", fcst_start_date)
    fcst_end_date=  fcst_input_data.index[-1]
    print("fcst_end_date",fcst_end_date)
    if len(list_ARIMA) > 0:
        stepwise_model = pd.DataFrame(columns=list_ARIMA)
        forecasts_ARIM = pd.DataFrame()
        for col in list_ARIMA:
                try:
                    try:
                        stepwise_model = auto_arima(ts_data_2[col].values, exogenous=ts_data_2[exogenus_variables], stepwise=True,error_action='ignore', seasonal=True,  start_P=1, D=None, start_Q=1,  max_p= 2, max_q= 2, max_d= 2, max_P=2, max_D=2, max_Q=2).fit(ts_data_2[col].values)
                        print("with exo exogeneous-m-12")
                        fcst = stepwise_model.predict(n_periods= len(fcst_input_data), start=fcst_start_date, end=fcst_end_date, exog=fcst_input_data[exogenus_variables])
                    except:
                        stepwise_model = auto_arima(ts_data_2[col].values, stepwise=True, error_action='ignore', seasonal=True, m=52,  start_P=1, D=None, start_Q=1,   max_p= 3, max_q= 3, max_d= 3, max_P=2, max_D=1, max_Q=2).fit(ts_data_2[col].values)
                        print(col, "without exogeneous-m-12")
                        fcst = stepwise_model.predict(n_periods=len(fcst_input_data))
                except:
                    try:
                        try:
                            stepwise_model = auto_arima(ts_data_2[col].values, exogenous=ts_data_2[exogenus_variables], stepwise=True, error_action='ignore', seasonal=True, m=12,  start_P=1, D=None, start_Q=1,   max_p= 3, max_q= 3, max_d= 3, max_P=2, max_D=1, max_Q=2).fit(ts_data_2[col].values)
                            print("with exogeneous-m-6")
                            fcst = stepwise_model.predict(n_periods= len(fcst_input_data), start=fcst_start_date, end=fcst_end_date,exog=fcst_input_data[exogenus_variables])
                        except:
                            stepwise_model = auto_arima(ts_data_2[col].values, stepwise=True, error_action='ignore', seasonal=True, m=12, start_P=1, D=None, start_Q=1,   max_p= 3, max_q= 3, max_d= 3, max_P=2, max_D=1, max_Q=2).fit(ts_data_2[col].values)
                            print("without exogeneous-m-6")
                            fcst = stepwise_model.predict(n_periods=len(fcst_input_data))
                    except:
                        try:
                            try:
                                stepwise_model = auto_arima(ts_data_2[col].values, exogenous=ts_data_2[exogenus_variables], stepwise=True, error_action='ignore', seasonal=True, m=4, start_P=1, D=None, start_Q=1,   max_p= 3, max_q= 3, max_d= 3, max_P=2, max_D=1, max_Q=2).fit(ts_data_2[col].values)
                                print("with exogeneous-m-1")
                                fcst = stepwise_model.predict(n_periods= len(fcst_input_data), start=fcst_start_date, end=fcst_end_date, exog=fcst_input_data[exogenus_variables])
                            except:
                                stepwise_model = auto_arima(ts_data_2[col].values, stepwise=True, m=4, error_action='ignore', seasonal=True,   start_P=1, D=None, start_Q=1,   max_p= 3, max_q= 3, max_d= 3, max_P=2, max_D=1, max_Q=2).fit(ts_data_2[col].values)
                                print("without exogeneous-m-1")
                                fcst = stepwise_model.predict(n_periods=len(fcst_input_data))
                        except:
                            pass
                            fcst= [0]*len(fcst_input_data)
                forecasts_ARIM[col] = fcst
                forecasts_ARIM.index = np.array(fcst_input_data.index)
        else:
            pass

          
          
          
    HWSE_model = pd.DataFrame(columns=list_HWSE)
    forecasts_HWSE = pd.DataFrame(columns=list_HWSE)
    if len(list_HWSE) > 0:
        for col in list_HWSE:
            try:
                HWSE_model = HWES(ts_data_2[col], seasonal_periods=52, trend='add', seasonal='add').fit()
                forecasts_HWSE[col] = HWSE_model.forecast(steps=len(fcst_input_data))
            except:
                try:
                    HWSE_model = HWES(ts_data_2[col], seasonal_periods=12, trend='add', seasonal='add').fit()
                    forecasts_HWSE[col] = HWSE_model.forecast(steps=len(fcst_input_data))
                except:
                    try:
                        HWSE_model = HWES(ts_data_2[col], seasonal_periods=4, trend='add', seasonal='add').fit()
                        forecasts_HWSE[col] = HWSE_model.forecast(steps=len(fcst_input_data))
                    except:
                        pass
                        forecasts_HWSE[col] =[0]* len(fcst_input_data)
            forecasts_HWSE[col] = forecasts_HWSE[col].fillna(0)
        forecasts_HWSE.index = np.array(fcst_input_data.index)
    else:
        pass

    PROP_model = pd.DataFrame(columns=list_PROP)
    forecasts_PROP = pd.DataFrame(index=np.array(fcst_input_data.index),  columns=list_PROP)
    if len(list_PROP) > 0:
        for col in list_PROP:
            temp_prop_train = pd.DataFrame()
            temp_prop_train['ds'] = np.array(ts_data_2.index)
            temp_prop_train['y'] = ts_data_2[col].values
            temp_prop_test = pd.DataFrame()
            temp_prop_test['ds'] = np.array(fcst_input_data.index)
            for exogenous_col in exogenus_variables :
                temp_prop_train[exogenous_col] = ts_data_2[exogenous_col].values
                temp_prop_test[exogenous_col] = fcst_input_data[exogenous_col].values
            
            print("temp_prop_train", temp_prop_train)
            print("temp_prop_test", temp_prop_test)
            print("temp_prop_train", temp_prop_train.dtypes)
            print("temp_prop_test", temp_prop_test.dtypes)
            try:
                PROP_model=Prophet(daily_seasonality=False,yearly_seasonality=True,weekly_seasonality=False,interval_width=0.80)
                for exogenous_col in exogenus_variables :
                    PROP_model = PROP_model.add_regressor(exogenous_col)
                PROP_model = PROP_model.add_seasonality(name='yearly', period=12, fourier_order=5, prior_scale=0.02).fit(temp_prop_train)
                
                    
                forecasts_PROP[col] = PROP_model.predict(temp_prop_test)['yhat'].values
                print("prop successful")
            except:
                PROP_model = Prophet(daily_seasonality=False, yearly_seasonality=True, weekly_seasonality=False,interval_width=0.80)
                PROP_model = PROP_model.add_seasonality(name='yearly', period=12, fourier_order=5, prior_scale=0.02).fit(temp_prop_train)
                forecasts_PROP[col] = PROP_model.predict(temp_prop_test)['yhat'].values

    else:
        pass
    forecasts_PROP.index = np.array(fcst_input_data.index)
    print(forecasts_PROP)

    forecasts_ARCH = pd.DataFrame(columns=list_ARCH)
    fcst_start_date = fcst_input_data.index[0]
    print("fcst_start_date", fcst_start_date)
    fcst_end_date = fcst_input_data.index[-1]
    if len(list_ARCH) > 0:
        stepwise_model = pd.DataFrame(columns=list_ARCH)
        forecasts_ARCH = pd.DataFrame()
        for col in list_ARCH:
            try:
                try:
                    stepwise_model = arch_model(ts_data_2[col].values,  mean="AR", vol= "ARCH", p=1, lags=12).fit()
                    fcst = stepwise_model.forecast(horizon=len(fcst_input_data)).mean.values[-1, :]
                except:
                    try:
                        stepwise_model = arch_model(ts_data_2[col].values,   mean="AR", vol= "ARCH", p=1,lags=6).fit()
                        fcst = stepwise_model.forecast(horizon=len(fcst_input_data)).mean.values[-1, :]
                        print("ARCH_fcst_6", fcst)
                    except:
                        try:
                            stepwise_model = arch_model(ts_data_2[col].values,  mean="AR", vol= "ARCH",p=1, lags=3).fit()
                            fcst = stepwise_model.forecast(horizon=len(fcst_input_data)).mean.values[-1, :]
                        except:
                            stepwise_model = arch_model(ts_data_2[col].values,  mean='AR', vol= "ARCH", p=1).fit()
                            fcst = stepwise_model.forecast(horizon=len(fcst_input_data)).mean.values[-1, :]
            except:
                print("ARCH failed")
                fcst = [0] * len(fcst_input_data)
            forecasts_ARCH[col] = fcst
            forecasts_ARCH.index = np.array(fcst_input_data.index)
        else:
            pass
    print(forecasts_ARCH.shape, forecasts_ARIM.shape ,  forecasts_HWSE.shape, forecasts_PROP.shape)
    return forecasts_ARIM, forecasts_HWSE, forecasts_PROP, forecasts_ARCH

In [153]:
def forecast_output(final_forecasts_ARIM, final_forecasts_HWSE, final_forecasts_PROP, final_forecasts_ARCH, sum_mat, sum_mat_labels, fcst_input_data):
    forecast_output = pd.concat([final_forecasts_ARIM, final_forecasts_HWSE, final_forecasts_PROP, final_forecasts_ARCH], axis=1)
    forecast_output_1 = forecast_output.copy()
    print("forecast_output:", forecast_output.columns)
    pred_dict = collections.OrderedDict()
    for label in sum_mat_labels:
        if np.all( np.array(forecast_output[label]) ):
            pass
        else :
            forecast_output_1[label] = np.random.randint(low = 0,high=2,size=len(forecast_output))
        pred_dict[label] = pd.DataFrame(data=abs(forecast_output_1[label].values), columns=['yhat'])
    revised_val = hts.functions.optimal_combination(pred_dict, sum_mat, method='OLS', mse={})
    revised_forecasts = pd.DataFrame(data=revised_val[0:,0:], index=forecast_output.index, columns=sum_mat_labels)
    revised_forecasts= revised_forecasts.fillna(0)
    for col in sum_mat_labels:
        revised_forecasts[col] = revised_forecasts[col].apply(funct)
    revised_forecasts.index= np.array(fcst_input_data.index)
    return revised_forecasts

In [155]:
def hts_forecast_function(df : pd.DataFrame , hier : dict , exogenus_variables : list , predictable_variables : list , fcst_input_data : pd.DataFrame,m:int):
    # Splitting the data into train and test samples

    # create an error statement if the sum of exogenus and predcitable not equla to df variables

    train_data,test_data =    train_test_data(df)
    tree, sum_mat, sum_mat_labels =  define_tree(train_data[predictable_variables],hier) 
    revised_forecasts_ARIM, revised_forecasts_HWSE, revised_forecasts_PROP, revised_forecasts_ARCH,  mape_ARIM, mape_HWSE, mape_PROP, mape_ARCH  = \
    forecast_models(train_data, test_data, tree, sum_mat, sum_mat_labels, exogenus_variables, m)
    list_ARIMA, list_HWSE, list_PROP, list_ARCH =  ensemble_fcst(test_data, revised_forecasts_ARIM, revised_forecasts_HWSE, revised_forecasts_PROP, revised_forecasts_ARCH,  mape_ARIM,
                  mape_HWSE, mape_PROP, mape_ARCH, sum_mat_labels)
    final_forecasts_ARIM, final_forecasts_HWSE, final_forecasts_PROP, final_forecasts_ARCH = forecast_refit(df, list_ARIMA,list_HWSE,list_PROP,list_ARCH,tree,sum_mat, sum_mat_labels,fcst_input_data,exogenus_variables )

    forecasted_output = forecast_output(final_forecasts_ARIM, final_forecasts_HWSE, final_forecasts_PROP, final_forecasts_ARCH, sum_mat, sum_mat_labels, fcst_input_data)       


In [156]:
from datetime import datetime
from hts import HTSRegressor
from hts.utilities.load_data import load_hierarchical_sine_data
import warnings
import numpy
import pandas
warnings.filterwarnings('ignore')

In [157]:
predictable_variables=['total','TN1_TN1-Vellore'] +  ['TN1_TN1-Vellore_80124',
  'TN1_TN1-Vellore_10853',
  'TN1_TN1-Vellore_10689',
  'TN1_TN1-Vellore_11720',
  'TN1_TN1-Vellore_11992',
  'TN1_TN1-Vellore_11706',
  'TN1_TN1-Vellore_13054']
exogenus_variables = ['week_of_month', 'peak_flag', 'festival_flag', 'covid_flag']
m =12

In [158]:
df = pd.read_csv("/Users/karanamramachaitanya/Downloads/cltv_space/forecasting/train_test_data.csv")
df['start_of_week'] = pd.to_datetime(df['start_of_week'], infer_datetime_format=True) 
df['start_of_week'] = df['start_of_week'].dt.tz_localize(None)
df = df.set_index("start_of_week")
df = df[predictable_variables+exogenus_variables]
df.head()

Unnamed: 0_level_0,total,TN1_TN1-Vellore,TN1_TN1-Vellore_80124,TN1_TN1-Vellore_10853,TN1_TN1-Vellore_10689,TN1_TN1-Vellore_11720,TN1_TN1-Vellore_11992,TN1_TN1-Vellore_11706,TN1_TN1-Vellore_13054,week_of_month,peak_flag,festival_flag,covid_flag
start_of_week,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2017-01-09,3,3,0,3,0,0,0,0,0,1,0,0,0
2017-01-16,3,3,2,0,1,0,0,0,0,2,0,0,0
2017-01-23,2,2,0,0,2,0,0,0,0,3,0,0,0
2017-01-30,4,4,1,3,0,0,0,0,0,4,0,0,0
2017-02-06,0,0,0,0,0,0,0,0,0,1,0,0,0


In [159]:
fcst_input_data = pd.read_csv("/Users/karanamramachaitanya/Downloads/cltv_space/forecasting/fcst_input_data.csv")
fcst_input_data['start_of_week'] = pd.to_datetime(fcst_input_data['fcst_weeks'],infer_datetime_format=True)
fcst_input_data['start_of_week'] = fcst_input_data['start_of_week'].dt.tz_localize(None)
fcst_input_data.drop(['fcst_weeks'],axis = 1,inplace=True)
fcst_input_data

Unnamed: 0,fcst_months,fcst_year,month_year,week_of_month,peak_flag,festival_flag,covid_flag,start_of_week
0,9,2021,2021-9,1,1,0,0,2021-09-06
1,9,2021,2021-9,2,1,0,0,2021-09-13
2,9,2021,2021-9,3,1,0,0,2021-09-20
3,9,2021,2021-9,4,1,0,0,2021-09-27
4,10,2021,2021-10,1,1,1,0,2021-10-04
5,10,2021,2021-10,2,1,1,0,2021-10-11
6,10,2021,2021-10,3,1,1,0,2021-10-18
7,10,2021,2021-10,4,1,1,0,2021-10-25
8,11,2021,2021-11,1,1,1,0,2021-11-01
9,11,2021,2021-11,2,1,1,0,2021-11-08


In [160]:
hier = {'total': ['TN1_TN1-Vellore'],
 'TN1_TN1-Vellore': ['TN1_TN1-Vellore_80124',
  'TN1_TN1-Vellore_10853',
  'TN1_TN1-Vellore_10689',
  'TN1_TN1-Vellore_11720',
  'TN1_TN1-Vellore_11992',
  'TN1_TN1-Vellore_11706',
  'TN1_TN1-Vellore_13054']}

In [161]:
train_test , valid_data  = train_test_data(df)
print("train data shape", train_test.shape)
print("validation data shape", valid_data.shape)

train data shape (194, 13)
validation data shape (49, 13)


In [162]:
train_data,test_data =    train_test_data(train_test)
print("train data shape", train_data.shape)
print("test data shape", test_data.shape)

train data shape (155, 13)
test data shape (39, 13)


In [163]:
tree, sum_mat, sum_mat_labels =  define_tree(train_data[predictable_variables],hier) 
# print("tree of the hierarachy ", tree)
# print("sumamrising matrix", sum_mat)
# print("summarising matrix labels", sum_mat_labels)

In [164]:
revised_forecasts_ARIM, revised_forecasts_HWSE, revised_forecasts_PROP, revised_forecasts_ARCH,  mape_ARIM, mape_HWSE, mape_PROP, mape_ARCH  = \
    forecast_models(train_data, test_data, tree, sum_mat, sum_mat_labels, exogenus_variables, m)

Initial log joint probability = -5.97642
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99       206.084   3.62135e-07       60.6559      0.6026      0.6026      129   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     121       206.084   1.15506e-07       78.1063   1.536e-09       0.001      199  LS failed, Hessian reset 
     129       206.084   8.64028e-09        74.529      0.2834      0.2834      210   
Optimization terminated normally: 
  Convergence detected: absolute parameter change was below tolerance
Initial log joint probability = -5.97642
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99       206.084   3.62135e-07       60.6559      0.6026      0.6026      129   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     121       206.084   1.15506e-07       78.1063   1.536e-09       0.00

Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



In [165]:
output1 = ensemble_fcst(test_data, revised_forecasts_ARIM, revised_forecasts_HWSE, revised_forecasts_PROP, revised_forecasts_ARCH,  mape_ARIM,
                  mape_HWSE, mape_PROP, mape_ARCH, sum_mat_labels)

mape_ARIM (1, 10)
mape_HWSE (1, 10)
mape_PROP (1, 10)
mape_ARCH (1, 10)
column name total ARIM
column name TN1_TN1-Vellore ARIM
column name TN1_TN1-Vellore_80124 ARIM
column name TN1_TN1-Vellore_10853 ARIM
column name TN1_TN1-Vellore_10689 ARIM
column name TN1_TN1-Vellore_11720 ARIM
column name TN1_TN1-Vellore_11992 ARCH
column name TN1_TN1-Vellore_11706 ARIM
column name TN1_TN1-Vellore_13054 HWSE
list of ARIMA ['total', 'TN1_TN1-Vellore', 'TN1_TN1-Vellore_80124', 'TN1_TN1-Vellore_10853', 'TN1_TN1-Vellore_10689', 'TN1_TN1-Vellore_11720', 'TN1_TN1-Vellore_11706']
list of HWSE ['TN1_TN1-Vellore_13054']
list of PROP []
list of ARCH ['TN1_TN1-Vellore_11992']


In [166]:
output1

(['total',
  'TN1_TN1-Vellore',
  'TN1_TN1-Vellore_80124',
  'TN1_TN1-Vellore_10853',
  'TN1_TN1-Vellore_10689',
  'TN1_TN1-Vellore_11720',
  'TN1_TN1-Vellore_11706'],
 ['TN1_TN1-Vellore_13054'],
 [],
 ['TN1_TN1-Vellore_11992'])

In [167]:
list_ARIMA, list_HWSE, list_PROP, list_ARCH =  ensemble_fcst(test_data, revised_forecasts_ARIM, revised_forecasts_HWSE, revised_forecasts_PROP, revised_forecasts_ARCH,  mape_ARIM,
                  mape_HWSE, mape_PROP, mape_ARCH, sum_mat_labels)

mape_ARIM (1, 10)
mape_HWSE (1, 10)
mape_PROP (1, 10)
mape_ARCH (1, 10)
column name total ARIM
column name TN1_TN1-Vellore ARIM
column name TN1_TN1-Vellore_80124 ARIM
column name TN1_TN1-Vellore_10853 ARIM
column name TN1_TN1-Vellore_10689 ARIM
column name TN1_TN1-Vellore_11720 ARIM
column name TN1_TN1-Vellore_11992 ARCH
column name TN1_TN1-Vellore_11706 ARIM
column name TN1_TN1-Vellore_13054 HWSE
list of ARIMA ['total', 'TN1_TN1-Vellore', 'TN1_TN1-Vellore_80124', 'TN1_TN1-Vellore_10853', 'TN1_TN1-Vellore_10689', 'TN1_TN1-Vellore_11720', 'TN1_TN1-Vellore_11706']
list of HWSE ['TN1_TN1-Vellore_13054']
list of PROP []
list of ARCH ['TN1_TN1-Vellore_11992']


In [168]:
print("list_ARIMA=", list_ARIMA)
print("list_HWSE=", list_HWSE)
print("list_PROP=", list_PROP)
print("list_ARCH=", list_ARCH)

list_ARIMA= ['total', 'TN1_TN1-Vellore', 'TN1_TN1-Vellore_80124', 'TN1_TN1-Vellore_10853', 'TN1_TN1-Vellore_10689', 'TN1_TN1-Vellore_11720', 'TN1_TN1-Vellore_11706']
list_HWSE= ['TN1_TN1-Vellore_13054']
list_PROP= []
list_ARCH= ['TN1_TN1-Vellore_11992']


In [169]:
final_forecasts_ARIM, final_forecasts_HWSE, final_forecasts_PROP, final_forecasts_ARCH = forecast_refit(df, list_ARIMA,list_HWSE,list_PROP,list_ARCH,tree,sum_mat, sum_mat_labels,fcst_input_data,exogenus_variables )

fcst_start_date 0
fcst_end_date 25
with exo exogeneous-m-12
with exo exogeneous-m-12
with exo exogeneous-m-12
with exo exogeneous-m-12
with exo exogeneous-m-12
with exo exogeneous-m-12
with exo exogeneous-m-12
Empty DataFrame
Columns: []
Index: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
fcst_start_date 0
Iteration:      1,   Func. Count:     17,   Neg. LLF: 2028.6888933264677
Iteration:      2,   Func. Count:     43,   Neg. LLF: 4320.669428145322
Iteration:      3,   Func. Count:     65,   Neg. LLF: 8313.996253882511
Iteration:      4,   Func. Count:     85,   Neg. LLF: 4591.467762672941
Iteration:      5,   Func. Count:    104,   Neg. LLF: 3202.095078161719
Iteration:      6,   Func. Count:    123,   Neg. LLF: 1527.957793537471
Iteration:      7,   Func. Count:    141,   Neg. LLF: 6834.833012779467
Iteration:      8,   Func. Count:    159,   Neg. LLF: 938773.3260077699
Iteration:      9,   Func. Count:    177,   Neg. LLF: 3483.204213

In [170]:
forecasted_output = forecast_output(final_forecasts_ARIM, final_forecasts_HWSE, final_forecasts_PROP, final_forecasts_ARCH, sum_mat, sum_mat_labels, fcst_input_data)       
print(forecasted_output)

forecast_output: Index(['total', 'TN1_TN1-Vellore', 'TN1_TN1-Vellore_80124',
       'TN1_TN1-Vellore_10853', 'TN1_TN1-Vellore_10689',
       'TN1_TN1-Vellore_11720', 'TN1_TN1-Vellore_11706',
       'TN1_TN1-Vellore_13054', 'TN1_TN1-Vellore_11992'],
      dtype='object')
    total  TN1_TN1-Vellore  TN1_TN1-Vellore_80124  TN1_TN1-Vellore_10853  \
0    47.0             47.0                   12.0                   12.0   
1    32.0             32.0                    8.0                    8.0   
2    43.0             43.0                   10.0                   11.0   
3    34.0             34.0                    9.0                    9.0   
4    42.0             42.0                   10.0                   11.0   
5    36.0             36.0                   10.0                    9.0   
6    41.0             41.0                   10.0                   10.0   
7    37.0             37.0                    9.0                    9.0   
8    40.0             40.0                   

In [172]:
forecasted_output

Unnamed: 0,total,TN1_TN1-Vellore,TN1_TN1-Vellore_80124,TN1_TN1-Vellore_10853,TN1_TN1-Vellore_10689,TN1_TN1-Vellore_11720,TN1_TN1-Vellore_11992,TN1_TN1-Vellore_11706,TN1_TN1-Vellore_13054
0,47.0,47.0,12.0,12.0,4.0,4.0,7.0,3.0,5.0
1,32.0,32.0,8.0,8.0,3.0,4.0,5.0,1.0,3.0
2,43.0,43.0,10.0,11.0,5.0,5.0,3.0,3.0,5.0
3,34.0,34.0,9.0,9.0,4.0,5.0,2.0,2.0,4.0
4,42.0,42.0,10.0,11.0,5.0,5.0,4.0,3.0,5.0
5,36.0,36.0,10.0,9.0,4.0,5.0,0.0,3.0,5.0
6,41.0,41.0,10.0,10.0,4.0,5.0,5.0,2.0,4.0
7,37.0,37.0,9.0,9.0,4.0,5.0,2.0,2.0,4.0
8,40.0,40.0,10.0,10.0,4.0,5.0,3.0,3.0,5.0
9,38.0,38.0,9.0,9.0,4.0,5.0,4.0,2.0,4.0


In [16]:
s, e = datetime(2019, 1, 15), datetime(2019, 2, 15)
hsd = load_hierarchical_sine_data(s, e).resample('1H').apply(sum)
hier = {'total': ['a', 'b', 'c'],
        'a': ['a_x', 'a_y'],
        'b': ['b_x', 'b_y'],
        'c': ['c_x', 'c_y'],
        'a_x': ['a_x_1', 'a_x_2'],
        'a_y': ['a_y_1', 'a_y_2'],
        'b_x': ['b_x_1', 'b_x_2'],
        'b_y': ['b_y_1', 'b_y_2'],
        'c_x': ['c_x_1', 'c_x_2'],
        'c_y': ['c_y_1', 'c_y_2']
    }

hsd.head()

Unnamed: 0,total,a,b,c,a_x,a_y,b_x,b_y,c_x,c_y,...,a_y_1,a_y_2,b_x_1,b_x_2,b_y_1,b_y_2,c_x_1,c_x_2,c_y_1,c_y_2
2019-01-15 00:00:00,39.208037,17.840944,8.209186,13.157908,9.535306,8.305638,5.50628,2.702906,6.209704,6.948204,...,5.786017,2.51962,2.157072,3.349208,1.250028,1.452878,3.265691,2.944014,0.5602,6.388004
2019-01-15 01:00:00,163.550244,48.338869,52.395102,62.816274,17.652901,30.685968,24.7283,27.666801,34.157669,28.658605,...,15.748453,14.937515,14.658236,10.070064,14.797188,12.869613,21.315198,12.842471,14.969084,13.689521
2019-01-15 02:00:00,210.540211,47.339751,75.073548,88.126912,21.333215,26.006537,22.653928,52.41962,56.950034,31.176878,...,12.721673,13.284864,7.109368,15.544559,29.963135,22.456485,13.991799,42.958235,16.172302,15.004576
2019-01-15 03:00:00,222.857219,86.959626,74.621644,61.275948,50.285013,36.674613,23.701859,50.919786,25.697349,35.578599,...,20.865075,15.809538,8.529802,15.172057,36.832818,14.086967,10.61564,15.081709,25.324369,10.254229
2019-01-15 04:00:00,366.105956,142.863314,124.045866,99.196776,75.23261,67.630704,55.804398,68.241468,40.217578,58.979198,...,35.187175,32.443529,29.931312,25.873087,32.844405,35.397062,15.971722,24.245856,27.740806,31.238392


In [21]:
train_test , valid_data  = train_test_data(hsd)
print("train data shape", train_test.shape)
print("validation data shape", valid_data.shape)

train data shape (595, 22)
validation data shape (149, 22)


In [23]:
train_data,test_data =    train_test_data(train_test)
print("train data shape", train_data.shape)
print("test data shape", test_data.shape)

train data shape (476, 22)
test data shape (119, 22)


In [28]:
predictable_variables=list(hsd.columns)
exogenus_variables = []
m =12

In [29]:
tree, sum_mat, sum_mat_labels =  define_tree(train_data[predictable_variables],hier) 
# print("tree of the hierarachy ", tree)
# print("sumamrising matrix", sum_mat)
# print("summarising matrix labels", sum_mat_labels)

In [30]:
revised_forecasts_ARIM, revised_forecasts_HWSE, revised_forecasts_PROP, revised_forecasts_ARCH,  mape_ARIM, mape_HWSE, mape_PROP, mape_ARCH  = \
    forecast_models(train_data, test_data, tree, sum_mat, sum_mat_labels, exogenus_variables, m)

Starting HWSE: total
Ending HWSE: total
Starting HWSE: a
Ending HWSE: a
Starting HWSE: b
Ending HWSE: b
Starting HWSE: c
Ending HWSE: c
Starting HWSE: a_x
Ending HWSE: a_x
Starting HWSE: a_y
Ending HWSE: a_y
Starting HWSE: b_x
Ending HWSE: b_x
Starting HWSE: b_y
Ending HWSE: b_y
Starting HWSE: c_x
Ending HWSE: c_x
Starting HWSE: c_y
Ending HWSE: c_y
Starting HWSE: a_x_1
Ending HWSE: a_x_1
Starting HWSE: a_x_2
Ending HWSE: a_x_2
Starting HWSE: a_y_1
Ending HWSE: a_y_1
Starting HWSE: a_y_2
Ending HWSE: a_y_2
Starting HWSE: b_x_1
Ending HWSE: b_x_1
Starting HWSE: b_x_2
Ending HWSE: b_x_2
Starting HWSE: b_y_1
Ending HWSE: b_y_1
Starting HWSE: b_y_2
Ending HWSE: b_y_2
Starting HWSE: c_x_1
Ending HWSE: c_x_1
Starting HWSE: c_x_2
Ending HWSE: c_x_2
Starting HWSE: c_y_1
Ending HWSE: c_y_1
Starting HWSE: c_y_2
Ending HWSE: c_y_2
Initial log joint probability = -62.5979
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      83       165.974   5.33079e-0

In [43]:
list_ARIMA, list_HWSE, list_PROP, list_ARCH =  ensemble_fcst(test_data, revised_forecasts_ARIM, revised_forecasts_HWSE, revised_forecasts_PROP, revised_forecasts_ARCH,  mape_ARIM,
                  mape_HWSE, mape_PROP, mape_ARCH, sum_mat_labels)

mape_ARIM (1, 23)
mape_HWSE (1, 23)
mape_PROP (1, 23)
mape_ARCH (1, 23)
mape dataframe     total       a       b       c     a_x     a_y      b_x      b_y      c_x  \
0   113.0   105.0   128.0   106.0   104.0   116.0    387.0    155.0    299.0   
0  4756.0  2531.0  3792.0  2515.0  2400.0  3817.0  40813.0  23500.0  11103.0   
0   149.0   122.0   124.0   115.0   106.0   138.0    806.0    413.0    289.0   
0   119.0   103.0   110.0   116.0   100.0   108.0    390.0    186.0    237.0   

      c_y  ...   a_y_2   b_x_1   b_x_2   b_y_1   b_y_2   c_x_1   c_x_2  \
0   123.0  ...   181.0   127.0   152.0   175.0   194.0   204.0   107.0   
0  8258.0  ...  4184.0  6940.0  3254.0  5529.0  5579.0  5796.0  5524.0   
0   191.0  ...   158.0   179.0   122.0   144.0   148.0   129.0   128.0   
0   123.0  ...   112.0   106.0   100.0   128.0   154.0   122.0   104.0   

    c_y_1   c_y_2 approach  
0   129.0   287.0     ARIM  
0  6705.0  9917.0     HWSE  
0   184.0   353.0     PROP  
0   106.0   218.0     ARC

ValueError: Must have equal len keys and value when setting with an iterable

In [None]:
print("list_ARIMA=", list_ARIMA)
print("list_HWSE=", list_HWSE)
print("list_PROP=", list_PROP)
print("list_ARCH=", list_ARCH)

In [32]:
df = pd.DataFrame()