In [1]:
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
from scalecast import GridGenerator
from Forecaster import Forecaster
from fix_date import fix_date

In [2]:
models = (
    'mlr',
    'elasticnet',
    'sgd',
    'svr',
    'lightgbm',
    'xgboost',
    'knn',
    'mlp',
)

In [3]:
info = pd.read_csv(
    'm4/M4-info.csv',
    index_col=0,
    parse_dates=['StartingDate'],
    dayfirst=True,
)
info['StartingDate'] = info['StartingDate'].apply(fix_date)

## Hourly

In [4]:
Hourly = pd.read_csv(
    f'm4/train/Hourly-train.csv',
    index_col=0,
)

In [5]:
Hourly_test = pd.read_csv(
    f'm4/test/Hourly-test.csv',
    index_col=0,
)

Hourly_results_template = pd.DataFrame(
    columns=Hourly_test.columns,
    index=Hourly_test.index,
)

Hourly_results = {}

In [None]:
for i in tqdm(Hourly.index):
    y = Hourly.loc[i].dropna()
    sd = info.loc[i,'StartingDate']
    fcst_horizon = info.loc[i,'Horizon']
    cd = pd.date_range(
        start = sd,
        freq = 'H',
        periods = len(y),
    )
    f = Forecaster(
        y = y,
        current_dates = cd,
        future_dates = fcst_horizon,
    )
    
    f.set_test_length(fcst_horizon if len(f.y) > 100 else 1)
    f.integrate(critical_pval=.99,max_integration=1)
    f.set_validation_length(fcst_horizon if len(f.y) > 100 else 1)
    f.set_validation_metric('mae')
    if len(f.y) > 8760:
        f.add_cycle(8760) # yearly
    if len(f.y) > 300:
        f.auto_Xvar_select(
            monitor='LevelTestSetMAE',
            max_ar = 48,
            exclude_seasonalities = ['quarter','month','week','day']
        )
        f.determine_best_series_length(
            monitor='LevelTestSetMAE',
            step=50,
            min_obs = 300,
        )
    else:
        f.auto_Xvar_select(
            monitor='LevelTestSetMAE',
            irr_cycles = [168], # weekly
            exclude_seasonalities = ['quarter','month','week','day','dayofweek'],
            max_ar = 24,
        )
    f.tune_test_forecast(
        models,
        error='ignore',
    )
    flagged = [
        m for m, r in f.history.items() if np.max(
            np.abs(r['LevelForecast'])
        ) > np.max(f.levely)*5
    ]
    for m in models: # replace applicable with naive forecasts
        if m in flagged or m not in f.history:
            f.set_estimator('hwes')
            f.manual_forecast(
                call_me=m,
                seasonal=(
                    'add' 
                    if len(f.y) > 48 + fcst_horizon
                    else None
                )
            )
        
    f.set_estimator('combo')
    for n in range(2,len(f.history) + 1,2):
        f.manual_forecast(
            models=f'top_{n}',
            determine_best_by='LevelTestSetMAE',
            call_me=f'avg_top_{n}'
        )
        f.manual_forecast(
            how='weighted',
            models=f'top_{n}',
            determine_best_by='LevelTestSetMAE',
            call_me=f'weighted_top_{n}'
        )
    fcsts = f.export('lvl_fcsts',determine_best_by='LevelTestSetMAE')
    for m in f.history.keys():
        if m in Hourly_results:
            Hourly_results[m].loc[i] = fcsts[m].to_list()
        else:
            df = Hourly_results_template.copy()
            df.loc[i] = fcsts[m].to_list()
            Hourly_results[m] = df
    if 'auto_select' in Hourly_results:
        Hourly_results['auto_select'].loc[i] = fcsts.iloc[:,1].to_list()
    else:
        df = Hourly_results_template.copy()
        df.loc[i] = fcsts.iloc[:,1].to_list()
        Hourly_results['auto_select'] = df

  0%|          | 0/4227 [00:00<?, ?it/s]

In [None]:
for m,df in Hourly_results.items():
    df.to_csv(f'M4/model_results/Hourly/{m}.csv')

In [None]:
del Hourly
del Hourly_test
del Hourly_results_template
del Hourly_results

## Daily

In [4]:
Daily = pd.read_csv(
    f'm4/train/Daily-train.csv',
    index_col=0,
)

In [5]:
Daily_test = pd.read_csv(
    f'm4/test/Daily-test.csv',
    index_col=0,
)

Daily_results_template = pd.DataFrame(
    columns=Daily_test.columns,
    index=Daily_test.index,
)

Daily_results = {}

In [None]:
for i in tqdm(Daily.index):
    y = Daily.loc[i].dropna()
    sd = info.loc[i,'StartingDate']
    fcst_horizon = info.loc[i,'Horizon']
    cd = pd.date_range(
        start = sd,
        freq = 'D',
        periods = len(y),
    )
    f = Forecaster(
        y = y,
        current_dates = cd,
        future_dates = fcst_horizon,
    )
    
    f.set_test_length(fcst_horizon if len(f.y) > 50 else 1)
    f.integrate(critical_pval=.99,max_integration=1)
    f.set_validation_length(fcst_horizon if len(f.y) > 100 else 1)
    f.set_validation_metric('mae')
    if len(f.y) > 366:
        f.auto_Xvar_select(
            irr_cycles=[90],
            monitor='LevelTestSetMAE',
            max_ar = 36,
            exclude_seasonalities = ['month'],
            seasonality_repr = {'quarter':['dummy','drop_first']},
        )
    else: # less than a year available
        f.auto_Xvar_select(
            irr_cycles=[90],
            monitor='LevelTestSetMAE',
            exclude_seasonalities = ['quarter','month','week','day'],
            max_ar = 20,
        )
    if len(f.y) > 1000: # more than three years available
        f.determine_best_series_length(
            monitor='LevelTestSetMAE',
            step=100,
            min_obs = 1000,
        )
    f.tune_test_forecast(
        models,
        error='ignore',
    )
    flagged = [
        m for m, r in f.history.items() if np.max(
            np.abs(r['LevelForecast'])
        ) > np.max(f.levely)*5
    ]
    for m in models: # replace applicable with naive forecasts
        if m in flagged or m not in f.history:
            f.set_estimator('hwes')
            f.manual_forecast(call_me=m)
        
    f.set_estimator('combo')
    for n in range(2,len(f.history) + 1,2):
        f.manual_forecast(
            models=f'top_{n}',
            determine_best_by='LevelTestSetMAE',
            call_me=f'avg_top_{n}'
        )
        f.manual_forecast(
            how='weighted',
            models=f'top_{n}',
            determine_best_by='LevelTestSetMAE',
            call_me=f'weighted_top_{n}'
        )
    fcsts = f.export('lvl_fcsts',determine_best_by='LevelTestSetMAE')
    for m in f.history.keys():
        if m in Daily_results:
            Daily_results[m].loc[i] = fcsts[m].to_list()
        else:
            df = Daily_results_template.copy()
            df.loc[i] = fcsts[m].to_list()
            Daily_results[m] = df
    if 'auto_select' in Daily_results:
        Daily_results['auto_select'].loc[i] = fcsts.iloc[:,1].to_list()
    else:
        df = Daily_results_template.copy()
        df.loc[i] = fcsts.iloc[:,1].to_list()
        Daily_results['auto_select'] = df

  0%|          | 0/4227 [00:00<?, ?it/s]

In [None]:
for m,df in Daily_results.items():
    df.to_csv(f'M4/model_results/Daily/{m}.csv')

In [None]:
del Daily
del Daily_test
del Daily_results_template
del Daily_results

## Weekly

In [4]:
Weekly = pd.read_csv(
    f'm4/train/Weekly-train.csv',
    index_col=0,
)

In [5]:
Weekly_test = pd.read_csv(
    f'm4/test/Weekly-test.csv',
    index_col=0,
)

Weekly_results_template = pd.DataFrame(
    columns=Weekly_test.columns,
    index=Weekly_test.index,
)

Weekly_results = {}

In [None]:
for i in tqdm(Weekly.index):
    y = Weekly.loc[i].dropna()
    sd = info.loc[i,'StartingDate']
    fcst_horizon = info.loc[i,'Horizon']
    cd = pd.date_range(
        start = sd,
        freq = 'W',
        periods = len(y),
    )
    f = Forecaster(
        y = y,
        current_dates = cd,
        future_dates = fcst_horizon,
    )
    
    f.set_test_length(fcst_horizon if len(f.y) > 100 else 1)
    f.integrate(critical_pval=.99,max_integration=1)
    f.set_validation_length(fcst_horizon if len(f.y) > 100 else 1)
    f.set_validation_metric('mae')
    if len(f.y) > 150:
        f.add_cycle(26)
        f.auto_Xvar_select(
            monitor='LevelTestSetMAE',
            max_ar = 52,
            irr_cycles = [13],
        )
        f.determine_best_series_length(
            monitor='LevelTestSetMAE',
            step=50,
            min_obs = 150,
        )
    else:
        f.auto_Xvar_select(
            monitor='LevelTestSetMAE',
            exclude_seasonalities = ['quarter','month'],
            max_ar = fcst_horizon,
            decomp_trend = False,
        )
    f.tune_test_forecast(
        models,
        error='ignore',
    )
    flagged = [
        m for m, r in f.history.items() if np.max(
            np.abs(r['LevelForecast'])
        ) > np.max(f.levely)*5
    ]
    for m in models: # replace applicable with naive forecasts
        if m in flagged or m not in f.history:
            f.set_estimator('hwes')
            f.manual_forecast(call_me=m)
        
    f.set_estimator('combo')
    for n in range(2,len(f.history) + 1,2):
        f.manual_forecast(
            models=f'top_{n}',
            determine_best_by='LevelTestSetMAE',
            call_me=f'avg_top_{n}'
        )
        f.manual_forecast(
            how='weighted',
            models=f'top_{n}',
            determine_best_by='LevelTestSetMAE',
            call_me=f'weighted_top_{n}'
        )
    fcsts = f.export('lvl_fcsts',determine_best_by='LevelTestSetMAE')
    for m in f.history.keys():
        if m in Weekly_results:
            Weekly_results[m].loc[i] = fcsts[m].to_list()
        else:
            df = Weekly_results_template.copy()
            df.loc[i] = fcsts[m].to_list()
            Weekly_results[m] = df
    if 'auto_select' in Weekly_results:
        Weekly_results['auto_select'].loc[i] = fcsts.iloc[:,1].to_list()
    else:
        df = Weekly_results_template.copy()
        df.loc[i] = fcsts.iloc[:,1].to_list()
        Weekly_results['auto_select'] = df

  0%|          | 0/4227 [00:00<?, ?it/s]

In [None]:
for m,df in Weekly_results.items():
    df.to_csv(f'M4/model_results/Weekly/{m}.csv')

In [None]:
del Weekly
del Weekly_test
del Weekly_results_template
del Weekly_results

## Monthly

In [4]:
Monthly = pd.read_csv(
    f'm4/train/Monthly-train.csv',
    index_col=0,
)

In [5]:
Monthly_test = pd.read_csv(
    f'm4/test/Monthly-test.csv',
    index_col=0,
)

Monthly_results_template = pd.DataFrame(
    columns=Monthly_test.columns,
    index=Monthly_test.index,
)

Monthly_results = {}

In [None]:
for i in tqdm(Monthly.index):
    y = Monthly.loc[i].dropna()
    sd = info.loc[i,'StartingDate']
    fcst_horizon = info.loc[i,'Horizon']
    cd = pd.date_range(
        start = sd,
        freq = 'M',
        periods = len(y),
    )
    f = Forecaster(
        y = y,
        current_dates = cd,
        future_dates = fcst_horizon,
    )
    
    f.set_test_length(fcst_horizon if len(f.y) > 100 else 1)
    f.integrate(critical_pval=.99,max_integration=1)
    f.set_validation_length(fcst_horizon if len(f.y) > 100 else 1)
    f.set_validation_metric('mae')
    if len(f.y) > 120:
        f.auto_Xvar_select(
            monitor='LevelTestSetMAE',
            max_ar = 48,
            irr_cycles = [3],
        )
        f.determine_best_series_length(
            monitor='LevelTestSetMAE',
            step=50,
            min_obs = 120,
        )
    else:
        f.auto_Xvar_select(
            monitor='LevelTestSetMAE',
            exclude_seasonalities = ['quarter'],
            max_ar = fcst_horizon,
            decomp_trend = False,
        )
    f.tune_test_forecast(
        models,
        error='ignore',
    )
    flagged = [
        m for m, r in f.history.items() if np.max(
            np.abs(r['LevelForecast'])
        ) > np.max(f.levely)*5
    ]
    for m in models: # replace applicable with naive forecasts
        if m in flagged or m not in f.history:
            f.set_estimator('hwes')
            f.manual_forecast(
                call_me=m,
                seasonal=(
                    'add' 
                    if len(f.y) > 24  + fcst_horizon
                    else None
                )
            )
        
    f.set_estimator('combo')
    for n in range(2,len(f.history) + 1,2):
        f.manual_forecast(
            models=f'top_{n}',
            determine_best_by='LevelTestSetMAE',
            call_me=f'avg_top_{n}'
        )
        f.manual_forecast(
            how='weighted',
            models=f'top_{n}',
            determine_best_by='LevelTestSetMAE',
            call_me=f'weighted_top_{n}'
        )
    fcsts = f.export('lvl_fcsts',determine_best_by='LevelTestSetMAE')
    for m in f.history.keys():
        if m in Monthly_results:
            Monthly_results[m].loc[i] = fcsts[m].to_list()
        else:
            df = Monthly_results_template.copy()
            df.loc[i] = fcsts[m].to_list()
            Monthly_results[m] = df
    if 'auto_select' in Monthly_results:
        Monthly_results['auto_select'].loc[i] = fcsts.iloc[:,1].to_list()
    else:
        df = Monthly_results_template.copy()
        df.loc[i] = fcsts.iloc[:,1].to_list()
        Monthly_results['auto_select'] = df

  0%|          | 0/4227 [00:00<?, ?it/s]

In [None]:
for m,df in Monthly_results.items():
    df.to_csv(f'M4/model_results/Monthly/{m}.csv')

In [None]:
del Monthly
del Monthly_test
del Monthly_results_template
del Monthly_results

## Quarterly

In [4]:
Quarterly = pd.read_csv(
    f'm4/train/Quarterly-train.csv',
    index_col=0,
)

In [5]:
Quarterly_test = pd.read_csv(
    f'm4/test/Quarterly-test.csv',
    index_col=0,
)

Quarterly_results_template = pd.DataFrame(
    columns=Quarterly_test.columns,
    index=Quarterly_test.index,
)

Quarterly_results = {}

In [None]:
for i in tqdm(Quarterly.index):
    y = Quarterly.loc[i].dropna()
    sd = info.loc[i,'StartingDate']
    fcst_horizon = info.loc[i,'Horizon']
    cd = pd.date_range(
        start = sd,
        freq = 'Q',
        periods = len(y),
    )
    f = Forecaster(
        y = y,
        current_dates = cd,
        future_dates = fcst_horizon,
    )
    
    f.set_test_length(fcst_horizon if len(f.y) > 100 else 1)
    f.integrate(critical_pval=.99,max_integration=1)
    f.set_validation_length(fcst_horizon if len(f.y) > 100 else 1)
    f.set_validation_metric('mae')
    f.auto_Xvar_select(
        monitor='LevelTestSetMAE',
        max_ar = 16 if len(f.y) > 100 else fcst_horizon,
        irr_cycles = [2],
    )
    if len(f.y) > 100:
        f.determine_best_series_length(
            monitor='LevelTestSetMAE',
            step=48,
            min_obs = 100,
        )
    f.tune_test_forecast(
        models,
        error='ignore',
    )
    flagged = [
        m for m, r in f.history.items() if np.max(
            np.abs(r['LevelForecast'])
        ) > np.max(f.levely)*5
    ]
    for m in models: # replace applicable with naive forecasts
        if m in flagged or m not in f.history:
            f.set_estimator('hwes')
            f.manual_forecast(
                call_me=m,
                seasonal=(
                    'add' 
                    if len(f.y) > 8 + fcst_horizon
                    else None
                )
            )
        
    f.set_estimator('combo')
    for n in range(2,len(f.history) + 1,2):
        f.manual_forecast(
            models=f'top_{n}',
            determine_best_by='LevelTestSetMAE',
            call_me=f'avg_top_{n}'
        )
        f.manual_forecast(
            how='weighted',
            models=f'top_{n}',
            determine_best_by='LevelTestSetMAE',
            call_me=f'weighted_top_{n}'
        )
    fcsts = f.export('lvl_fcsts',determine_best_by='LevelTestSetMAE')
    for m in f.history.keys():
        if m in Quarterly_results:
            Quarterly_results[m].loc[i] = fcsts[m].to_list()
        else:
            df = Quarterly_results_template.copy()
            df.loc[i] = fcsts[m].to_list()
            Quarterly_results[m] = df
    if 'auto_select' in Quarterly_results:
        Quarterly_results['auto_select'].loc[i] = fcsts.iloc[:,1].to_list()
    else:
        df = Quarterly_results_template.copy()
        df.loc[i] = fcsts.iloc[:,1].to_list()
        Quarterly_results['auto_select'] = df

  0%|          | 0/4227 [00:00<?, ?it/s]

In [None]:
for m,df in Quarterly_results.items():
    df.to_csv(f'M4/model_results/Quarterly/{m}.csv')

In [None]:
del Quarterly
del Quarterly_test
del Quarterly_results_template
del Quarterly_results

## Yearly

In [None]:
Yearly = pd.read_csv(
    f'm4/train/Yearly-train.csv',
    index_col=0,
)

In [None]:
Yearly_test = pd.read_csv(
    f'm4/test/Yearly-test.csv',
    index_col=0,
)

Yearly_results_template = pd.DataFrame(
    columns=Yearly_test.columns,
    index=Yearly_test.index,
)

Yearly_results = {}

In [None]:
for i in tqdm(Yearly.index):
    y = Yearly.loc[i].dropna()
    sd = info.loc[i,'StartingDate']
    fcst_horizon = info.loc[i,'Horizon']
    cd = pd.date_range(
        start = sd,
        freq = 'D', # for yearly it doesn't matter and will break if Y
        periods = len(y),
    )
    f = Forecaster(
        y = y,
        current_dates = cd,
        future_dates = fcst_horizon,
    )
    
    f.set_test_length(fcst_horizon if len(f.y) > 50 else 1)
    f.integrate(critical_pval=.99,max_integration=1)
    f.set_validation_length(fcst_horizon if len(f.y) > 100 else 1)
    f.set_validation_metric('mae')        
    f.auto_Xvar_select(
        try_seasonalities=False,
        irr_cycles=[fcst_horizon],
        monitor='LevelTestSetMAE',
        decomp_trend = len(f.y) > 100
    )
    if len(f.y) > 100:
        f.determine_best_series_length(
            monitor='LevelTestSetMAE',
            step=50,
        )
    f.tune_test_forecast(
        models,
        error='ignore',
    )
    flagged = [
        m for m, r in f.history.items() if np.max(
            np.abs(r['LevelForecast'])
        ) > np.max(f.levely)*5
    ]
    for m in models: # replace applicable with naive forecasts
        if m in flagged or m not in f.history:
            f.set_estimator('hwes')
            f.manual_forecast(call_me=m)
        
    f.set_estimator('combo')
    for n in range(2,len(f.history) + 1,2):
        f.manual_forecast(
            models=f'top_{n}',
            determine_best_by='LevelTestSetMAE',
            call_me=f'avg_top_{n}'
        )
        f.manual_forecast(
            how='weighted',
            models=f'top_{n}',
            determine_best_by='LevelTestSetMAE',
            call_me=f'weighted_top_{n}'
        )
    fcsts = f.export('lvl_fcsts',determine_best_by='LevelTestSetMAE')
    for m in f.history.keys():
        if m in Yearly_results:
            Yearly_results[m].loc[i] = fcsts[m].to_list()
        else:
            df = Yearly_results_template.copy()
            df.loc[i] = fcsts[m].to_list()
            Yearly_results[m] = df
    if 'auto_select' in Yearly_results:
        Yearly_results['auto_select'].loc[i] = fcsts.iloc[:,1].to_list()
    else:
        df = Yearly_results_template.copy()
        df.loc[i] = fcsts.iloc[:,1].to_list()
        Yearly_results['auto_select'] = df

In [None]:
for m,df in Yearly_results.items():
    df.to_csv(f'M4/model_results/Yearly/{m}.csv')

In [None]:
del Yearly
del Yearly_test
del Yearly_results_template
del Yearly_results