In [0]:
import yaml

In [0]:
# Must have parameters - 
# dependent_variable, date_var, date_format_pandas, date_format_pyspark, modelling_granularity ,input_file_path,output_dir_path

general_inputs = """

  dependent_variable : Sales_Qty_fill 
  date_var : Week_Start_Date
  date_format_pandas : "%d-%m-%Y"
  modeling_granularity : [Div_No, Store_No,Base_UPC]
  input_file_path : "/home/satish/Modelling/Forecaster/modeling-sample.csv"
  output_dir_path : "/home/satish/Modelilng/Forecaster/modeling_results_python"

"""

In [0]:
# Must have parameters - 
# outlier_treatment: When any one of the hampel_filter or inter_quartile_range is selected, the respective section must be filled
# missing_value_treatment: “cols” parameter with values is only considered and the respective treatment is applied to those columns

### output_features in:
# 1.3 - "Granular Prophet Trend creation" - granular_trend
# 1.4 - "Prophet Trend & SI creation" - higher_level_trend, prophet_si_yearly, prophet_si_weekly, prophet_si_daily
# 1.5 - "Calculated SI creation" - cal_si_daily,cal_si_weekly, cal_si_monthly, cal_si_qtrly 

# use the output_features according to the requirement and pass the relevant features under the exogenous variables section for the respective algorithm.

data_processing = """
  
  outlier_treatment_needed : yes #yes/no
  outlier_treatment :
    algorithm : inter_quartile_range
    hampel_filter :
      window_size : 2
      no_of_sigmas : 3
    inter_quartile_range :
      minimum_quantile : 5
      maximum_quantile : 95
      
      
  feature_engineering:
    prophet_based:
      consider_granular_prophet_trend: yes #1.3
      consider_prophet_trend_si: yes #1.4
      higher_level_si_trend_creation :
        sales_variable : Sales_Amt #Eq. Volume Sales or Sales Amount
        granularity : [Div_No]
        start_date : ""
        end_date : ""

      future_n_datapoints : 10
      Holidays : 
        include_holidays : yes
        country : US
        years : [2018, 2019, 2020, 2021, 2022]
        holiday_lower_window : 7
        holiday_upper_window : 0
        additional_holidays :
          Easter :
            ds : ['2018-04-01','2019-04-21','2020-04-12', '2021-04-04', '2022-04-17']
          Valentine's day :
            ds : ['2018-02-14','2019-02-14', '2020-02-14', '2021-02-14', '2022-02-14']
            
    calculation_based:
      consider_calulated_si: yes #1.5
      higher_level_si_creation :
        sales_variable : Sales_Amt
        granularity : [Div_No]
        start_date : ""
        end_date : "" 
  
  
  missing_value_treatment :
    Mean :
      cols : []
      zero_as_missing_value : False
    Median :
      cols : []
      zero_as_missing_value : False
    Scalar :
      cols : []
      value : 0
      zero_as_missing_value : False
    Rolling_Mean :
      cols : []
      window : 4
      zero_as_missing_value : False
    Rolling_Median :
      cols : []
      window : 4
      zero_as_missing_value : False
    Forward_fill :
      cols : []
      zero_as_missing_value : False
    Backward_fill :
      cols : []
      zero_as_missing_value : False
    Linear_Interpolation :
      cols : []
      zero_as_missing_value : False
    Spline_Interpolation :
      cols : []
      zero_as_missing_value : False
    Mode :
      cols : []
    Mean_Across_Years :
      cols : []
      time_granularity : weekly
      zero_as_missing_value : False

"""

In [0]:
feature_selection = """
  
  exogenous_variables:
      consider_correlation: yes 
      uncertain_corr: []
      positive_corr: [tpr_discount,stateLevel_grocery_and_pharmacy_percent_change_from_baseline]
      negative_corr: [median_baseprice,New_cases,AvgUnitPrice]
  lasso_cvglmnet:
    consider_limits: no
    zero_penalty_vars: [median_baseprice,tpr_discount,stateLevel_grocery_and_pharmacy_percent_change_from_baseline,New_cases,AvgUnitPrice]
    
"""

In [0]:
# This section takes the required information to perform backtesting and to split the data into 3 sets(train, validation & test)

validation = """

  backtesting :
    algorithm : expanding_window #sliding_window/expanding_window
    stride: 4
      
  train_percentage : 0.7 #(0.7 = 70%) train_percentage_in_train_cum_validation
  no_of_backtesting_test_periods : 5 #number of validation periods
  no_of_test_periods : 8
  metric : wmape  #mape, wmape, mae, rmse, bias, tracking_signal
  
  agg_metrics_req : no 
  agg_metrics_stride : 1 #there is a chance of missing some dates at the last if stride !=1
  agg_metrics_test_periods : 4

"""

In [0]:
# This section takes the list of hyperparameters required for each of the algorithm as input and also the independent variables those decide the effectiveness of the model and its performance.  

algorithms = """

  SimpleExponentialSmoothing :
    Hyperparameters :
      fit :
        smoothing_level: [0.2, 0.3,0.4,0.5]
        optimized: [True]
      __init__ :
        initialization_method: [estimated]
      
      
  ExponentialSmoothingHolt :
    Hyperparameters :
      fit :
        smoothing_level: [0.2, 0.3,0.4,0.5]
        optimized: [True]
      __init__:
        exponential: [False]
        damped_trend: [False]
        
        
  ExponentialSmoothingHoltWinters :
    Hyperparameters :
      fit :
        smoothing_level: [0.2, 0.3,0.4,0.5]
        use_brute: [True]
      __init__:
        damped_trend: [False]
        
        
  ElasticNet :
    feature_selection:
      use_feature_selected_idvs: no
      approach: lasso_cvglmnet
      must_have_idvs: []
      
    exogenous_variables:
      consider_correlation: no 
      uncertain_corr: []
      positive_corr: [tpr_discount,stateLevel_grocery_and_pharmacy_percent_change_from_baseline]
      negative_corr: [median_baseprice,New_cases,AvgUnitPrice]
    coefficients_required : yes
    
    Hyperparameters :
      alpha : [0.75,0.9]
      l1_ratio : [0.1,0.9]
      fit_intercept: [True]
      positive: [True]
      
      
  XGBoost :
    feature_selection:
      use_feature_selected_idvs: no
      approach: lasso_cvglmnet
      must_have_idvs: []
      
    exogenous_variables:
      consider_correlation: no 
      uncertain_corr: []
      positive_corr: [tpr_discount,stateLevel_grocery_and_pharmacy_percent_change_from_baseline]
      negative_corr: [median_baseprice,New_cases,AvgUnitPrice]
      
    Hyperparameters :
      objective : ['reg:squarederror']
      learning_rate : [0.05, 0.07]
      n_estimators : [1, 5]
    
    
  Lasso_cvglmnet:
    feature_selection:
      use_feature_selected_idvs: no
      approach: lasso_cvglmnet
      must_have_idvs: []
      
    exogenous_variables:
      consider_correlation: no 
      uncertain_corr: []
      positive_corr: [tpr_discount,stateLevel_grocery_and_pharmacy_percent_change_from_baseline]
      negative_corr: [median_baseprice,New_cases,AvgUnitPrice]
    coefficients_required : yes
    
    limits:  #coeff_bounds: {var:[lower_bound,upper_bound]} NOTE:-ve values in upper_bound and >0 values in lower_bound not allowed
      consider_limits: no 
      coeff_bounds: {'tpr_discount':[0,0.6]} 
    zero_penalty_vars: [median_baseprice,tpr_discount,stateLevel_grocery_and_pharmacy_percent_change_from_baseline,New_cases,AvgUnitPrice]
    
    
  Prophet :
    feature_selection:
      use_feature_selected_idvs: no
      approach: lasso_cvglmnet
      must_have_idvs: []
      
    exogenous_variables:
      consider_correlation: no
      uncertain_corr: []
      positive_corr: [tpr_discount,stateLevel_grocery_and_pharmacy_percent_change_from_baseline]
      negative_corr: [median_baseprice,New_cases,AvgUnitPrice]
    regressor_mode : 'additive'
    
    Hyperparameters :
      seasonality_prior_scale : range(0.5,0.92,0.4) #which is equivalent to [0.5,0.9]
      changepoint_prior_scale: [0.01,0.05]
      yearly_seasonality: [True]
    Holidays :
      include_holidays : yes
      country : US
      years : [2018, 2019, 2020, 2021, 2022]
      holiday_lower_window : 7
      holiday_upper_window : 7
      additional_holidays :
        Easter :
          ds : ['2018-04-01','2019-04-21','2020-04-12', '2021-04-04']
        Valentine's day :
          ds : ['2018-02-14','2019-02-14', '2020-02-14', '2021-02-14']


  SARIMAX :
    feature_selection:
      use_feature_selected_idvs: no
      approach: lasso_cvglmnet
      must_have_idvs: []
      
    exogenous_variables:
      consider_correlation: no 
      uncertain_corr: []
      positive_corr: [tpr_discount,stateLevel_grocery_and_pharmacy_percent_change_from_baseline]
      negative_corr: [median_baseprice,New_cases,AvgUnitPrice]
    coefficients_required : yes 
    
    Auto_ARIMA :
      use_auto_arima : yes
      consider_exogenous_variables : yes
      seasonal : True
      start_p : 2
      max_p : 3
      start_P : 0
      
    Hyperparameters :
      fit:
        cov_type: ['opg']
        optim_score: ['harvey']
      __init__:
        measurement_error : [False,True]
        concentrate_scale : [False,True]
        order : [[1, 1, 1]]
        seasonal_order : [[0, 0, 0, 0]]


  DeepAR :
    global_model_gran: ['Div_No']
    exogenous_variables:
      feat_dynamic_real: [tpr_discount,median_baseprice,New_cases,AvgUnitPrice,stateLevel_grocery_and_pharmacy_percent_change_from_baseline]
      
    sales_amt_variable : Sales_Amt
    Hyperparameters :
      Estimator:
        num_cells : [30]
        num_layers : [1,5]
        dropout_rate : [0.05,0.06]
      Trainer:
        epochs: [1]
        weight_decay: [0.00000001]
        
        
  DeepState : 
    global_model_gran: ['Div_No']
    exogenous_variables:
      feat_dynamic_real: [tpr_discount,median_baseprice,New_cases,AvgUnitPrice,stateLevel_grocery_and_pharmacy_percent_change_from_baseline]
      
    sales_amt_variable : Sales_Amt
    Hyperparameters :
      Estimator:
        use_feat_dynamic_real : [True]
        num_layers : [2]
      Trainer:
        epochs: [1]
        weight_decay: [0.00000001]
        
        
  LSTM :
    feature_selection:
      use_feature_selected_idvs: no
      approach: lasso_cvglmnet
      must_have_idvs: []
      
    exogenous_variables:
      consider_correlation: yes 
      uncertain_corr: []
      positive_corr: [tpr_discount,stateLevel_grocery_and_pharmacy_percent_change_from_baseline]
      negative_corr: [median_baseprice,New_cases,AvgUnitPrice]
      
    forecast_periods: 4
    lookback_periods: 5 
    
    Hyperparameters :
      compile: 
        loss: ['mae']
        optimizer: ['adam']
      fit:
        epochs: [50]
        batch_size: [32]
        verbose: [2]
        shuffle: [False]
        
"""

In [0]:
# Must have parameters -
# When the tracking_needed is yes, all the parameters must be filled

tracking = """

  tracking_needed : no 
  type : "Managed" #Managed/Unmanaged
  tracking_uri : None #Will be used when type is Unmanaged
  mlflow_experiment_id : "3114000483440696"
  
"""

In [0]:
app_config = yaml.safe_load(general_inputs)
app_config['data_processing'] = yaml.safe_load(data_processing)
app_config['validation'] = yaml.safe_load(validation)
app_config['feature_selection'] = yaml.safe_load(feature_selection)
app_config['Algorithms'] = yaml.safe_load(algorithms)
app_config['tracking'] = yaml.safe_load(tracking)
# app_config