# Requirements

In [None]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import src.data_handler as dh
import src.sagemaker_utils as su
import src.outputs_stacking as osk
import src.utils as ut

# Parameters

#### Modeling arguments handling

In [None]:
ENVIRONMENT = 'seed'
LIST_CUTOFF = [202050, 202051, 202052, 202053, 202101, 202102, 202103, 202104, 202105, 202106, 202107, 202108, 
               202109, 202110, 202111, 202112, 202113, 202114, 202115, 202116, 202117, 202118, 202119, 202120, 
               202121, 202122, 202123, 202124, 202125, 202126, 202127, 202128, 202129, 202130, 202131, 202132, 
               202133, 202134, 202135, 202136, 202137]
LIST_CUTOFF = [202050]
RUN_NAME = 'forecast-v21-debug'
ut.check_environment(ENVIRONMENT)
list_cutoff = ut.check_list_cutoff(LIST_CUTOFF)
ut.check_run_name(RUN_NAME)

#### Logging level

In [None]:
LOGGING_LVL = 'INFO'
assert LOGGING_LVL in ['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'], 'Wrong logging level'
for module in [dh, su, osk]:
    module.logger.setLevel(LOGGING_LVL)

#### Constants

In [None]:
main_params = ut.import_modeling_parameters(ENVIRONMENT)

REFINED_DATA_GLOBAL_BUCKET = main_params['refined_data_global_bucket']
REFINED_DATA_SPECIFIC_BUCKET = main_params['refined_data_specific_bucket']
REFINED_DATA_GLOBAL_PATH = main_params['refined_global_path']
REFINED_DATA_SPECIFIC_PATH = main_params['refined_specific_path']
REFINED_DATA_SPECIFIC_URI = ut.to_uri(REFINED_DATA_SPECIFIC_BUCKET, REFINED_DATA_SPECIFIC_PATH)

MODEL_WEEK_SALES_PATH = f"{REFINED_DATA_GLOBAL_PATH}model_week_sales"
MODEL_WEEK_TREE_PATH = f"{REFINED_DATA_GLOBAL_PATH}model_week_tree"
MODEL_WEEK_MRP_PATH = f"{REFINED_DATA_GLOBAL_PATH}model_week_mrp"
IMPUTED_SALES_LOCKDOWN_1_PATH = f"{REFINED_DATA_GLOBAL_PATH}imputed_sales_lockdown_1.parquet"

LIST_ALGORITHM = list(main_params['algorithm'])
OUTPUTS_STACKING = main_params['outputs_stacking']
SHORT_TERM_ALGORITHM = main_params['short_term_algorithm']
LONG_TERM_ALGORITHM = main_params['long_term_algorithm']
SMOOTH_STACKING_RANGE = main_params['smooth_stacking_range']

# Data Loading

In [None]:
df_model_week_sales = ut.read_multipart_parquet_s3(REFINED_DATA_GLOBAL_BUCKET, MODEL_WEEK_SALES_PATH)
df_model_week_tree = ut.read_multipart_parquet_s3(REFINED_DATA_GLOBAL_BUCKET, MODEL_WEEK_TREE_PATH)
df_model_week_mrp = ut.read_multipart_parquet_s3(REFINED_DATA_GLOBAL_BUCKET, MODEL_WEEK_MRP_PATH)
df_imputed_sales_lockdown_1 = ut.read_multipart_parquet_s3(REFINED_DATA_GLOBAL_BUCKET, IMPUTED_SALES_LOCKDOWN_1_PATH)

# Initialize df_jobs

In [None]:
df_jobs = su.generate_df_jobs(list_cutoff=list_cutoff,
                              run_name=RUN_NAME,
                              list_algorithm=LIST_ALGORITHM,
                              refined_data_specific_path=REFINED_DATA_SPECIFIC_URI
                              )
df_jobs

# Generate modeling specific data

In [None]:
for _, job in df_jobs.iterrows():

    # Parameters init
    algorithm = job['algorithm']
    cutoff = job['cutoff']
    train_path = job['train_path']
    predict_path = job['predict_path']

    refining_params = dh.import_refining_config(environment=ENVIRONMENT,
                                                algorithm=algorithm,
                                                cutoff=cutoff,
                                                train_path=train_path,
                                                predict_path=predict_path
                                                )

    # Data/Features init
    base_data = {
        'model_week_sales': df_model_week_sales,
        'model_week_tree': df_model_week_tree,
        'model_week_mrp': df_model_week_mrp,
        'imputed_sales_lockdown_1': df_imputed_sales_lockdown_1
    }

    if algorithm == 'deepar':
        df_static_tree = df_model_week_tree[df_model_week_tree['week_id'] == cutoff].copy()

        static_features = {
            'family_id': df_static_tree[['model_id', 'family_id']],
            'sub_department_id': df_static_tree[['model_id', 'sub_department_id']],
            'department_id': df_static_tree[['model_id', 'department_id']],
            'univers_id': df_static_tree[['model_id', 'univers_id']],
            'product_nature_id': df_static_tree[['model_id', 'product_nature_id']]
        }
    else:
        static_features = None
    
    global_dynamic_features = None

    specific_dynamic_features = None

    # Execute data refining
    refining_handler = dh.DataHandler(base_data=base_data,
                                      static_features=static_features,
                                      global_dynamic_features=global_dynamic_features,
                                      specific_dynamic_features=specific_dynamic_features,
                                      **refining_params
                                      )

    refining_handler.execute_data_refining_specific()

## Launch Fit & Transform

In [None]:
for algorithm in LIST_ALGORITHM:

    df_jobs_algo = df_jobs[df_jobs['algorithm'] == algorithm].copy()

    sagemaker_params = su.import_sagemaker_params(environment=ENVIRONMENT, algorithm=algorithm)

    modeling_handler = su.SagemakerHandler(df_jobs=df_jobs_algo, **sagemaker_params)

    modeling_handler.launch_training_jobs()

    if algorithm == 'deepar':
        modeling_handler.launch_transform_jobs()

# Calculate outputs stacking

In [None]:
# TEMPORARY : Create fake df_jobs including old deepar run
import boto3
s3 = boto3.client("s3")

def get_predict_json(cutoff):
    if cutoff <= 202120:
        all_objects = s3.list_objects(Bucket='fcst-refined-demand-forecast-prod',
                                      Prefix=f'specific/forecast-v2-init/forecast-v2-init-deepar-{cutoff}/input/predict')
    else:
        all_objects = s3.list_objects(Bucket='fcst-refined-demand-forecast-prod',
                                      Prefix=f'specific/sunday-pipeline/sunday-pipeline-deepar-{cutoff}/input/predict')
    return all_objects['Contents'][0]['Key']

df_jobs_deepar = pd.DataFrame({'cutoff' : df_jobs['cutoff']})
df_jobs_deepar['algorithm'] = 'deepar'
df_jobs_deepar['predict_path'] = \
    [f's3://fcst-refined-demand-forecast-prod/{get_predict_json(cutoff)}' for cutoff in df_jobs_deepar['cutoff']]

df_jobs_tmp = df_jobs.append(df_jobs_deepar)

In [None]:
#df_jobs_tmp.to_csv('df_jobs_tmp.csv', index=False)

In [None]:
#df_jobs = pd.read_csv("df_jobs_tmp.csv")

In [None]:
if OUTPUTS_STACKING:
    osk.calculate_outputs_stacking(
        #df_jobs,
        df_jobs_tmp,
        short_term_algorithm=SHORT_TERM_ALGORITHM,
        long_term_algorithm=LONG_TERM_ALGORITHM,
        smooth_stacking_range=SMOOTH_STACKING_RANGE
    )