## Requirements

In [1]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import src.data_handler as dh
import src.sagemaker_utils as su
import src.utils as ut

## Params

In [2]:
list_cutoff = [202050, 202051, 202052, 202053, 202101, 202102, 202103, 202104, 202105, 202106, 202107, 202108,
               202109, 202110, 202111, 202112, 202113, 202114, 202115, 202116]
list_cutoff = [202117]
run_name = 'deepar-16w-init'
environment = 'seed'

## Configs

In [None]:
confs = ut.import_raw_config(environment)

global_bucket = confs['buckets']['refined_data_global']
global_path = confs['paths']['refined_global_path']

specific_bucket = confs['buckets']['refined_data_specific']
specific_path = confs['paths']['refined_specific_path']

algorithm = ut.import_raw_config(environment)['modeling_parameters']['algorithm']

## Load data

In [None]:
df_model_week_sales = ut.read_multipart_parquet_s3(global_bucket, global_path + 'model_week_sales')
df_model_week_tree = ut.read_multipart_parquet_s3(global_bucket, global_path + 'model_week_tree')
df_model_week_mrp = ut.read_multipart_parquet_s3(global_bucket, global_path + 'model_week_mrp')
df_imputed_sales_lockdown_1 = ut.read_multipart_parquet_s3('fcst-refined-demand-forecast-dev', 
                                                           global_path + 'imputed_sales_lockdown_1.parquet')

## Generate df_jobs

In [None]:
refined_data_specific_path = ut.to_uri(specific_bucket, specific_path)

df_jobs = generate_df_jobs(
    list_cutoff=list_cutoff,
    run_name=run_name,
    algorithm=algorithm,
    refined_data_specific_path=refined_data_specific_path
)

df_jobs

## Generate modeling specific data

In [None]:
for cutoff in list_cutoff:
    
    print(cutoff)
    
    # Base data
    base_data = {
        'model_week_sales': df_model_week_sales,
        'model_week_tree': df_model_week_tree,
        'model_week_mrp': df_model_week_mrp,
        'imputed_sales_lockdown_1': df_imputed_sales_lockdown_1
    }
    
    # Static features
    df_static_tree = df_model_week_tree[df_model_week_tree['week_id'] == cutoff].copy()
    
    static_features = {
        'family_id': df_static_tree[['model_id', 'family_id']],
        'sub_department_id': df_static_tree[['model_id', 'sub_department_id']],
        'department_id': df_static_tree[['model_id', 'department_id']],
        'univers_id': df_static_tree[['model_id', 'univers_id']],
        'product_nature_id': df_static_tree[['model_id', 'product_nature_id']]
    }

    global_dynamic_features = None
    
    specific_dynamic_features = None
    
    # Import refining config
    train_path = df_jobs[df_jobs['cutoff'] == cutoff].loc[:, 'train_path'].values[0]
    predict_path = df_jobs[df_jobs['cutoff'] == cutoff].loc[:, 'predict_path'].values[0]
    
    refining_params = import_refining_config(
        environment=environment,
        cutoff=cutoff,
        run_name=run_name,
        train_path=train_path,
        predict_path=predict_path
    )
    
    dh = data_handler(
        base_data=base_data,
        static_features=static_features,
        global_dynamic_features=global_dynamic_features,
        specific_dynamic_features=specific_dynamic_features,
        **refining_params
    )
    
    dh.execute_data_refining_specific()

## Launch parallel Fit-Transform

In [None]:
sagemaker_params = import_sagemaker_params(environment=environment)

sh = SagemakerHandler(
    run_name=run_name,
    df_jobs=df_jobs,
    **sagemaker_params
)

sh.launch_training_jobs()

sh.launch_transform_jobs()