In [5]:
from datetime import date, timedelta
import papermill as pm
from analysis.pipeline import authorize_pipeline, run_with_argument, get_notebook_op, execute_pipeline, create_pipeline, create_parent_folder, get_default_pipeline_fn
import kfp.dsl as dsl

In [6]:
#global parameters
notebook_folder = '/home/jovyan/public/notebooks/model_evaluation/2020-09-22/'
image_ver_to_use = '075790327284.dkr.ecr.ap-south-1.amazonaws.com/midgar-analysis-notebook:latest'
spark_memory = '120G'
cpu_cores = '16'

In [13]:
model_date = '2020-09-16'
sales_date = '2020-09-17'

In [14]:
file_date = model_date.replace('-','_')

In [15]:
def pipeline_func():
    task1 = get_notebook_op(notebook_folder+'allocation_metrics-v1.ipynb',
                            notebook_folder+f'allocation_metrics_output_{file_date}_stg.ipynb',
                            notebook_args=f'-p sparkMemory {spark_memory} -p model_date {model_date} -p sales_date {sales_date} -p model_env stg -p sales_env stg',
                            image=image_ver_to_use,
                            job_name=f'model_eval_alloc_{file_date}_stg' )
    task1.set_memory_limit(spark_memory),
    task1.set_cpu_limit(cpu_cores)
    
    task2 = get_notebook_op(notebook_folder+'allocation_metrics-v1.ipynb',
                            notebook_folder+f'allocation_metrics_output_{file_date}_prod.ipynb',
                            notebook_args=f'-p sparkMemory {spark_memory} -p model_date {model_date} -p sales_date {sales_date} -p model_env prod -p sales_env stg',
                            image=image_ver_to_use,
                            job_name=f'model_eval_alloc_{file_date}_prod' )
    task2.set_memory_limit(spark_memory),
    task2.set_cpu_limit(cpu_cores)

In [16]:
pipeline = authorize_pipeline('alloc', pipeline_func)
execute_pipeline('model_eval_alloc', 'amir_model_eval_exp', pipeline, dict())

In [17]:
def pipeline_func():
    task1 = get_notebook_op(notebook_folder+'weighted_trend_eval_new_cats_support-v1.ipynb',
                            notebook_folder+f'weight_metrics_new_cats_support_output_{file_date}_stg_nnc.ipynb',
                            notebook_args=f'-p sparkMemory {spark_memory} -p model_date {model_date} -p start_date {sales_date} -p end_date {sales_date} -p test_env stg -p pred_env stg -p remove_new_cats 1',
                            image=image_ver_to_use,
                            job_name=f'weight_{file_date}_stg_nnc' )
    task1.set_memory_limit(spark_memory),
    task1.set_cpu_limit(cpu_cores)
    
    task2 = get_notebook_op(notebook_folder+'weighted_trend_eval_new_cats_support-v1.ipynb',
                            notebook_folder+f'weight_new_cats_support_metrics_output_{file_date}_stg_wnc.ipynb',
                            notebook_args=f'-p sparkMemory {spark_memory} -p model_date {model_date} -p start_date {sales_date} -p end_date {sales_date} -p test_env stg -p pred_env stg -p remove_new_cats 0',
                            image=image_ver_to_use,
                            job_name=f'weight_{file_date}_stg_wnc' )
    task2.set_memory_limit(spark_memory),
    task2.set_cpu_limit(cpu_cores)
    
    task3 = get_notebook_op(notebook_folder+'weighted_trend_eval_new_cats_support-v1.ipynb',
                            notebook_folder+f'weight_new_cats_support_metrics_output_{file_date}_prod_nnc.ipynb',
                            notebook_args=f'-p sparkMemory {spark_memory} -p model_date {model_date} -p start_date {sales_date} -p end_date {sales_date} -p test_env stg -p pred_env prod -p remove_new_cats 1',
                            image=image_ver_to_use,
                            job_name=f'weight_{file_date}_prod_nnc' )
    task3.set_memory_limit(spark_memory),
    task3.set_cpu_limit(cpu_cores)

In [18]:
pipeline = authorize_pipeline('weight', pipeline_func)
execute_pipeline('weight_trend_alloc', 'amir_model_eval_exp', pipeline, dict())