In [10]:
import kfp
import kfp.components as comp

create_step_preprocess = comp.load_component_from_text("""
name: Preprocess Boston House Price Model Data
description: Load necessary data from GCS and perform basic pre-processing steps

inputs:
- {name: Input 1, type: String, description: 'Cloud bucket name containing raw data'}
- {name: Input 2, type: String, description: 'Relative path containing house_price.csv'}
- {name: Input 3, type: String, description: 'Relative path containing config.json'}


- {name: Input 4, type: String, description: 'Save file name for house_price.csv'}
- {name: Input 5, type: String, description: 'Save file name for config.json'}

outputs:
- {name: Output 1, type: Data, description: 'Path of the local folder where the main_data_preprocessed.csv should be written.'}

implementation:
  container:
    image: us-west2-docker.pkg.dev/ml-model-monitoring-framework/house-price-preprocess-docker-repo/quickstart-image@sha256:f8cc53ff6ac44f9ddeea27c35d96794e060162fa82764c3912351183c2c78520
    # command is a list of strings (command-line arguments). 
    # The YAML language has two syntaxes for lists and you can use either of them. 
    # Here we use the "flow syntax" - comma-separated strings inside square brackets.
    command: [
      python3, 
      # Path of the program inside the container
      data-preprocessing.py,
      --input1-path,
      {inputValue: Input 1},
      --input2-path,
      {inputValue: Input 2},
      --input3-path,
      {inputValue: Input 3},
      
      --input4-path,
      {inputValue: Input 4},
      --input5-path,
      {inputValue: Input 5},
      
      --output1-path, 
      {outputPath: Output 1}
    ]""")




create_step_model_training = comp.load_component_from_text("""
name: Boston House Price Model Training
description: Boston House Price Model Training

inputs:
- {name: Input 1, type: String, description: 'Cloud bucket name containing raw data'}
- {name: Input 2, type: Data, description: 'Relative path main_data_preprocessed.csv'}
- {name: Input 3, type: String, description: 'Relative path containing config.json'}

- {name: Input 4, type: String, description: 'Save file name for config.json'}

- {name: Parameter 1, type: String, description: 'Pre-processed main file name'}
- {name: Parameter 2, type: String, description: 'Relative path of the GCS location to upload model artifacts'}

outputs:
- {name: Output 1, type: Data, description: 'Path of the local folder where model artifacts should be written.'}


implementation:
  container:
    image: us-west2-docker.pkg.dev/ml-model-monitoring-framework/house-price-train-docker-repo/quickstart-image@sha256:af315bfc950929f18df84fb128afd7af76463d4faf43be1d8fe21ad4824ab092
    # command is a list of strings (command-line arguments). 
    # The YAML language has two syntaxes for lists and you can use either of them. 
    # Here we use the "flow syntax" - comma-separated strings inside square brackets.
    command: [
      python3, 
      # Path of the program inside the container
      XGBoost-train.py,
      --input1-path,
      {inputValue: Input 1},
      --input2-path,
      {inputPath: Input 2},
      --input3-path,
      {inputValue: Input 3},
      --input4-path,
      {inputValue: Input 4},
      
      --output1-path, 
      {outputPath: Output 1},
      
      --param1, 
      {inputValue: Parameter 1},
      --param2, 
      {inputValue: Parameter 2},
      
      
    ]
""")

# Define your pipeline 
def my_pipeline():
    preprocess_step = create_step_preprocess(
        input_1='boston-house-prices',input_2='raw_data/housing.csv',
        input_3='config/housing_price_model_config.json',input_4='housing.csv',
        input_5='housing_price_model_config.json'
    
    )

    
    
    model_triaining_step = create_step_model_training(
        input_1='boston-house-prices',
        input_2=preprocess_step.outputs['Output 1'],
        input_3='config/housing_price_model_config.json',input_4='housing.csv',
        
         parameter_1='main_data_preprocessed.csv', parameter_2 = 'model_artifacts'
    )
    

## replace host with client URL
client = kfp.Client(host='')

client.create_run_from_pipeline_func(my_pipeline, arguments={})

RunPipelineResult(run_id=ca8025b7-2d0b-4fde-a4cc-5b53cf37ec61)

In [None]:
python3 data-preprocessing.py --input1-path 'boston-house-prices'  --input2-path 'raw_data/housing.csv'  --input3-path 'config/housing_price_model_config.json' --input4-path 'housing.csv'  --input5-path 'housing_price_model_config.json' --output1-path 'data/preprocessed/'

In [None]:
python3 XGBoost-train.py --input1-path 'boston-house-prices'  --input2-path 'data/preprocessed'   --input3-path 'config/housing_price_model_config.json'   --input4-path 'config.json'  --output1-path 'model' --param1 'main_data_preprocessed.csv' 