In [None]:
from utils import *

In [None]:
from azureml.core import Workspace

# Configure experiment
ws = Workspace.from_config()

In [None]:
# Create or get training cluster
aml_cluster = get_aml_cluster(ws, cluster_name="cpu-cluster")
aml_cluster.wait_for_completion(show_output=True)

In [None]:
# Create a run configuration
run_conf = get_run_config(['numpy', 'pandas', 'scikit-learn', 'tensorflow'])

In [None]:
from azureml.core import Dataset

dataset = Dataset.get_by_name(ws, name='titanic')
data_in = dataset.as_named_input('titanic')

In [None]:
from azureml.core import Datastore
from azureml.pipeline.core import PipelineData

datastore = Datastore.get(ws, datastore_name="mldata")
data_train = PipelineData('train', datastore=datastore)
data_test = PipelineData('test', datastore=datastore)

In [None]:
from azureml.data import OutputFileDatasetConfig

data_out = OutputFileDatasetConfig(name="predictions", destination=(datastore, 'titanic/predictions'))
data_out = data_out.read_delimited_files().register_on_complete('titanic.pred')

In [None]:
from azureml.pipeline.steps import PythonScriptStep

step_1 = PythonScriptStep(name='Preprocessing',
                          script_name="preprocess_output.py",
                          source_directory="code",
                          arguments=[
                              "--input", data_in,
                              "--out-train", data_train,
                              "--out-test", data_test],
                          inputs=[data_in],
                          outputs=[data_train, data_test],
                          runconfig=run_conf,
                          compute_target=aml_cluster)


In [None]:
from azureml.pipeline.steps import PythonScriptStep

step_2 = PythonScriptStep(name='Training',
                          script_name="train_output.py",
                          source_directory="code",
                          arguments=[
                              "--in-train", data_train,
                              "--in-test", data_test,
                              "--output", data_out],
                          inputs=[data_train, data_test],
                          outputs=[data_out],
                          runconfig=run_conf,
                          compute_target=aml_cluster)

In [None]:
from azureml.pipeline.core import Pipeline

pipeline = Pipeline(ws, steps=[step_1, step_2])

In [None]:
pipeline.validate()

In [None]:
from azureml.core import Experiment

exp = Experiment(ws, "azureml-pipeline")
run = exp.submit(pipeline)

In [None]:
from azureml.widgets import RunDetails
RunDetails(run).show()