In [1]:
!pip install --upgrade azureml-sdk azureml-widgets

Requirement already up-to-date: azureml-sdk in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (1.43.0)
Requirement already up-to-date: azureml-widgets in /anaconda/envs/azureml_py38/lib/python3.8/site-packages (1.43.0)


In [2]:
import mlflow
mlflow.set_experiment(experiment_name="churn-mlflow-experiment")

2022/07/18 01:48:12 INFO mlflow.tracking.fluent: Experiment with name 'churn-mlflow-experiment' does not exist. Creating a new experiment.


<Experiment: artifact_location='', experiment_id='a9809072-6e3f-43af-8ba1-6c4cf7709e14', lifecycle_stage='active', name='churn-mlflow-experiment', tags={}>

In [3]:
from azureml.core import Experiment
from azureml.core import Model
from sklearn.model_selection import train_test_split
from azureml.core import Dataset
from azureml.core import Workspace


# Load the workspace from the saved config file
ws = Workspace.from_config()
default_ds = ws.get_default_datastore()

# load the diabetes dataset
churns=Dataset.Tabular.from_delimited_files(path=(default_ds, 'trainingset/'), validate=False)
churnsdf=churns.to_pandas_dataframe()

# Separate features and labels
x_train, x_test, y_train, y_test = train_test_split(
    churnsdf.drop(['customer_churned', 'customer_id'], axis = 1), churnsdf["customer_churned"], test_size=0.3
)




In [4]:
import numpy as np
from sklearn.preprocessing import OrdinalEncoder

from sklearn.compose import ColumnTransformer
from xgboost import XGBClassifier

encoder = ColumnTransformer(
    [
        (
            "cat_encoding",
            OrdinalEncoder(
                categories="auto",
            ),
            ["email_domain"],
        )
    ],
    remainder="passthrough",
)

model = XGBClassifier(use_label_encoder=False, eval_metric="logloss")

In [5]:
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

pipeline = Pipeline(steps=[("encoding", encoder), ("model", model)])

In [6]:
pipeline

Pipeline(memory=None,
         steps=[('encoding',
                 ColumnTransformer(n_jobs=None, remainder='passthrough',
                                   sparse_threshold=0.3,
                                   transformer_weights=None,
                                   transformers=[('cat_encoding',
                                                  OrdinalEncoder(categories='auto',
                                                                 dtype=<class 'numpy.float64'>),
                                                  ['email_domain'])],
                                   verbose=False)),
                ('model',
                 XGBClassifier(base_score=None, booster=None,
                               colsample_bylevel=None, colsamp...
                               max_delta_step=None, max_depth=None,
                               min_child_weight=None, missing=nan,
                               monotone_constraints=None, n_estimators=100,
                        

In [7]:
from mlflow.models import infer_signature

signature = infer_signature(x_test, y_test)

  inputs = _infer_schema(model_input)
  outputs = _infer_schema(model_output) if model_output is not None else None


In [8]:
signature

inputs: 
  ['current_linkedin_activity': long, 'email_domain': string, 'linkedin_skill_code': long, 'mentor_program_involvement': long, 'negative_review_in_past_5_years': long, 'recruiting_location_code': long, 'recruiting_method_code': long, 'weekly_consumption': long, 'years_of_membership': long, 'survey_attitude_towards_company': long, 'survey_attitude_towards_product_features': long, 'survey_attitude_towards_performance': long, 'survey_attitude_towards_usability': long, 'survey_attitude_towards_product_quality': long, 'survey_attitude_towards_customer_service': long]
outputs: 
  [long]

In [9]:
with mlflow.start_run() as run:
    pipeline.fit(x_train, y_train)
    mlflow.sklearn.log_model(pipeline, artifact_path="pipeline", signature=signature)

In [11]:
runs = mlflow.search_runs(experiment_names=[ "churn-mlflow-experiment" ])
runs[runs.status == "FINISHED"]

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,tags.mlflow.source.name,tags.mlflow.source.type,tags.mlflow.user,tags.mlflow.rootRunId
0,7171b295-8ac6-4590-b781-c034c5a4244a,a9809072-6e3f-43af-8ba1-6c4cf7709e14,FINISHED,,2022-07-18 01:48:44.475000+00:00,2022-07-18 01:48:54.660000+00:00,/anaconda/envs/azureml_py38/lib/python3.8/site...,LOCAL,Liping Huang,7171b295-8ac6-4590-b781-c034c5a4244a


In [23]:
run_id = runs.run_id[0]

print(run_id)

7171b295-8ac6-4590-b781-c034c5a4244a


In [25]:
model_name = "churns_mlflow"
mlflow.register_model(f"runs:/{run_id}/pipeline", model_name)

Successfully registered model 'churns_mlflow'.
2022/07/18 02:00:41 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: churns_mlflow, version 1
Created version '1' of model 'churns_mlflow'.


<ModelVersion: creation_timestamp=1658109641821, current_stage='None', description='', last_updated_timestamp=1658109641821, name='churns_mlflow', run_id='7171b295-8ac6-4590-b781-c034c5a4244a', run_link='', source='azureml://experiments/churn-mlflow-experiment/runs/7171b295-8ac6-4590-b781-c034c5a4244a/artifacts/pipeline', status='READY', status_message='', tags={}, user_id='', version='1'>