In [11]:
from azureml.core import Workspace, Dataset, Datastore, Experiment, Run
import math, random, pickle
import pandas as pd
import numpy as np

In [None]:
experiment_name = "titanic_classifier"

In [2]:
!az login -t "67269ab6-9b12-4bb4-9781-cdd9ce5a3102"
!az account set --subscription "Visual Studio Enterprise"

[
  {
    "cloudName": "AzureCloud",
    "homeTenantId": "67269ab6-9b12-4bb4-9781-cdd9ce5a3102",
    "id": "d87d4530-ce4e-4d84-b997-7a78d01e2906",
    "isDefault": true,
    "managedByTenants": [
      {
        "tenantId": "2f4a9838-26b7-47ee-be60-ccc1fdec5953"
      }
    ],
    "name": "Visual Studio Enterprise",
    "state": "Enabled",
    "tenantId": "67269ab6-9b12-4bb4-9781-cdd9ce5a3102",
    "user": {
      "name": "kyle.m.hale@avanade.com",
      "type": "user"
    }
  }
]
You have logged in. Now let us find all the subscriptions to which you have access...


In [4]:
from azureml.core.authentication import AzureCliAuthentication

try:
    ws = Workspace.from_config()
except:
    cli_auth = AzureCliAuthentication()

    ws = Workspace(subscription_id="d87d4530-ce4e-4d84-b997-7a78d01e2906",
               resource_group="mlops-RG",
               workspace_name="mlops-AML-WS",
               auth=cli_auth)

Performing interactive authentication. Please follow the instructions on the terminal.
Note, we have launched a browser for you to login. For old experience with device code, use "az login --use-device-code"
You have logged in. Now let us find all the subscriptions to which you have access...
Interactive authentication successfully completed.


In [4]:
experiment = Experiment(workspace = ws, name = experiment_name)

In [7]:
datastore = Datastore.get_default(workspace=ws)
datastore

{
  "name": "workspaceblobstore",
  "container_name": "azureml-blobstore-6f7dfc08-44b5-438d-a6bc-9c804e0bdd76",
  "account_name": "kylemhaleamlsa",
  "protocol": "https",
  "endpoint": "core.windows.net"
}

In [8]:
titanic_ds = pd.read_csv("./data/titanic3.csv")
#drop unnecessary columns for classifier
titanic_ds.drop(['name','ticket','fare','cabin','embarked','boat','body','home.dest'], axis=1, inplace=True)

#condense Families to unaccompanied boolean
titanic_ds['unaccompanied'] = np.where(titanic_ds.sibsp > 0, 1, np.where(titanic_ds['parch'] > 0, 1, 0))
titanic_ds.drop(['sibsp', 'parch'], axis=1, inplace=True)

#map gender to numeric values
genders = {"male": 0, "female": 1}
titanic_ds['sex'] = titanic_ds['sex'].map(genders)

#round age to nearest year, and fill in missing values with average age
titanic_ds["age"].fillna(titanic_ds["age"].mean(), inplace=True)
titanic_ds['age'] = titanic_ds['age'].astype(int)

#re-bin ages into groups
titanic_ds['age'] = pd.cut(titanic_ds['age'], [-10, 18, 40, 100], labels=[1,2,3]).astype(int)

titanic_ds = titanic_ds.reindex(columns=['pclass','age','sex','unaccompanied','survived'])


In [12]:
engineered_data_path = './data/uploads/titanic-engineered.csv'
titanic_ds.to_csv(engineered_data_path)

datastore.upload(src_dir='./data/uploads', target_path='data')

dataset = Dataset.Tabular.from_delimited_files(datastore.path('data/titanic-engineered.csv'))

Uploading an estimated of 1 files
Target already exists. Skipping upload for data\titanic-engineered.csv
Uploaded 0 files
2020-05-13 18:16:08.899961 | ActivityCompleted: Activity=from_delimited_files, HowEnded=Failure, Duration=0.0 [ms], Info = {'activity_id': '525a8679-faf9-452b-bc7b-953e8e07f6b7', 'activity_name': 'from_delimited_files', 'activity_type': 'PublicApi', 'app_name': 'TabularDataset', 'source': 'azureml.dataset', 'version': '1.1.5.1', 'dataprepVersion': ''}, Exception=ModuleNotFoundError; No module named 'azureml.dataprep'


ModuleNotFoundError: No module named 'azureml.dataprep'

In [53]:
from sklearn.model_selection import train_test_split

X = titanic_ds.iloc[ : , :-1].values
y = titanic_ds.iloc[ : , 4].values

X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.33, random_state=39)



In [54]:

from sklearn.tree import DecisionTreeClassifier

run = experiment.start_logging(snapshot_directory=None)

# Log total number of iterations
decision_tree = DecisionTreeClassifier() 
decision_tree.fit(X_train, Y_train)  
Y_pred = decision_tree.predict(X_test) 
acc_decision_tree = round(decision_tree.score(X_train, Y_train) * 100, 2)
# Log final results
run.log("Final estimate", acc_decision_tree)
#run.log("Final error",math.pi-pi_estimate)

# Write file containing pi value into run history
#with open("pi_estimate.txt","wb") as f:
    #pickle.dump(str(pi_estimate),f)
filename = 'finalized_model.sav'
pickle.dump(decision_tree , open(filename, 'wb'))
run.upload_file(name = 'outputs/finalized_model.sav', path_or_stream = './finalized_model.sav')

# Complete tracking and get link to details
run.complete()
print("Run completed")

Run completed


In [55]:
from sklearn.metrics import classification_report

print(classification_report(Y_test,Y_pred, target_names=['class 0', 'class 1']))

precision    recall  f1-score   support

     class 0       0.78      0.91      0.84       274
     class 1       0.77      0.56      0.65       158

    accuracy                           0.78       432
   macro avg       0.78      0.73      0.75       432
weighted avg       0.78      0.78      0.77       432



In [11]:
run

Experiment,Id,Type,Status,Details Page,Docs Page
titanic-classifier,aaa80263-cb85-4853-8169-db3e3420b846,,Completed,Link to Azure Machine Learning studio,Link to Documentation


In [9]:
experiment

Name,Workspace,Report Page,Docs Page
titanic-classifier,mlops-AML-WS,Link to Azure Machine Learning studio,Link to Documentation


In [56]:
run = list(Experiment(workspace = ws, name = experiment.name).get_runs())[0]
run

Experiment,Id,Type,Status,Details Page,Docs Page
titanic-classifier,c0de61c7-44f4-4900-be99-9d488156473f,,Completed,Link to Azure Machine Learning studio,Link to Documentation


In [57]:
model = run.register_model(model_name = "titanic_classifier", model_path = "outputs/finalized_model.sav")

In [14]:
%%writefile score.py
import pickle, json
from azureml.core.model import Model
import pandas as pd
import numpy as np

from inference_schema.schema_decorators import input_schema, output_schema
from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType
from inference_schema.parameter_types.pandas_parameter_type import PandasParameterType
from sklearn.externals import joblib

def init():
    global titanic_classifier
    model_path = Model.get_model_path(model_name = "titanic_classifier")
    #model_path = "finalized_model.sav"
    #with open(model_path, "rb") as f:
    titanic_classifier = joblib.load(model_path)

input_sample = pd.DataFrame(data=[{
    "age": 20,
    "pclass": 1,
    "unaccompanied" : 0,
    "sex" : 0
}])

output_sample = np.array([0])

@input_schema('data', PandasParameterType(input_sample))
@output_schema(NumpyParameterType(output_sample))
def run(data):
    try:
        result = titanic_classifier.predict(data)
        return result.tolist()
    except Exception as e:
        result = str(e)
        return error

Overwriting score.py


In [18]:
from azureml.core.conda_dependencies import CondaDependencies 

cd = CondaDependencies()
cd.add_pip_package("inference-schema[numpy-support]")
cd.add_conda_package("scikit-learn")
cd.add_conda_package("pandas")
cd.add_conda_package("numpy")

cd.save_to_file(".", "myenv.yml")

'myenv.yml'

In [26]:
from azureml.core.model import InferenceConfig, Model
from azureml.core.webservice import AciWebservice
from azureml.core import Environment

e = Environment.from_conda_specification("myenv", "./myenv.yml")

inference_config = InferenceConfig(entry_script='score.py', runtime='python', conda_file='myenv.yml')
aci_config = AciWebservice.deploy_configuration(cpu_cores=1, memory_gb=1)

service = Model.deploy(workspace=ws, name='titanic-classifier', models=[model], inference_config=inference_config, deployment_config=aci_config, deployment_target=None)

service.update(description='Binary classifier for Titanic')
service.wait_for_deployment(show_output = True)

AciWebservice(workspace=Workspace.create(name='mlops-AML-WS', subscription_id='d87d4530-ce4e-4d84-b997-7a78d01e2906', resource_group='mlops-RG'), name=titanic-classifier, image_id=None, compute_type=None, state=ACI, scoring_uri=Transitioning, tags=None, properties={}, created_by={'azureml.git.repository_uri': 'https://github.com/kthejoker/Presentations.git', 'mlflow.source.git.repoURL': 'https://github.com/kthejoker/Presentations.git', 'azureml.git.branch': 'master', 'mlflow.source.git.branch': 'master', 'azureml.git.commit': '0116f681f658ac8daa8bed51009c9489f4664760', 'mlflow.source.git.commit': '0116f681f658ac8daa8bed51009c9489f4664760', 'azureml.git.dirty': 'True'})

In [58]:
service.update(models=[model], inference_config = inference_config)

In [40]:
   import json
   from azureml.core import Webservice

   service = Webservice(workspace=ws, name="titanic-classifier")
    
   request = json.dumps({"data" : [{"sex": 0, "pclass" : 3, "age": 70, "unaccompanied": 0}, {"sex": 1, "pclass" : 1, "age": 34, "unaccompanied": 1}]})
   response = service.run(request)
   response

[0, 0]

In [None]:
service.state