In [27]:
from azureml.core import Workspace 

ws = Workspace(subscription_id="fc1c5e68-95c2-4bce-9ff9-5bd8442fb921", resource_group="titanic-api", workspace_name="titanic-ws")
default_ds = ws.get_default_datastore()

print(ws.name, "workspace loaded.")
print(default_ds.name, "datastore loaded.")

titanic-ws workspace loaded.
workspaceblobstore datastore loaded.


# Modelling

In [1]:
import pandas as pd
import titanic_functions as tfunc
from sklearn.preprocessing import StandardScaler

df2 = pd.read_csv("../dataset/titanic_data.csv")
df2 = tfunc.clean_df(df2)

X = df2.drop("survived", axis=1)
y = df2["survived"]

scaler = StandardScaler()

Xsc = scaler.fit_transform(X)

In [2]:
# Split the dataset in two equal parts
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(Xsc, y, test_size=0.2, random_state=0)

In [22]:
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

gnb = GaussianNB()
rfc = RandomForestClassifier()
lr = LogisticRegression()

In [23]:
gnb.fit(X_train, y_train)
rfc.fit(X_train, y_train)
lr.fit(X_train, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [24]:
pred_gnb = gnb.predict(X_test)
pred_rfc = rfc.predict(X_test)
pred_lr = lr.predict(X_test)

In [28]:
from sklearn.metrics import confusion_matrix, plot_confusion_matrix, accuracy_score, precision_score, recall_score, roc_curve, roc_auc_score, classification_report, f1_score
import numpy as np

models = {"Gaussian NB": pred_gnb, "Random Forest Classifier": pred_rfc, "Logistic Regression": pred_lr}
metrics = {}

for name, pred in models.items():
    print(name+":")

    print("\tAccuracy:", np.round(accuracy_score(y_test, pred),4))
    print("\tPrecision:", np.round(precision_score(y_test, pred),4))
    print("\tRecall:", np.round(recall_score(y_test, pred),4))
    print("\tF1-Score:", np.round(f1_score(y_test, pred),4))
    print()

    m = {"Accuracy:": np.round(accuracy_score(y_test, pred),4),
        "Precision:": np.round(precision_score(y_test, pred),4),
        "Recall:": np.round(recall_score(y_test, pred),4),
        "F1-Score:": np.round(f1_score(y_test, pred),4)}
    metrics[name] = m 

Gaussian NB:
	Accuracy: 0.7023
	Precision: 0.5897
	Recall: 0.8679
	F1-Score: 0.7023

Random Forest Classifier:
	Accuracy: 0.7863
	Precision: 0.7404
	Recall: 0.7264
	F1-Score: 0.7333

Logistic Regression:
	Accuracy: 0.8282
	Precision: 0.7699
	Recall: 0.8208
	F1-Score: 0.7945



In [29]:
import joblib

best_model = lr
best_model_name = "Logistic Regression"
best_model_metrics = metrics[best_model_name]

os.makedirs("model_folder", exist_ok=True)
joblib.dump(best_model, "model_folder/titanic-api-model.pkl")
joblib.dump(scaler, "model_folder/scaler.pkl")

['model_folder/scaler.pkl']

In [30]:
from azureml.core import Model 
Model.register(ws, "model_folder/titanic-api-model.pkl", "titanic-api-model", tags=best_model_metrics)

Registering model titanic-api-model


Model(workspace=Workspace.create(name='titanic-ws', subscription_id='fc1c5e68-95c2-4bce-9ff9-5bd8442fb921', resource_group='titanic-api'), name=titanic-api-model, id=titanic-api-model:3, version=3, tags={'Accuracy:': '0.8282', 'Precision:': '0.7699', 'Recall:': '0.8208', 'F1-Score:': '0.7945'}, properties={})

In [33]:
Model.register(ws, "model_folder/scaler.pkl", "titanic-api-scaler", tags={"Scaler":"Titanic Input Scaler"})

Registering model titanic-api-scaler


Model(workspace=Workspace.create(name='titanic-ws', subscription_id='fc1c5e68-95c2-4bce-9ff9-5bd8442fb921', resource_group='titanic-api'), name=titanic-api-scaler, id=titanic-api-scaler:1, version=1, tags={'Scaler': 'Titanic Input Scaler'}, properties={})

In [34]:
model = Model.get_model_path("model_folder/titanic-api-model.pkl")
scaler = Model.get_model_path("model_folder/scaler.pkl")

In [37]:
%%writefile model_folder/score.py

import json
import numpy as np
import os
import joblib

def init():
    global model
    global scaler
    model_path = os.path.join(os.getenv('AZUREML_MODEL_DIR'), 'titanic-api-model.pkl')
    model = joblib.load(model_path)

    scaler_path = os.path.join(os.getenv('AZUREML_MODEL_DIR'), 'scaler.pkl')
    scaler = joblib.load(scaler_path)

def run(data):

########### SCALE INPUT

    try:
        data = np.array(json.loads(data))
        result = model.predict(data)
        # You can return any data type, as long as it is JSON serializable.
        return result.tolist()
    except Exception as e:
        error = str(e)
        return error


Writing model_folder/score.py


In [43]:
# Inference config

from azureml.core.environment import Environment
from azureml.core.conda_dependencies import CondaDependencies

# Create an environment and add conda dependencies to it
myenv = Environment(name="myenv")
# Enable Docker based environment
myenv.docker.enabled = True
# Build conda dependencies
myenv.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'],
                                                           pip_packages=['azureml-defaults'])
inference_config = InferenceConfig(entry_script="model_folder/score.py", environment=myenv)

In [44]:
# Deployment config
# NOT NEEDED FOR AZURE FUNCTIONS

In [45]:
!pip install azureml-contrib-functions

Collecting azureml-contrib-functions
  Downloading azureml_contrib_functions-1.15.0-py3-none-any.whl (6.7 kB)
Collecting azureml-core~=1.15.0
  Downloading azureml_core-1.15.0-py3-none-any.whl (2.0 MB)

We recommend you use --use-feature=2020-resolver to test your packages with the new resolver before it becomes the default.

azureml-train-core 1.11.0 requires azureml-core~=1.11.0, but you'll have azureml-core 1.15.0 which is incompatible.
azureml-train-automl-client 1.11.0 requires azureml-core~=1.11.0, but you'll have azureml-core 1.15.0 which is incompatible.
azureml-telemetry 1.11.0 requires azureml-core~=1.11.0, but you'll have azureml-core 1.15.0 which is incompatible.
azureml-sdk 1.11.0 requires azureml-core~=1.11.0, but you'll have azureml-core 1.15.0 which is incompatible.
azureml-pipeline-core 1.11.0 requires azureml-core~=1.11.0, but you'll have azureml-core 1.15.0 which is incompatible.

Installing collected packages: azureml-core, azureml-contrib-functions
  Attempting uni

In [47]:
from azureml.contrib.functions import package
from azureml.contrib.functions import BLOB_TRIGGER

model = joblib.load(model)

blob = package(ws, [model], inference_config, functions_enabled=True, trigger=BLOB_TRIGGER, input_path="input/{blobname}.json", output_path="output/{blobname}_out.json")
blob.wait_for_creation(show_output=True)
# Display the package location/ACR path
print(blob.location)

ERROR - Models must either be of type azureml.core.Model or a str path to a file or folder.



WebserviceException: WebserviceException:
	Message: Models must either be of type azureml.core.Model or a str path to a file or folder.
	InnerException None
	ErrorResponse 
{
    "error": {
        "message": "Models must either be of type azureml.core.Model or a str path to a file or folder."
    }
}