In [4]:
from azureml.core import Workspace 

ws = Workspace(subscription_id="fc1c5e68-95c2-4bce-9ff9-5bd8442fb921", resource_group="titanic-api", workspace_name="titanic-ws")
default_ds = ws.get_default_datastore()

print(ws.name, "workspace loaded.")
print(default_ds.name, "datastore loaded.")

titanic-ws workspace loaded.
workspaceblobstore datastore loaded.


# Modelling

In [5]:
import pandas as pd
import titanic_functions as tfunc
from sklearn.preprocessing import StandardScaler

df2 = pd.read_csv("../dataset/titanic_data.csv")
df2 = tfunc.clean_df(df2)

X = df2.drop("survived", axis=1)
y = df2["survived"]

scaler = StandardScaler()
scaler.fit(X)

Xsc = scaler.transform(X)

In [6]:
# Split the dataset in two equal parts
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(Xsc, y, test_size=0.2, random_state=0)

In [7]:
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

gnb = GaussianNB()
rfc = RandomForestClassifier()
lr = LogisticRegression()

In [8]:
gnb = gnb.fit(X_train, y_train)
rfc = rfc.fit(X_train, y_train)
lr = lr.fit(X_train, y_train)

In [9]:
pred_gnb = gnb.predict(X_test)
pred_rfc = rfc.predict(X_test)
pred_lr = lr.predict(X_test)

In [10]:
from sklearn.metrics import confusion_matrix, plot_confusion_matrix, accuracy_score, precision_score, recall_score, roc_curve, roc_auc_score, classification_report, f1_score
import numpy as np

models = {"Gaussian NB": pred_gnb, "Random Forest Classifier": pred_rfc, "Logistic Regression": pred_lr}
metrics = {}

for name, pred in models.items():
    print(name+":")

    print("\tAccuracy:", np.round(accuracy_score(y_test, pred),4))
    print("\tPrecision:", np.round(precision_score(y_test, pred),4))
    print("\tRecall:", np.round(recall_score(y_test, pred),4))
    print("\tF1-Score:", np.round(f1_score(y_test, pred),4))
    print()

    m = {"Accuracy:": np.round(accuracy_score(y_test, pred),4),
        "Precision:": np.round(precision_score(y_test, pred),4),
        "Recall:": np.round(recall_score(y_test, pred),4),
        "F1-Score:": np.round(f1_score(y_test, pred),4)}
    metrics[name] = m 

Gaussian NB:
	Accuracy: 0.7023
	Precision: 0.5897
	Recall: 0.8679
	F1-Score: 0.7023

Random Forest Classifier:
	Accuracy: 0.7863
	Precision: 0.7451
	Recall: 0.717
	F1-Score: 0.7308

Logistic Regression:
	Accuracy: 0.8282
	Precision: 0.7699
	Recall: 0.8208
	F1-Score: 0.7945



In [11]:
import joblib

best_model = lr
best_model_name = "Logistic Regression"
best_model_metrics = metrics[best_model_name]

os.makedirs("model_folder", exist_ok=True)
joblib.dump(best_model, "model_folder/titanic-api-model.pkl")
joblib.dump(scaler, "model_folder/titanic-api-scaler.pkl")

default_ds.upload_files(["model_folder/titanic-api-model.pkl", "model_folder/titanic-api-scaler.pkl"], target_path="model_folder", overwrite=True)

Uploading an estimated of 2 files
Uploading model_folder/titanic-api-model.pkl
Uploaded model_folder/titanic-api-model.pkl, 1 files out of an estimated total of 2
Uploading model_folder/titanic-api-scaler.pkl
Uploaded model_folder/titanic-api-scaler.pkl, 2 files out of an estimated total of 2
Uploaded 2 files


$AZUREML_DATAREFERENCE_ea0fee9a0a034a629ca616c5852d0d86

In [12]:
from azureml.core import Model 

In [13]:
Model.register(ws, "model_folder/titanic-api-model.pkl", "titanic-api-model", tags=best_model_metrics)

Registering model titanic-api-model


Model(workspace=Workspace.create(name='titanic-ws', subscription_id='fc1c5e68-95c2-4bce-9ff9-5bd8442fb921', resource_group='titanic-api'), name=titanic-api-model, id=titanic-api-model:10, version=10, tags={'Accuracy:': '0.8282', 'Precision:': '0.7699', 'Recall:': '0.8208', 'F1-Score:': '0.7945'}, properties={})

In [14]:
Model.register(ws, "model_folder/scaler.pkl", "titanic-api-scaler", tags={"Scaler":"Titanic Input Scaler"})

Registering model titanic-api-scaler


Model(workspace=Workspace.create(name='titanic-ws', subscription_id='fc1c5e68-95c2-4bce-9ff9-5bd8442fb921', resource_group='titanic-api'), name=titanic-api-scaler, id=titanic-api-scaler:7, version=7, tags={'Scaler': 'Titanic Input Scaler'}, properties={})

In [37]:
# model = Model.get_model_path("titanic-api-model")
# scaler = Model.get_model_path("titanic-api-scaler")

# DEPLOYMENT

In [65]:
%%writefile model_folder/score.py

import json
import numpy as np
import os
import joblib

def init():
    global model
    global scaler
    model_path = './model_folder/titanic-api-model.pkl'
    model = joblib.load(model_path)

    scaler_path = './model_folder/titanic-api-scaler.pkl'
    scaler = joblib.load(scaler_path)

###### POWER BI ######

# providing 3 sample inputs for schema generation
standard_sample_input = StandardPythonParameterType([1, "Raul", "male", 25, 0, 0, 300, "C32", "S"])

# This is a nested input sample, any item wrapped by `ParameterType` will be described by schema
sample_input = StandardPythonParameterType({'input1': standard_sample_input})

#sample_global_parameters = StandardPythonParameterType(1.0) #this is optional
sample_output = StandardPythonParameterType([1.0])

@input_schema('inputs', sample_input)
#@input_schema('global_parameters', sample_global_parameters) #this is optional
@output_schema(sample_output)


######################


def run(data):

    #ws = Workspace(subscription_id="fc1c5e68-95c2-4bce-9ff9-5bd8442fb921", resource_group="titanic-api", workspace_name="titanic-ws")

    try:
        data = json.loads(data)['data']
        data = scaler.transform([data])
        result = model.predict(data)
        # You can return any data type, as long as it is JSON serializable.
        return result.tolist()
    except Exception as e:
        error = str(e)
        return error


Overwriting model_folder/score.py


In [16]:
# Deployment Env

from azureml.core.environment import Environment
from azureml.core.conda_dependencies import CondaDependencies

# Create an environment and add conda dependencies to it
myenv = Environment(name="myenv")
# Enable Docker based environment
myenv.docker.enabled = True
# Build conda dependencies
myenv.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'],
                                                           pip_packages=['azureml-defaults'])
                                                        
print("Deployment")

Deployment


In [66]:
# Deployment config
from azureml.core.webservice import AciWebservice
from azureml.core.model import InferenceConfig
# from azureml.core import Model 

# model = ws.models["titanic-api-model"]

inference_config = InferenceConfig(source_directory="model_folder",
                                   entry_script="score.py",
                                   environment=myenv)

# deploy_config = AciWebservice.deploy_configuration(cpu_cores=0.1, memory_gb=0.5)

# service_name = "titanic-api"

# service = Model.deploy(ws, service_name, [model], inference_config, deploy_config)
# service.wait_for_deployment(True)
# print(service.state)

service = AciWebservice(ws, "titanic-api")
service.update(inference_config=inference_config)
print(service.state)

Healthy


In [36]:
# TEST ENDPOINT
endpoint = service.scoring_uri
print(endpoint)

http://a278d689-be85-4c99-a512-1b715ae287db.uksouth.azurecontainer.io/score


In [40]:
i=1

x_new = list(df2.drop("survived",axis=1).iloc[i].values)

In [41]:
import json
import requests

# Convert the array to a serializable list in a JSON document
input_json = json.dumps({"data": x_new})

# Set the content type
headers = { 'Content-Type':'application/json' }


In [42]:
print("Predicted:", requests.post(endpoint, input_json, headers = headers).json()[0])
print("Actual:", df2["survived"].iloc[i])

Predicted: 1
Actual: 1


In [67]:
import titanic_functions as tfunc

data=[[1, "Raul", "male", 25, 0, 0, 300, "C32", "S"]]

data=list(tfunc.clean_df(data).values[0])

input_json = json.dumps({"data": data})
headers = { 'Content-Type':'application/json' }

requests.post(endpoint, input_json, headers = headers).json()[0]


1

In [64]:
print(data)

[1.0, 25.0, 0.0, 0.0, 300.0, 1.0, 300.0, 1.0, 25.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
