In [23]:
import pandas as pd
from sklearn import model_selection
from sklearn import tree
from sklearn.metrics import accuracy_score
from sklearn.externals import joblib

In [24]:
DATA_FILE = 'data/iris.data.csv'
MODEL_FILE = 'model.pkl'

## Load the Data

In [25]:
INPUT_FEATURES = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']
TARGET_FEATURE = 'species'

iris_data = pd.read_csv(DATA_FILE, header=0)
X= iris_data[INPUT_FEATURES]
y= iris_data[TARGET_FEATURE]

## Fit and Evaluate SKLearn Estimator

In [27]:
estimator = tree.DecisionTreeClassifier(max_leaf_nodes=3)
estimator.fit(X=X, y=y)
y_predicted = estimator.predict(X=X)
accuracy = accuracy_score(y, y_predicted)
print("Training Performance: {}%".format(accuracy*100))

Training Performance: 96.0%


## Save & Load Trained Estimator

In [45]:
joblib.dump(estimator, MODEL_FILE) 
loaded_estimator = joblib.load(MODEL_FILE) 
predictions = loaded_estimator.predict(X.iloc[:5,:])
print(predictions)

['setosa' 'setosa' 'setosa' 'setosa' 'setosa']


## Submit Cloud ML Job to Train the SKLearn Model
The package in **iris-sklearn-package/trainer** performs the following:
* Download the data file from GCS
* Load the data file as Pandas Dataframe
* Create and Fit a SKLearn Estimator using the data
* Save the fitted model on locally
* Upload the saved model to GCS to be served

In [None]:
%%bash

echo "Submitting a Cloud ML Engine job..."

REGION=europe-west1
TIER=BASIC
BUCKET=ksalama-gcs-cloudml

MODEL_NAME="iris_estimator"

PACKAGE_PATH=trainer
TRAIN_FILE=gs://${BUCKET}/data/iris/iris.data.csv
MODEL_DIR=gs://${BUCKET}/ml-models/iris_estimators

CURRENT_DATE=`date +%Y%m%d_%H%M%S`
JOB_NAME=train_${MODEL_NAME}_${CURRENT_DATE}

gcloud ml-engine jobs submit training ${JOB_NAME} \
        --job-dir=${MODEL_DIR}/job_dir \
        --runtime-version=1.2 \
        --region=${REGION} \
        --scale-tier=${TIER} \
        --module-name=trainer.task \
        --package-path=${PACKAGE_PATH} \
        -- \
        --train-file=${TRAIN_FILE} \
        --model-dir=${MODEL_DIR}

## Confirm the Uploaded Fitted Model to GCS

In [21]:
%%bash

BUCKET=ksalama-gcs-cloudml

gsutil ls gs://${BUCKET}/ml-models/iris_estimators

gs://ksalama-gcs-cloudml/ml-models/iris_estimators/
gs://ksalama-gcs-cloudml/ml-models/iris_estimators/model.joblib
gs://ksalama-gcs-cloudml/ml-models/iris_estimators/job_dir/


## Create a Model and Deploy a Model Version on Cloud ML Engine

In [51]:
%%bash 

MODEL_NAME="iris_estimator"
REGION=europe-west1

gcloud ml-engine models create ${MODEL_NAME} --regions=${REGION}

Created ml engine model [projects/ksalama-gcp-playground/models/iris_estimator].


In [11]:
%%bash 

MODEL_NAME="iris_estimator"
VERSION='v1'

gcloud ml-engine versions create ${VERSION} --model=${MODEL_NAME} \
    --origin="gs://ksalama-gcs-cloudml/ml-models/iris_estimators" \
    --runtime-version="1.2" \
    --framework="SCIKIT_LEARN"

Creating version (this might take a few minutes)......
...............................................................................................................................done.


## Perform Prediction Using the Deployed Model

In [18]:
from googleapiclient import discovery
from oauth2client.client import GoogleCredentials

def estimate(project, model_name, version, instances):

    credentials = GoogleCredentials.get_application_default()
    api = discovery.build('ml', 'v1', credentials=credentials,
                discoveryServiceUrl='https://storage.googleapis.com/cloud-ml/discovery/ml_v1_discovery.json')

    request_data = {'instances': instances}

    model_url = 'projects/{}/models/{}/versions/{}'.format(project, model_name, version)
    response = api.projects().predict(body=request_data, name=model_url).execute()

    predictions = response["predictions"]

    return predictions

In [19]:
PROJECT='ksalama-gcp-playground'
MODEL_NAME='iris_estimator'
VERSION='v1'

instances = [
    [6.8, 2.8, 4.8, 1.4],
    [6. , 3.4, 4.5, 1.6]
]

predictions = estimate(instances=instances
                     ,project=PROJECT
                     ,model_name=MODEL_NAME
                     ,version=VERSION)

print(predictions)

['versicolor', 'versicolor']
