# AI Platform - Scikit-learn Example

In this notebook will demonstrate AI Platform with Scikit-learn.

In [1]:
# Set enviroment variable for authentification
import os
import time

os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="credentials.json"

In [2]:
import pandas as pd
import numpy as np
from google.cloud import bigquery
from google.oauth2 import service_account
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Sklearn training

- Initialise directory for training package.
- Write training script
- Submit training to AI Platform via GCloud 

In [3]:
!mkdir iris_training

mkdir: cannot create directory ‘iris_training’: File exists


In [4]:
!touch ./iris_training/__init__.py

In [5]:
!ls iris_training

__init__.py  train.py


# Write training script

In [6]:
%%writefile ./iris_training/train.py

# Import modules
import datetime
import os
import subprocess
import sys
import pickle
import pandas as pd
from sklearn.ensemble import RandomForestClassifier

# Download data
BUCKET_NAME = 'gcp_ai_demo'
iris_data_filename = 'iris_data.csv'
iris_target_filename = 'iris_target.csv'
data_dir = 'gs://cloud-samples-data/ml-engine/iris'
subprocess.check_call(['gsutil', 'cp', os.path.join(data_dir,
                                                    iris_data_filename),
                       iris_data_filename], stderr=sys.stdout)
subprocess.check_call(['gsutil', 'cp', os.path.join(data_dir,
                                                    iris_target_filename),
                       iris_target_filename], stderr=sys.stdout)

# Load data into Pandas
iris_data = pd.read_csv(iris_data_filename).values
iris_target = pd.read_csv(iris_target_filename).values
iris_target = iris_target.reshape((iris_target.size,))

# Train the model
classifier = RandomForestClassifier()
classifier.fit(iris_data, iris_target)


# Save model to disk
model_filename = 'model.pkl'
pickle.dump(classifier, open(model_filename, 'wb'))


# Upload model to Cloud Storage
gcs_model_path = os.path.join('gs://', BUCKET_NAME,
    datetime.datetime.now().strftime('iris_%Y%m%d_%H%M%S'), model_filename)
subprocess.check_call(['gsutil', 'cp', model_filename, gcs_model_path],
    stderr=sys.stdout)

Overwriting ./iris_training/train.py


# Submit job to AI Platform Training

In [7]:
BUCKET_NAME = "gcp_ai_demo"
JOB_NAME = "iris_job_rf_{}".format(int(time.time()))

! gcloud ai-platform jobs submit training $JOB_NAME \
    --region us-central1 \
    --runtime-version=1.15 \
    --scale-tier BASIC \
    --python-version=3.7 \
    --staging-bucket gs://$BUCKET_NAME \
    --package-path ./iris_training \
    --module-name iris_training.train

Job [iris_job_rf_1585658071] submitted successfully.
Your job is still active. You may view the status of your job with the command

  $ gcloud ai-platform jobs describe iris_job_rf_1585658071

or continue streaming the logs with the command

  $ gcloud ai-platform jobs stream-logs iris_job_rf_1585658071
jobId: iris_job_rf_1585658071
state: QUEUED


In [8]:
! gcloud ai-platform jobs describe $JOB_NAME

createTime: '2020-03-31T12:34:33Z'
etag: NvSDFdxmkrE=
jobId: iris_job_rf_1585658071
state: PREPARING
trainingInput:
  packageUris:
  - gs://gcp_ai_demo/iris_job_rf_1585658071/c9ef0513981a6c23e7c9bfc978958c27adbfcddbe928991e356475f7ee2fd065/iris_training-0.0.0.tar.gz
  pythonModule: iris_training.train
  pythonVersion: '3.7'
  region: us-central1
  runtimeVersion: '1.15'
trainingOutput: {}

View job in the Cloud Console at:
https://console.cloud.google.com/mlengine/jobs/iris_job_rf_1585658071?project=data-sc-activator

View logs at:
https://console.cloud.google.com/logs?resource=ml.googleapis.com%2Fjob_id%2Firis_job_rf_1585658071&project=data-sc-activator


# Deploy Model via AI Platform Predictions

In [9]:
MODEL_NAME = "scikit_learn_demo_{}".format(int(time.time()))
VERSION_NAME = "scikit_learn_demo_{}".format(int(time.time()))

In [10]:
!gcloud ai-platform models create $MODEL_NAME \
    --regions us-central1

Created ml engine model [projects/data-sc-activator/models/scikit_learn_demo_1585658099].


In [11]:
MODEL_DIR = BUCKET_NAME + "/iris_20200329_091925"

! gcloud ai-platform versions create $VERSION_NAME \
    --model=$MODEL_NAME \
    --framework=scikit-learn \
    --origin=gs://$MODEL_DIR \
    --runtime-version=1.15 \
    --python-version=3.7 \
    --staging-bucket=gs://$BUCKET_NAME

Creating version (this might take a few minutes)......⠹                        ^C
Creating version (this might take a few minutes)......aborted by ctrl-c.       
[1;31mERROR:[0m (gcloud.ai-platform.versions.create) Aborting wait for operation https://ml.googleapis.com/v1/projects/data-sc-activator/operations/create_scikit_learn_demo_1585658099_scikit_learn_demo_1585658099-1585658116419.



# Make Predictions

In [12]:
%%writefile test_data
[5.1, 3.5, 1.4, 0.2]
[4, 3, 1, 0.5]
[3, 4.5, 4, 3]

Overwriting test_data


In [13]:
MODEL_NAME = "scikit_learn_rf"
INPUT_DATA_FILE = "test_data"

In [14]:
! gcloud ai-platform predict \
    --model $MODEL_NAME  \
    --json-instances $INPUT_DATA_FILE

[0, 0, 2]
