In [1]:
import tensorflow as tf
import datetime
import os
print tf.__version__

1.2.0


## Set gcloud commands' variables

In [2]:
PROJECT = 'ksalama-gcp-playground'
BUCKET = 'ksalama-gcs-cloudml'
REGION = 'europe-west1'
CURRENT_DATE = datetime.datetime.now().strftime('%y%m%d%H%M%S') 

os.environ['PROJECT'] = PROJECT
os.environ['BUCKET'] = BUCKET
os.environ['REGION'] = REGION

In [3]:
%%bash
gcloud config set project $PROJECT
gcloud config set compute/region $REGION
gcloud config list

[compute]
region = europe-west1
[core]
account = 538539525836-compute@developer.gserviceaccount.com
disable_usage_reporting = True
project = ksalama-gcp-playground


Updated property [core/project].
Updated property [compute/region].

Your active configuration is: [default]


## Download the ml-package from GCS

In [3]:
%%bash
rm -rf nyc-taxifare-trainer 
gsutil -m cp -r gs://$BUCKET/ml-packages/nyc-taxifare-trainer .
ls nyc-taxifare-trainer/trainer

__init__.py
model.py
task.py


Copying gs://ksalama-gcs-cloudml/ml-packages/nyc-taxifare-trainer/hyperparams.yaml...
/ [0 files][    0.0 B/  515.0 B]                                                Copying gs://ksalama-gcs-cloudml/ml-packages/nyc-taxifare-trainer/trainer/__init__.py...
/ [0/5 files][    0.0 B/ 12.3 KiB]   0% Done                                    Copying gs://ksalama-gcs-cloudml/ml-packages/nyc-taxifare-trainer/trainer/model.py...
/ [0/5 files][    0.0 B/ 12.3 KiB]   0% Done                                    Copying gs://ksalama-gcs-cloudml/ml-packages/nyc-taxifare-trainer/trainer/task.py...
/ [0/5 files][    0.0 B/ 12.3 KiB]   0% Done                                    / [1/5 files][  677.0 B/ 12.3 KiB]   5% Done                                    / [2/5 files][  6.4 KiB/ 12.3 KiB]  52% Done                                    / [3/5 files][ 12.3 KiB/ 12.3 KiB]  99% Done                                    / [4/5 files][ 12.3 KiB/ 12.3 KiB]  99% Done                                    

## Run ml-package locally using gcloud ml-engine command

In [None]:
# %%bash
# rm -rf trained_models/dnn_combined_regression_model_gcloud

# gcloud ml-engine local train \
#    --module-name=trainer.task \
#    --package-path=nyc-taxifare-trainer/trainer \
#    -- \
#    --train-files=../data/train-data.csv \
#    --eval-files=../data/test-data.csv  \
#    --num-epochs=2 \
#    --job-dir=../trained_models/dnn_combined_regression_model_gcloud

## Run ml-package on Google Cloud ML Engine (big data + GPUs)

In [4]:
%%bash

OUTDIR=gs://${BUCKET}/ml-models/taxifare/dnn-combined-regression-big-gpu
JOBNAME=train_taxifare_model_$(date -u +%y%m%d%H%M%S)

echo $OUTDIR $REGION $JOBNAME
gsutil -m rm -rf $OUTDIR
gcloud ml-engine jobs submit training $JOBNAME \
   --region=$REGION \
   --module-name=trainer.task \
   --package-path=nyc-taxifare-trainer/trainer \
   --job-dir=$OUTDIR \
   --staging-bucket=gs://stagging-ksalama-gcs-cloudml \
   --scale-tier=BASIC_GPU \
   --runtime-version=1.2 \
   #--config=nyc-taxifare-trainer/hyperparams.yaml \
   -- \
#    --train_data_paths="gs://${BUCKET}/data/nyc-taxifare/big/train*" \
#    --eval_data_paths="gs://${BUCKET}/data/nyc-taxifare/big/valid*"  \
   --train_data_paths="gs://${BUCKET}/data/nyc-taxifare/train-data.csv" \
   --eval_data_paths="gs://${BUCKET}/data/nyc-taxifare/valid-data.csv"  \
   --output_dir=$OUTDIR \
   --num_epochs=100 --train_batch_size=10000 --nbuckets=16 --hidden_units="64 64 64 8"
   

gs://ksalama-gcs-cloudml/ml-models/taxifare/dnn-combined-regression-big-gpu europe-west1 train_taxifare_model_170928144453
jobId: train_taxifare_model_170928144453
state: QUEUED


Removing gs://ksalama-gcs-cloudml/ml-models/taxifare/dnn-combined-regression-big-gpu/#1506294278976496...
Removing gs://ksalama-gcs-cloudml/ml-models/taxifare/dnn-combined-regression-big-gpu/checkpoint#1506294280214896...
Removing gs://ksalama-gcs-cloudml/ml-models/taxifare/dnn-combined-regression-big-gpu/eval/#1506294197590364...
Removing gs://ksalama-gcs-cloudml/ml-models/taxifare/dnn-combined-regression-big-gpu/eval/events.out.tfevents.1506294197.master-d774518f99-0-j6tqz#1506294198636598...
Removing gs://ksalama-gcs-cloudml/ml-models/taxifare/dnn-combined-regression-big-gpu/eval/events.out.tfevents.1506294284.master-d774518f99-0-j6tqz#1506294285310756...
Removing gs://ksalama-gcs-cloudml/ml-models/taxifare/dnn-combined-regression-big-gpu/events.out.tfevents.1506294183.master-d774518f99-0-j6tqz#1506294281255932...
Removing gs://ksalama-gcs-cloudml/ml-models/taxifare/dnn-combined-regression-big-gpu/export/#1506294287837338...
Removing gs://ksalama-gcs-cloudml/ml-models/taxifare/dnn-c

## Run ml-package on Google Cloud ML Engine (small data + standard cluster)

In [8]:
%%bash

OUTDIR=gs://${BUCKET}/ml-models/taxifare/dnn-combined-regression-small
JOBNAME=train_taxifare_model_$(date -u +%y%m%d%H%M%S)

echo $OUTDIR $REGION $JOBNAME
gsutil -m rm -rf $OUTDIR
gcloud ml-engine jobs submit training $JOBNAME \
   --region=$REGION \
   --module-name=trainer.task \
   --package-path=nyc-taxifare-trainer/trainer \
   --job-dir=$OUTDIR \
   --staging-bucket=gs://stagging-ksalama-gcs-cloudml \
   --scale-tier=STANDARD_1 \
   --runtime-version=1.2 \
   #--config=nyc-taxifare-trainer/hyperparams.yaml \
   -- \
   --train_data_paths="gs://${BUCKET}/data/nyc-taxifare/train-data.csv" \
   --eval_data_paths="gs://${BUCKET}/data/nyc-taxifare/valid-data.csv"  \
   --output_dir=$OUTDIR \
   --num_epochs=10

gs://ksalama-gcs-cloudml/ml-models/taxifare/dnn-combined-regression-small europe-west1 train_taxifare_model_170924225826
jobId: train_taxifare_model_170924225826
state: QUEUED


Removing gs://ksalama-gcs-cloudml/ml-models/taxifare/dnn-combined-regression-small/#1506293906953727...
Removing gs://ksalama-gcs-cloudml/ml-models/taxifare/dnn-combined-regression-small/events.out.tfevents.1506293907.master-dc51f9d099-0-tcx2g#1506293907765985...
/ [1/2 objects]  50% Done                                                       / [2/2 objects] 100% Done                                                       
Operation completed over 2 objects.                                              
Job [train_taxifare_model_170924225826] submitted successfully.
Your job is still active. You may view the status of your job with the command

  $ gcloud ml-engine jobs describe train_taxifare_model_170924225826

or continue streaming the logs with the command

  $ gcloud ml-engine jobs stream-logs train_taxifare_model_170924225826


## Results comparison

In [None]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

df = pd.DataFrame({
              'Method' : pd.Series(['Basline', 'Linear Reg', 'DNN', ' Comb DNN + Feature Eng', '+ Hyperparam Tuning', '+ Big Data']),
              'RMSE': pd.Series([8.89, 11.15, 14.94, 8.1, 5.42, 3.01]) })

plt.figure(figsize=(15, 8))
ax = sns.barplot(data=df, x='Method', y='RMSE')
ax.set_ylabel('RMSE (dollars)')
ax.set_xlabel('Method')
plt.plot(np.linspace(-20,120,1000), [8.1]*1000, 'b');