In [None]:
## Common Google Cloud Platform Commands

In [1]:
## Cloud Shell

# pull from public website
curl -LO http://www.ssa.gov/OACT/babynames/names.zip
# unzip
unzip names.zip

# pull from public cloud bucket
gsutil cp gs://spls/gsp072/baby-names.zip .

# copy from working directory to your bucket
gsutil cp yob2014.txt gs://<your_bucket>

# download data from bucket to working directory
gsutil -m cp gs://cloudml-public/census/data/* data/



SyntaxError: invalid syntax (<ipython-input-1-d3a693333e62>, line 4)

In [None]:
# Dataflow

# must use python version 2.7
python --version

# install trusty
pip --version

# pip version 7 or newer
pip install -U pip

# install virtual env & run
pip install --upgrade virtualenv
virtualenv -p python2.7 env

# activate
source env/bin/activate

# install dataflow
pip install google-cloud-dataflow

# run apache beam with wordcount.py locally
python -m apache_beam.examples.wordcount --output OUTPUT_FILE

# run pipeline remotely
python -m apache_beam.examples.wordcount --project $DEVSHELL_PROJECT_ID \
  --runner DataflowRunner \
  --staging_location $BUCKET/staging \
  --temp_location $BUCKET/temp \
  --output $BUCKET/results/output

# check dataflow 

In [None]:
## BigQuery

# edit schema as text when creating table
# of form:
field:type,field:type,field:type
# example
name:string,gender:string,count:integer

In [None]:
# ML commands

# install dependencies
EVAL_DATA=$(pwd)/data/adult.test.csv


# assign training data variable
TRAIN_DATA=$(pwd)/data/adult.data.csv

# assign test data variable
EVAL_DATA=$(pwd)/data/adult.test.csv

# specify output directory
MODEL_DIR=output

# delete contents from output before each training run
rm -rf $MODEL_DIR/*

# train locally
gcloud ml-engine local train \
    --module-name trainer.task \
    --package-path trainer/ \
    -- \
    --train-files $TRAIN_DATA \
    --eval-files $EVAL_DATA \
    --train-steps 1000 \
    --job-dir $MODEL_DIR \
    --eval-steps 100
    
# launch tensorboard server
tensorboard --logdir=output --port=8080

## running training on the cloud

# set env variables
PROJECT_ID=$(gcloud config list project --format "value(core.project)")
BUCKET_NAME=${PROJECT_ID}-mlengine
echo $BUCKET_NAME
REGION=us-central1

# create bucket
gsutil mb -l $REGION gs://$BUCKET_NAME

# upload to bucket and set variables
gsutil cp -r data gs://$BUCKET_NAME/data
TRAIN_DATA=gs://$BUCKET_NAME/data/adult.data.csv
EVAL_DATA=gs://$BUCKET_NAME/data/adult.test.csv
    
# specify job
JOB_NAME=census1
OUTPUT_PATH=gs://$BUCKET_NAME/$JOB_NAME
echo $OUTPUT_PATH

# run job
gcloud ml-engine jobs submit training $JOB_NAME \
--job-dir $OUTPUT_PATH \
--runtime-version 1.4 \
--module-name trainer.task \
--package-path trainer/ \
--region $REGION \
-- \
--train-files $TRAIN_DATA \
--eval-files $EVAL_DATA \
--train-steps 5000 \
--verbosity DEBUG

# monitor progress
gcloud ml-engine jobs stream-logs $JOB_NAME

# review on tensorboard
tensorboard --logdir=$OUTPUT_PATH --port=8080

## deploy model to Cloud ML Engine

# create model
MODEL_NAME=census
gcloud ml-engine models create $MODEL_NAME --regions=$REGION

# select model version to use
gsutil ls -r $OUTPUT_PATH/export

# set binary 
MODEL_BINARIES=$OUTPUT_PATH/export/census/<timestamp>/

# create v1

gcloud ml-engine versions create v1 \
--model $MODEL_NAME \
--origin $MODEL_BINARIES \
--runtime-version 1.4

# confirm it is listed
gcloud ml-engine models list

# send a prediction request 
gcloud ml-engine predict \
--model $MODEL_NAME \
--version v1 \
--json-instances ../test.json