-
Notifications
You must be signed in to change notification settings - Fork 0
/
predict_baby_weights_tf.py
220 lines (197 loc) · 5.87 KB
/
predict_baby_weights_tf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
!sudo chown -R jupyter:jupyter /home/jupyter/training-data-analyst
!pip install --user google-cloud-bigquery==1.25.0
# change these to try this notebook out
BUCKET = 'cloud-training-demos-ml' # Replace with the your bucket name
PROJECT = 'cloud-training-demos' # Replace with your project-id
REGION = 'us-central1'
import os
from google.cloud import bigquery
os.environ["PROJECT"] = PROJECT
os.environ["BUCKET"] = BUCKET
os.environ["REGION"] = REGION
os.environ["TFVERSION"] = "2.1"
os.environ["PYTHONVERSION"] = "3.7"
%%bash
export PROJECT=$(gcloud config list project --format "value(core.project)")
echo "Your current GCP Project Name is: "$PROJECT
%%bash
# Create a BigQuery dataset for babyweight if it doesn't exist
datasetexists=$(bq ls -d | grep -w babyweight)
if [ -n "$datasetexists" ]; then
echo -e "BigQuery dataset already exists, let's not recreate it."
else
echo "Creating BigQuery dataset titled: babyweight"
bq --location=US mk --dataset \
--description "Babyweight" \
$PROJECT:babyweight
echo "Here are your current datasets:"
bq ls
fi
## Create GCS bucket if it doesn't exist already...
exists=$(gsutil ls -d | grep -w gs://${BUCKET}/)
if [ -n "$exists" ]; then
echo -e "Bucket exists, let's not recreate it."
else
echo "Creating a new GCS bucket."
gsutil mb -l ${REGION} gs://${BUCKET}
echo "Here are your current buckets:"
gsutil ls
fi
%%bigquery
CREATE OR REPLACE TABLE
babyweight.babyweight_data AS
SELECT
weight_pounds,
CAST(is_male AS STRING) AS is_male,
mother_age,
CASE
WHEN plurality = 1 THEN "Single(1)"
WHEN plurality = 2 THEN "Twins(2)"
WHEN plurality = 3 THEN "Triplets(3)"
WHEN plurality = 4 THEN "Quadruplets(4)"
WHEN plurality = 5 THEN "Quintuplets(5)"
END AS plurality,
gestation_weeks,
FARM_FINGERPRINT(
CONCAT(
CAST(year AS STRING),
CAST(month AS STRING)
)
) AS hashmonth
FROM
publicdata.samples.natality
WHERE
year > 2000
AND weight_pounds > 0
AND mother_age > 0
AND plurality > 0
AND gestation_weeks > 0
%%bigquery
CREATE OR REPLACE TABLE
babyweight.babyweight_augmented_data AS
SELECT
weight_pounds,
is_male,
mother_age,
plurality,
gestation_weeks,
hashmonth
FROM
babyweight.babyweight_data
UNION ALL
SELECT
weight_pounds,
"Unknown" AS is_male,
mother_age,
CASE
WHEN plurality = "Single(1)" THEN plurality
ELSE "Multiple(2+)"
END AS plurality,
gestation_weeks,
hashmonth
FROM
babyweight.babyweight_data
%%bigquery
CREATE OR REPLACE TABLE
babyweight.babyweight_data_train AS
SELECT
weight_pounds,
is_male,
mother_age,
plurality,
gestation_weeks
FROM
babyweight.babyweight_augmented_data
WHERE
ABS(MOD(hashmonth, 4)) < 3
%%bigquery
CREATE OR REPLACE TABLE
babyweight.babyweight_data_eval AS
SELECT
weight_pounds,
is_male,
mother_age,
plurality,
gestation_weeks
FROM
babyweight.babyweight_augmented_data
WHERE
ABS(MOD(hashmonth, 4)) = 3
%%bigquery
-- LIMIT 0 is a free query; this allows us to check that the table exists.
SELECT * FROM babyweight.babyweight_data_train
LIMIT 0
%%bigquery
-- LIMIT 0 is a free query; this allows us to check that the table exists.
SELECT * FROM babyweight.babyweight_data_eval
LIMIT 0
# Construct a BigQuery client object.
client = bigquery.Client()
dataset_name = "babyweight"
# Create dataset reference object
dataset_ref = client.dataset(
dataset_id=dataset_name, project=client.project)
# Export both train and eval tables
for step in ["train", "eval"]:
destination_uri = os.path.join(
"gs://", BUCKET, dataset_name, "data", "{}*.csv".format(step))
table_name = "babyweight_data_{}".format(step)
table_ref = dataset_ref.table(table_name)
extract_job = client.extract_table(
table_ref,
destination_uri,
# Location must match that of the source table.
location="US",
) # API request
extract_job.result() # Waits for job to complete.
print("Exported {}:{}.{} to {}".format(
client.project, dataset_name, table_name, destination_uri))
%%bash
gsutil ls gs://${BUCKET}/babyweight/data/*.csv
%%bash
gsutil ls gs://${BUCKET}/babyweight/data/*000000000000.csv
%%bash
OUTDIR=gs://${BUCKET}/babyweight/trained_model
JOBID=babyweight_$(date -u +%y%m%d_%H%M%S)
gcloud ai-platform jobs submit training ${JOBID} \
--region=${REGION} \
--module-name=trainer.task \
--package-path=$(pwd)/babyweight/trainer \
--job-dir=${OUTDIR} \
--staging-bucket=gs://${BUCKET} \
--master-machine-type=n1-standard-8 \
--scale-tier=CUSTOM \
--runtime-version=${TFVERSION} \
--python-version=${PYTHONVERSION} \
-- \
--train_data_path=gs://${BUCKET}/babyweight/data/train*.csv \
--eval_data_path=gs://${BUCKET}/babyweight/data/eval*.csv \
--output_dir=${OUTDIR} \
--num_epochs=10 \
--train_examples=10000 \
--eval_steps=100 \
--batch_size=32 \
--nembeds=8
%%bash
gsutil ls gs://${BUCKET}/babyweight/trained_model
%%bash
MODEL_LOCATION=$(gsutil ls -ld -- gs://${BUCKET}/babyweight/trained_model/2* \
| tail -1)
gsutil ls ${MODEL_LOCATION}
%%bash
%%bash
gcloud config set ai_platform/region global
%%bash
MODEL_NAME="babyweight"
MODEL_VERSION="ml_on_gcp"
MODEL_LOCATION=$(gsutil ls -ld -- gs://${BUCKET}/babyweight/trained_model/2* \
| tail -1 | tr -d '[:space:]')
echo "Deleting and deploying $MODEL_NAME $MODEL_VERSION from $MODEL_LOCATION"
# gcloud ai-platform versions delete ${MODEL_VERSION} --model ${MODEL_NAME}
# gcloud ai-platform models delete ${MODEL_NAME}
gcloud ai-platform models create ${MODEL_NAME} --regions ${REGION}
gcloud ai-platform versions create ${MODEL_VERSION} \
--model=${MODEL_NAME} \
--origin=${MODEL_LOCATION} \
--runtime-version=2.1 \
--python-version=3.7