<a href="https://colab.research.google.com/github/martindevoto/machine-learning-notebooks-personal/blob/main/MLOps_Coursera_GCP_Intro.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import xgboost as xgb
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from google.cloud import bigquery

In [None]:
query = """
SELECT
    weight_pounds,
    is_male,
    mother_age,
    plurality,
    gestation_weeks
FROM
    publicdata.samples.natality
WHERE year > 2000
LIMIT 10000
"""

df = bigquery.Client().query(query).to_dataframe()
df.head()

Unnamed: 0,weight_pounds,is_male,mother_age,plurality,gestation_weeks
0,9.312326,False,28,1,40.0
1,7.749249,True,30,1,40.0
2,7.394304,True,27,1,39.0
3,6.750554,False,40,1,41.0
4,8.377566,True,24,1,38.0


In [None]:
df.describe()

Unnamed: 0,weight_pounds,mother_age,plurality,gestation_weeks
count,9984.0,10000.0,10000.0,9935.0
mean,7.207753,27.3949,1.0329,38.545848
std,1.325683,6.142551,0.187673,2.561521
min,0.639341,13.0,1.0,17.0
25%,6.563162,23.0,1.0,38.0
50%,7.312733,27.0,1.0,39.0
75%,8.035849,32.0,1.0,40.0
max,12.50021,47.0,4.0,47.0


In [None]:
df['is_male'].value_counts()

True     5129
False    4871
Name: is_male, dtype: int64

In [None]:
df = df.dropna()
df = shuffle(df, random_state=2)

In [None]:
labels = df['weight_pounds']
data = df.drop(columns=['weight_pounds'])

In [None]:
df['is_male'] = df['is_male'].astype(int)

In [None]:
x, y = data, labels
x_train, x_test, y_train, y_test = train_test_split(x, y)

In [None]:
model = xgb.XGBRegressor(
    objective='reg:squarederror')

In [None]:
model.fit(x_train, y_train)

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
             importance_type='gain', interaction_constraints='',
             learning_rate=0.300000012, max_delta_step=0, max_depth=6,
             min_child_weight=1, missing=nan, monotone_constraints='()',
             n_estimators=100, n_jobs=4, num_parallel_tree=1, random_state=0,
             reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
             tree_method='exact', validate_parameters=1, verbosity=None)

In [None]:
y_pred = model.predict(x_test)

In [None]:
for i in range(20):
    print(f"Predicted weight: {y_pred[i]}")
    print(f"Actual weight: {y_test.iloc[i]}")
    print()

Predicted weight: 7.7191009521484375
Actual weight: 6.1068046574

Predicted weight: 1.934630274772644
Actual weight: 8.40181680482

Predicted weight: 7.021804332733154
Actual weight: 6.56316153974

Predicted weight: 8.182778358459473
Actual weight: 7.12534030784

Predicted weight: 7.1645941734313965
Actual weight: 8.9728140634

Predicted weight: 6.906790256500244
Actual weight: 6.8122838958

Predicted weight: 7.341507434844971
Actual weight: 8.4326815215

Predicted weight: 8.147192001342773
Actual weight: 7.5618555866

Predicted weight: 7.095052719116211
Actual weight: 6.8784225744

Predicted weight: 7.8326263427734375
Actual weight: 7.68751907594

Predicted weight: 5.706501483917236
Actual weight: 7.43839671988

Predicted weight: 1.3365039825439453
Actual weight: 3.0622208191799998

Predicted weight: 7.176836967468262
Actual weight: 8.21442388212

Predicted weight: 7.5810627937316895
Actual weight: 8.9948602896

Predicted weight: 7.490711688995361
Actual weight: 7.5618555866

Predicte

In [None]:
model.save_model('model.bst')

In [None]:
!gcloud config list project --format "value(core.project)"

vocal-nova-367515


In [None]:
# Update these to your own GCP project, model, and version names
GCP_PROJECT = 'vocal-nova-367515'
MODEL_BUCKET = f"gs://{GCP_PROJECT}"
VERSION_NAME = 'v1'
MODEL_NAME = 'baby_weight'

In [None]:
!gsutil mb $MODEL_BUCKET

# !gsustil google store utility make bucket

Creating gs://vocal-nova-367515/...


In [None]:
!gsutil cp ./model.bst $MODEL_BUCKET
# google store util copy project file destination

Copying file://./model.bst [Content-Type=application/octet-stream]...
/ [1 files][300.6 KiB/300.6 KiB]                                                
Operation completed over 1 objects/300.6 KiB.                                    


In [None]:
!gcloud ai-platform models create $MODEL_NAME --region=us-central1

Using endpoint [https://us-central1-ml.googleapis.com/]
Created ai platform model [projects/vocal-nova-367515/models/baby_weight].


In [None]:
!gcloud ai-platform versions create $VERSION_NAME \
--model=$MODEL_NAME \
--framework='XGBOOST' \
--runtime-version=2.5 \
--origin=$MODEL_BUCKET \
--python-version=3.7 \
--project=$GCP_PROJECT \
--region=us-central1

# model name, framework, runtime, origin locaiton, python version, project, 
# region

Using endpoint [https://us-central1-ml.googleapis.com/]
Creating version (this might take a few minutes)......done.                    


In [None]:
%%writefile predictions.json
[0.0, 33.0, 1.0, 27.0]
[1.0, 26.0, 1.0, 40.0]

Writing predictions.json


In [None]:
prediction = !gcloud ai-platform predict \
--model=$MODEL_NAME \
--json-instances=predictions.json \
--version=$VERSION_NAME

In [None]:
print(prediction.s)

Using endpoint [https://us-central1-ml.googleapis.com/] [6.50042724609375, 7.785537242889404]
