In [1]:
import pandas as pd
import xgboost as xgb
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from google.cloud import bigquery

In [2]:
query="""
SELECT
  weight_pounds,
  is_male,
  mother_age,
  plurality,
  gestation_weeks
FROM
  publicdata.samples.natality
WHERE year > 2000
LIMIT 10000
"""
df = bigquery.Client().query(query).to_dataframe()
df.head()

Unnamed: 0,weight_pounds,is_male,mother_age,plurality,gestation_weeks
0,7.561856,False,18,1,39
1,4.750962,True,22,1,40
2,7.374463,False,28,1,37
3,7.813183,False,31,1,41
4,3.688334,False,29,2,35


In [3]:
df.describe()

Unnamed: 0,weight_pounds,is_male,mother_age,plurality,gestation_weeks
count,9993.0,10000,10000.0,10000.0,9928.0
unique,,2,,,
top,,True,,,
freq,,5152,,,
mean,7.218369,,27.4571,1.0365,38.536664
std,1.309668,,6.122182,0.199429,2.575165
min,0.500449,,12.0,1.0,17.0
25%,6.563162,,23.0,1.0,38.0
50%,7.312733,,27.0,1.0,39.0
75%,8.009394,,32.0,1.0,40.0


In [4]:
df['is_male'].value_counts()

True     5152
False    4848
Name: is_male, dtype: Int64

In [5]:
df = df.dropna()
df = shuffle(df, random_state=2)
df.shape

(9922, 5)

In [6]:
labels = df['weight_pounds']
data = df.drop(columns=['weight_pounds'])

In [17]:
data = data.astype(int)
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 9922 entries, 6632 to 7395
Data columns (total 4 columns):
 #   Column           Non-Null Count  Dtype
---  ------           --------------  -----
 0   is_male          9922 non-null   int64
 1   mother_age       9922 non-null   int64
 2   plurality        9922 non-null   int64
 3   gestation_weeks  9922 non-null   int64
dtypes: int64(4)
memory usage: 387.6 KB


In [18]:
x,y = data,labels
x_train,x_test,y_train,y_test = train_test_split(x,y)

In [19]:
model = xgb.XGBRegressor(
    objective='reg:squarederror'
)

In [20]:
model.fit(x_train, y_train)

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
             importance_type='gain', interaction_constraints='',
             learning_rate=0.300000012, max_delta_step=0, max_depth=6,
             min_child_weight=1, missing=nan, monotone_constraints='()',
             n_estimators=100, n_jobs=4, num_parallel_tree=1, random_state=0,
             reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
             tree_method='exact', validate_parameters=1, verbosity=None)

In [21]:
y_pred = model.predict(x_test)

In [22]:
for i in range(20):
    print('Predicted weight: ', y_pred[i])
    print('Actual weight: ', y_test.iloc[i])
    print()

Predicted weight:  7.2721667
Actual weight:  6.75055446244

Predicted weight:  7.913362
Actual weight:  6.1949895622

Predicted weight:  5.9186172
Actual weight:  6.3118345610599995

Predicted weight:  6.877714
Actual weight:  4.87442061282

Predicted weight:  7.575252
Actual weight:  5.6879263596

Predicted weight:  7.3492627
Actual weight:  6.72630361362

Predicted weight:  6.4987435
Actual weight:  7.5618555866

Predicted weight:  7.2721667
Actual weight:  7.25100379718

Predicted weight:  6.608408
Actual weight:  5.18747702486

Predicted weight:  8.311152
Actual weight:  7.12534030784

Predicted weight:  6.46221
Actual weight:  6.1883756943399995

Predicted weight:  7.3580246
Actual weight:  7.38989502224

Predicted weight:  7.541404
Actual weight:  5.6879263596

Predicted weight:  6.8899765
Actual weight:  6.0627122049999995

Predicted weight:  7.5040865
Actual weight:  7.31273323054

Predicted weight:  8.347335
Actual weight:  9.43798943622

Predicted weight:  3.7367203
Actual we

In [23]:
model.save_model('model.bst')

In [30]:
project_id = !gcloud config list project --format "value(core.project)"
GCP_PROJECT = project_id[0]
print(GCP_PROJECT)

mlops-deeplearning


In [31]:
MODEL_BUCKET = 'gs://' + GCP_PROJECT
VERSION_NAME = 'v1'
MODEL_NAME = 'baby_weight'

In [32]:
#Creating a bucket
!gsutil mb $MODEL_BUCKET

Creating gs://mlops-deeplearning/...


In [33]:
#Saving our XGBoost model
!gsutil cp ./model.bst $MODEL_BUCKET

Copying file://./model.bst [Content-Type=application/octet-stream]...
/ [1 files][319.2 KiB/319.2 KiB]                                                
Operation completed over 1 objects/319.2 KiB.                                    


In [34]:
#Creating ai platform model
!gcloud ai-platform models create $MODEL_NAME --region=us-central1

Using endpoint [https://us-central1-ml.googleapis.com/]
Created ai platform model [projects/mlops-deeplearning/models/baby_weight].


In [35]:
#Deploying model
!gcloud ai-platform versions create $VERSION_NAME \
--model=$MODEL_NAME \
--framework='XGBOOST' \
--runtime-version=2.5 \
--origin=$MODEL_BUCKET \
--python-version=3.7 \
--project=$GCP_PROJECT \
--region=us-central1

Using endpoint [https://us-central1-ml.googleapis.com/]
Creating version (this might take a few minutes)......done.                    


In [37]:
%%writefile predictions.json
[0.0, 31.0, 1.0, 35.0]
[0.0, 31.0, 1.0, 36.0]
[0.0, 31.0, 1.0, 37.0]
[0.0, 31.0, 1.0, 38.0]
[0.0, 31.0, 1.0, 39.0]
[0.0, 31.0, 1.0, 40.0]
[0.0, 31.0, 1.0, 41.0]
[0.0, 31.0, 1.0, 42.0]

Overwriting predictions.json


In [69]:
prediction = !gcloud ai-platform predict --model=$MODEL_NAME --json-instances=predictions.json --version=$VERSION_NAME --format=table
print(prediction)

['Using endpoint [https://us-central1-ml.googleapis.com/]', 'ERROR: (gcloud.ai-platform.predict) Format [value] requires a non-empty projection. Use key parameters to specify a projection like so `value(foo, bar.baz)`']


In [67]:
import json
preds = []
for i in range(len(prediction)):
    if i > 2 and i < len(prediction)-1:
        preds.append(float(prediction[i]))
        
preds

ValueError: could not convert string to float: '    6.205345153808594,'