In [3]:
from google.cloud import storage
from google.cloud import bigquery
from google.cloud import aiplatform

import datetime
import pandas as pd
import joblib

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

In [4]:
bqclient = bigquery.Client()

In [5]:
query_string = "SELECT *  FROM `gcp-onsite-training.diabetic_retinapathy.retinapathy-diagnosis` "

dataframe = (
    bqclient.query(query_string)
    .result()
    .to_dataframe(
        create_bqstorage_client=True,
    )
)

dataframe.head()

Unnamed: 0,Age,Systolic_BP,Diastolic_BP,Cholesterol,label
0,66.890987,93.205124,85.27031,106.465128,0.0
1,62.929328,101.524942,95.684936,102.916473,0.0
2,74.275743,91.245927,78.204968,99.417104,0.0
3,58.26702,101.259314,86.790774,102.242965,0.0
4,62.744349,89.883434,80.923457,102.083878,0.0


In [6]:
dataframe.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6000 entries, 0 to 5999
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Age           6000 non-null   float64
 1   Systolic_BP   6000 non-null   float64
 2   Diastolic_BP  6000 non-null   float64
 3   Cholesterol   6000 non-null   float64
 4   label         6000 non-null   float64
dtypes: float64(5)
memory usage: 234.5 KB


In [46]:
X_train, X_test, y_train, y_test = train_test_split(dataframe.drop(columns="label"), dataframe.label, train_size=0.7, random_state=100)
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)

X_train shape: (4200, 4)
X_test shape: (1800, 4)


In [57]:
# Create the classifier
classifier = RandomForestClassifier(n_estimators=100)

classifier.fit(X_train, y_train)

RandomForestClassifier()

In [58]:
y_train_pred = classifier.predict(X_train)

In [59]:
f1_score(y_train_pred, y_train)

1.0

In [60]:
y_test_pred = classifier.predict(X_test)

In [61]:
f1_score(y_test_pred, y_test)

0.7261904761904763

In [None]:
# !gsutil mb -b on -l us-east1 gs://retinapaty-artifacts/

In [73]:
# Export the model to a file
model = 'model.joblib'
joblib.dump(classifier, model)

# Upload the model to GCS
bucket = storage.Client().bucket("retinapaty-artifacts")
blob = bucket.blob('{}/{}'.format(
    datetime.datetime.now().strftime('retinapathy_%Y%m%d_%H%M%S'),
    model))
blob.upload_from_filename(model)

In [78]:
diabetic_model = aiplatform.Model.upload(display_name="diabetic-retinapathy-model-v5", 
                                           artifact_uri="gs://retinapaty-artifacts/retinapathy_20220518_182529", 
                                           serving_container_image_uri="us-docker.pkg.dev/vertex-ai/prediction/sklearn-cpu.1-0:latest")

Creating Model
Create Model backing LRO: projects/909656045816/locations/us-central1/models/1391387984485416960/operations/7658812688981557248
Model created. Resource name: projects/909656045816/locations/us-central1/models/1391387984485416960
To use this Model in another session:
model = aiplatform.Model('projects/909656045816/locations/us-central1/models/1391387984485416960')


In [79]:
endpoint = diabetic_model.deploy(deployed_model_display_name="retinapathy-predicton", 
                                 traffic_split={"0": 100}, 
                                 machine_type="n1-standard-4", 
                                 accelerator_count=0, 
                                 min_replica_count=1, 
                                 max_replica_count=1)

Creating Endpoint
Create Endpoint backing LRO: projects/909656045816/locations/us-central1/endpoints/8413031967183863808/operations/5599541759366397952
Endpoint created. Resource name: projects/909656045816/locations/us-central1/endpoints/8413031967183863808
To use this Endpoint in another session:
endpoint = aiplatform.Endpoint('projects/909656045816/locations/us-central1/endpoints/8413031967183863808')
Deploying model to Endpoint : projects/909656045816/locations/us-central1/endpoints/8413031967183863808
Deploy Endpoint model backing LRO: projects/909656045816/locations/us-central1/endpoints/8413031967183863808/operations/1894205165947322368
Endpoint model deployed. Resource name: projects/909656045816/locations/us-central1/endpoints/8413031967183863808


In [100]:
instances = [{
    "Age": 63.5,
    "Cholesterol": 110.38241054933151,
    "Diastolic_BP": 84.852360692033216,
    "Systolic_BP": 99.379735822842008
}]

retinapathy_endpoint = aiplatform.Endpoint(endpoint_name="8413031967183863808")
retinapathy_endpoint.predict(instances=[list(instances[0].values())])

Prediction(predictions=[1.0], deployed_model_id='3236812698591166464', explanations=None)

In [90]:
list(instances[0].values())

[63.5, 110.38241054933151, 84.85236069203322, 99.37973582284201]