In [None]:
# !pip install google-cloud-aiplatform --user --upgrade

# Sklearn with Pandas


Create a custom model for evaulation and batch prediction

In [None]:
# !gsutil mb -l us-central1 gs://jsw-model-artifacts

Creating gs://jsw-model-artifacts/...


In [None]:
PROJECT_ID = "wortz-project-352116"  # SET THIS TO YOUR PROJECT ID
BUCKET = "gs://jsw-model-artifacts"  # BE SURE TO gsutil mb -l <REGION> <LOG_BUCKET> to create the bucket on GCP
REGION = "us-central1"

# Generate synthetic data


In [None]:
import pandas as pd
import numpy as np  # for the random integer example

# set seed

np.random.seed(1234)

x = np.random.normal(0.0, 1.0, size=(10, 3))
y = np.random.normal(0.0, 1.0, size=(10, 1))
df = pd.DataFrame(
    np.append(x, y, axis=1),
    index=range(10, 20),
    columns=["col1", "col2", "col3", "label"],
    dtype="float64",
)

In [4]:
df

Unnamed: 0,col1,col2,col3,label
10,0.471435,-1.190976,1.432707,-0.39784
11,-0.312652,-0.720589,0.887163,0.337438
12,0.859588,-0.636524,0.015696,1.047579
13,-2.242685,1.150036,0.991946,1.045938
14,0.953324,-2.021255,-0.334077,0.863717
15,0.002118,0.405453,0.289092,-0.122092
16,1.321158,-1.546906,-0.202646,0.124713
17,-0.655969,0.193421,0.553439,-0.322795
18,1.318152,-0.469305,0.675554,0.841675
19,-1.817027,-0.183109,1.058969,2.390961


# Create model

In [None]:
from sklearn.linear_model import LinearRegression


reg = LinearRegression().fit(df[["col1", "col2", "col3"]], df["label"])
# rf = RandomForestClassifier(n_estimators = n_estimators, max_depth = max_depth, max_features = max_features)
reg.fit(df[["col1", "col2", "col3"]], df["label"])

# Upload model with latest prebuilt container

In [None]:
import os
import pickle

# import joblib
# from sklearn.externals import joblib


artifact_filename = "model.pkl"  # has to be joblib to work with CPR

# Save model artifact to local filesystem (doesn't persist)

# joblib.dump(reg, artifact_filename)
# Export the model to a file
with open(artifact_filename, "wb") as model_file:
    pickle.dump(reg, model_file)

#### Upload the model pipeline to gcs

In [18]:
! gsutil cp $artifact_filename $BUCKET/regression_pkl/model/

Copying file://model.pkl [Content-Type=application/octet-stream]...
/ [0 files][    0.0 B/  559.0 B]                                                / [1 files][  559.0 B/  559.0 B]                                                
Operation completed over 1 objects/559.0 B.                                      


In [20]:
! gsutil ls $BUCKET/regression_pkl/model

gs://jsw-model-artifacts/regression_pkl/model


# Use a standard Sklearn Image and register the model

In [None]:
# Using a prebuilt container https://cloud.google.com/vertex-ai/docs/training/pre-built-containers#scikit-learn
serving_container_image_uri = (
    "us-docker.pkg.dev/vertex-ai/prediction/sklearn-cpu.1-5:latest"
)

In [None]:
from google.cloud import aiplatform

aiplatform.init(project=PROJECT_ID, location=REGION)

In [None]:
MODEL_DISPLAY_NAME = "pandas test REGRESSION"

model = aiplatform.Model.upload(
    display_name=MODEL_DISPLAY_NAME,
    artifact_uri=f"{BUCKET}/regression_pkl/model",
    serving_container_image_uri=serving_container_image_uri,
    parent_model="projects/679926387543/locations/us-central1/models/659756454688849920",  # use this to version an existing model
)

INFO:google.cloud.aiplatform.models:Creating Model
INFO:google.cloud.aiplatform.models:Create Model backing LRO: projects/679926387543/locations/us-central1/models/659756454688849920/operations/8957991814430720000
INFO:google.cloud.aiplatform.models:Model created. Resource name: projects/679926387543/locations/us-central1/models/659756454688849920@4
INFO:google.cloud.aiplatform.models:To use this Model in another session:
INFO:google.cloud.aiplatform.models:model = aiplatform.Model('projects/679926387543/locations/us-central1/models/659756454688849920@4')
