# Deploying Iris-detection model using Vertex AI


## Problem Statement

Setting up the ML Pipeline for IRIS Classifier in Vertex AI


## Setting Up the configurations



In [1]:

# Vertex SDK for Python
! pip3 install --upgrade --quiet  google-cloud-aiplatform

In [2]:
PROJECT_ID = "gentle-presence-472611-u8"  # @param {type:"string"}
LOCATION = "us-central1"  # @param {type:"string"}

### Create a Cloud Storage bucket


In [3]:
BUCKET_URI = f"gs://mlops-course-gentle-presence-472611-u8-v4-unique-week1"  # @param {type:"string"}

In [4]:
! gsutil mb -l {LOCATION} -p {PROJECT_ID} {BUCKET_URI}

Creating gs://mlops-course-gentle-presence-472611-u8-v4-unique-week1/...
ServiceException: 409 A Cloud Storage bucket named 'mlops-course-gentle-presence-472611-u8-v4-unique-week1' already exists. Try another name. Bucket names must be globally unique across all Google Cloud projects, including those outside of your organization.


### Initialize Vertex AI SDK for Python


In [5]:
from google.cloud import aiplatform

aiplatform.init(project=PROJECT_ID, location=LOCATION, staging_bucket=BUCKET_URI)

### Import the required libraries

In [6]:
import os
import sys

### Configure resource names

Set a name for the following parameters:

`MODEL_ARTIFACT_DIR` - Folder directory path to your model artifacts within a Cloud Storage bucket, for example: "my-models/fraud-detection/trial-4"

`REPOSITORY` - Name of the Artifact Repository to create or use.

`IMAGE` - Name of the container image that is pushed to the repository.

`MODEL_DISPLAY_NAME` - Display name of Vertex AI model resource.

In [7]:
MODEL_ARTIFACT_DIR = "my-models/iris-classifier-week-1"  # @param {type:"string"}
REPOSITORY = "iris-classifier-repo"  # @param {type:"string"}
IMAGE = "iris-classifier-img"  # @param {type:"string"}
MODEL_DISPLAY_NAME = "iris-classifier"  # @param {type:"string"}

# Set the defaults if no names were specified
if MODEL_ARTIFACT_DIR == "[your-artifact-directory]":
    MODEL_ARTIFACT_DIR = "custom-container-prediction-model"

if REPOSITORY == "[your-repository-name]":
    REPOSITORY = "custom-container-prediction"

if IMAGE == "[your-image-name]":
    IMAGE = "sklearn-fastapi-server"

if MODEL_DISPLAY_NAME == "[your-model-display-name]":
    MODEL_DISPLAY_NAME = "sklearn-custom-container"

## Storing and Fetching the data from Bucket


In [8]:
! gsutil cp data/iris.csv {BUCKET_URI}/data/iris.csv

Copying file://data/iris.csv [Content-Type=text/csv]...
/ [1 files][  3.8 KiB/  3.8 KiB]                                                
Operation completed over 1 objects/3.8 KiB.                                      


In [9]:
! gsutil cp data/v1/data.csv {BUCKET_URI}/data/v1.csv

Copying file://data/v1/data.csv [Content-Type=text/csv]...
/ [1 files][  2.5 KiB/  2.5 KiB]                                                
Operation completed over 1 objects/2.5 KiB.                                      


In [10]:
! gsutil cp data/v2/data.csv {BUCKET_URI}/data/v2.csv

Copying file://data/v2/data.csv [Content-Type=text/csv]...
/ [1 files][  1.3 KiB/  1.3 KiB]                                                
Operation completed over 1 objects/1.3 KiB.                                      


In [11]:
#fetching the data file 
import pandas as pd

data = pd.read_csv(f"gs://{BUCKET_URI}/data/iris.csv")

In [12]:
import numpy as np
from sklearn.model_selection import train_test_split
from pandas.plotting import parallel_coordinates
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn import metrics

data.head(5)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [13]:
train, test = train_test_split(data, test_size = 0.4, stratify = data['species'], random_state = 42)
X_train = train[['sepal_length','sepal_width','petal_length','petal_width']]
y_train = train.species
X_test = test[['sepal_length','sepal_width','petal_length','petal_width']]
y_test = test.species

In [14]:
mod_dt = DecisionTreeClassifier(max_depth = 3, random_state = 1)
mod_dt.fit(X_train,y_train)
prediction=mod_dt.predict(X_test)
print('The accuracy of the Decision Tree is',"{:.3f}".format(metrics.accuracy_score(prediction,y_test)))

The accuracy of the Decision Tree is 0.983


In [15]:
import pickle
import joblib
import time

#VERSION = "v1"
timestamp = time.strftime("%Y%m%d_%H%M%S")
OUTPUT_FOLDER = f"artifacts/{timestamp}"
os.makedirs(OUTPUT_FOLDER, exist_ok=True)

joblib.dump(mod_dt, f"{OUTPUT_FOLDER}/model.joblib")

['artifacts/20251004_064926/model.joblib']

## Upload model artifacts to bucket


In [16]:
!gsutil cp "{OUTPUT_FOLDER}/model.joblib" {BUCKET_URI}/{MODEL_ARTIFACT_DIR}/{timestamp}/

Copying file://artifacts/20251004_064926/model.joblib [Content-Type=application/octet-stream]...
/ [1 files][  2.5 KiB/  2.5 KiB]                                                
Operation completed over 1 objects/2.5 KiB.                                      


## Inference 

In [17]:
inf_model = joblib.load(f"{OUTPUT_FOLDER}/model.joblib")

In [18]:
#read v1 and v2 files 

data_v1 = pd.read_csv(f"gs://{BUCKET_URI}/data/v1.csv")
data_v2 = pd.read_csv(f"gs://{BUCKET_URI}/data/v2.csv")

#drop the label from v1 and v2 

data_v1 = data_v1.drop("species", axis=1)
data_v2 = data_v2.drop("species", axis=1)

In [19]:
#Pred from model from artifacts 

y_pred_v1 = inf_model.predict(data_v1)
y_pred_v2 = inf_model.predict(data_v2)

In [20]:
out_v1 = pd.DataFrame({"predicted_v1": y_pred_v1}).to_csv(f"{BUCKET_URI}/{MODEL_ARTIFACT_DIR}/v1/{timestamp}", index=False)

In [21]:
out_v2 = pd.DataFrame({"predicted_v2": y_pred_v2}).to_csv(f"{BUCKET_URI}/{MODEL_ARTIFACT_DIR}/v2/{timestamp}", index=False)