# Vertex AI
Use the service "Experiments" directly in vertex to tracking the results of the models trained

Examples codes to understand the tool

### 0. Packages

In [1]:
# ml packages
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn import datasets
import seaborn as sns
import pickle
import os

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

# vertex gcp
import google.cloud.aiplatform as aiplatform

### 1. Generate parameters, variables and artifacts to save (dummy examples)

Create very simple example to focus in show how to use the tool

#### 1.1 Generate Data

In [2]:
#### generate data ###

# parameters data
len_data = 1000
number_columns = 6
data = []
list_variables = ["240FY050.RO02" , "SGM-PI9514", "SSTRIPPING015", "SGM-PI9516" , "SGM-PI9512", "target"]


# seed
np.random.seed(42)

# generate random data
for column in range(number_columns):
    random_choise = np.random.choice(10) + 1 # amplitud
    data_column = np.random.rand(len_data)
    data_column = random_choise * data_column
    data.append(data_column)
    
# to dataframe
data = pd.DataFrame(data).T
data.columns = list_variables

# split train y test
features = list(set(list_variables) - set(['target']))
X_train, X_test, y_train, y_test = train_test_split(data[features], data['target'], test_size = 0.2, random_state=42)

print('TRAIN')
print('X_train', X_train.shape)
print('y_train', y_train.shape)

print('\nTEST')
print('X_test', X_test.shape)
print('y_test', y_test.shape)

TRAIN
X_train (800, 5)
y_train (800,)

TEST
X_test (200, 5)
y_test (200,)


#### 1.2 Train model

In [3]:
#### train model ####
model = LinearRegression()
#model = RandomForestRegressor(random_state = 42)
model.fit(X_train, y_train)



#### predicción y evaluación ####

### RMSE
rmse_train = mean_squared_error(y_train, 
                                model.predict(X_train),
                                squared = False)

rmse_test = mean_squared_error(y_test, 
                               model.predict(X_test),
                               squared = False)


### R2
r2_score


r2_train = r2_score(y_train,
                   model.predict(X_train))

r2_test = r2_score(y_test,
                   model.predict(X_test))

#### 1.3 Generate outputs to save
In this example, the outputs to save are genated in the final step, but it can be genrated while the code is running and use its python variables generated

#### Parameters

In [4]:
# print previous parameter defined - list tags
print('list tags:\n', list_variables)

list tags:
 ['240FY050.RO02', 'SGM-PI9514', 'SSTRIPPING015', 'SGM-PI9516', 'SGM-PI9512', 'target']


In [5]:
# type of model trained - define a string with the name
model_type = "RF"
model_type

'RF'

In [6]:
# dates of training
start_train = "2020-01-01"
end_train = "2022-12-01"

#### Metrics

In [7]:
# print metrics
print('rmse_train: ', rmse_train)
print('rmse_test: ', rmse_test)
print('r2_train: ', r2_train)
print('r2_test: ', r2_test)

rmse_train:  1.4387312960740368
rmse_test:  1.4632467048976407
r2_train:  0.007202043897398536
r2_test:  -0.011984948488191094


#### Artifacts

In [8]:
# generate a pickle with the model
model_name = 'model.pkl'
with open(model_name, 'wb') as file:
    pickle.dump(model, file)

In [9]:
# generar artefacto data csv y borrar del local
data_name = 'data.csv'
data.to_csv(data_name)

### 2. Registry Experiment Vertex AI
- Documentation codes Vertex: https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform.Experiment

Notebooks with examples:
- Oficial repo gcp: https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/experiments/vertex_ai_model_experimentation.ipynb
- medium post: https://medium.com/google-cloud/machine-learning-experiment-tracking-with-vertex-ai-8406f8d44376

The objetive is use only the experiment resources and NOT create a tensorboard instance for the high costs that has

In [10]:
# show version of vertex used. The version of vertex is constantly updated
!pip show google-cloud-aiplatform

Name: google-cloud-aiplatform
Version: 1.35.0
Summary: Vertex AI API client library
Home-page: https://github.com/googleapis/python-aiplatform
Author: Google LLC
Author-email: googleapis-packages@google.com
License: Apache 2.0
Location: d:\anaconda\envs\data-science-python-3-10\lib\site-packages
Requires: google-api-core, google-cloud-bigquery, google-cloud-resource-manager, google-cloud-storage, packaging, proto-plus, protobuf, shapely
Required-by: 


In [11]:
# PARAMETERS GCP
project_gcp = 'cmpc-innovation-cd4ml-test'
location_gcp = 'us-east1'

In [12]:
# PARAMETERS CREATE AN EXPERIMENT IN VERTEX AI. # obs: In names only are accepted '[a-z0-9][a-z0-9-]{0,127}'
EXPERIMENT_NAME = 'test-experiment-vertex'
EXPERIMENT_DESCRIPTION = 'Introduction to save experiment in vertex'

#### 2.1 Create experiment
Create an experiment and inside of this, different runs are created to save the results of diferents models trained

In [13]:
# initialize vertex
aiplatform.init(
       project = project_gcp,
       location = location_gcp
    )

# you can create inmediatly and experiment and a tensorboard instance.
# aiplatform.init(
#        project = project_gcp,
#        location = location_gcp,
#        experiment = experiment_name,
#     )

In [14]:
# Create a new experiment in Vertex AI Experiments. The experiment can be created only once
my_experiment = aiplatform.Experiment.create(EXPERIMENT_NAME, 
                                             description = EXPERIMENT_DESCRIPTION)
my_experiment

<google.cloud.aiplatform.metadata.experiment_resources.Experiment at 0x11ae6cccc10>

#### 2.2 Create Run
create a run were save the results of the model trained

In [15]:
# PARAMETERS CREATE A RUN IN A VERTEX AI "EXPERIMENT"
RUN_NAME = "run-10"

In [16]:
# create a run
my_run = aiplatform.ExperimentRun.create(RUN_NAME, 
                                         experiment = EXPERIMENT_NAME)

Associating projects/724348686027/locations/us-east1/metadataStores/default/contexts/test-experiment-vertex-run-10 to Experiment: test-experiment-vertex


#### 2.3 Save parameters

In [17]:
# define params to save. In a dicctionary
params_to_save = {
    'learning_rate': 0.1,
    'dropout_rate': 0.2,
    'list_features': str(list_variables), # only support strings
    'model_type': model_type,
    'start_train': start_train,
    'end_train': end_train
}

# save parameters
my_run.log_params(params_to_save)

#### 2.4 Save metrics

In [18]:
# define metrics to save. In a dicctionary
metrics_to_save = {
    'rmse_train': rmse_train,
    'rmse_test': rmse_test,
    'r2_train': r2_train,
    'r2_test': r2_test
}

# save metrics
my_run.log_metrics(metrics_to_save)

In [19]:
#### obs: the metrics can be replaced any times that you want/need
metrics_to_save = {
    'rmse_train': rmse_train,
    'rmse_test': rmse_test,
    'r2_train': r2_train + 300,
    'r2_test': r2_test + 300
}

# save metrics
my_run.log_metrics(metrics_to_save)

In [20]:
### obs: in adition you can add new metrics without loss the previous values
metrics_to_save = {
    'loss_train': 0.0001,
    'loss_test': 100
}

# save metrics
my_run.log_metrics(metrics_to_save)

OBS: with the parameters is the same history

#### 2.5 Save time series
Save metrics during the training phase. For example, acc, loss, etc. in each step of training the NN.

These metrics must be stored in a vertex tensorboard, which is too expensive ($300 per person. See reference links).

In [21]:
#my_run.log_time_series_metrics({'accuracy': 0.9}, step=10)

#### 2.6 Save artifacts
This is not posible to save artifacts without creating a tensorboard instance. Before the tensorboard instance was very expensive but since August 2023 tensorboard was cheaper

#### 2.7 End Run

In [28]:
# terminar
my_run.end_run()

In [29]:
# remove artifacts saved locally
os.remove(model_name)
os.remove(data_name)