# Etapa de teste

Métricas:
- MSE
- RMSE
- MAE
- R2
- AR2

In [9]:
# Imports
%pip install -r requirements.txt -q

import os
import json
from datetime import datetime
import pandas as pd
from google.cloud import bigquery
from google.cloud import storage
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import joblib

Note: you may need to restart the kernel to use updated packages.


In [None]:
# Load Data
PROJECT_ID = "ai-platform-mockup"
INPUT_DATASET_ID = "preprocessed"
TABLE_ID = "p1_county_natality"

client = bigquery.Client(project=PROJECT_ID)

query = f"SELECT * FROM  `{PROJECT_ID}.{INPUT_DATASET_ID}.{TABLE_ID}`"
df = client.query(query).to_dataframe()

In [6]:
# Load Model
model_path = "linear_regressor.pkl"
storage_client = storage.Client(project=PROJECT_ID)
bucket_name = f"vertex-models-{PROJECT_ID}"
bucket = storage_client.bucket(bucket_name)
blob = bucket.blob(model_path)
blob.download_to_filename(model_path)
model = joblib.load(model_path)

In [5]:
# Split
df_train = df[df["split"] == "train"]
df_test = df[df["split"] == "test"]
df_eval = df[df["split"] == "eval"]
df_train = df_train.drop(columns=["split"])
df_test = df_test.drop(columns=["split"])   
df_eval = df_eval.drop(columns=["split"])

In [10]:
# Calculate Metrics
X = df_eval.drop(columns=["Births"])
y_true = df_eval["Births"]
y_pred = model.predict(X)

mse = mean_squared_error(y_true, y_pred)
rmse = mse ** 0.5
mae = mean_absolute_error(y_true, y_pred)
r2 = r2_score(y_true, y_pred)

results = {
    "mse": mse,
    "rmse": rmse,
    "mae": mae,
    "r2": r2
}

In [14]:
# Publish Results
EVALUATION_DATASET_ID = "evaluation"
evaluation_table_ref = client.dataset(EVALUATION_DATASET_ID).table(TABLE_ID)
job_config = bigquery.LoadJobConfig(
    schema=[
        bigquery.SchemaField("timestamp", "TIMESTAMP"),
        bigquery.SchemaField("pipeline_name", "STRING"),
        bigquery.SchemaField("results", "STRING"),
    ],
    write_disposition="WRITE_APPEND",
)

row = {
    "timestamp": datetime.now(),
    "pipeline_name": "pipeline_1",
    "results": json.dumps(results), 
}
df_results = pd.DataFrame([row])

job = client.load_table_from_dataframe(df_results, evaluation_table_ref, job_config=job_config)
job.result()

LoadJob<project=ai-platform-mockup, location=US, id=ca1bc643-4f55-41ac-9353-05451908b2dd>

In [15]:
# Clean up
os.remove(model_path)