## Set-up

In [2]:
from google.cloud import bigquery
from google.cloud import storage
from tqdm import tqdm
import matplotlib.pyplot as plt
import ipywidgets
import numpy as np
from google.cloud import aiplatform
import pandas as pd


In [3]:
!PROJECT_ID=$(gcloud config get-value project)
PROJECT_ID = "bqml-sandbox-396011"
VERTEX_AI_LOCATION = 'europe-west4'

In [4]:
%load_ext google.cloud.bigquery

In [5]:
aiplatform.init(project=PROJECT_ID, location=VERTEX_AI_LOCATION)

## Owerview

There are four ways to export BigQueryMl models:
1. by using the Google Cloud Console,
2. by using `EXPORT MODEL` statement,
3. by using `bq extract` command,
4. Using API or Client Library.

Most of the time the model is saved by default as `TensorfFlow SavedModel`

In [6]:
# list all models
!bq ls -m --format=pretty $PROJECT_ID:BQ_ML_ID

+-------------------------------------+--------------------------------+--------+-----------------+
|                 Id                  |           Model Type           | Labels |  Creation Time  |
+-------------------------------------+--------------------------------+--------+-----------------+
| AUTO_KERAS_MODEL                    | TENSORFLOW                     |        | 16 Sep 17:29:09 |
| AUTO_ML                             | AUTOML_CLASSIFIER              |        | 10 Sep 13:27:47 |
| BASE_LOGISTIC_REGRESSION            | LOGISTIC_REGRESSION            |        | 10 Sep 08:38:52 |
| DIMMENSIONALITY_REDUCTION_PCA       | PCA                            |        | 18 Sep 13:22:09 |
| DNN                                 | DNN_LINEAR_COMBINED_CLASSIFIER |        | 10 Sep 11:43:48 |
| LOGISTIC_REGRESSION_WITH_HP_TUNNING | LOGISTIC_REGRESSION            |        | 10 Sep 11:58:26 |
| LOGISTIC_REGRESSION_WITH_PCA        | LOGISTIC_REGRESSION            |        | 18 Sep 13:33:22 |


In [None]:
# Create gcs bucket to store models

project_id = "bqml-sandbox-396011"
bucket_name = "bq-ml-store"
default_storage_class = "STANDARD" 

# Initialize the client
client = storage.Client(project=project_id)

# Create the bucket with the specified default storage class
bucket = client.bucket(bucket_name)
bucket.location = "EU"
bucket.storage_class = default_storage_class
    # Try to create the bucket (it will raise an error if it already exists)
try:
    bucket.create()
    print(f"Bucket '{bucket_name}' created with default storage class '{default_storage_class}'.")
except Exception as e:
    print(f"Error creating bucket: {e}")


In [8]:
!bq extract --model 'BQ_ML_ID.BASE_LOGISTIC_REGRESSION' gs://bq-ml-store/base-logistic-regression

Waiting on bqjob_rafb471f5315eeba_0000018ac65d5eab_1 ... (23s) Current status: DONE   


In [9]:
%%bigquery

 EXPORT MODEL `BQ_ML_ID.DNN`
 OPTIONS(URI = 'gs://bq-ml-store/dnn')
 

Query is running:   0%|          |

## Register the model in Vertex AI

In [None]:
%%bigquery
ALTER MODEL BQ_ML_ID.BASE_LOGISTIC_REGRESSION SET OPTIONS (vertex_ai_model_id="base_logistic_regression");

## Deploying Model in Vertex AI

In [None]:
# create an endpoint

endpoint = aiplatform.Endpoint.create(
        display_name= "base_logistic_regression",
        project= PROJECT_ID,
        location= VERTEX_AI_LOCATION,
    )


In [None]:
# deploy a model
model = aiplatform.Model(model_name = "base_logistic_regression")
model.deploy(
    endpoint = endpoint,
    deployed_model_display_name = "base_logistic_regression",
    traffic_percentage = 100, # only one model in the endpoint so it must be 100%
    machine_type = "n1-standard-2",
    min_replica_count = 1,
    max_replica_count = 4,
    accelerator_type = None ,
    accelerator_count = None ,
    sync=True,
    )

model.wait()

In [None]:
## Sample prediction
# To TEST !
example = {
    "island": "Dream",
    "culmen_length_mm": 36.6,
    "culmen_depth_mm": 18.4,
    "flipper_length_mm": 184.0,
    "body_mass_g": 3475.0,
    "sex": "FEMALE",
}

prediction = endpoint.predict([example])
print(prediction)


## Importing a Model to BigQuery ML

The models defined and trained outside of the BigQuery ML can be also imported into the service
The possible extensions are:
1. XGBoost,
2. Tensorflow,
3. Tensorflow light,
4. Open Neural Network Exchange (ONNX)


### Importing a XGBoost model <br>

I tried importing the XGBoost models but the were many errors especially because at the moment the BigQuery ML does not support the current version of XGBoost, but only below 1.5.1.

### Importing the Tensorflow model 

BigQueryMl should work well with Tensorflow because there are both created by Google. <br>
Keras is a high-level interface for tensorflow which simplifies building the deep-learning models. <br>
AutoKeras is library that perform automatic machine learning using Keras. <br>
It was developed by Texas A@M University. <br>
Check out the website "https://autokeras.com and the paper https://jmlr.org/papers/v24/20-1355.html .

The training of the model is performed in keras_model.ipynb

In [18]:
# Coping the model to gcs
# It turns out that you can copy to folder that does not exist and it will be created automatically
!gsutil cp -r auto_keras_classifier/* gs://default-credit-clients-2023/auto_keras_classifier

Copying file://auto_keras_classifier/fingerprint.pb [Content-Type=application/octet-stream]...
Copying file://auto_keras_classifier/keras_metadata.pb [Content-Type=application/octet-stream]...
Copying file://auto_keras_classifier/saved_model.pb [Content-Type=application/octet-stream]...
Copying file://auto_keras_classifier/variables/variables.index [Content-Type=application/octet-stream]...
- [4 files][454.7 KiB/454.7 KiB]                                                
==> NOTE: You are performing a sequence of gsutil operations that may
run significantly faster if you instead use gsutil -m cp ... Please
see the -m section under "gsutil help options" for further information
about when gsutil -m can be advantageous.

Copying file://auto_keras_classifier/variables/variables.data-00000-of-00001 [Content-Type=application/octet-stream]...
- [5 files][607.4 KiB/607.4 KiB]                                                
Operation completed over 5 objects/607.4 KiB.                           

In [22]:
%%bigquery

CREATE OR REPLACE MODEL `BQ_ML_ID.AUTO_KERAS_MODEL`
 OPTIONS(MODEL_TYPE='TENSORFLOW',
         MODEL_PATH="gs://default-credit-clients-2023/auto_keras_classifier/*")

Query is running:   0%|          |

In [23]:
# checking if model was properly imported 

!PROJECT_ID=$(gcloud config get-value project)
!bq ls -m --format=pretty $PROJECT_ID:BQ_ML_ID

+-------------------------------------+--------------------------------+--------+-----------------+
|                 Id                  |           Model Type           | Labels |  Creation Time  |
+-------------------------------------+--------------------------------+--------+-----------------+
| AUTO_KERAS_MODEL                    | TENSORFLOW                     |        | 16 Sep 17:29:09 |
| AUTO_ML                             | AUTOML_CLASSIFIER              |        | 10 Sep 13:27:47 |
| BASE_LOGISTIC_REGRESSION            | LOGISTIC_REGRESSION            |        | 10 Sep 08:38:52 |
| DNN                                 | DNN_LINEAR_COMBINED_CLASSIFIER |        | 10 Sep 11:43:48 |
| LOGISTIC_REGRESSION_WITH_HP_TUNNING | LOGISTIC_REGRESSION            |        | 10 Sep 11:58:26 |
+-------------------------------------+--------------------------------+--------+-----------------+
