In [None]:
# Copyright 2024 Forusone
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

## Feature store

* https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/feature_store/online_feature_serving_and_fetching_bigquery_data_with_feature_store_bigtable.ipynb

### Install packages

In [1]:
# Install the packages
%pip install --user --quiet  google-cloud-aiplatform \
                             google-cloud-storage \
                             google-cloud-pipeline-components \
                             google-cloud-bigquery \
                             kfp


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
PROJECT_ID = "ai-hangsik"  # @param {type:"string"}
LOCATION = "us-central1"  # @param {type:"string"}


from google.cloud import aiplatform

aiplatform.init(project=PROJECT_ID, location=LOCATION)
API_ENDPOINT = f"{LOCATION}-aiplatform.googleapis.com"

In [3]:

from google.cloud import bigquery
from google.cloud.aiplatform_v1 import (FeatureOnlineStoreAdminServiceClient,
                                        FeatureOnlineStoreServiceClient,
                                        FeatureRegistryServiceClient)
from google.cloud.aiplatform_v1.types import feature as feature_pb2
from google.cloud.aiplatform_v1.types import feature_group as feature_group_pb2
from google.cloud.aiplatform_v1.types import feature_online_store as feature_online_store_pb2
from google.cloud.aiplatform_v1.types import feature_online_store_admin_service as feature_online_store_admin_service_pb2
from google.cloud.aiplatform_v1.types import feature_online_store_service as feature_online_store_service_pb2
from google.cloud.aiplatform_v1.types import feature_registry_service as feature_registry_service_pb2
from google.cloud.aiplatform_v1.types import feature_view as feature_view_pb2
from google.cloud.aiplatform_v1.types import featurestore_service as featurestore_service_pb2
from google.cloud.aiplatform_v1.types import io as io_pb2

### Create a feature table in BigQuery

In [4]:
from google.cloud import bigquery

# Construct a BigQuery client object.
client = bigquery.Client()

# TODO(developer): Set table_id to the ID of the table to create.
table_id = "ai-hangsik.feature_stores.iris_data"

job_config = bigquery.LoadJobConfig(
    schema=[
        bigquery.SchemaField("entity_id", "STRING"),
        bigquery.SchemaField("sepal_length", "FLOAT"),
        bigquery.SchemaField("sepal_width", "FLOAT"),
        bigquery.SchemaField("petal_length", "FLOAT"),
        bigquery.SchemaField("petal_width", "FLOAT"),
        bigquery.SchemaField("species", "STRING"),        
        bigquery.SchemaField("feature_timestamp", "TIMESTAMP"),        
    ],
    
    skip_leading_rows=1,
    # The source format defaults to CSV, so the line below is optional.
    source_format=bigquery.SourceFormat.CSV,
)
uri = "gs://mlops-poc-0303/test_data/iris - iris.csv"

load_job = client.load_table_from_uri(
    uri, table_id, job_config=job_config
)  # Make an API request.

load_job.result()  # Waits for the job to complete.

destination_table = client.get_table(table_id)  # Make an API request.
print("Loaded {} rows.".format(destination_table.num_rows))

Loaded 300 rows.


### Create feature group

In [8]:
FEATURE_GROUP_ID = "iris_feature_group2"  # @param {type: "string"}
FEATURE_IDS = [
    "sepal_length",
    "sepal_width",
    "petal_length",
    "petal_width",
    "species",
]  # @param

In [9]:
# Now, create the featureGroup
BQ_VIEW_ID_FQN = table_id

registry_client = FeatureRegistryServiceClient(
    client_options={"api_endpoint": API_ENDPOINT}
)

feature_group_config = feature_group_pb2.FeatureGroup(
    big_query=feature_group_pb2.FeatureGroup.BigQuery(
        big_query_source=io_pb2.BigQuerySource(input_uri=f"bq://{BQ_VIEW_ID_FQN}")
    )
)

create_group = registry_client.create_feature_group(
    feature_registry_service_pb2.CreateFeatureGroupRequest(
        parent=f"projects/{PROJECT_ID}/locations/{LOCATION}",
        feature_group_id=FEATURE_GROUP_ID,
        feature_group=feature_group_config,
    )
)

In [10]:
print(create_group.result())

name: "projects/721521243942/locations/us-central1/featureGroups/iris_feature_group2"



In [11]:
create_feature = []
for id in FEATURE_IDS:
    create_feature.append(
        registry_client.create_feature(
            featurestore_service_pb2.CreateFeatureRequest(
                parent=f"projects/{PROJECT_ID}/locations/{LOCATION}/featureGroups/{FEATURE_GROUP_ID}",
                feature_id=id,
                feature=feature_pb2.Feature(),
            )
        )
    )

In [12]:

for feature in create_feature:
    print(feature.result())

name: "projects/721521243942/locations/us-central1/featureGroups/iris_feature_group2/features/sepal_length"

name: "projects/721521243942/locations/us-central1/featureGroups/iris_feature_group2/features/sepal_width"

name: "projects/721521243942/locations/us-central1/featureGroups/iris_feature_group2/features/petal_length"

name: "projects/721521243942/locations/us-central1/featureGroups/iris_feature_group2/features/petal_width"

name: "projects/721521243942/locations/us-central1/featureGroups/iris_feature_group2/features/species"



### Online Online store

In [13]:

admin_client = FeatureOnlineStoreAdminServiceClient(
    client_options={"api_endpoint": API_ENDPOINT}
)
registry_client = FeatureRegistryServiceClient(
    client_options={"api_endpoint": API_ENDPOINT}
)
   

In [15]:
FEATURE_ONLINE_STORE_ID = "iris_online_data2"  # @param {type:"string"}

online_store_config = feature_online_store_pb2.FeatureOnlineStore(
    bigtable=feature_online_store_pb2.FeatureOnlineStore.Bigtable(
        auto_scaling=feature_online_store_pb2.FeatureOnlineStore.Bigtable.AutoScaling(
            min_node_count=1, max_node_count=3, cpu_utilization_target=50
        )
    )
)

create_store = admin_client.create_feature_online_store(
    feature_online_store_admin_service_pb2.CreateFeatureOnlineStoreRequest(
        parent=f"projects/{PROJECT_ID}/locations/{LOCATION}",
        feature_online_store_id=FEATURE_ONLINE_STORE_ID,
        feature_online_store=online_store_config,
    )
)

In [16]:
print(create_store.result())


name: "projects/721521243942/locations/us-central1/featureOnlineStores/iris_online_data2"



In [17]:
# Use list to verify the store is created.
admin_client.list_feature_online_stores(
    parent=f"projects/{PROJECT_ID}/locations/{LOCATION}"
)

ListFeatureOnlineStoresPager<feature_online_stores {
  bigtable {
    auto_scaling {
      min_node_count: 1
      max_node_count: 3
      cpu_utilization_target: 50
    }
  }
  name: "projects/721521243942/locations/us-central1/featureOnlineStores/iris_online_data"
  create_time {
    seconds: 1741663870
    nanos: 51140000
  }
  update_time {
    seconds: 1741663870
    nanos: 748471000
  }
  etag: "AMEw9yO9yjy30X45Xkpapntnj6X3A6Lgi7Jxu7HqPLgqciV11GArYNbieylQSj-cqNx7"
}
feature_online_stores {
  bigtable {
    auto_scaling {
      min_node_count: 1
      max_node_count: 3
      cpu_utilization_target: 50
    }
  }
  name: "projects/721521243942/locations/us-central1/featureOnlineStores/iris_online_data2"
  create_time {
    seconds: 1742183618
    nanos: 407517000
  }
  update_time {
    seconds: 1742183618
    nanos: 996144000
  }
  etag: "AMEw9yMLsVgF0GbfWlEpDD2TijTbxjPNlR6LCPqeHUD8LBwUWXCa_R5nno4DZDq7V30E"
}
feature_online_stores {
  bigtable {
    auto_scaling {
      min_node_co

### Create featureView directly with BQ source

In [19]:
FEATURE_VIEW_ID = "iris_view2"  # @param {type:"string"}
CRON_SCHEDULE = "TZ=America/Los_Angeles 56 * * * *"  # @param {type:"string"}
BQ_VIEW_ID_FQN = table_id

big_query_source = feature_view_pb2.FeatureView.BigQuerySource(
    uri=f"bq://{BQ_VIEW_ID_FQN}", entity_id_columns=["entity_id"]
)

sync_config = feature_view_pb2.FeatureView.SyncConfig(cron=CRON_SCHEDULE)

create_view = admin_client.create_feature_view(
    feature_online_store_admin_service_pb2.CreateFeatureViewRequest(
        parent=f"projects/{PROJECT_ID}/locations/{LOCATION}/featureOnlineStores/{FEATURE_ONLINE_STORE_ID}",
        feature_view_id=FEATURE_VIEW_ID,
        feature_view=feature_view_pb2.FeatureView(
            big_query_source=big_query_source,
            sync_config=sync_config,
        ),
    )
)

In [20]:
# Wait for LRO to complete and show result
print(create_view.result())

name: "projects/721521243942/locations/us-central1/featureOnlineStores/iris_online_data2/featureViews/iris_view2"



In [31]:
import subprocess
command = 'gcloud projects describe $(gcloud config get-value project) --format="value(projectNumber)"'
PROJECT_NUMBER = subprocess.check_output(command, shell=True, text=True).strip()
print(project_num)

721521243942


In [32]:
data_client = FeatureOnlineStoreServiceClient(
    client_options={"api_endpoint": API_ENDPOINT}
)

In [34]:
feature_view=f"projects/{PROJECT_NUMBER}/locations/{LOCATION}/featureOnlineStores/{FEATURE_ONLINE_STORE_ID}/featureViews/{FEATURE_VIEW_ID}",
print(feature_view)

('projects/721521243942/locations/us-central1/featureOnlineStores/iris_online_data2/featureViews/iris_view2',)


In [35]:
# PROJECT_ID = 721521243942

data_client.fetch_feature_values(
    request=feature_online_store_service_pb2.FetchFeatureValuesRequest(
        feature_view=f"projects/{PROJECT_NUMBER}/locations/{LOCATION}/featureOnlineStores/{FEATURE_ONLINE_STORE_ID}/featureViews/{FEATURE_VIEW_ID}",
        data_key=feature_online_store_service_pb2.FeatureViewDataKey(key="id_39"),
    )
)

key_values {
  features {
    value {
      double_value: 4.4
    }
    name: "sepal_length"
  }
  features {
    value {
      double_value: 3
    }
    name: "sepal_width"
  }
  features {
    value {
      double_value: 1.3
    }
    name: "petal_length"
  }
  features {
    value {
      double_value: 0.2
    }
    name: "petal_width"
  }
  features {
    value {
      string_value: "setosa"
    }
    name: "species"
  }
  features {
    value {
      int64_value: 1741694400000000
    }
    name: "feature_timestamp"
  }
}

In [36]:
data_client.fetch_feature_values(
    request=feature_online_store_service_pb2.FetchFeatureValuesRequest(
        feature_view=f"projects/{PROJECT_ID}/locations/{LOCATION}/featureOnlineStores/{FEATURE_ONLINE_STORE_ID}/featureViews/{FEATURE_VIEW_ID}",
        data_key=feature_online_store_service_pb2.FeatureViewDataKey(key="id_14"),
        data_format=feature_online_store_service_pb2.FeatureViewDataFormat.PROTO_STRUCT,
    )
)

proto_struct {
  fields {
    key: "species"
    value {
      string_value: "setosa"
    }
  }
  fields {
    key: "sepal_width"
    value {
      number_value: 3
    }
  }
  fields {
    key: "sepal_length"
    value {
      number_value: 4.3
    }
  }
  fields {
    key: "petal_width"
    value {
      number_value: 0.1
    }
  }
  fields {
    key: "petal_length"
    value {
      number_value: 1.1
    }
  }
  fields {
    key: "feature_timestamp"
    value {
      number_value: 1.7416944e+15
    }
  }
}