# Feature Store Example

In [1]:
!pip install -q --user --no-warn-conflicts google-cloud-aiplatform

In [2]:
from google.cloud.aiplatform_v1beta1 import FeaturestoreServiceClient
from google.cloud.aiplatform_v1beta1 import FeaturestoreOnlineServingServiceClient

from google.protobuf.duration_pb2 import Duration
from google.cloud.aiplatform_v1beta1.types import feature as feature_pb2
from google.cloud.aiplatform_v1beta1.types import entity_type as entity_type_pb2
from google.cloud.aiplatform_v1beta1.types import featurestore as featurestore_pb2
from google.cloud.aiplatform_v1beta1.types import feature_selector as feature_selector_pb2
from google.cloud.aiplatform_v1beta1.types import featurestore_online_service as featurestore_online_service_pb2
from google.cloud.aiplatform_v1beta1.types import featurestore_service as featurestore_service_pb2
from google.cloud.aiplatform_v1beta1.types import featurestore_monitoring as featurestore_monitoring_pb2
from google.cloud.aiplatform_v1beta1.types import featurestore_service as featurestore_service_pb2
from google.cloud.aiplatform_v1beta1.types import io as io_pb2

from google.cloud import aiplatform

PROJECT_ID = 'sandbox-michael-menzel'
REGION = 'europe-west4'

aiplatform.init(location=REGION)

In [3]:
admin_client = FeaturestoreServiceClient(client_options={"api_endpoint": f'{REGION}-aiplatform.googleapis.com'})
data_client = FeaturestoreOnlineServingServiceClient(client_options={"api_endpoint": f'{REGION}-featurestore-aiplatform.googleapis.com'})

In [4]:
BASE_RESOURCE_PATH = admin_client.common_location_path(PROJECT_ID, REGION)
admin_client.list_featurestores(parent=BASE_RESOURCE_PATH)

ListFeaturestoresPager<featurestores {
  name: "projects/928871478446/locations/europe-west4/featurestores/example_store"
  create_time {
    seconds: 1622538985
    nanos: 220623000
  }
  update_time {
    seconds: 1629416900
    nanos: 55809000
  }
  etag: "AMEw9yMFlj_adB18r8c56Hw5LPurVKAnHnBPMV474nVTeqH9Qc3gm3Xe9FS00v0rbIY="
  labels {
    key: "environment"
    value: "testing"
  }
  online_serving_config {
    fixed_node_count: 2
  }
  state: STABLE
}
>

In [5]:
FEATURESTORE_ID = f"movie_analysis"

# Define feature store config
feature_store_config = featurestore_pb2.Featurestore(
    name="Google Movie Analysis",
    online_serving_config=featurestore_pb2.Featurestore.OnlineServingConfig(fixed_node_count=6), # The number of nodes for each cluster (BigTable)
    labels={"team": "ce_ml", 
            "loc" : "de-mhg"},
)

# Create feature store request
create_feature_store_request = featurestore_service_pb2.CreateFeaturestoreRequest(
    parent=BASE_RESOURCE_PATH,
    featurestore_id=FEATURESTORE_ID,
    featurestore=feature_store_config
)

create_feature_store_job = admin_client.create_featurestore(
    request=create_feature_store_request
)

# Run the job
try:
    result = create_feature_store_job.result()
except RuntimeError as error:
    print(error)
else:
    FEATURESTORE_RESOURCE_NAME = result.name
    FEATURESTORE_ID = FEATURESTORE_RESOURCE_NAME.split("/")[-1]
    print(f"Found featurestore {FEATURESTORE_RESOURCE_NAME}: {FEATURESTORE_ID}")

Found featurestore projects/928871478446/locations/europe-west4/featurestores/movie_analysis: movie_analysis


In [6]:
admin_client.list_entity_types(parent=FEATURESTORE_RESOURCE_NAME)

ListEntityTypesPager<>

In [7]:
# Define a general monitoring policy
monitoring_spec = featurestore_monitoring_pb2.FeaturestoreMonitoringConfig(
    snapshot_analysis=featurestore_monitoring_pb2.FeaturestoreMonitoringConfig.SnapshotAnalysis(
        monitoring_interval=Duration(seconds=300),  #just for demo, but the correct value should be 86400
    ),
)
# Define the entity type
entity_type = entity_type_pb2.EntityType(
    description="Users Entity",
    monitoring_config=monitoring_spec
)

# Create the entity request
entity_request = featurestore_service_pb2.CreateEntityTypeRequest(
    parent=FEATURESTORE_RESOURCE_NAME,
    entity_type_id="users",
    entity_type=entity_type
)

customer_entity_type_job = admin_client.create_entity_type(
    request=entity_request
).result()

In [8]:
age_request = featurestore_service_pb2.CreateFeatureRequest(
    feature=feature_pb2.Feature(
        value_type=feature_pb2.Feature.ValueType.INT64,
        description="User age",
    ),
    feature_id="age",
)
gender_request = featurestore_service_pb2.CreateFeatureRequest(
    feature=feature_pb2.Feature(
        value_type=feature_pb2.Feature.ValueType.STRING,
        description="User gender",
        monitoring_config=featurestore_monitoring_pb2.FeaturestoreMonitoringConfig(
            snapshot_analysis=featurestore_monitoring_pb2.FeaturestoreMonitoringConfig.SnapshotAnalysis(
                disabled=True,
            ),
        ),
    ),
    feature_id="gender",
)
liked_request = featurestore_service_pb2.CreateFeatureRequest(
    feature=feature_pb2.Feature(
        value_type=feature_pb2.Feature.ValueType.STRING_ARRAY,
        description="An array of genres that this user liked",
    ),
    feature_id="liked_genres",
)

admin_client.batch_create_features(
    parent=admin_client.entity_type_path(PROJECT_ID, REGION, FEATURESTORE_ID, "users"),
    requests=[age_request, gender_request, liked_request],
).result()

features {
  name: "projects/928871478446/locations/europe-west4/featurestores/movie_analysis/entityTypes/users/features/age"
}
features {
  name: "projects/928871478446/locations/europe-west4/featurestores/movie_analysis/entityTypes/users/features/gender"
}
features {
  name: "projects/928871478446/locations/europe-west4/featurestores/movie_analysis/entityTypes/users/features/liked_genres"
}

In [9]:
import_users_request = featurestore_service_pb2.ImportFeatureValuesRequest(
    entity_type=admin_client.entity_type_path(
        PROJECT_ID, REGION, FEATURESTORE_ID, "users"
    ),
    avro_source=io_pb2.AvroSource(
        gcs_source=io_pb2.GcsSource(
            uris=[
                "gs://sandbox-michael-menzel-data-europe-west4/vertex-ai/feature-store/datasets/users.avro"
            ]
        )
    ),
    entity_id_field="user_id",
    feature_specs=[
        featurestore_service_pb2.ImportFeatureValuesRequest.FeatureSpec(id="age"),
        featurestore_service_pb2.ImportFeatureValuesRequest.FeatureSpec(id="gender"),
        featurestore_service_pb2.ImportFeatureValuesRequest.FeatureSpec(id="liked_genres"),
    ],
    feature_time_field="update_time",
    worker_count=10,
)

admin_client.import_feature_values(import_users_request).result()

imported_entity_count: 7
imported_feature_value_count: 12

In [10]:
feature_selector = feature_selector_pb2.FeatureSelector(
    id_matcher=feature_selector_pb2.IdMatcher(ids=["age", "gender", "liked_genres"])
)

data_client.read_feature_values(
    featurestore_online_service_pb2.ReadFeatureValuesRequest(
        entity_type=admin_client.entity_type_path(
            PROJECT_ID, REGION, FEATURESTORE_ID, "users"
        ),
        entity_id="alice",
        feature_selector=feature_selector,
    )
)

header {
  entity_type: "projects/928871478446/locations/europe-west4/featurestores/movie_analysis/entityTypes/users"
  feature_descriptors {
    id: "age"
  }
  feature_descriptors {
    id: "gender"
  }
  feature_descriptors {
    id: "liked_genres"
  }
}
entity_view {
  entity_id: "alice"
  data {
    value {
      int64_value: 55
      metadata {
        generate_time {
          seconds: 1629493102
          nanos: 261000000
        }
      }
    }
  }
  data {
    value {
      string_value: "Female"
      metadata {
        generate_time {
          seconds: 1629493102
          nanos: 261000000
        }
      }
    }
  }
  data {
    value {
      string_array_value {
        values: "Drama"
      }
      metadata {
        generate_time {
          seconds: 1629493102
          nanos: 261000000
        }
      }
    }
  }
}

In [11]:
response_stream = data_client.streaming_read_feature_values(
    featurestore_online_service_pb2.StreamingReadFeatureValuesRequest(
        entity_type=admin_client.entity_type_path(
            PROJECT_ID, REGION, FEATURESTORE_ID, "users"
        ),
        entity_ids=["alice", "bob"],
        feature_selector=feature_selector,
    )
)

for response in response_stream:
    print(response)

header {
  entity_type: "projects/928871478446/locations/europe-west4/featurestores/movie_analysis/entityTypes/users"
  feature_descriptors {
    id: "age"
  }
  feature_descriptors {
    id: "gender"
  }
  feature_descriptors {
    id: "liked_genres"
  }
}

entity_view {
  entity_id: "alice"
  data {
    value {
      int64_value: 55
      metadata {
        generate_time {
          seconds: 1629493102
          nanos: 261000000
        }
      }
    }
  }
  data {
    value {
      string_value: "Female"
      metadata {
        generate_time {
          seconds: 1629493102
          nanos: 261000000
        }
      }
    }
  }
  data {
    value {
      string_array_value {
        values: "Drama"
      }
      metadata {
        generate_time {
          seconds: 1629493102
          nanos: 261000000
        }
      }
    }
  }
}

entity_view {
  entity_id: "bob"
  data {
    value {
      int64_value: 35
      metadata {
        generate_time {
          seconds: 1629493102
     

## Clean Up

In [12]:
feature_store_delete_request=featurestore_service_pb2.DeleteFeaturestoreRequest(
        name=FEATURESTORE_RESOURCE_NAME,
        force=True
    )
admin_client.delete_featurestore(request=feature_store_delete_request).result()

