# Feature Store

The purpose of this notebook is to:

* Create a Feature Store
* Create an entity with some features
* Batch ingest some feature data
* Deploy a Cloud Function that can read this data and feed it to the model

## Imports

In [66]:
from google.api_core import operations_v1
from google.cloud.aiplatform_v1beta1 import FeaturestoreOnlineServingServiceClient, FeaturestoreServiceClient, FeatureSelector
from google.cloud.aiplatform_v1beta1.types import featurestore_online_service as featurestore_online_service_pb2
from google.cloud.aiplatform_v1beta1.types import entity_type as entity_type_pb2
from google.cloud.aiplatform_v1beta1.types import feature as feature_pb2
from google.cloud.aiplatform_v1beta1.types import featurestore as featurestore_pb2
from google.cloud.aiplatform_v1beta1.types import featurestore_service as featurestore_service_pb2
from google.cloud.aiplatform_v1beta1.types import io as io_pb2
from google.cloud.aiplatform_v1beta1.types import ListFeaturestoresRequest, CreateFeaturestoreRequest, Featurestore

from google.protobuf.timestamp_pb2 import Timestamp
from google.cloud.aiplatform_v1beta1.types import featurestore_monitoring as featurestore_monitoring_pb2
from google.protobuf.duration_pb2 import Duration

import yaml

## Configuration

In [67]:
with open('mainconfig.yaml') as f:
    main_config = yaml.safe_load(f)
main_config = main_config['personal']

In [68]:
PROJECT = main_config['project'] 
REGION = main_config['region'] 

SERVICE_ACCOUNT = main_config['service_account']

print("Project ID:", PROJECT)
print("Region:", REGION)

API_ENDPOINT = f"{REGION}-aiplatform.googleapis.com"  
FEATURESTORE_ID = main_config['featurestore_id']

Project ID: pbalm-cxb-aa
Region: europe-west4


In [69]:
admin_client = FeaturestoreServiceClient(
    client_options={"api_endpoint": API_ENDPOINT})
data_client = FeaturestoreOnlineServingServiceClient(
    client_options={"api_endpoint": API_ENDPOINT})

In [70]:
print(f'Existing feature stores in project {PROJECT} and region {REGION}:')
for f in admin_client.list_featurestores(ListFeaturestoresRequest(parent=admin_client.common_location_path(PROJECT, REGION))):
      print(f)

Existing feature stores in project pbalm-cxb-aa and region europe-west4:
name: "projects/188940921537/locations/europe-west4/featurestores/creditcards"
create_time {
  seconds: 1657035029
  nanos: 610160000
}
update_time {
  seconds: 1657035029
  nanos: 696023000
}
etag: "AMEw9yO-PbGrqGZL4xp1CperXKp3TvgOoS2NRM7ju3Gu41lzSjm2II2URSwBy9SEvEC9"
online_serving_config {
}
state: STABLE

name: "projects/188940921537/locations/europe-west4/featurestores/creditcards2"
create_time {
  seconds: 1657035022
  nanos: 511480000
}
update_time {
  seconds: 1657035022
  nanos: 553038000
}
etag: "AMEw9yNAqFGzMo13ZTgAItY9e921ZaS9shSxi8RuE_WrPcoW1EI7kgDaOBFC_bx3qhau"
online_serving_config {
}
state: STABLE

name: "projects/188940921537/locations/europe-west4/featurestores/test"
create_time {
  seconds: 1657035465
  nanos: 984207000
}
update_time {
  seconds: 1657035466
  nanos: 28958000
}
etag: "AMEw9yP3D37TbhESt51aznRiVNtEO42jXpeDB_7lx6eD4x2fUDyLbMw_eskCSsgUpcQ="
online_serving_config {
  fixed_node_count

## Move this stuff to source code files

In [104]:
def create_fs(project, region, store_id, store_name=None):
    base_path = admin_client.common_location_path(project, region)
    
    for f in admin_client.list_featurestores(ListFeaturestoresRequest(parent=admin_client.common_location_path(project, region))):
        existing_id = f.name.split('/')[-1]
        if store_id == existing_id:
            print(f'Feature Store "{store_id}" already exists in {region}')
            return
    
    if store_name is None:
        store_name = f'{base_path}/{store_id}'
    
    req = CreateFeaturestoreRequest(
        parent = base_path,
        featurestore = Featurestore(
            name=store_name,
            online_serving_config=Featurestore.OnlineServingConfig(fixed_node_count=3)),
        featurestore_id = store_id)
    
    lro = admin_client.create_featurestore(req)
    name = lro.result()
    print(f'Created Feature Store {name} in {region}')
    return name


def create_entity(project, region, store_id, entity, entity_descr, features, features_descr=None):
    
    if features_descr is None:
        features_descr = features
    
    if len(features) != len(features_descr):
        print(f'ERROR: Got {len(features)} features and {len(features_descr)} descriptions')
        return
    
    print(f'Creating entity {entity} in Feature Store {store_id} ({region})')
    
    snapshot_analysis = featurestore_monitoring_pb2.FeaturestoreMonitoringConfig.SnapshotAnalysis(
                    monitoring_interval=Duration(seconds=3600))  # 1 hour
    
    lro = admin_client.create_entity_type(
        featurestore_service_pb2.CreateEntityTypeRequest(
            parent=admin_client.featurestore_path(project, region, store_id),
            entity_type_id=entity,
            entity_type=entity_type_pb2.EntityType(
             description=entity_descr,
             monitoring_config=featurestore_monitoring_pb2.FeaturestoreMonitoringConfig(
                snapshot_analysis=snapshot_analysis))
        )
    ).result()
    
    print(lro)
    
    def _create_f_request(name, descr):
        return featurestore_service_pb2.CreateFeatureRequest(
                feature=feature_pb2.Feature(
                    value_type=feature_pb2.Feature.ValueType.DOUBLE,
                    description=descr,
                    monitoring_config=featurestore_monitoring_pb2.FeaturestoreMonitoringConfig(
                        snapshot_analysis=snapshot_analysis)),
                feature_id=name)
    
    requests = [_create_f_request(x[0], x[1]) for x in zip(features, features_descr)]
    
    print(f'\nCreating features: {",".join(features)}')

    lro = admin_client.batch_create_features(
        parent=admin_client.entity_type_path(PROJECT, REGION, FEATURESTORE_ID, entity),
        requests=requests).result()
    
    return lro


def ingest_entities_csv(project, region, store_id, entity, features, gcs_uris):

    timestamp = Timestamp()
    timestamp.GetCurrentTime()
    timestamp.nanos = 0
    
    specs = [featurestore_service_pb2.ImportFeatureValuesRequest.FeatureSpec(id=f) for f in features]
    
    import_request_transaction = featurestore_service_pb2.ImportFeatureValuesRequest(
        entity_type=admin_client.entity_type_path(project, region, store_id, entity),
        csv_source=io_pb2.CsvSource(gcs_source=io_pb2.GcsSource(uris=gcs_uris)),
        feature_specs=specs,
        entity_id_field=entity,
        feature_time=timestamp, # unique timestamp for all
        worker_count=5)
    
    print(f'Ingesting features for "{entity}" entity...')
    ingestion_lro = admin_client.import_feature_values(import_request_transaction).result()
    print('done')
    
    return ingestion_lro


# entity is the name of the entity type you want to read, for example: user
# entity_value is the specific instance of the entity that you want to have the feature of, for example a user ID
def read_features(project, region, store_id, entity, features, entity_value):
    feature_selector = FeatureSelector()
    feature_selector.id_matcher.ids = features
    
    read_request = featurestore_online_service_pb2.ReadFeatureValuesRequest(
        entity_type = admin_client.entity_type_path(project, region, store_id, entity),
        entity_id = entity_value,
        feature_selector=feature_selector)
    
    res = data_client.read_feature_values(read_request)
    values = [d.value.double_value for d in res.entity_view.data]
    
    # return a dict with { 'feature1': val1, 'feature2': val2, ... }
    return {f:v for (f,v) in zip(features, values)}


## Create Feature Store and entity with features

In [28]:
create_fs(PROJECT, REGION, FEATURESTORE_ID, "Feature Store for credit card use case")

Feature Store "creditcards" already exists in europe-west4


In [72]:
admin_client.get_featurestore(name = admin_client.featurestore_path(PROJECT, REGION, FEATURESTORE_ID))

name: "projects/188940921537/locations/europe-west4/featurestores/test"
create_time {
  seconds: 1657035465
  nanos: 984207000
}
update_time {
  seconds: 1657035466
  nanos: 28958000
}
etag: "AMEw9yOyDHnppzyRW6XhXJDDjcScpZ7nmneJXMZ14nry8PnwdCT5lxnmsqSBN-hMNXQ="
online_serving_config {
  fixed_node_count: 3
}
state: STABLE

In [108]:
entity = 'user'
entity_descr = 'User ID'
features = ['v27', 'v28']

In [74]:
create_entity(PROJECT, REGION, FEATURESTORE_ID, entity, entity_descr, features)

Creating entity user in Feature Store test (europe-west4)
name: "projects/188940921537/locations/europe-west4/featurestores/test/entityTypes/user"


Creating features: v27,v28


features {
  name: "projects/188940921537/locations/europe-west4/featurestores/test/entityTypes/user/features/v27"
}
features {
  name: "projects/188940921537/locations/europe-west4/featurestores/test/entityTypes/user/features/v28"
}

### Create the feature data

In [75]:
import random

filename = f'features_{entity}.csv'

with open(filename, 'w') as f:
    line = f'{entity},{",".join(features)}\n'
    f.write(line)
    for i in range(100):
        f.write(f'user{i},{random.random()},{random.random()}\n')

In [76]:
!cat {filename}

user,v27,v28
user0,0.05278513767900106,0.2733072156716301
user1,0.8876808002457176,0.37118132639832957
user2,0.6484678858556028,0.884864632636418
user3,0.6538130756571675,0.016524449856715306
user4,0.03522969451299729,0.6717350357901456
user5,0.6659553008066724,0.3192681293317895
user6,0.4971542465546237,0.13951232914755796
user7,0.5524238508287338,0.28355391635374705
user8,0.6484548871018954,0.8760327594578065
user9,0.4915989114590842,0.23367455263702874
user10,0.49673724207577596,0.7815007872377936
user11,0.3871640207275481,0.7227514344241577
user12,0.04762816971189521,0.3967234982055644
user13,0.628006736594944,0.5886541010985723
user14,0.816720419139756,0.8804032399086309
user15,0.7546682954880937,0.9836800765345006
user16,0.9976384063954794,0.9084957628088213
user17,0.2175272730081298,0.8620769974508634
user18,0.9504168066705266,0.024318760115989546
user19,0.4979103297647449,0.8880661928433115
user20,0.7542035168199034,0.8375506772064244
user21,0.5703134504084629,0.177767119349459

In [77]:
BUCKET = main_config['bucket']
BUCKET

'pbalm-cxb-aa-eu'

In [78]:
!gsutil cp {filename} gs://{BUCKET}/{filename} 

Copying file://features_user.csv [Content-Type=text/csv]...
/ [1 files][  4.4 KiB/  4.4 KiB]                                                
Operation completed over 1 objects/4.4 KiB.                                      


## Ingest feature data

In [79]:
gcs_uris = [f'gs://{BUCKET}/{filename}']

ingest_entities_csv(PROJECT, REGION, FEATURESTORE_ID, entity, features, gcs_uris)

Ingesting features for "user" entity...
done


imported_entity_count: 100
imported_feature_value_count: 200

## Test Feature Store: Read values

In [109]:
features_data = {}
for i in range(90,100):
    entity_id = f'user{i}'
    features_data[entity_id] = read_features(PROJECT, REGION, FEATURESTORE_ID, entity, features, entity_id)

features_data

{'user90': {'v27': 0.1799766660815415, 'v28': 0.5034024037219565},
 'user91': {'v27': 0.9104607795780705, 'v28': 0.7522889373632377},
 'user92': {'v27': 0.47501841324168304, 'v28': 0.7189647069679109},
 'user93': {'v27': 0.047793919967654586, 'v28': 0.07699106480235007},
 'user94': {'v27': 0.5024908053355615, 'v28': 0.5322711139626671},
 'user95': {'v27': 0.4984407680204491, 'v28': 0.23219261975122873},
 'user96': {'v27': 0.07877193063213983, 'v28': 0.696556036005365},
 'user97': {'v27': 0.94991781533993, 'v28': 0.5263697093647477},
 'user98': {'v27': 0.4975465905451488, 'v28': 0.30645728591572285},
 'user99': {'v27': 0.5172086169388093, 'v28': 0.8556641244117467}}

In [110]:
FEATURESTORE_ID

'test'

In [39]:
list(feature_pb2.Feature.ValueType)

[<ValueType.VALUE_TYPE_UNSPECIFIED: 0>,
 <ValueType.BOOL: 1>,
 <ValueType.BOOL_ARRAY: 2>,
 <ValueType.DOUBLE: 3>,
 <ValueType.DOUBLE_ARRAY: 4>,
 <ValueType.INT64: 9>,
 <ValueType.INT64_ARRAY: 10>,
 <ValueType.STRING: 11>,
 <ValueType.STRING_ARRAY: 12>,
 <ValueType.BYTES: 13>]

In [83]:
FEATURESTORE_ID

'test'

In [84]:
REGION

'europe-west4'