# Feature Store

The purpose of this notebook is to:

* Create a Feature Store
* Create an entity with some features
* Batch ingest some feature data
* Deploy a Cloud Function that can read this data and feed it to the model

## Imports

In [20]:
from google.api_core import operations_v1
from google.cloud.aiplatform_v1beta1 import FeaturestoreOnlineServingServiceClient, FeaturestoreServiceClient, FeatureSelector
from google.cloud.aiplatform_v1beta1.types import featurestore_online_service as featurestore_online_service_pb2
from google.cloud.aiplatform_v1beta1.types import entity_type as entity_type_pb2
from google.cloud.aiplatform_v1beta1.types import feature as feature_pb2
from google.cloud.aiplatform_v1beta1.types import featurestore as featurestore_pb2
from google.cloud.aiplatform_v1beta1.types import featurestore_service as featurestore_service_pb2
from google.cloud.aiplatform_v1beta1.types import io as io_pb2
from google.cloud.aiplatform_v1beta1.types import ListFeaturestoresRequest, CreateFeaturestoreRequest, Featurestore

from google.protobuf.timestamp_pb2 import Timestamp
from google.cloud.aiplatform_v1beta1.types import featurestore_monitoring as featurestore_monitoring_pb2
from google.protobuf.duration_pb2 import Duration

import yaml

## Configuration

In [21]:
with open('mainconfig.yaml') as f:
    main_config = yaml.safe_load(f)
main_config = main_config['personal']

In [22]:
PROJECT = main_config['project'] 
REGION = main_config['region'] 

SERVICE_ACCOUNT = main_config['service_account']

print("Project ID:", PROJECT)
print("Region:", REGION)

API_ENDPOINT = f"{REGION}-aiplatform.googleapis.com"  
FEATURESTORE_ID = main_config['featurestore_id']

Project ID: pbalm-cxb-aa
Region: europe-west1


In [23]:
admin_client = FeaturestoreServiceClient(
    client_options={"api_endpoint": API_ENDPOINT})
data_client = FeaturestoreOnlineServingServiceClient(
    client_options={"api_endpoint": API_ENDPOINT})

In [24]:
print(f'Existing feature stores in project {PROJECT} and region {REGION}:')
for f in admin_client.list_featurestores(ListFeaturestoresRequest(parent=admin_client.common_location_path(PROJECT, REGION))):
      print(f)

Existing feature stores in project pbalm-cxb-aa and region europe-west1:
name: "projects/188940921537/locations/europe-west1/featurestores/creditcards"
create_time {
  seconds: 1657136254
  nanos: 29531000
}
update_time {
  seconds: 1657136254
  nanos: 118341000
}
etag: "AMEw9yN8kR8_IkgNwOP3UJEt3lhowvZv_EuG0MS0pCYZolTvWM4iwEF9MPof3Gpy30A="
online_serving_config {
  fixed_node_count: 3
}
state: STABLE



## Move this stuff to source code files

In [66]:
def create_fs(project, region, store_id, store_name=None):
    base_path = admin_client.common_location_path(project, region)
    
    for f in admin_client.list_featurestores(ListFeaturestoresRequest(parent=admin_client.common_location_path(project, region))):
        existing_id = f.name.split('/')[-1]
        if store_id == existing_id:
            print(f'Feature Store "{store_id}" already exists in {region}')
            return
    
    if store_name is None:
        store_name = f'{base_path}/{store_id}'
    
    req = CreateFeaturestoreRequest(
        parent = base_path,
        featurestore = Featurestore(
            name=store_name,
            online_serving_config=Featurestore.OnlineServingConfig(fixed_node_count=3)),
        featurestore_id = store_id)
    
    lro = admin_client.create_featurestore(req)
    name = lro.result()
    print(f'Created Feature Store {name} in {region}')
    return name


def create_entity(project, region, store_id, entity, entity_descr, features, features_descr=None):
    
    if features_descr is None:
        features_descr = features
    
    if len(features) != len(features_descr):
        print(f'ERROR: Got {len(features)} features and {len(features_descr)} descriptions')
        return
    
    print(f'Creating entity {entity} in Feature Store {store_id} ({region})')
    
    snapshot_analysis = featurestore_monitoring_pb2.FeaturestoreMonitoringConfig.SnapshotAnalysis(
                    monitoring_interval=Duration(seconds=3600))  # 1 hour
    
    lro = admin_client.create_entity_type(
        featurestore_service_pb2.CreateEntityTypeRequest(
            parent=admin_client.featurestore_path(project, region, store_id),
            entity_type_id=entity,
            entity_type=entity_type_pb2.EntityType(
             description=entity_descr,
             monitoring_config=featurestore_monitoring_pb2.FeaturestoreMonitoringConfig(
                snapshot_analysis=snapshot_analysis))
        )
    ).result()
    
    print(lro)
    
    def _create_f_request(name, descr):
        return featurestore_service_pb2.CreateFeatureRequest(
                feature=feature_pb2.Feature(
                    value_type=feature_pb2.Feature.ValueType.DOUBLE,
                    description=descr,
                    monitoring_config=featurestore_monitoring_pb2.FeaturestoreMonitoringConfig(
                        snapshot_analysis=snapshot_analysis)),
                feature_id=name)
    
    requests = [_create_f_request(x[0], x[1]) for x in zip(features, features_descr)]
    
    print(f'\nCreating features: {",".join(features)}')

    lro = admin_client.batch_create_features(
        parent=admin_client.entity_type_path(PROJECT, REGION, FEATURESTORE_ID, entity),
        requests=requests).result()
    
    return lro


def ingest_entities_csv(project, region, store_id, entity, features, gcs_uris):

    timestamp = Timestamp()
    timestamp.GetCurrentTime()
    timestamp.nanos = 0
    
    specs = [featurestore_service_pb2.ImportFeatureValuesRequest.FeatureSpec(id=f) for f in features]
    
    import_request_transaction = featurestore_service_pb2.ImportFeatureValuesRequest(
        entity_type=admin_client.entity_type_path(project, region, store_id, entity),
        csv_source=io_pb2.CsvSource(gcs_source=io_pb2.GcsSource(uris=gcs_uris)),
        feature_specs=specs,
        entity_id_field=entity,
        feature_time=timestamp, # unique timestamp for all
        worker_count=5)
    
    print(f'Ingesting features for "{entity}" entity...')
    ingestion_lro = admin_client.import_feature_values(import_request_transaction).result()
    print('done')
    
    return ingestion_lro


# entity is the name of the entity type you want to read, for example: user
# entity_value is the specific instance of the entity that you want to have the feature of, for example a user ID
def read_features(project, region, store_id, entity, features, entity_value):
    feature_selector = FeatureSelector()
    feature_selector.id_matcher.ids = features
    
    read_request = featurestore_online_service_pb2.ReadFeatureValuesRequest(
        entity_type = admin_client.entity_type_path(project, region, store_id, entity),
        entity_id = entity_value,
        feature_selector=feature_selector)
    
    res = data_client.read_feature_values(read_request)
    values = [d.value for d in res.entity_view.data]

    # return a dict with { 'feature1': val1, 'feature2': val2, ... }
    # exclude features that do not have a generate_time: these do not exist in the store
    return {f:v.double_value for (f,v) in zip(features, values) if v.metadata.generate_time}

## Create Feature Store and entity with features

In [7]:
create_fs(PROJECT, REGION, FEATURESTORE_ID, "Feature Store for credit card use case")

Created Feature Store name: "projects/188940921537/locations/europe-west1/featurestores/creditcards"
 in europe-west1


name: "projects/188940921537/locations/europe-west1/featurestores/creditcards"

In [8]:
admin_client.get_featurestore(name = admin_client.featurestore_path(PROJECT, REGION, FEATURESTORE_ID))

name: "projects/188940921537/locations/europe-west1/featurestores/creditcards"
create_time {
  seconds: 1657136254
  nanos: 29531000
}
update_time {
  seconds: 1657136254
  nanos: 118341000
}
etag: "AMEw9yMQYcxSMo07Pxe1SoR-xw4qcMk92ByK6VI9LsploP5c_3tPvNcw0CO9rwiH9v0="
online_serving_config {
  fixed_node_count: 3
}
state: STABLE

In [9]:
entity = 'user'
entity_descr = 'User ID'
features = ['v27', 'v28']

In [10]:
create_entity(PROJECT, REGION, FEATURESTORE_ID, entity, entity_descr, features)

Creating entity user in Feature Store creditcards (europe-west1)
name: "projects/188940921537/locations/europe-west1/featurestores/creditcards/entityTypes/user"


Creating features: v27,v28


features {
  name: "projects/188940921537/locations/europe-west1/featurestores/creditcards/entityTypes/user/features/v27"
}
features {
  name: "projects/188940921537/locations/europe-west1/featurestores/creditcards/entityTypes/user/features/v28"
}

### Create the feature data

In [11]:
import random

filename = f'features_{entity}.csv'

with open(filename, 'w') as f:
    line = f'{entity},{",".join(features)}\n'
    f.write(line)
    for i in range(100):
        f.write(f'user{i},{random.random()},{random.random()}\n')

In [12]:
!cat {filename}

user,v27,v28
user0,0.8752007622310332,0.5085713748296266
user1,0.18172587234862592,0.331560453610137
user2,0.7678604213202734,0.4796658653927143
user3,0.03530153195587893,0.3925520421488291
user4,0.714354519152235,0.7416135959249333
user5,0.310460362693614,0.446304360638923
user6,0.6262247551571973,0.6205434867159426
user7,0.934891273361272,0.3159109422133256
user8,0.005784604731528753,0.6083360616030765
user9,0.4650906821533981,0.23484670927139173
user10,0.38533364987649643,0.055908172244078735
user11,0.19284495284841252,0.9662444569349677
user12,0.9296274362017263,0.4560936299443399
user13,0.1237296954914393,0.31201567278130604
user14,0.4903181549866118,0.168048375398973
user15,0.04618572281597866,0.7157594756749772
user16,0.6975090670221781,0.9732154609744624
user17,0.7749512534629103,0.11609543617175999
user18,0.8966848026372027,0.4194811229151356
user19,0.4974831622648439,0.8144141834082029
user20,0.685089344258394,0.7309976629059198
user21,0.2208765865449639,0.10142488435923813
u

In [13]:
BUCKET = main_config['bucket']
BUCKET

'pbalm-cxb-aa-eu'

In [14]:
!gsutil cp {filename} gs://{BUCKET}/{filename} 

Copying file://features_user.csv [Content-Type=text/csv]...
/ [1 files][  4.4 KiB/  4.4 KiB]                                                
Operation completed over 1 objects/4.4 KiB.                                      


## Ingest feature data

In [16]:
gcs_uris = [f'gs://{BUCKET}/{filename}']

ingest_entities_csv(PROJECT, REGION, FEATURESTORE_ID, entity, features, gcs_uris)

Ingesting features for "user" entity...
done


imported_entity_count: 100
imported_feature_value_count: 200

## Test Feature Store: Read values

In [69]:
features_data = {}
for i in range(90,102):
    entity_id = f'user{i}'
    features_data[entity_id] = read_features(PROJECT, REGION, FEATURESTORE_ID, entity, features, entity_id)

features_data

{'user90': {'v27': 0.43233606501215793, 'v28': 0.4782478397702635},
 'user91': {'v27': 0.9416494951196843, 'v28': 0.806256891634685},
 'user92': {'v27': 0.7779062300322946, 'v28': 0.7522797939488289},
 'user93': {'v27': 0.08492768270605855, 'v28': 5.3993977775501634e-05},
 'user94': {'v27': 0.5994417869948815, 'v28': 0.5387954257682637},
 'user95': {'v27': 0.8818240021759036, 'v28': 0.3420486529317205},
 'user96': {'v27': 0.33044465161460523, 'v28': 0.9422315030335132},
 'user97': {'v27': 0.9175349839104221, 'v28': 0.013577671867111896},
 'user98': {'v27': 0.1186484791889193, 'v28': 0.3108415357458678},
 'user99': {'v27': 0.8770988976812567, 'v28': 0.6458292822903422},
 'user100': {},
 'user101': {}}

In [None]:
list(feature_pb2.Feature.ValueType)

In [None]:
FEATURESTORE_ID

In [None]:
REGION

In [62]:
entity_value = 'user100'

feature_selector = FeatureSelector()
feature_selector.id_matcher.ids = features

read_request = featurestore_online_service_pb2.ReadFeatureValuesRequest(
    entity_type = admin_client.entity_type_path(PROJECT, REGION, FEATURESTORE_ID, entity),
    entity_id = entity_value,
    feature_selector=feature_selector)

res = data_client.read_feature_values(read_request)
print(res)
values = [d.value for d in res.entity_view.data]

# return a dict with { 'feature1': val1, 'feature2': val2, ... }
{f:v.double_value for (f,v) in zip(features, values) if v.metadata.generate_time}

header {
  entity_type: "projects/188940921537/locations/europe-west1/featurestores/creditcards/entityTypes/user"
  feature_descriptors {
    id: "v27"
  }
  feature_descriptors {
    id: "v28"
  }
}
entity_view {
  entity_id: "user100"
  data {
  }
  data {
  }
}



{}

In [56]:
len(res.entity_view.data)

2

In [58]:
res.entity_view.data[0].value.metadata.generate_time is None

True

In [70]:
res

header {
  entity_type: "projects/188940921537/locations/europe-west1/featurestores/creditcards/entityTypes/user"
  feature_descriptors {
    id: "v27"
  }
  feature_descriptors {
    id: "v28"
  }
}
entity_view {
  entity_id: "user100"
  data {
  }
  data {
  }
}