# This notebook creates an experimental design to test featurestore and executes the test

In [2]:
%pip install -q doepy google-cloud-aiplatform

Note: you may need to restart the kernel to use updated packages.


In [3]:
# set up the featurestore client
import pandas as pd
from datetime import datetime
from dateutil.parser import parse
import datetime as datetime_class
import json
from google.cloud import bigquery
import time
from helper_fns.helpers import *


#variables change to your liking
BUCKET = "matching-engine-demo-blog"
BQ_DATASET = 'movielens'
PROJECT_ID = 'matching-engine-blog'
API_ENDPOINT = "us-central1-aiplatform.googleapis.com"  # @param {type:"string"}
FEATURESTORE_ID = "performance_testing"
REGION = 'us-central1'

n_iterations = 2
n_predictions = 100
n_workers = 1

admin_client = FeaturestoreServiceClient(client_options={"api_endpoint": API_ENDPOINT})

data_client = FeaturestoreOnlineServingServiceClient(
    client_options={"api_endpoint": API_ENDPOINT}
)

BASE_RESOURCE_PATH = admin_client.common_location_path(PROJECT_ID, REGION)

#initialize bq client for building benchmark datasets for FS
client = bigquery.Client()


## Create repeat measure run function

In [4]:
def repeat_measure(n_iterations, n_predictions, n_workers, n_repeats=30):
    data = {'create_stats': [],
            'n_features': [],
            'n_predictions': [],
            'n_workers': [],
            'total_seconds': [],
            'n_features': []
           }
    stats = create_a_fs_run(n_iterations, n_predictions, n_workers)
    
    
    for run in range(n_repeats):
        total_time, n_features = measure_fs(n_iterations, n_predictions)
        data['create_stats'].append(stats)
        data['n_features'].append(n_features)
        data['n_predictions'].append(n_predictions)
        data['n_workers'].append(n_workers)
        data['total_seconds'].append(total_time)

    print(total_time)
    return data

In [9]:
from doepy import build
import pandas as pd

design_data = build.lhs(
{'Nodes':[2,8],
'N_Rows':[1, 10],
'N_Iterations':[6,10],
}, num_samples=30)

design_data = design_data[['Nodes', 'N_Rows', 'N_Iterations']].astype(int)
design_data

Unnamed: 0,Nodes,N_Rows,N_Iterations
0,5,1,7
1,5,3,6
2,7,9,9
3,6,4,9
4,2,7,8
5,7,6,7
6,5,9,8
7,3,6,6
8,3,5,9
9,4,7,6


In [6]:
# run the experiment and store data

cols = ['create_stats',
        'n_features',
        'n_predictions',
        'n_workers',
        'total_seconds']


data = pd.DataFrame([], columns=cols)

for index, row in design_data.iterrows():
    print(f"Testing for following row: \n{row}")
    repeat_run_data = repeat_measure(row['N_Iterations']
                                     , row['N_Rows']
                                     , row['Nodes'], n_repeats=30)
    append_frame = pd.DataFrame.from_dict(repeat_run_data)
    data = data.append(append_frame, ignore_index=True)
    ts = datetime.now()
    data.to_csv(f'data/experiment-{ts}.csv')
    
#save to csv
ts = datetime.now()
data.to_csv(f'data/experiment-{ts}.csv')

data

Testing for following row: 
Nodes           5
N_Rows          9
N_Iterations    7
Name: 0, dtype: int64
Deleted featurestore 'performance_testing'.
name: "projects/180938242395/locations/us-central1/featurestores/performance_testing"

name: "projects/180938242395/locations/us-central1/featurestores/performance_testing/entityTypes/movies"

Ran for a total of 0:05:34.026333
imported_entity_count: 9
imported_feature_value_count: 441

Ran for a total of: 0.2307415008544922 seconds for 9 streaming predictions 
 Per prediction seconds: 0.02563794453938802
Ran for a total of: 0.1574552059173584 seconds for 9 streaming predictions 
 Per prediction seconds: 0.017495022879706487
Ran for a total of: 0.1835622787475586 seconds for 9 streaming predictions 
 Per prediction seconds: 0.020395808749728732
Ran for a total of: 0.16243386268615723 seconds for 9 streaming predictions 
 Per prediction seconds: 0.018048206965128582
Ran for a total of: 0.023209571838378906 seconds for 9 streaming predictions 

RetryError: Deadline of 120.0s exceeded while calling functools.partial(<function _wrap_unary_errors.<locals>.error_remapped_callable at 0x7fc824ed37a0>, name: "projects/180938242395/locations/us-central1/featurestores/performance_testing/entityTypes/movies/operations/4973789419753439232"
, metadata=[('x-goog-request-params', 'name=projects/180938242395/locations/us-central1/featurestores/performance_testing/entityTypes/movies/operations/4973789419753439232'), ('x-goog-api-client', 'gl-python/3.7.10 grpc/1.38.1 gax/1.31.2')]), last exception: 429 Quota exceeded for quota metric 'Resource management (CRUD) requests' and limit 'Resource management (CRUD) requests per minute per region' of service 'aiplatform.googleapis.com' for consumer 'project_number:180938242395'.

In [None]:
!gsutil cp f'experiment-$ts.csv' gs://$BUCKET/benchmarking_data