# This notebook creates an experimental design to test featurestore and executes the test

In [1]:
%pip install -q doepy google-cloud-aiplatform

Note: you may need to restart the kernel to use updated packages.


In [1]:
# set up the featurestore client
import pandas as pd
from datetime import datetime
from dateutil.parser import parse
import datetime as datetime_class
import json
from google.cloud import bigquery
import time
from helper_fns.helpers import *


#variables change to your liking
BUCKET = "matching-engine-demo-blog"
BQ_DATASET = 'movielens'
PROJECT_ID = 'matching-engine-blog'
API_ENDPOINT = "us-central1-aiplatform.googleapis.com"  # @param {type:"string"}
FEATURESTORE_ID = "performance_testing"
REGION = 'us-central1'

n_iterations = 2
n_predictions = 100
n_workers = 1

admin_client = FeaturestoreServiceClient(client_options={"api_endpoint": API_ENDPOINT})

data_client = FeaturestoreOnlineServingServiceClient(
    client_options={"api_endpoint": API_ENDPOINT}
)

BASE_RESOURCE_PATH = admin_client.common_location_path(PROJECT_ID, REGION)

#initialize bq client for building benchmark datasets for FS
client = bigquery.Client()


## Create repeat measure run function

In [2]:
def repeat_measure(n_iterations, n_predictions, n_workers, n_repeats=30):
    data = {'create_stats': [],
            'n_features': [],
            'n_predictions': [],
            'n_workers': [],
            'total_seconds': [],
            'n_features': []
           }
    stats = create_a_fs_run(n_iterations, n_predictions, n_workers)
    
    
    for run in range(n_repeats):
        total_time, n_features = measure_fs(n_iterations, n_predictions)
        data['create_stats'].append(stats)
        data['n_features'].append(n_features)
        data['n_predictions'].append(n_predictions)
        data['n_workers'].append(n_workers)
        data['total_seconds'].append(total_time)

    print(total_time)
    return data

In [3]:
from doepy import build
import pandas as pd

design_data = build.lhs(
{'Nodes':[2,10],
'N_Rows':[1, 10],
'N_Iterations':[1,10],
}, num_samples=3)

design_data = design_data[['Nodes', 'N_Rows', 'N_Iterations']].astype(int)
design_data

Unnamed: 0,Nodes,N_Rows,N_Iterations
0,8,7,7
1,3,4,1
2,6,1,5


In [4]:
# run the experiment and store data

cols = ['create_stats',
        'n_features',
        'n_predictions',
        'n_workers',
        'total_seconds']


data = pd.DataFrame([], columns=cols)

for index, row in design_data.iterrows():
    print(f"Testing for following row: \n{row}")
    repeat_run_data = repeat_measure(row['N_Iterations']
                                     , row['N_Rows']
                                     , row['Nodes'], n_repeats=30)
    append_frame = pd.DataFrame.from_dict(repeat_run_data)
    data = data.append(append_frame, ignore_index=True)
    ts = datetime.now()
    data.to_csv(f'data/experiment-{ts}.csv')
    
#save to csv
ts = datetime.now()
data.to_csv(f'data/experiment-{ts}.csv')

data

Testing for following row: 
Nodes           8
N_Rows          7
N_Iterations    7
Name: 0, dtype: int64
Deleted featurestore 'performance_testing'.
name: "projects/180938242395/locations/us-central1/featurestores/performance_testing"

name: "projects/180938242395/locations/us-central1/featurestores/performance_testing/entityTypes/movies"
etag: "AMEw9yPSoA9glDVghUR6rgsnNRPV8tajijNB5ffjZpjBmUjjYk7Z"

Ran for a total of 0:06:20.466271
imported_entity_count: 7
imported_feature_value_count: 336

Ran for a total of: 0.24520158767700195 seconds for 7 streaming predictions 
 Per prediction seconds: 0.03502879823957171
Ran for a total of: 0.17627263069152832 seconds for 7 streaming predictions 
 Per prediction seconds: 0.025181804384504045
Ran for a total of: 0.15088510513305664 seconds for 7 streaming predictions 
 Per prediction seconds: 0.02155501501900809
Ran for a total of: 0.19172024726867676 seconds for 7 streaming predictions 
 Per prediction seconds: 0.027388606752668108
Ran for a tota

Unnamed: 0,create_stats,n_features,n_predictions,n_workers,total_seconds
0,415.242187,56,7,8,0.245202
1,415.242187,56,7,8,0.176273
2,415.242187,56,7,8,0.150885
3,415.242187,56,7,8,0.191720
4,415.242187,56,7,8,0.199270
...,...,...,...,...,...
85,367.409406,40,1,6,0.015240
86,367.409406,40,1,6,0.184554
87,367.409406,40,1,6,0.017043
88,367.409406,40,1,6,0.016943


In [None]:
!gsutil cp f'experiment-$ts.csv' gs://$BUCKET/benchmarking_data