In [None]:
# Define env variables
import os
PROJECT_NAME = os.getenv("PROJECT_NAME", "demo_onlineapi")
REFRESH_TOKEN = os.getenv("REFRESH_TOKEN")
SPARK_DEPS_JAR = os.getenv("SPARK_DEPS_JAR")

In [None]:
# Install feature store and other dependencies
! pip install pyspark==3.4.1 h2o-featurestore

In [None]:
# configure spark
from pyspark.sql import SparkSession

spark = SparkSession.builder \
    .master("local") \
    .config("spark.jars.packages", "org.apache.hadoop:hadoop-aws:3.3.1,io.delta:delta-core_2.12:2.4.0,org.apache.hadoop:hadoop-azure:3.3.1") \
    .config("spark.jars", SPARK_DEPS_JAR) \
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \
    .getOrCreate()

spark.sparkContext.setLogLevel("ERROR")

In [None]:
spark

In [None]:
# Login and client initialization
from featurestore import *
client = Client(API, secure=True)
client.auth.set_auth_token(REFRESH_TOKEN)

In [None]:
# Define credentials 
S3_ACCESS_KEY = os.getenv("S3_ACCESS_KEY")
S3_SECRET_KEY = os.getenv("S3_SECRET_KEY")
S3_REGION = os.getenv("S3_REGION")
credentials = S3Credentials(S3_ACCESS_KEY, S3_SECRET_KEY, S3_REGION)

### Create a Project

In [None]:
project = client.projects.create(PROJECT_NAME)

In [None]:
# Specify a data source
source = CSVFile("s3a://h2o-public-test-data/end-to-end-notebook-demo/bank-marketing.csv")

### Extract Schema from the source

In [None]:
schema = client.extract_schema_from_source(source, credentials)

### Create a Feature set

In [None]:
fs = project.feature_sets.register(schema, "fs_bank_marketing_online")

### Ingest data into the feature set

In [None]:
fs.ingest(source, credentials)

### Retrieve features as a spark dataframe

In [None]:
ref = fs.retrieve()
ref.as_spark_frame(spark)

## Online API

In [None]:
# Create a new featureset 
project.feature_sets.register(schema, "derived_fs_bank_marketing_online", primary_key="duration")

### Ingest row to online featurestore

In [None]:
# Input a new json row to online store
import json
input = {
    "age": 30,
    "job": "admin",
    "marital": "married",
    "education": "university.degree",
    "default": "no",
    "housing": "yes",
    "loan": "no",
    "contact": "email",
    "month": "november",
    "day_of_week": "wed",
    "duration": 200,
    "campaign": 1,
    "pdays": 1300,
    "previous": 0, 
    "poutcome": "nonexistent",
    "emp.var.rate": 1.1,
    "cons.price.idx": 93.994,
    "cons.conf.idx": -35.0,
    "euribor3m": 2.323,
    "nr.employed": 5000,
    "y": "no"
    }
online_data = json.dumps(input)

In [None]:
# Ingest a single json row to Online Feature store
fs = project.feature_sets.get("derived_fs_bank_marketing_online")
fs.ingest_online(online_data)
fs

### Retrieve data from the online featurestore

In [None]:
# Retrieve from online feature store as a payload to model
import json
import time
# wait before retrieving - ingesting to online is async and cannot be available right away
time.sleep(2)
marketing_row = json.loads(online_data)
start = time.perf_counter()
payload = fs.retrieve_online(marketing_row['duration'])
end = time.perf_counter()
print(f"Retrieved in {end - start:0.4f} seconds")
payload

## Cleanups

In [None]:
client.projects.get(PROJECT_NAME).delete()