In [None]:
# PREP work - Set these parameters for automation
import os
PROJECT_NAME = os.getenv("PROJECT_NAME", "demo_featuresetapi")
REFRESH_TOKEN = os.getenv("REFRESH_TOKEN")

In [None]:
# Login and authenticate
from featurestore import *
client = Client(API, secure=True)
client.auth.set_auth_token(REFRESH_TOKEN)

In [None]:
# Define Credentials
S3_ACCESS_KEY = os.getenv("S3_ACCESS_KEY")
S3_SECRET_KEY = os.getenv("S3_SECRET_KEY")
S3_REGION = os.getenv("S3_REGION")
credentials = S3Credentials(S3_ACCESS_KEY, S3_SECRET_KEY, S3_REGION)

In [None]:
# project will be deleted if there exists already
try:
    client.projects.get(PROJECT_NAME).delete()
except:
    pass

## Feature set API

In [None]:
# Create a project
project = client.projects.create(PROJECT_NAME)

In [None]:
# Specify input data source
source = CSVFile("s3a://feature-store-test-data/duplicate_match/combined_match.csv")

In [None]:
# Extract the schema from datasource
schema = client.extract_schema_from_source(source, credentials)

In [None]:
# Display schema as a string
str_schema = schema.to_string()
str_schema

In [None]:
# Create a new feature set and ingest data
project.feature_sets.register(schema, "fs_featureset_api", "description", primary_key="rec_id_orig")

In [None]:
# Get the feature set
fs = project.feature_sets.get("fs_featureset_api")
fs

In [None]:
# List the feature set versions
fs.list_versions()

In [None]:
# Add the tags to the feature set
fs.tags.append("people_match1.0")
fs

In [None]:
# Set feature set permissions
# Carla have only view access to feature set; "social security and date of birth" are masked for carla (Consumer privileges)
# Mike, sensitive consumer can see the raw data features of "social security id and date of birth" (Sensitive Consumer privileges)
fs = project.feature_sets.get("fs_featureset_api")
fs.add_consumers(["carla@h2o.ai"])
fs.add_sensitive_consumers(["mike@h2o.ai"])

In [None]:
# Mask the special data. Only sensitive consumers can view the data
schema["soc_sec_id_orig"].special_data.sensitive=True
schema["soc_sec_id_dup"].special_data.sensitive=True
schema["date_of_birth_orig"].special_data.sensitive=True
schema["date_of_birth_dup"].special_data.sensitive=True

In [None]:
# Create a new feature set version 
new_fs = fs.create_new_version(schema = schema, reason = "hide the sensitive data: social security and date of birth")
new_fs

In [None]:
# List all version of feature set
new_fs.list_versions()

## Cleanups

In [None]:
client.projects.get(PROJECT_NAME).delete()
