In [None]:
# For Databricks
# dbutils.widgets.text('Tecton API Key', defaultValue='')
# dbutils.widgets.text('API endpoint', defaultValue='')

# TECTON_API_KEY = dbutils.widgets.get('Tecton API Key')
# TECTON_CLUSTER = dbutils.widgets.get('API endpoint')

# For EMR
TECTON_API_KEY='<your api key here>'
TECTON_CLUSTER='<your cluster.tecton.ai here>'

# Welcome to Tecton!

We've designed this notebook to introduce you to the basic workflow of creating a feature view in Tecton, testing it, and pushing it to your Tecton instance when you're done.

# Initializing your session

## Logging into Tecton

In [None]:
import tecton
from datetime import timedelta, datetime
import pandas as pd

tecton.version.summary()

# Create feature view from scratch

In [None]:
from tecton import (
  BatchSource,
  FileConfig
)

### Create data source

In [None]:
transactions_batch = BatchSource(
    name='transactions_batch',
    batch_config=FileConfig(
        timestamp_field="timestamp",
        uri="s3://tecton.ai.public/data/fraud_mini/transactions/transactions.parquet",
        file_format="parquet"
    ),
    owner='jack@tecton.ai',
    tags={'release': 'production'}
)

### Inspect data source

### Create the entity and feature view logic

Now that we have our data source created, we can now create our feature view, which contains both the transformation logic we want to run on our data source as well as orchestration parameters.

An **entity** just tells Tecton what the join key is, in this case the `USER_ID` column. Tecton will check to make sure this column(s) exist after running our transformation logic, so we need to make sure we return a `USER_ID` column from our feature view

A **batch_feature_view** ties it all together: we specify the data source, entity, and call the transformation we defined above, plus we add in what storage layer Tecton should write this data to and how frequently Tecton should run this logic against new data.

In [None]:
from tecton import (
  batch_feature_view,
  Aggregation
)

In [None]:
# Add feature view code from workshop here
@batch_feature_view(
    sources=[transactions_batch],
    entities=[user],
    mode='spark_sql',
    aggregations=[
        Aggregation(column='transaction', function='count', time_window=timedelta(hours=24)),
        Aggregation(column='transaction', function='count', time_window=timedelta(hours=72))
    ],
    aggregation_interval=timedelta(hours=24),
    online=True,
    offline=True,
    feature_start_time=datetime(2020, 10, 10)
)
def user_transaction_counts(transactions_batch):
    return f'''
        SELECT
        nameOrig AS user_id,
        1 AS transaction,
        timestamp
        FROM {transactions_batch}
    '''

## Test our feature view

Now let's see what kind of data we get back from our feature view

Our output is a Tecton dataframe and we're going to save this as a Snowpark dataframe. [Snowpark](https://www.snowflake.com/en/data-cloud/snowpark/) is like Spark on Snowflake (get it?) and the Spark dataframe methods you might be familiar with you can use here. Working in Snowpark is a good idea if you're going to be examining a Tecton query that could be bringing back a large set of results.

# Append our feature view to a remote feature service

## Log in to a remote cluster

We're going to log into our cluster using a Tecton API key linked to a service account. This service account needs to be created and granted at least Consumer access to the workspace we're trying to read

In [None]:
tecton_api_key = dbutils.widgets.get('Tecton API Key')
tecton_api_endpoint = dbutils.widgets.get('API endpoint')
tecton.set_credentials(tecton_url=f'https://{tecton_api_endpoint}/api', tecton_api_key=tecton_api_key)
tecton.test_credentials()
print(tecton.who_am_i())

## Get our feature service remotely from Tecton

We only created the feature view above from scratch because it's worth seeing what the whole flow looks like. In many cases, however, you want to extend or modify an existing feature service.

In [None]:
spine = pd.DataFrame([
    ["C151068873", pd.Timestamp('2023-03-29 00:00Z'), True],
    ["C658286540", pd.Timestamp('2023-03-28 00:00Z'), False],
], columns=['user_id', 'timestamp', 'is_fraud'])

spine

## Now let's extend that remote feature service with a locally-developed feature view

In [None]:
@batch_feature_view(
    sources=[tecton.FilteredSource(ws.get_data_source('transactions_batch'))],
    entities=[ws.get_entity('fraud_user')],
    mode='spark_sql',
    online=False,
    offline=False,
    feature_start_time=datetime(2022, 4, 1),
    batch_schedule=timedelta(days=1),
    ttl=timedelta(days=30),
    description='Last user transaction amount (batch calculated)'
)
def last_transaction_amount_batch(transactions_batch):
    return f'''
        SELECT
            timestamp,
            nameOrig AS user_id,
            amount
        FROM
            {transactions_batch}
        '''

# Apply our new feature view + feature service into our repository!

So far, all of our development has been done locally in our Jupyter instance. We’re happy with our work so far and we want to push it to a workspace in Tecton so we can do additional integration testing and/or push to production:

The steps are:

* Copy and paste the feature view and feature service definitions into the repository’s .py files
* Log into your cluster (`tecton login https://...`) and select or create a workspace (`tecton workspace create ...` or `tecton workspace select ...`)
* Run a `tecton plan` to inspect your features
* Once satisfied, run tecton apply to push your features to the workspace

Now let's test our feature service!

So far, all of our development has been done locally in our Jupyter instance. We're happy with our work so far and we want to push it to a workspace in Tecton so we can do additional integration testing and/or push to production:

The steps are:
* Copy and paste the feature view and feature service definitions into the repository's `.py` files
* Log into your cluster (`tecton login https://...`) and select or create a workspace (`tecton workspace create ...` or `tecton workspace select ...`)
* Run a `tecton plan` to inspect your features
* Once satisfied, run `tecton apply` to push your features to the workspace

Now let's test our feature service!

# Get real-time features from your REST API

Once you've tested your features to make sure they work (and check out our Unit Testing capabilities!), if you need real-time serving, there are just a couple more steps:

* Create or select a **live** workspace. This is a workspace with materialization switched on.
* Wait for your features to materialize. You can check the Web UI for more details here.
* Create or use a service account and grant it at least `Consumer` level privileges
* Use the API key associated with the service account to retrieve features.

You can use any language to retrieve features, since it's just a REST API call. We suggest using native http requests libraries in your language (such as `requests` perhaps with `asyncio` with Python), or if you're on Java, check out our high-performance REST API client [here](https://github.com/tecton-ai/tecton-http-client-java) (also available on maven).

In [None]:
import requests
import json

Let's formulate the request data packet. You can use our [API reference documentation](https://docs.tecton.ai/http-api) and [this docs page](https://docs.tecton.ai/docs/reading-feature-data/reading-feature-data-for-inference/reading-online-features-for-inference-using-the-http-api/#metadata-options-for-the-http-api) for information on the metadata you can retrieve on your features.

In [None]:
request_data = json.dumps(
    {
        'params': {
            'feature_service_name': 'fraud_detection_feature_service',
            'join_key_map': {
                'user_id': 'user_724235628997',
                'merchant': 'fraud_Crona and Sons',
            },
            'request_context_map': {
              'amt': 167.11
            },
            'workspace_name': 'prod',
            'metadata_options': {
                "include_names": True,
                "include_effective_times": True,
                "include_data_types": True,
                "include_slo_info": True,
                'include_serving_status': True
            } 
        }
    }

)

Let's make the post request (we're reading the `REST_API_KEY` variable from the .env file and/or environment variable)

In [None]:
r = requests.post(
    url=f"https://{TECTON_CLUSTER}/api/v1/feature-service/get-features",
    headers={
        'Authorization': f"Tecton-key {TECTON_API_KEY}"
    },
    data=request_data
)

Now we examine the output