In [0]:
import tecton 
import pandas

from datetime import timedelta, datetime
from tecton import Entity, BatchSource, batch_feature_view, spark_batch_config, Aggregation, RequestSource, on_demand_feature_view
from tecton.types import Float64, Field, String

tecton.set_credentials(tecton_api_key=dbutils.secrets.get(scope='tecton-lab-2', key='TECTON_API_KEY'),tecton_url="https://lab.tecton.ai/api")
ws = tecton.get_workspace('prod')

In [0]:
rows = [
  {
    "user_id": "12345",
    "payment_id": "54321",
    "payment_date": datetime(2023, 7, 25),
    "payment_due_date": datetime(2023, 7, 20),
    "update_timestamp": datetime(2023, 7, 25),
    "amt": 10,
  },
  {
    "user_id": "12345",
    "payment_id": "54320",
    "payment_date": datetime(2023, 7, 25),
    "payment_due_date": datetime(2023, 7, 25),
    "update_timestamp": datetime(2023, 7, 25),
    "amt": 9,
  },
  {
    "user_id": "12345",
    "payment_id": "54310",
    "payment_date": datetime(2023, 7, 25),
    "payment_due_date": datetime(2023, 7, 25),
    "update_timestamp": datetime(2023, 7, 25),
    "amt": 8,
  },
  {
    "user_id": "10000",
    "payment_id": "10101",
    "payment_date": datetime(2023, 7, 25),
    "payment_due_date": datetime(2023, 7, 15),   
    "update_timestamp": datetime(2023, 7, 25),
    "amt": 7,
  },
  {
    "user_id": "10000",
    "payment_id": "10102",
    "payment_date": datetime(2023, 7, 25),
    "payment_due_date": datetime(2023, 7, 10),
    "update_timestamp": datetime(2023, 7, 25),
    "amt": 6,
  },
  {
    "user_id": "10000",
    "payment_id": "10103",
    "payment_date": datetime(2023, 7, 25),
    "payment_due_date": datetime(2023, 7, 5),
    "update_timestamp": datetime(2023, 7, 25),
    "amt": 5,
  },
]

df_spark = spark.createDataFrame(pandas.DataFrame.from_records(rows))
display(df_spark)

In [0]:
df_spark.createOrReplaceTempView("data")

In [0]:
@spark_batch_config()
def batch_config(spark):
  return df_spark

late_payments_source = BatchSource(
    name='late_payments_source',
    batch_config=batch_config
)

late_payments_source.validate()
display(late_payments_source.get_dataframe().to_spark())

In [0]:
user = Entity(
    name="user",
    join_keys=["user_id"],
)
user.validate()

@batch_feature_view(
    sources=[late_payments_source],
    entities=[user],
    mode='spark_sql',
    batch_schedule=timedelta(days=1),
    feature_start_time=datetime(2023,7,1),
    timestamp_field="update_timestamp"
)
def features(late_payments_source):
  return f'''
    select user_id, 
    payment_id,
    amt, 
    update_timestamp
    from {late_payments_source}
  '''
features.validate()

In [0]:
@batch_feature_view(
    sources=[late_payments_source],
    entities=[user],
    mode='spark_sql',
    batch_schedule=timedelta(days=1),
    aggregation_interval=timedelta(days=1),
    aggregations=[
        Aggregation(column='late_payment', function='sum', time_window=timedelta(days=365*10)),
        Aggregation(column='payment_id', function='count', time_window=timedelta(days=365*10)),
    ],
    feature_start_time=datetime(2023,7,1),
    timestamp_field="update_timestamp"
)
def payment_aggregates(late_payments_source):
  return f'''
    select user_id, 
        payment_id, 
        CASE when datediff(payment_date, payment_due_date) > 0 THEN 1 ELSE 0 END as late_payment,
        update_timestamp
    from {late_payments_source}
  '''
payment_aggregates.validate()

In [0]:
%sql
select *
from data

In [0]:
%sql
select user_id, payment_id, CASE when datediff(payment_date, payment_due_date) > 0 THEN 1 ELSE 0 END as late_payment
from data

In [0]:
z = payment_aggregates.get_historical_features(start_time=datetime(2023, 1, 1), end_time=datetime(2023, 8, 1)).to_spark()
display(payment_aggregates.get_historical_features(start_time=datetime(2023, 1, 1), end_time=datetime(2023, 8, 1)).to_spark())

In [0]:
request_schema = [Field("payment_timestamp", String),Field("payment_due_timestamp", String)]
transaction_request = RequestSource(schema=request_schema)
output_schema = [Field("late_payment_ratio", Float64)]

@on_demand_feature_view(
   sources=[transaction_request,payment_aggregates],
   mode='python',
   schema=output_schema

)
def late_ratio(transaction_request, payment_aggregates):
    payments = 0 if payment_aggregates['payment_id_count_3650d_1d'] is None else payment_aggregates['payment_id_count_3650d_1d']
    late_payments = 0 if payment_aggregates['late_payment_sum_3650d_1d'] is None else payment_aggregates['late_payment_sum_3650d_1d']

    return {'late_ratio': (late_payments+(datetime.strptime(transaction_request['payment_timestamp'],'%Y-%m-%dT%H:%M:%S.000+0000') > datetime.strptime(transaction_request['payment_due_timestamp'],'%Y-%m-%dT%H:%M:%S.000+0000')))/(1 + payments)}

late_ratio.validate()

In [0]:
x = "2023-07-25T00:00:00.000+0000"
y = "2023-07-20T00:00:00.000+0000"

In [0]:
late_ratio.run(transaction_request={"payment_timestamp":x,"payment_due_timestamp":y},payment_aggregates=z['late_payment_sum_3650d_1d', 'payment_id_count_3650d_1d'])

In [0]:
late_ratio.run(transaction_request={"payment_timestamp":y,"payment_due_timestamp":x},payment_aggregates=z['late_payment_sum_3650d_1d', 'payment_id_count_3650d_1d'])