In [2]:
import pandas as pd
from datetime import datetime
from feast import FeatureStore

In [3]:
# Initialize the feature store with your local configuration
# store = FeatureStore(
#     repo_path="../feature_repo_local",
# )

In [4]:
# Initialize the feature store with your remote configuration
store = FeatureStore(
    repo_path="../feature_repo_local",
)

In [4]:
entity_df = pd.read_csv("../feature_repo/data/bfi_static_loan_data.csv")
entity_df['event_timestamp'] = pd.to_datetime(entity_df['event_timestamp'])

start_date = '2023-01-01'
end_date = '2023-10-01'
entity_df = entity_df.query('event_timestamp >= @start_date and event_timestamp < @end_date')[['loan_id', 'customer_id', 'event_timestamp']]

entity_df.head()

Unnamed: 0,loan_id,customer_id,event_timestamp
0,L10001,C2824,2023-01-18
1,L10002,C1409,2023-05-26
3,L10004,C5012,2023-01-09
4,L10005,C4657,2023-09-23
5,L10006,C3286,2023-03-11


In [5]:
# SQL query that directly selects loan_id and timestamps within your desired date range
# This approach lets you select exactly the entities and timeframe you want
# sql_query = """
#     SELECT
#         loan_id, -- Define all enity
#         customer_id, -- Define all entity
#         event_timestamp 
#     FROM 
#         bfi_static_loan_data 
#     WHERE 
#         event_timestamp BETWEEN '2023-01-01' AND '2023-10-01'
# """



# Retrieve all features from the updated static_loan_features view
# Note that we're now including the new fields we added
training_df = store.get_historical_features(
    entity_df=entity_df,
    features=[
        # Original features
        "static_loan_features:loan_amount",
        "static_loan_features:interest_rate",
        "static_loan_features:loan_term",
        "static_loan_features:product_type",
        "static_loan_features:collateral_value",
        "static_loan_features:ltv_ratio",
        "static_loan_features:credit_score",
        "static_loan_features:monthly_income",
        "static_loan_features:dti_ratio",
        "static_loan_features:age",
        "static_loan_features:employment_years",
        "static_loan_features:province",
        "static_loan_features:urban_rural",
        
        # Newly added features
        "static_loan_features:origination_date",
        "static_loan_features:time_to_event",
        "static_loan_features:default_flag",
        "static_loan_features:observed"
    ],
).to_df()

# Display information about the retrieved data
print(f"Retrieved {len(training_df)} rows of historical feature data")
print(f"Features included: {training_df.columns.tolist()}")
print(f"Date range: {training_df['event_timestamp'].min()} to {training_df['event_timestamp'].max()}")

# Basic data exploration with the new fields
print("\nSummary statistics for key features:")
summary_stats = training_df[['loan_amount', 'interest_rate', 'ltv_ratio', 'time_to_event']].describe()
print(summary_stats)

# Check default distribution
default_count = training_df['default_flag'].sum()
print(f"\nDefault rate: {default_count}/{len(training_df)} ({default_count/len(training_df)*100:.2f}%)")

# Observed vs unobserved events
observed_count = training_df['observed'].sum()
print(f"Observed events: {observed_count}/{len(training_df)} ({observed_count/len(training_df)*100:.2f}%)")

# Analyzing time to event distribution by default status
print("\nTime to event statistics by default status:")
print(training_df.groupby('default_flag')['time_to_event'].describe())

# Save the complete dataset for model training
training_df.to_csv("bfi_loan_complete_training_data.csv", index=False)
print("\nComplete training data saved to bfi_loan_complete_training_data.csv")

Retrieved 3702 rows of historical feature data
Features included: ['loan_id', 'customer_id', 'event_timestamp', 'loan_amount', 'interest_rate', 'loan_term', 'product_type', 'collateral_value', 'ltv_ratio', 'credit_score', 'monthly_income', 'dti_ratio', 'age', 'employment_years', 'province', 'urban_rural', 'origination_date', 'time_to_event', 'default_flag', 'observed']
Date range: 2023-01-01 00:00:00 to 2023-09-30 00:00:00

Summary statistics for key features:
        loan_amount  interest_rate    ltv_ratio  time_to_event
count  3.702000e+03    3702.000000  3702.000000    3702.000000
mean   5.540440e+07      17.071556     0.988971      15.048082
std    5.774547e+07       2.588608     0.127315       8.653020
min    4.973699e+06      12.000000     0.770000       1.000000
25%    1.387325e+07      15.300000     0.900000       8.000000
50%    1.963320e+07      16.900000     0.970000      12.000000
75%    9.360733e+07      19.000000     1.090000      24.000000
max    4.180482e+08      22.000

In [6]:
training_df

Unnamed: 0,loan_id,customer_id,event_timestamp,loan_amount,interest_rate,loan_term,product_type,collateral_value,ltv_ratio,credit_score,monthly_income,dti_ratio,age,employment_years,province,urban_rural,origination_date,time_to_event,default_flag,observed
0,L10001,C2824,2023-01-18,132049269,16.4,24,Car,155764993,0.85,628,18047162,0.52,36,3.4,Bali,Urban,2023-01-18,12,1,1
1,L10002,C1409,2023-05-26,102430418,12.9,60,Car,107223993,0.96,711,17370151,0.32,37,3.3,Jakarta,Urban,2023-05-26,24,0,0
2,L10004,C5012,2023-01-09,147151597,17.0,24,Car,156525409,0.94,684,16062879,0.46,41,6.5,West Java,Urban,2023-01-09,24,0,0
3,L10005,C4657,2023-09-23,89938163,13.8,60,Car,104096852,0.86,725,16763699,0.19,38,1.8,West Java,Urban,2023-09-23,24,0,0
4,L10006,C3286,2023-03-11,12740304,18.0,12,Motorcycle,11305347,1.13,700,7914012,0.15,42,2.0,Banten,Urban,2023-03-11,12,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3697,L14995,C5162,2023-01-02,131933556,16.5,36,Car,168648590,0.78,678,8888394,0.57,47,3.3,Jakarta,Urban,2023-01-02,1,1,1
3698,L14996,C6898,2023-08-22,68874172,15.9,36,Car,88922626,0.77,732,17800133,0.32,43,4.7,Banten,Urban,2023-08-22,24,0,0
3699,L14998,C8331,2023-07-25,63903206,12.0,12,Car,77931980,0.82,631,13863629,0.54,35,3.2,East Java,Urban,2023-07-25,7,1,1
3700,L14999,C7505,2023-03-13,18779891,17.4,36,Motorcycle,17966475,1.05,709,3520890,0.26,21,10.5,East Java,Urban,2023-03-13,24,0,0


### Get historical data using feature service

In [7]:
feature_refs = store.get_feature_service("combined_prediction_service")


combined_df = store.get_historical_features(
    entity_df=entity_df,
    features=feature_refs
).to_df()

In [8]:
combined_df.head()

Unnamed: 0,loan_id,customer_id,event_timestamp,payment_status,days_past_due,remaining_balance,payment_amount,inflation_rate,unemployment_rate,exchange_rate,loan_amount,interest_rate,loan_term,product_type,ltv_ratio,credit_score,dti_ratio,monthly_income
0,L10001,C2824,2023-01-18,,,,,,,,132049269,16.4,24,Car,0.85,628,0.52,18047162
1,L10002,C1409,2023-05-26,,,,,,,,102430418,12.9,60,Car,0.96,711,0.32,17370151
2,L10004,C5012,2023-01-09,,,,,,,,147151597,17.0,24,Car,0.94,684,0.46,16062879
3,L10005,C4657,2023-09-23,,,,,,,,89938163,13.8,60,Car,0.86,725,0.19,16763699
4,L10006,C3286,2023-03-11,,,,,,,,12740304,18.0,12,Motorcycle,1.13,700,0.15,7914012


- [x] Failed
1. Disamakan ttl (asumsi data yang melewati ttl tidak akan dikembalikan/dimunculkan) [x] Failed
2. Sama sama menggunakan 1 entity [x] Failed

### Push

In [9]:
training_df.columns

Index(['loan_id', 'customer_id', 'event_timestamp', 'loan_amount',
       'interest_rate', 'loan_term', 'product_type', 'collateral_value',
       'ltv_ratio', 'credit_score', 'monthly_income', 'dti_ratio', 'age',
       'employment_years', 'province', 'urban_rural', 'origination_date',
       'time_to_event', 'default_flag', 'observed'],
      dtype='object')

In [10]:
dropped_columns = ['monthly_income', 'age', 'employment_years', 'province', 'urban_rural', 'observed']
pushed_training_df = training_df.drop(columns=dropped_columns)

In [11]:
pushed_training_df

Unnamed: 0,loan_id,customer_id,event_timestamp,loan_amount,interest_rate,loan_term,product_type,collateral_value,ltv_ratio,credit_score,dti_ratio,origination_date,time_to_event,default_flag
0,L10001,C2824,2023-01-18,132049269,16.4,24,Car,155764993,0.85,628,0.52,2023-01-18,12,1
1,L10002,C1409,2023-05-26,102430418,12.9,60,Car,107223993,0.96,711,0.32,2023-05-26,24,0
2,L10004,C5012,2023-01-09,147151597,17.0,24,Car,156525409,0.94,684,0.46,2023-01-09,24,0
3,L10005,C4657,2023-09-23,89938163,13.8,60,Car,104096852,0.86,725,0.19,2023-09-23,24,0
4,L10006,C3286,2023-03-11,12740304,18.0,12,Motorcycle,11305347,1.13,700,0.15,2023-03-11,12,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3697,L14995,C5162,2023-01-02,131933556,16.5,36,Car,168648590,0.78,678,0.57,2023-01-02,1,1
3698,L14996,C6898,2023-08-22,68874172,15.9,36,Car,88922626,0.77,732,0.32,2023-08-22,24,0
3699,L14998,C8331,2023-07-25,63903206,12.0,12,Car,77931980,0.82,631,0.54,2023-07-25,7,1
3700,L14999,C7505,2023-03-13,18779891,17.4,36,Motorcycle,17966475,1.05,709,0.26,2023-03-13,24,0


In [12]:
store.push("statical_loan_push_source", pushed_training_df)

PushSourceNotFoundException: Unable to find push source 'statical_loan_push_source'.

In [15]:
print(store.list_data_sources())      # should include statical_loan_push_source
# print(store.list_push_sources())      # should include statical_loan_push_source
print("---")
print(store.list_feature_views())


[<feast.infra.offline_stores.contrib.postgres_offline_store.postgres_source.PostgreSQLSource object at 0x70457bd01910>, <feast.infra.offline_stores.contrib.postgres_offline_store.postgres_source.PostgreSQLSource object at 0x70457bd10890>, <feast.infra.offline_stores.contrib.postgres_offline_store.postgres_source.PostgreSQLSource object at 0x70457bd13c50>, <feast.data_source.PushSource object at 0x70457be37f90>, <feast.infra.offline_stores.contrib.postgres_offline_store.postgres_source.PostgreSQLSource object at 0x70457be377d0>]
---
[<FeatureView(name = static_loan_features, entities = ['loan'], ttl = 3650 days, 0:00:00, stream_source = None, batch_source = {
  "type": "CUSTOM_SOURCE",
  "timestampField": "event_timestamp",
  "customOptions": {
    "configuration": "eyJuYW1lIjogInN0YXRpY19sb2FuX3NvdXJjZSIsICJxdWVyeSI6ICJcbiAgICBTRUxFQ1QgXG4gICAgICAgIGxvYW5faWQsXG4gICAgICAgIGN1c3RvbWVyX2lkLFxuICAgICAgICBwcm9kdWN0X3R5cGUsXG4gICAgICAgIGxvYW5fYW1vdW50LFxuICAgICAgICBpbnRlcmVzdF9yYXRlLFxuICAg

In [16]:
fv = store.list_feature_views()[0]

In [17]:
fv

<FeatureView(name = static_loan_features, entities = ['loan'], ttl = 3650 days, 0:00:00, stream_source = None, batch_source = {
  "type": "CUSTOM_SOURCE",
  "timestampField": "event_timestamp",
  "customOptions": {
    "configuration": "eyJuYW1lIjogInN0YXRpY19sb2FuX3NvdXJjZSIsICJxdWVyeSI6ICJcbiAgICBTRUxFQ1QgXG4gICAgICAgIGxvYW5faWQsXG4gICAgICAgIGN1c3RvbWVyX2lkLFxuICAgICAgICBwcm9kdWN0X3R5cGUsXG4gICAgICAgIGxvYW5fYW1vdW50LFxuICAgICAgICBpbnRlcmVzdF9yYXRlLFxuICAgICAgICBsb2FuX3Rlcm0sXG4gICAgICAgIGNvbGxhdGVyYWxfdmFsdWUsXG4gICAgICAgIGx0dl9yYXRpbyxcbiAgICAgICAgb3JpZ2luYXRpb25fZGF0ZSxcbiAgICAgICAgY3JlZGl0X3Njb3JlLFxuICAgICAgICBtb250aGx5X2luY29tZSxcbiAgICAgICAgZHRpX3JhdGlvLFxuICAgICAgICBhZ2UsXG4gICAgICAgIGVtcGxveW1lbnRfeWVhcnMsXG4gICAgICAgIHByb3ZpbmNlLFxuICAgICAgICB1cmJhbl9ydXJhbCxcbiAgICAgICAgdGltZV90b19ldmVudCxcbiAgICAgICAgZGVmYXVsdF9mbGFnLFxuICAgICAgICBvYnNlcnZlZCxcbiAgICAgICAgZXZlbnRfdGltZXN0YW1wXG4gICAgRlJPTSBcbiAgICAgICAgYmZpX3N0YXRpY19sb2FuX2RhdGFcbiAgICAiLCAidGFibGUiOiAiIn0="
  },
  "dataS

In [18]:
print(store.list_feature_services())


[<FeatureService(name = loan_prediction_service, _features = [], feature_view_projections = [FeatureViewProjection(name='static_loan_features', name_alias=None, desired_features=[], features=[Field(
    name='loan_amount',
    dtype=<PrimitiveFeastType.FLOAT32: 6>,
    description='',
    tags={}
    vector_index=False
    vector_length=0
    vector_search_metric=''
), Field(
    name='interest_rate',
    dtype=<PrimitiveFeastType.FLOAT32: 6>,
    description='',
    tags={}
    vector_index=False
    vector_length=0
    vector_search_metric=''
), Field(
    name='loan_term',
    dtype=<PrimitiveFeastType.INT64: 4>,
    description='',
    tags={}
    vector_index=False
    vector_length=0
    vector_search_metric=''
), Field(
    name='product_type',
    dtype=<PrimitiveFeastType.STRING: 2>,
    description='',
    tags={}
    vector_index=False
    vector_length=0
    vector_search_metric=''
), Field(
    name='collateral_value',
    dtype=<PrimitiveFeastType.FLOAT32: 6>,
    descrip

In [5]:
feature_refs = store.get_feature_service("loan_prediction_service")

In [6]:
feature_refs

<FeatureService(name = loan_prediction_service, _features = [], feature_view_projections = [FeatureViewProjection(name='static_loan_features', name_alias=None, desired_features=[], features=[Field(
    name='loan_amount',
    dtype=<PrimitiveFeastType.FLOAT32: 6>,
    description='',
    tags={}
    vector_index=False
    vector_length=0
    vector_search_metric=''
), Field(
    name='interest_rate',
    dtype=<PrimitiveFeastType.FLOAT32: 6>,
    description='',
    tags={}
    vector_index=False
    vector_length=0
    vector_search_metric=''
), Field(
    name='loan_term',
    dtype=<PrimitiveFeastType.INT64: 4>,
    description='',
    tags={}
    vector_index=False
    vector_length=0
    vector_search_metric=''
), Field(
    name='product_type',
    dtype=<PrimitiveFeastType.STRING: 2>,
    description='',
    tags={}
    vector_index=False
    vector_length=0
    vector_search_metric=''
), Field(
    name='collateral_value',
    dtype=<PrimitiveFeastType.FLOAT32: 6>,
    descript

In [None]:
loans = [
        {"loan_id": "L13669", "customer_id": "C9761"},
        {"loan_id": "L14012", "customer_id": "C4624"},
        {"loan_id": "L13463", "customer_id": "C8478"}
    ]

In [7]:
entity_row = [{"loan_id": "L13669", "customer_id": "C9761"}]

In [8]:
online_features = store.get_online_features(
    entity_rows=entity_row,
    features=feature_refs
).to_dict()

In [9]:
online_features

{'loan_id': ['L13669'],
 'customer_id': ['C9761'],
 'credit_score': [789],
 'employment_years': [4.300000190734863],
 'interest_rate': [17.600000381469727],
 'urban_rural': ['Rural'],
 'loan_term': [60],
 'dti_ratio': [0.23000000417232513],
 'province': ['South Sulawesi'],
 'collateral_value': [101765152.0],
 'product_type': ['Car'],
 'loan_amount': [92929112.0],
 'age': [41],
 'monthly_income': [10546224.0],
 'ltv_ratio': [0.9100000262260437]}

materialize failed with remote

get online features failed with remote

Push failed