# Feature Store Demo using FEAST

https://docs.feast.dev/getting-started/quickstart

!pip install fastparquet
!pip install pyarrow

In [2]:
import pandas as pd
import numpy as np
#!pip install numpy==1.24.4

In [3]:
#import data
df = pd.read_csv('credit.csv')

In [4]:
df.sample()

Unnamed: 0,Loan_ID,Gender_Male,Married,Graduate,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Eligible,Rural_Property,Semiurban_Property,Urban_Property,Dependents_0,Dependents_1,Dependents_2,Dependents_3,timestamp
434,LP002390,1,0,1,0,3750,0.0,100.0,360.0,1,1,0,0,1,1,0,0,0,2023-03-09 12:07:08.476791


In [5]:
#Seperate input features vs target variable
predictors = df.drop('Eligible',1)

  predictors = df.drop('Eligible',1)


### We will keep Loan_ID so that we can use Loan_ID and timestamp to uniquely identify a record

In [6]:
target = df[['Loan_ID','Eligible','timestamp']]

In [7]:
predictors.head()

Unnamed: 0,Loan_ID,Gender_Male,Married,Graduate,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Rural_Property,Semiurban_Property,Urban_Property,Dependents_0,Dependents_1,Dependents_2,Dependents_3,timestamp
0,LP001002,1,0,1,0,5849,0.0,128.0,360.0,1,0,0,1,1,0,0,0,2021-12-30 12:07:08.476791
1,LP001003,1,1,1,0,4583,1508.0,128.0,360.0,1,1,0,0,0,1,0,0,2021-12-31 12:07:08.476791
2,LP001005,1,1,1,1,3000,0.0,66.0,360.0,1,0,0,1,1,0,0,0,2022-01-01 12:07:08.476791
3,LP001006,1,1,0,0,2583,2358.0,120.0,360.0,1,0,0,1,1,0,0,0,2022-01-02 12:07:08.476791
4,LP001008,1,0,1,0,6000,0.0,141.0,360.0,1,0,0,1,1,0,0,0,2022-01-03 12:07:08.476791


In [8]:
predictors.to_parquet('predictors.parquet')
target.to_parquet('target.parquet')

### All the above steps are done to satisfy Feast library requirements. All the data cleaning, data preparation should be done before this. Feast requires a timestamp and id column which we have in our data.

!pip install feast

In [50]:
#to check version
pip show feast

Name: feast
Version: 0.33.1
Summary: Python SDK for Feast
Home-page: https://github.com/feast-dev/feast
Author: Feast
Author-email: 
License: Apache
Location: /Users/swapnil/opt/anaconda3/lib/python3.9/site-packages
Requires: bowler, click, colorama, dask, dill, fastapi, fastavro, grpcio, grpcio-reflection, gunicorn, httpx, Jinja2, jsonschema, mmh3, numpy, pandas, pandavro, proto-plus, protobuf, pyarrow, pydantic, pygments, PyYAML, requests, SQLAlchemy, tabulate, tenacity, toml, tqdm, typeguard, uvicorn
Required-by: 
Note: you may need to restart the kernel to use updated packages.


In [9]:
pwd

'/Users/swapnil/Downloads/Feature_store'

In [10]:
#let us create a feast repository
!feast init feature_repo


Creating a new Feast repository in [1m[32m/Users/swapnil/Downloads/Feature_store/feature_repo[0m.



The above code creates a folder structure in your path file.

<img src="image1.png" width="240" height="240" align="left"/>



Next > 

step 1 - Move your parcquet files in the data folder

step 2 - Rename example_repo.py file to feature_metadata.py

step 3 - open the feature_metadata.py file through jupyter notebook and edit it.

replace all with contents from below cell.

In [None]:
# This is an example feature definition file

from datetime import timedelta

import pandas as pd

from feast import (
    Entity,
    FeatureService,
    FeatureView,
    Field,
    FileSource,
    PushSource,
    RequestSource,
)
from feast.on_demand_feature_view import on_demand_feature_view
from feast.types import Float32, Float64, Int64

# Define an entity for the driver. You can think of an entity as a primary key used to
# fetch features.
ID = Entity(name="Loan_ID",description = 'Unique ID of the applicant') 

# PREDICTOR FEATURE VIEW


# Read data from parquet files. Parquet is convenient for local development mode. For
# production, you can use your favorite DWH, such as BigQuery. See Feast documentation
# for more info.
predictor_source = FileSource(
    path="/Users/swapnil/Downloads/Feature_store/feature_repo/feature_repo/data/predictors.parquet",
    timestamp_field="timestamp",
)


# Our parquet files contain sample data that includes a loan_id column, timestamps and
# 16 feature columns. Here we define a Feature View that will allow us to serve this
# data to our model online.
predictor_data_fv = FeatureView(
    # The unique name of this feature view. Two feature views in a single 
    # project cannot have the same name
    name="predictors_fv",
    entities=[ID],
    ttl=timedelta(days=1),
    # The list of features defined below act as a schema to both define features
    # for both materialization of features into a store, and are used as references
    # during retrieval for building a training dataset or serving features
    schema=[
        Field(name="Gender_Male", dtype=Int64, description="If male then 1"),
        Field(name="Married", dtype=Int64, description="If married then 1"),
        Field(name="Graduate", dtype=Int64, description="If graduate then 1"),
        Field(name="Self_Employed", dtype=Int64, description="If Self_Employed then 1"),
        Field(name="ApplicantIncome", dtype=Float64, description="Monthly income of the applicant"),
        Field(name="CoapplicantIncome", dtype=Float64, description="Monthly income of the co-applicant"),
        Field(name="LoanAmount", dtype=Float64, description="loan amount applied"),
        Field(name="Loan_Amount_Term", dtype=Float64, description="Ammortization period"),
        Field(name="Credit_History", dtype=Int64, description="1 if the applicant has a credit history"),
        Field(name="Rural_Property", dtype=Int64, description="1 if current propert is rural"),
        Field(name="Urban_Property", dtype=Int64, description="1 if current propert is urban"),
        Field(name="Dependents_0", dtype=Int64, description="1 if no dependants"),
        Field(name="Dependents_1", dtype=Int64, description="1 if 1 dependant"),
        Field(name="Dependents_2", dtype=Int64, description="1 if 2 dependants"),
        Field(name="Dependents_3", dtype=Int64, description="1 if 3 or more dependants"),
    ],
    online=True,
    source=predictor_source,
    # Tags are user defined key/value pairs that are attached to each
    # feature view for filtering purposes. I have not used any tags here. refer doc.
    tags={},
)



# TARGET FEATURE VIEW


target_source = FileSource(
    path="/Users/swapnil/Downloads/Feature_store/feature_repo/feature_repo/data/target.parquet",
    timestamp_field="timestamp",
)



target_data_fv = FeatureView(
    name="target_fv",
    entities=[ID],
    ttl=timedelta(days=1),
    schema=[
        Field(name="Eligible", dtype=Int64, description="1 if applicant was eligible for loan"),
    ],
    online=True,
    source=target_source,
    tags={},
)

### Navigate inside feature_repo folder

In [11]:
pwd

'/Users/swapnil/Downloads/Feature_store'

In [12]:
cd /Users/swapnil/Downloads/Feature_store/feature_repo/feature_repo

/Users/swapnil/Downloads/Feature_store/feature_repo/feature_repo


In [13]:
pwd

'/Users/swapnil/Downloads/Feature_store/feature_repo/feature_repo'

In [15]:
# Run feast apply. This will register features in feature store.
!feast apply 

09/04/2023 07:13:40 PM feast.infra.registry.registry INFO: Registry cache expired, so refreshing
09/04/2023 07:13:40 PM feast.infra.registry.registry INFO: Registry cache expired, so refreshing
09/04/2023 07:13:40 PM feast.infra.registry.registry INFO: Registry cache expired, so refreshing
09/04/2023 07:13:40 PM feast.infra.registry.registry INFO: Registry cache expired, so refreshing
09/04/2023 07:13:40 PM feast.infra.registry.registry INFO: Registry cache expired, so refreshing
09/04/2023 07:13:40 PM feast.infra.registry.registry INFO: Registry cache expired, so refreshing
09/04/2023 07:13:40 PM feast.infra.registry.registry INFO: Registry cache expired, so refreshing
09/04/2023 07:13:40 PM feast.infra.registry.registry INFO: Registry cache expired, so refreshing
09/04/2023 07:13:40 PM feast.infra.registry.registry INFO: Registry cache expired, so refreshing
09/04/2023 07:13:40 PM feast.infra.registry.registry INFO: Registry cache expired, so refreshing
09/04/2023 07:13:40 