## Connect with your Feature Store

In [1]:
from feast import FeatureStore
from feast.infra.offline_stores.file_source import SavedDatasetFileStorage
import pandas as pd
from sklearn.model_selection import train_test_split
from datetime import datetime

In [2]:
store= FeatureStore(repo_path=r"../feature_store")

## Check Feature Views

### [ Same time in features and target ]

In [145]:
store.get_feature_view("features").features

[continent_Africa-Int64,
 continent_Asia-Int64,
 continent_Europe-Int64,
 continent_North America-Int64,
 continent_Oceania-Int64,
 continent_South America-Int64,
 unit_of_wage_Hour-Int64,
 unit_of_wage_Month-Int64,
 unit_of_wage_Week-Int64,
 unit_of_wage_Year-Int64,
 region_of_employment_Island-Int64,
 region_of_employment_Midwest-Int64,
 region_of_employment_Northeast-Int64,
 region_of_employment_South-Int64,
 region_of_employment_West-Int64,
 has_job_experience-Int64,
 requires_job_training-Int64,
 full_time_position-Int64,
 education_of_employee-Int64,
 no_of_employees-Float32,
 company_age-Float32,
 prevailing_wage-Float32]

In [146]:
store.get_feature_view("target").features

[case_status-String]

### [ Difference in time between features and target  ]

In [147]:
store.get_feature_view("exp_features").features

[continent_Africa-Int64,
 continent_Asia-Int64,
 continent_Europe-Int64,
 continent_North America-Int64,
 continent_Oceania-Int64,
 continent_South America-Int64,
 unit_of_wage_Hour-Int64,
 unit_of_wage_Month-Int64,
 unit_of_wage_Week-Int64,
 unit_of_wage_Year-Int64,
 region_of_employment_Island-Int64,
 region_of_employment_Midwest-Int64,
 region_of_employment_Northeast-Int64,
 region_of_employment_South-Int64,
 region_of_employment_West-Int64,
 has_job_experience-Int64,
 requires_job_training-Int64,
 full_time_position-Int64,
 education_of_employee-Int64,
 no_of_employees-Float32,
 company_age-Float32,
 prevailing_wage-Float32]

In [148]:
store.get_feature_view("exp_target").features

[case_status-String]

## Access Data  [ same event_timestamp in features and target ]

In [149]:
feature_df = pd.read_parquet(path=r"../feature_store/data/features.parquet")  
feature_df

Unnamed: 0,case_id,event_timestamp,continent_Africa,continent_Asia,continent_Europe,continent_North America,continent_Oceania,continent_South America,unit_of_wage_Hour,unit_of_wage_Month,...,region_of_employment_Northeast,region_of_employment_South,region_of_employment_West,has_job_experience,requires_job_training,full_time_position,education_of_employee,no_of_employees,company_age,prevailing_wage
0,EZYV01,1953-02-19 15:20:53.084144,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,2.0,1.517300,-0.943712,-1.398537
1,EZYV02,1953-02-20 15:20:53.084144,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,3.0,0.030912,-0.454005,0.169835
2,EZYV03,1953-02-21 15:20:53.084144,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,1.0,1.0,0.0,2.885076,-1.069026,0.919079
3,EZYV04,1953-02-22 15:20:53.084144,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,-1.343550,1.625983,0.169994
4,EZYV05,1953-02-23 15:20:53.084144,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,1.0,0.0,1.0,3.0,-0.432287,-0.724210,1.428604
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25475,EZYV25476,2022-11-19 15:20:53.084144,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.079917,-1.069026,0.049924
25476,EZYV25477,2022-11-20 15:20:53.084144,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,2.0,0.235747,-0.829275,3.876159
25477,EZYV25478,2022-11-21 15:20:53.084144,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,1.0,0.0,0.0,3.0,-0.413885,1.541805,1.360280
25478,EZYV25479,2022-11-22 15:20:53.084144,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,1.0,1.0,1.0,3.0,-0.111949,1.682601,0.221509


In [150]:
# Entity DataFrame contains target and all the entities through which scanning will occur in feature views.
# https://github.com/feathr-ai/feathr/blob/main/docs/concepts/point-in-time-join.md
entity_df = pd.read_parquet(path=r"../feature_store/data/target.parquet")  
entity_df

Unnamed: 0,case_id,event_timestamp,case_status
0,EZYV01,1953-02-19 15:20:53.084144,Denied
1,EZYV02,1953-02-20 15:20:53.084144,Certified
2,EZYV03,1953-02-21 15:20:53.084144,Denied
3,EZYV04,1953-02-22 15:20:53.084144,Denied
4,EZYV05,1953-02-23 15:20:53.084144,Certified
...,...,...,...
25475,EZYV25476,2022-11-19 15:20:53.084144,Certified
25476,EZYV25477,2022-11-20 15:20:53.084144,Certified
25477,EZYV25478,2022-11-21 15:20:53.084144,Certified
25478,EZYV25479,2022-11-22 15:20:53.084144,Certified


In [151]:
## Required Features from feature view
features_names = []
for i in store.get_feature_view(name="features").features:
    features_names.append(f"features:{i.name}")

features_names    

['features:continent_Africa',
 'features:continent_Asia',
 'features:continent_Europe',
 'features:continent_North America',
 'features:continent_Oceania',
 'features:continent_South America',
 'features:unit_of_wage_Hour',
 'features:unit_of_wage_Month',
 'features:unit_of_wage_Week',
 'features:unit_of_wage_Year',
 'features:region_of_employment_Island',
 'features:region_of_employment_Midwest',
 'features:region_of_employment_Northeast',
 'features:region_of_employment_South',
 'features:region_of_employment_West',
 'features:has_job_experience',
 'features:requires_job_training',
 'features:full_time_position',
 'features:education_of_employee',
 'features:no_of_employees',
 'features:company_age',
 'features:prevailing_wage']

In [152]:
retrieval_job = store.get_historical_features(
    entity_df=entity_df,
    features=features_names
)

feature_data = retrieval_job.to_df()

In [157]:
feature_data.head()

Unnamed: 0,case_id,event_timestamp,case_status,continent_Africa,continent_Asia,continent_Europe,continent_North America,continent_Oceania,continent_South America,unit_of_wage_Hour,...,region_of_employment_Northeast,region_of_employment_South,region_of_employment_West,has_job_experience,requires_job_training,full_time_position,education_of_employee,no_of_employees,company_age,prevailing_wage
0,EZYV01,1953-02-19 15:20:53.084144+00:00,Denied,0.0,1.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,1.0,0.0,0.0,1.0,2.0,1.5173,-0.943712,-1.398537
1,EZYV02,1953-02-20 15:20:53.084144+00:00,Certified,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,3.0,0.030912,-0.454005,0.169835
2,EZYV03,1953-02-21 15:20:53.084144+00:00,Denied,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,1.0,1.0,0.0,2.885076,-1.069026,0.919079
3,EZYV04,1953-02-22 15:20:53.084144+00:00,Denied,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,-1.34355,1.625983,0.169994
4,EZYV05,1953-02-23 15:20:53.084144+00:00,Certified,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,1.0,0.0,1.0,3.0,-0.432287,-0.72421,1.428604


## Access Data [ Different event_timestamp in features and target ]

In [3]:
entity_df = pd.read_parquet(path=r"../feature_store/data/target_exp.parquet")  
feature_df = pd.read_parquet(path=r"../feature_store/data/features_exp.parquet")  
entity_df

Unnamed: 0,case_id,event_timestamp,case_status
0,EZYV01,1953-02-19 16:02:49.893268,Denied
1,EZYV02,1953-02-20 16:02:49.893268,Certified
2,EZYV03,1953-02-21 16:02:49.893268,Denied
3,EZYV04,1953-02-22 16:02:49.893268,Denied
4,EZYV05,1953-02-23 16:02:49.893268,Certified
...,...,...,...
25476,EZYV25477,2022-11-20 16:02:49.893268,Certified
25477,EZYV25478,2022-11-21 16:02:49.893268,Certified
25478,EZYV25479,2022-11-22 16:02:49.893268,Certified
25479,EZYV25480,2022-11-23 16:02:49.893268,Certified


In [4]:
features_names = []
for i in store.get_feature_view(name="exp_features").features:
    features_names.append(f"exp_features:{i.name}")

features_names    

['exp_features:continent_Africa',
 'exp_features:continent_Asia',
 'exp_features:continent_Europe',
 'exp_features:continent_North America',
 'exp_features:continent_Oceania',
 'exp_features:continent_South America',
 'exp_features:unit_of_wage_Hour',
 'exp_features:unit_of_wage_Month',
 'exp_features:unit_of_wage_Week',
 'exp_features:unit_of_wage_Year',
 'exp_features:region_of_employment_Island',
 'exp_features:region_of_employment_Midwest',
 'exp_features:region_of_employment_Northeast',
 'exp_features:region_of_employment_South',
 'exp_features:region_of_employment_West',
 'exp_features:has_job_experience',
 'exp_features:requires_job_training',
 'exp_features:full_time_position',
 'exp_features:education_of_employee',
 'exp_features:no_of_employees',
 'exp_features:company_age',
 'exp_features:prevailing_wage']

In [245]:
retrieval_job = store.get_historical_features(
    entity_df=entity_df,
    features=features_names
)

feature_data = retrieval_job.to_df()

### Time Difference Between analysis

In [246]:
# Target time stamp
entity_df["event_timestamp"][25480].time()

datetime.time(15, 57, 49, 893268)

In [247]:
# feature time stamp
feature_df["event_timestamp"][25480].time()

datetime.time(15, 26, 5, 308067)

In [248]:
# data generated by feature store
feature_data.iloc[25480]["event_timestamp"].time()

datetime.time(15, 57, 49, 893268)

In [249]:
# difference between target and features 
entity_df["event_timestamp"][25480] - feature_df["event_timestamp"][25480]

Timedelta('0 days 00:31:44.585201')

In [252]:
entity_df

Unnamed: 0,case_id,event_timestamp,case_status
0,EZYV01,1953-02-19 16:02:49.893268,Denied
1,EZYV02,1953-02-20 16:02:49.893268,Certified
2,EZYV03,1953-02-21 16:02:49.893268,Denied
3,EZYV04,1953-02-22 16:02:49.893268,Denied
4,EZYV05,1953-02-23 16:02:49.893268,Certified
...,...,...,...
25476,EZYV25477,2022-11-20 16:02:49.893268,Certified
25477,EZYV25478,2022-11-21 16:02:49.893268,Certified
25478,EZYV25479,2022-11-22 16:02:49.893268,Certified
25479,EZYV25480,2022-11-23 16:02:49.893268,Certified


In [250]:
feature_data

Unnamed: 0,case_id,event_timestamp,case_status,continent_Africa,continent_Asia,continent_Europe,continent_North America,continent_Oceania,continent_South America,unit_of_wage_Hour,...,region_of_employment_Northeast,region_of_employment_South,region_of_employment_West,has_job_experience,requires_job_training,full_time_position,education_of_employee,no_of_employees,company_age,prevailing_wage
0,EZYV01,1953-02-19 16:02:49.893268+00:00,Denied,0.0,1.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,1.0,0.0,0.0,1.0,2.0,1.517300,-0.943712,-1.398537
1,EZYV02,1953-02-20 16:02:49.893268+00:00,Certified,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,3.0,0.030912,-0.454005,0.169835
2,EZYV03,1953-02-21 16:02:49.893268+00:00,Denied,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,1.0,1.0,0.0,2.885076,-1.069026,0.919079
3,EZYV04,1953-02-22 16:02:49.893268+00:00,Denied,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,-1.343550,1.625983,0.169994
4,EZYV05,1953-02-23 16:02:49.893268+00:00,Certified,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,1.0,0.0,1.0,3.0,-0.432287,-0.724210,1.428604
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25476,EZYV25477,2022-11-20 16:02:49.893268+00:00,Certified,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,2.0,0.235747,-0.829275,3.876159
25477,EZYV25478,2022-11-21 16:02:49.893268+00:00,Certified,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,1.0,0.0,0.0,3.0,-0.413885,1.541805,1.360280
25478,EZYV25479,2022-11-22 16:02:49.893268+00:00,Certified,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,1.0,1.0,1.0,3.0,-0.111949,1.682601,0.221509
25479,EZYV25480,2022-11-23 16:02:49.893268+00:00,Certified,100.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.218742,1.012247,-0.067763


In [251]:
feature_df

Unnamed: 0,case_id,event_timestamp,continent_Africa,continent_Asia,continent_Europe,continent_North America,continent_Oceania,continent_South America,unit_of_wage_Hour,unit_of_wage_Month,...,region_of_employment_Northeast,region_of_employment_South,region_of_employment_West,has_job_experience,requires_job_training,full_time_position,education_of_employee,no_of_employees,company_age,prevailing_wage
0,EZYV01,1953-02-19 15:46:05.308067,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,2.0,1.517300,-0.943712,-1.398537
1,EZYV02,1953-02-20 15:46:05.308067,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,3.0,0.030912,-0.454005,0.169835
2,EZYV03,1953-02-21 15:46:05.308067,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,1.0,1.0,0.0,2.885076,-1.069026,0.919079
3,EZYV04,1953-02-22 15:46:05.308067,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,-1.343550,1.625983,0.169994
4,EZYV05,1953-02-23 15:46:05.308067,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,1.0,0.0,1.0,3.0,-0.432287,-0.724210,1.428604
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25479,EZYV25480,2022-11-23 15:46:05.308067,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.218742,1.012247,-0.067763
25480,EZYV25480,2022-11-23 15:26:05.308067,51.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.218742,1.012247,-0.067763
25481,EZYV25480,2022-11-23 16:06:05.308067,51.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.218742,1.012247,-0.067763
25482,EZYV25480,2022-11-23 16:06:05.308067,100.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.218742,1.012247,-0.067763


## Feature retrieval [ Create Feature Service ]

- Generally, Feast supports several patterns of feature retrieval:
 1. feature_store.get_historical_features(...)
 2. feature_store.get_online_features(...)
 3. via deployed feature server endpoints: requests.post('http://localhost:6566/get-online-features', data=json.dumps(online_request)
    
### Dataset vs Feature View

1. Feature views contain the schema of data and a reference to where data can be found (through its data source).
2. Datasets are the actual data manifestation of querying those data sources.

### Dataset vs Data Source: 
1. Datasets are the output of historical retrieval, whereas data sources are the inputs. One or more data sources can be used in the creation of a dataset.

### Feature Service
1. way to specify the features to fetch: A feature service is an object that represents a logical group of features from one or more feature views. Feature Services allows features from within a feature view to be used as needed by an ML model. Users can expect to create one feature service per model version, allowing for tracking of the features used by models.

In [5]:
data = store.get_feature_service("features_service_1")

In [6]:
data.feature_view_projections[0].features

[continent_Africa-Int64,
 continent_Asia-Int64,
 continent_Europe-Int64,
 continent_North America-Int64,
 continent_Oceania-Int64,
 continent_South America-Int64,
 unit_of_wage_Hour-Int64,
 unit_of_wage_Month-Int64,
 unit_of_wage_Week-Int64,
 unit_of_wage_Year-Int64,
 region_of_employment_Island-Int64,
 region_of_employment_Midwest-Int64,
 region_of_employment_Northeast-Int64,
 region_of_employment_South-Int64,
 region_of_employment_West-Int64,
 has_job_experience-Int64,
 requires_job_training-Int64,
 full_time_position-Int64,
 education_of_employee-Int64,
 no_of_employees-Float32,
 company_age-Float32,
 prevailing_wage-Float32]

In [7]:
data.feature_view_projections[0].features

[continent_Africa-Int64,
 continent_Asia-Int64,
 continent_Europe-Int64,
 continent_North America-Int64,
 continent_Oceania-Int64,
 continent_South America-Int64,
 unit_of_wage_Hour-Int64,
 unit_of_wage_Month-Int64,
 unit_of_wage_Week-Int64,
 unit_of_wage_Year-Int64,
 region_of_employment_Island-Int64,
 region_of_employment_Midwest-Int64,
 region_of_employment_Northeast-Int64,
 region_of_employment_South-Int64,
 region_of_employment_West-Int64,
 has_job_experience-Int64,
 requires_job_training-Int64,
 full_time_position-Int64,
 education_of_employee-Int64,
 no_of_employees-Float32,
 company_age-Float32,
 prevailing_wage-Float32]

## Saved Datasets [ Alpha ]

- Feast datasets allow for conveniently saving dataframes that include both features 
  and entities to be subsequently used for data analysis and model training. Data Quality Monitoring was the primary motivation for creating dataset concept.
  
- Dataset's metadata is stored in the Feast registry and raw data (features, entities, additional input keys and timestamp) is stored in the offline store.

In [8]:
from feast.infra.offline_stores.file_source import SavedDatasetFileStorage

In [9]:
entity_df = pd.read_parquet(path=r"../feature_store/data/target.parquet")  
entity_df.head()

Unnamed: 0,case_id,event_timestamp,case_status
0,EZYV01,1953-02-19 15:20:53.084144,Denied
1,EZYV02,1953-02-20 15:20:53.084144,Certified
2,EZYV03,1953-02-21 15:20:53.084144,Denied
3,EZYV04,1953-02-22 15:20:53.084144,Denied
4,EZYV05,1953-02-23 15:20:53.084144,Certified


In [10]:
features_names = []
for i in store.get_feature_view(name="features").features:
    features_names.append(f"features:{i.name}")

features_names  

['features:continent_Africa',
 'features:continent_Asia',
 'features:continent_Europe',
 'features:continent_North America',
 'features:continent_Oceania',
 'features:continent_South America',
 'features:unit_of_wage_Hour',
 'features:unit_of_wage_Month',
 'features:unit_of_wage_Week',
 'features:unit_of_wage_Year',
 'features:region_of_employment_Island',
 'features:region_of_employment_Midwest',
 'features:region_of_employment_Northeast',
 'features:region_of_employment_South',
 'features:region_of_employment_West',
 'features:has_job_experience',
 'features:requires_job_training',
 'features:full_time_position',
 'features:education_of_employee',
 'features:no_of_employees',
 'features:company_age',
 'features:prevailing_wage']

In [11]:
retrieval_job = store.get_historical_features(
    entity_df=entity_df,
    features=features_names
)

feature_data = retrieval_job.to_df()

In [13]:
feature_data

Unnamed: 0,case_id,event_timestamp,case_status,continent_Africa,continent_Asia,continent_Europe,continent_North America,continent_Oceania,continent_South America,unit_of_wage_Hour,...,region_of_employment_Northeast,region_of_employment_South,region_of_employment_West,has_job_experience,requires_job_training,full_time_position,education_of_employee,no_of_employees,company_age,prevailing_wage
0,EZYV01,1953-02-19 15:20:53.084144+00:00,Denied,0.0,1.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,1.0,0.0,0.0,1.0,2.0,1.517300,-0.943712,-1.398537
1,EZYV02,1953-02-20 15:20:53.084144+00:00,Certified,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,3.0,0.030912,-0.454005,0.169835
2,EZYV03,1953-02-21 15:20:53.084144+00:00,Denied,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,1.0,1.0,0.0,2.885076,-1.069026,0.919079
3,EZYV04,1953-02-22 15:20:53.084144+00:00,Denied,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,-1.343550,1.625983,0.169994
4,EZYV05,1953-02-23 15:20:53.084144+00:00,Certified,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,1.0,0.0,1.0,3.0,-0.432287,-0.724210,1.428604
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25475,EZYV25476,2022-11-19 15:20:53.084144+00:00,Certified,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.079917,-1.069026,0.049924
25476,EZYV25477,2022-11-20 15:20:53.084144+00:00,Certified,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,2.0,0.235747,-0.829275,3.876159
25477,EZYV25478,2022-11-21 15:20:53.084144+00:00,Certified,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,1.0,0.0,0.0,3.0,-0.413885,1.541805,1.360280
25478,EZYV25479,2022-11-22 15:20:53.084144+00:00,Certified,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,1.0,1.0,1.0,3.0,-0.111949,1.682601,0.221509


In [15]:
# Can be created for only one time else you will get error
dataset = store.create_saved_dataset(
    from_=retrieval_job,
    name='training_dataset',
    storage=SavedDatasetFileStorage(r"../feature_store/data/training_dataset.parquet"),
    tags={'author': 'ketanGangal'}
)

dataset.to_df().head()

Unnamed: 0,no_of_employees,has_job_experience,education_of_employee,continent_Europe,prevailing_wage,requires_job_training,full_time_position,company_age,continent_North America,case_status,...,case_id,region_of_employment_Midwest,unit_of_wage_Week,continent_Africa,region_of_employment_Island,continent_South America,continent_Asia,unit_of_wage_Year,region_of_employment_South,unit_of_wage_Month
0,1.5173,0.0,2.0,0.0,-1.398537,0.0,1.0,-0.943712,0.0,Denied,...,EZYV01,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,0.030912,1.0,3.0,0.0,0.169835,0.0,1.0,-0.454005,0.0,Certified,...,EZYV02,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0
2,2.885076,0.0,0.0,0.0,0.919079,1.0,1.0,-1.069026,0.0,Denied,...,EZYV03,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0
3,-1.34355,0.0,0.0,0.0,0.169994,0.0,1.0,1.625983,0.0,Denied,...,EZYV04,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0
4,-0.432287,1.0,3.0,0.0,1.428604,0.0,1.0,-0.72421,0.0,Certified,...,EZYV05,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0


## Create Data for Model Training

In [27]:
retrieval_job = store.get_historical_features(
    entity_df=entity_df,
    features=features_names
)

feature_data = retrieval_job.to_df()

In [28]:
feature_data.head()

Unnamed: 0,case_id,event_timestamp,case_status,continent_Africa,continent_Asia,continent_Europe,continent_North America,continent_Oceania,continent_South America,unit_of_wage_Hour,...,region_of_employment_Northeast,region_of_employment_South,region_of_employment_West,has_job_experience,requires_job_training,full_time_position,education_of_employee,no_of_employees,company_age,prevailing_wage
0,EZYV01,1953-02-19 15:20:53.084144+00:00,Denied,0.0,1.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,1.0,0.0,0.0,1.0,2.0,1.5173,-0.943712,-1.398537
1,EZYV02,1953-02-20 15:20:53.084144+00:00,Certified,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,3.0,0.030912,-0.454005,0.169835
2,EZYV03,1953-02-21 15:20:53.084144+00:00,Denied,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,1.0,1.0,0.0,2.885076,-1.069026,0.919079
3,EZYV04,1953-02-22 15:20:53.084144+00:00,Denied,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,-1.34355,1.625983,0.169994
4,EZYV05,1953-02-23 15:20:53.084144+00:00,Certified,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,1.0,0.0,1.0,3.0,-0.432287,-0.72421,1.428604


In [26]:
# 2nd way to get the data 
dataset = store.get_saved_dataset('training_dataset')
dataset.to_df().head()



Unnamed: 0,no_of_employees,has_job_experience,education_of_employee,continent_Europe,prevailing_wage,requires_job_training,full_time_position,company_age,continent_North America,case_status,...,case_id,region_of_employment_Midwest,unit_of_wage_Week,continent_Africa,region_of_employment_Island,continent_South America,continent_Asia,unit_of_wage_Year,region_of_employment_South,unit_of_wage_Month
0,1.5173,0.0,2.0,0.0,-1.398537,0.0,1.0,-0.943712,0.0,Denied,...,EZYV01,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,0.030912,1.0,3.0,0.0,0.169835,0.0,1.0,-0.454005,0.0,Certified,...,EZYV02,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0
2,2.885076,0.0,0.0,0.0,0.919079,1.0,1.0,-1.069026,0.0,Denied,...,EZYV03,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0
3,-1.34355,0.0,0.0,0.0,0.169994,0.0,1.0,1.625983,0.0,Denied,...,EZYV04,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0
4,-0.432287,1.0,3.0,0.0,1.428604,0.0,1.0,-0.72421,0.0,Certified,...,EZYV05,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0


## Materialization

Feast uses online stores to serve features at low latency. Feature values are loaded from data sources into the online store through materialization, which can be triggered through the materialize command.

The storage schema of features within the online store mirrors that of the original data source. One key difference is that for each entity key, only the latest feature values are stored. No historical values are stored.

Features can also be written directly to the online store via push sources .

**Materialize**: materialize loads the latest features between two dates. A terminal call would look like this:              
                 **`feast materialize start_date end_date`**


**Materialize-Incremental**: In contrast, materialize-incremental loads features up to the provided end date:

**```feast materialize-incremental end_date/ttl```**


With feast materialize-incremental, the start time is either now — ttl (the ttl that we defined in our feature views) or the time of the most recent materialization

- If you’ve materialized features at least once, then subsequent materializations will only fetch features that weren’t present in the store at the time of the previous materializations.

- One thing to note here — if you have several feature rows per entity, Feast will only load the latest values per entity key. 

### 1. [ Ingest batch features into an online store Mtd: materialize and materialize_incremental  ]

In [29]:
from datetime import datetime, timedelta

In [46]:
# Start days 2 years back
store.materialize(start_date=datetime.now() - timedelta(days=700), end_date=datetime.now())

Materializing [1m[32m4[0m feature views from [1m[32m2020-12-24 16:02:26+05:30[0m to [1m[32m2022-11-24 16:02:26+05:30[0m into the [1m[32msqlite[0m online store.

[1m[32mexp_features[0m:


100%|███████████████████████████████████████████████████████████| 699/699 [00:00<00:00, 2247.59it/s]


[1m[32mfeatures[0m:


100%|███████████████████████████████████████████████████████████| 699/699 [00:00<00:00, 2322.01it/s]


[1m[32mexp_target[0m:


100%|██████████████████████████████████████████████████████████| 700/700 [00:00<00:00, 26913.43it/s]


[1m[32mtarget[0m:


100%|██████████████████████████████████████████████████████████| 699/699 [00:00<00:00, 25889.17it/s]


In [31]:
store.materialize_incremental(end_date=datetime.now())

Materializing [1m[32m4[0m feature views to [1m[32m2022-11-24 15:58:11+05:30[0m into the [1m[32msqlite[0m online store.

[1m[32mexp_features[0m from [1m[32m2022-11-24 21:27:55+05:30[0m to [1m[32m2022-11-24 15:58:11+05:30[0m:


0it [00:00, ?it/s]


[1m[32mfeatures[0m from [1m[32m2022-11-24 21:27:55+05:30[0m to [1m[32m2022-11-24 21:28:11+05:30[0m:


0it [00:00, ?it/s]


[1m[32mexp_target[0m from [1m[32m2022-11-24 21:27:55+05:30[0m to [1m[32m2022-11-24 21:28:11+05:30[0m:


0it [00:00, ?it/s]


[1m[32mtarget[0m from [1m[32m2022-11-24 21:27:55+05:30[0m to [1m[32m2022-11-24 21:28:11+05:30[0m:


0it [00:00, ?it/s]


### 2. [ Fetch online features to power real time inference Mtd: get_online_features ]

In [41]:
entity_df.tail()

Unnamed: 0,case_id,event_timestamp,case_status
25475,EZYV25476,2022-11-19 15:20:53.084144,Certified
25476,EZYV25477,2022-11-20 15:20:53.084144,Certified
25477,EZYV25478,2022-11-21 15:20:53.084144,Certified
25478,EZYV25479,2022-11-22 15:20:53.084144,Certified
25479,EZYV25480,2022-11-23 15:20:53.084144,Certified


In [32]:
features_names

['features:continent_Africa',
 'features:continent_Asia',
 'features:continent_Europe',
 'features:continent_North America',
 'features:continent_Oceania',
 'features:continent_South America',
 'features:unit_of_wage_Hour',
 'features:unit_of_wage_Month',
 'features:unit_of_wage_Week',
 'features:unit_of_wage_Year',
 'features:region_of_employment_Island',
 'features:region_of_employment_Midwest',
 'features:region_of_employment_Northeast',
 'features:region_of_employment_South',
 'features:region_of_employment_West',
 'features:has_job_experience',
 'features:requires_job_training',
 'features:full_time_position',
 'features:education_of_employee',
 'features:no_of_employees',
 'features:company_age',
 'features:prevailing_wage']

In [47]:
features_1 = store.get_online_features(features=features_names,    
                                     entity_rows=[{"case_id": "EZYV25480"}, {"case_id": "EZYV25476"}]
                                    ).to_dict()

In [48]:
features_2 = store.get_online_features(features=features_names,    
                                     entity_rows=[{"case_id": "EZYV01"}, {"case_id": "EZYV02"}]
                                    ).to_dict()

In [49]:
features_1

{'case_id': ['EZYV25480', 'EZYV25476'],
 'no_of_employees': [0.21874167025089264, 0.07991687953472137],
 'has_job_experience': [1, 1],
 'education_of_employee': [0, 0],
 'continent_Europe': [0, 0],
 'prevailing_wage': [-0.06776315718889236, 0.04992445558309555],
 'requires_job_training': [0, 1],
 'full_time_position': [1, 1],
 'company_age': [1.0122473239898682, -1.0690256357192993],
 'continent_North America': [0, 0],
 'continent_Oceania': [0, 0],
 'region_of_employment_West': [0, 0],
 'unit_of_wage_Hour': [0, 0],
 'region_of_employment_Northeast': [0, 0],
 'region_of_employment_Midwest': [1, 0],
 'unit_of_wage_Week': [0, 0],
 'continent_Africa': [0, 0],
 'region_of_employment_Island': [0, 0],
 'continent_South America': [0, 0],
 'continent_Asia': [1, 1],
 'unit_of_wage_Year': [1, 1],
 'region_of_employment_South': [0, 1],
 'unit_of_wage_Month': [0, 0]}

In [50]:
features_2

{'case_id': ['EZYV01', 'EZYV02'],
 'no_of_employees': [None, None],
 'has_job_experience': [None, None],
 'education_of_employee': [None, None],
 'continent_Europe': [None, None],
 'prevailing_wage': [None, None],
 'requires_job_training': [None, None],
 'full_time_position': [None, None],
 'company_age': [None, None],
 'continent_North America': [None, None],
 'continent_Oceania': [None, None],
 'region_of_employment_West': [None, None],
 'unit_of_wage_Hour': [None, None],
 'region_of_employment_Northeast': [None, None],
 'region_of_employment_Midwest': [None, None],
 'unit_of_wage_Week': [None, None],
 'continent_Africa': [None, None],
 'region_of_employment_Island': [None, None],
 'continent_South America': [None, None],
 'continent_Asia': [None, None],
 'unit_of_wage_Year': [None, None],
 'region_of_employment_South': [None, None],
 'unit_of_wage_Month': [None, None]}

### Api Based Feature Serving

In [292]:
# https://docs.feast.dev/reference/feature-servers/python-feature-server

### Conclusion 

In [56]:
# Point in time correctness -> 
    # In features till what time we need to look up has to be defined in TTL if defined time is less than actual
        # target and feature time the 0 rows will be there. 
        
    # If target contains duplicate data then final df will also contain duplicate but according to time. 
    # If in features number of samples of perticular user are more than only nearst in time will be selected