In [1]:
import feast

In [2]:
from datetime import datetime, timedelta
import pandas as pd

from feast import FeatureStore

In [3]:
!ls

1-Install-init.ipynb                 5-Prediction-stored-features.ipynb
2-Using Existing Feature Store.ipynb Access-sqlite-db.ipynb
3-Understand Data.ipynb              driver_orders.csv
4-Working-with-features.ipynb        [1m[36mfeature_repo[m[m


In [4]:
%cd feature_repo

/Users/avkash/work/prodramp/publiccode/feature-store/feast-starter/feature_repo


In [5]:
# The entity dataframe is the dataframe we want to enrich with feature values
entity_df = pd.DataFrame.from_dict(
    {
        "driver_id": [1001, 1002, 1003],
        "label_driver_reported_satisfaction": [1, 5, 3], 
        "event_timestamp": [
            datetime.now() - timedelta(minutes=11),
            datetime.now() - timedelta(minutes=36),
            datetime.now() - timedelta(minutes=73),
        ],
    }
)

In [6]:
store = FeatureStore(repo_path=".")

  from collections import MutableMapping
  from collections import Mapping


In [7]:
store

<feast.feature_store.FeatureStore at 0x7fc9091a7c10>

In [9]:
store.list_entities()

[<feast.entity.Entity at 0x7fc8d8cd1fd0>]

In [None]:
store.

In [10]:
training_df = store.get_historical_features(
    entity_df=entity_df,
    features=[
        "driver_hourly_stats:conv_rate",
        "driver_hourly_stats:acc_rate",
        "driver_hourly_stats:avg_daily_trips",
    ],
).to_df()


In [11]:
print("----- Feature schema -----\n")
print(training_df.info())

----- Feature schema -----

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3 entries, 0 to 2
Data columns (total 6 columns):
 #   Column                              Non-Null Count  Dtype              
---  ------                              --------------  -----              
 0   event_timestamp                     3 non-null      datetime64[ns, UTC]
 1   driver_id                           3 non-null      int64              
 2   label_driver_reported_satisfaction  3 non-null      int64              
 3   conv_rate                           3 non-null      float32            
 4   acc_rate                            3 non-null      float32            
 5   avg_daily_trips                     3 non-null      int32              
dtypes: datetime64[ns, UTC](1), float32(2), int32(1), int64(2)
memory usage: 132.0 bytes
None


In [12]:
print("----- Example features -----\n")
print(training_df.head())

----- Example features -----

                   event_timestamp  driver_id  \
0 2022-02-23 13:30:37.848789+00:00       1003   
1 2022-02-23 14:07:37.848787+00:00       1002   
2 2022-02-23 14:32:37.848776+00:00       1001   

   label_driver_reported_satisfaction  conv_rate  acc_rate  avg_daily_trips  
0                                   3   0.757673  0.869066              289  
1                                   5   0.793374  0.953679              756  
2                                   1   0.539819  0.483499              281  


In [13]:
training_df

Unnamed: 0,event_timestamp,driver_id,label_driver_reported_satisfaction,conv_rate,acc_rate,avg_daily_trips
0,2022-02-23 13:30:37.848789+00:00,1003,3,0.757673,0.869066,289
1,2022-02-23 14:07:37.848787+00:00,1002,5,0.793374,0.953679,756
2,2022-02-23 14:32:37.848776+00:00,1001,1,0.539819,0.483499,281


In [None]:
%env CURRENT_TIME=$(date -u +"%Y-%m-%dT%H:%M:%S")

In [None]:
!env | grep CURRENT_TIME

In [None]:
!feast materialize-incremental $CURRENT_TIME

In [17]:
from pprint import pprint
from feast import FeatureStore

store = FeatureStore(repo_path=".")

feature_vector = store.get_online_features(
    features=[
        "driver_hourly_stats:conv_rate",
        "driver_hourly_stats:acc_rate",
        "driver_hourly_stats:avg_daily_trips",
    ],
    entity_rows=[
        {"driver_id": 1004},
        {"driver_id": 1005},
    ],
).to_dict()

pprint(feature_vector)

{'acc_rate': [0.9962660074234009, 0.8402746915817261],
 'avg_daily_trips': [973, 111],
 'conv_rate': [0.7610865831375122, 0.25467395782470703],
 'driver_id': [1004, 1005]}


In [18]:
from sklearn.linear_model import LinearRegression

In [19]:
training_df

Unnamed: 0,event_timestamp,driver_id,label_driver_reported_satisfaction,conv_rate,acc_rate,avg_daily_trips
0,2022-02-23 13:30:37.848789+00:00,1003,3,0.757673,0.869066,289
1,2022-02-23 14:07:37.848787+00:00,1002,5,0.793374,0.953679,756
2,2022-02-23 14:32:37.848776+00:00,1001,1,0.539819,0.483499,281


In [20]:
# Train model
target = "label_driver_reported_satisfaction"

reg = LinearRegression()
train_X = training_df[training_df.columns.drop(target).drop("event_timestamp")]
train_Y = training_df.loc[:, target]
reg.fit(train_X[sorted(train_X)], train_Y)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [25]:
reg.score(train_X, train_Y)

-60727.1193188328

In [26]:
print('intercept:', reg.intercept_)

intercept: -931.831040031148


In [27]:
print('slope:', reg.coef_)

slope: [0.17710339 0.00623436 0.10016274 0.93000948]


In [28]:
y_pred = reg.predict(train_X)
print('predicted response:', y_pred, sep='\n')

predicted response:
[-485.33183398  -51.18581489 -493.16609422]


In [29]:
# Save model
from joblib import dump

dump(reg, "driver_model.bin")

['driver_model.bin']

In [30]:
!ls -l

total 24
-rw-r--r--  1 avkash  staff     0 Feb 20 09:04 __init__.py
drwxr-xr-x  5 avkash  staff   160 Feb 23 14:54 [1m[36mdata[m[m
-rw-r--r--  1 avkash  staff   708 Feb 23 14:59 driver_model.bin
-rw-r--r--  1 avkash  staff  1402 Feb 23 14:19 example.py
-rw-r--r--  1 avkash  staff   109 Feb 23 14:19 feature_store.yaml


In [31]:
!pwd

/Users/avkash/work/prodramp/publiccode/feature-store/feast-starter/feature_repo
