In [1]:
import feast
from joblib import dump
import pandas as pd
from sklearn.linear_model import LinearRegression


In [4]:
!ls

1-Install-init.ipynb                 5-Prediction-stored-features.ipynb
2-Using Existing Feature Store.ipynb Access-sqlite-db.ipynb
3-Understand Data.ipynb              driver_orders.csv
4-Working-with-features.ipynb        [1m[36mfeature_repo[m[m


In [2]:
# Load driver order data
orders = pd.read_csv("driver_orders.csv", sep="\t")
orders["event_timestamp"] = pd.to_datetime(orders["event_timestamp"])

In [3]:
orders

Unnamed: 0,event_timestamp,driver_id,trip_completed
0,2021-04-16 20:29:28+00:00,1001,1
1,2021-04-17 04:29:28+00:00,1002,0
2,2021-04-17 12:29:28+00:00,1003,0
3,2021-04-17 20:29:28+00:00,1001,1
4,2021-04-18 04:29:28+00:00,1002,0
5,2021-04-18 12:29:28+00:00,1003,0
6,2021-04-18 20:29:28+00:00,1001,1
7,2021-04-19 04:29:28+00:00,1002,0
8,2021-04-19 12:29:28+00:00,1003,0
9,2021-04-19 20:29:28+00:00,1004,1


In [5]:
%cd feature_repo

/Users/avkash/work/prodramp/publiccode/feature-store/feast-starter/feature_repo


In [6]:
# Connect to your feature store provider
fs = feast.FeatureStore(repo_path=".")

  from collections import MutableMapping
  from collections import Mapping


In [7]:
fs

<feast.feature_store.FeatureStore at 0x7f8a312b2dd0>

In [8]:
fs.list_feature_views()

[<FeatureView(_name = driver_hourly_stats, _features = [conv_rate-ValueType.FLOAT, acc_rate-ValueType.FLOAT, avg_daily_trips-ValueType.INT64], _projection = FeatureViewProjection(name='driver_hourly_stats', name_alias=None, features=[conv_rate-ValueType.FLOAT, acc_rate-ValueType.FLOAT, avg_daily_trips-ValueType.INT64], join_key_map={}), created_timestamp = 2022-02-23 22:37:42.665307, last_updated_timestamp = 2022-02-23 22:56:47.966606, entities = ['driver_id'], tags = {}, ttl = 1 day, 0:00:00, online = True, input = <feast.infra.offline_stores.file_source.FileSource object at 0x7f8a108fdd50>, batch_source = <feast.infra.offline_stores.file_source.FileSource object at 0x7f8a108fdd50>, stream_source = None, materialization_intervals = [(datetime.datetime(2022, 2, 22, 22, 54, 47, 558169, tzinfo=<UTC>), datetime.datetime(2022, 2, 23, 22, 54, 27, tzinfo=<UTC>)), (datetime.datetime(2022, 2, 23, 22, 54, 27, tzinfo=<UTC>), datetime.datetime(2022, 2, 23, 22, 56, 33, tzinfo=<UTC>))])>]

In [9]:
fs.list_feature_services()

[]

In [10]:
fs.list_entities()

[<feast.entity.Entity at 0x7f89e0115d10>]

In [11]:
fs.get_online_features

<bound method FeatureStore.get_online_features of <feast.feature_store.FeatureStore object at 0x7f8a312b2dd0>>

In [12]:
fs.get_historical_features

<bound method FeatureStore.get_historical_features of <feast.feature_store.FeatureStore object at 0x7f8a312b2dd0>>

In [13]:
!pwd

/Users/avkash/work/prodramp/publiccode/feature-store/feast-starter/feature_repo


In [14]:
import pandas as pd
import feast
from joblib import load


class DriverRankingModel:
    def __init__(self):
        # Load model
        self.model = load("driver_model.bin")

        # Set up feature store
        self.fs = feast.FeatureStore(repo_path=".")

    def predict(self, driver_ids):
        # Read features from Feast
        driver_features = self.fs.get_online_features(
            entity_rows=[{"driver_id": driver_id} for driver_id in driver_ids],
            features=[
                "driver_hourly_stats:conv_rate",
                "driver_hourly_stats:acc_rate",
                "driver_hourly_stats:avg_daily_trips",
            ],
        )
        df = pd.DataFrame.from_dict(driver_features.to_dict())
        print("------------------ Driver Features ------------ ")
        print(df)
        
        # Make prediction
        df["prediction"] = self.model.predict(df[sorted(df)])

        # Choose best driver
        best_driver_id = df["driver_id"].iloc[df["prediction"].argmax()]

        # return best driver
        return best_driver_id

In [15]:
def make_drivers_prediction():
    drivers = [1001, 1002, 1003, 1004]
    model = DriverRankingModel()
    best_driver = model.predict(drivers)
    print()
    print("------------------ Best Driver Prediction ------------ ")
    print(f"Prediction for best driver id: {best_driver}")

In [16]:
make_drivers_prediction()

------------------ Driver Features ------------ 
   driver_id  avg_daily_trips  conv_rate  acc_rate
0       1001              281   0.539819  0.483499
1       1002              756   0.793374  0.953679
2       1003              289   0.757673  0.869066
3       1004              973   0.761087  0.996266

------------------ Best Driver Prediction ------------ 
Prediction for best driver id: 1004
