In [1]:
import numpy as np
import pandas as pd

In [3]:
minio_uri = "http://172.17.0.3:9000"
bucket_name = "feast-example"
fname = "driver_stats.parquet"

In [5]:
store_data = pd.read_parquet(f"{minio_uri}/{bucket_name}/{fname}")

In [6]:
query_data = store_data.sample(n=10)[["event_timestamp","driver_id"]]

In [8]:
query_data['trip_completed'] = np.random.randint(0, 2, query_data.shape[0])

In [9]:
query_data

Unnamed: 0,event_timestamp,driver_id,trip_completed
1022,2022-09-03 04:00:00+00:00,1003,0
426,2022-08-24 09:00:00+00:00,1004,0
644,2022-09-02 11:00:00+00:00,1004,1
19,2022-08-22 11:00:00+00:00,1005,0
453,2022-08-25 12:00:00+00:00,1004,1
408,2022-08-23 15:00:00+00:00,1004,0
844,2022-08-26 18:00:00+00:00,1003,1
96,2022-08-25 16:00:00+00:00,1005,0
102,2022-08-25 22:00:00+00:00,1005,1
590,2022-08-31 05:00:00+00:00,1004,1


In [10]:
query_data.to_csv('/home/jovyan/jupyter/ml_project_1/data/driver_orders.csv', sep="\t", index=False)

In [11]:
import pandas as pd
from pprint import pprint
from sklearn.linear_model import ElasticNet
import feast

In [42]:
class DriverRankingTrainModel:
    def __init__(self, repo_path: str, f_service_name: str, tuning_params={}) -> None:
        self._repo_path = repo_path
        self._params = tuning_params
        self._feature_service_name = f_service_name
        
    def get_training_data(self) -> pd.DataFrame:
        orders = pd.read_csv("/home/jovyan/jupyter/ml_project_1/data/driver_orders.csv", sep="\t")
        orders["event_timestamp"] = pd.to_datetime(orders["event_timestamp"])
        
        store = feast.FeatureStore(repo_path=self._repo_path)
        feature_service = store.get_feature_service(self._feature_service_name)
        
        training_df = store.get_historical_features(
            entity_df = orders,
            features = feature_service
        ).to_df()
        
        return training_df
    
    def train_model(self) -> str:
        mlflow.set_tracking_uri("sqlite:///mlruns.db")
        mlflow.sklearn.autolog()

        model = ElasticNet(**self._params)
        target = "trip_completed"
        training_df = self.get_training_data()
        train_X = training_df[training_df.columns.drop(target).drop("event_timestamp")]
        train_y = training_df.loc[:, target]
        
        with mlflow.start_run() as run:
            model.fit(train_X[sorted(train_X)], train_y)
            mlflow.log_dict({"features": ["driver_hourly_stats:conv_rate",
                                  "driver_hourly_stats:acc_rate",
                                  "driver_hourly_stats:avg_daily_trips"],
                     "feast_feature_service": self._feature_service_name,
                     "feast_feature_data": "driver_hourly_stats"}, "feast_data.json")
            mlflow.sklearn.log_model(
                    sk_model=model,
                    artifact_path="sklearn-model",
                    registered_model_name="sk-learn-elasticnet-model"
            )
        return {run.info.run_id}     
    
if __name__ == '__main__':
    REPO_PATH = "/home/jovyan/feature_repo/feature_repo/"
    FEATURE_SERVICE_NAME = "driver_ranking_fv_svc"
    params_list = [{"alpha": 0.5, "l1_ratio": 0.15},
                   {"alpha": 0.75, "l1_ratio": 0.25},
                   {"alpha": 1.0, "l1_ratio": 0.5}]
    
    for params in params_list:
        mlflow_run = DriverRankingTrainModel(REPO_PATH, FEATURE_SERVICE_NAME, params)
        # model_coef_ = model_cls.train_model()
        mlflow_run_id = mlflow_run.train_model
        pprint(f"ElasticNet params: {params}")
        print(f"Model run id: {mlflow_run_id}")
        # print(f"Model coefficients: {model_coef_}")
        

"ElasticNet params: {'alpha': 0.5, 'l1_ratio': 0.15}"
Model run id: <bound method DriverRankingTrainModel.train_model of <__main__.DriverRankingTrainModel object at 0x7fbc087d05b0>>
"ElasticNet params: {'alpha': 0.75, 'l1_ratio': 0.25}"
Model run id: <bound method DriverRankingTrainModel.train_model of <__main__.DriverRankingTrainModel object at 0x7fbc087d0550>>
"ElasticNet params: {'alpha': 1.0, 'l1_ratio': 0.5}"
Model run id: <bound method DriverRankingTrainModel.train_model of <__main__.DriverRankingTrainModel object at 0x7fbc087d05b0>>
