# Feature store Example

In [38]:
# Imports

import pandas as pd
import requests

In [39]:
url = 'https://feature-store-demo.herokuapp.com/v1.0'

### Publishing quotes to db

this step will launch 'n_flows' flows each flow will act as follows

1. create a quote
2. 8/10 quotes will bind to user
3. 8/10 binded quotes will be paid and convert to policies
4. each policy will have multiple payments

In [40]:
publish_url = "{}/publish".format(url)

params = {"n_flows":"100"}
payload = ""
headers = {'Content-Type': "application/json"}

response = requests.request("PUT", publish_url, data=payload, headers=headers, params=params)

In [41]:
print(response.status_code)
response.json()

200


{'msg': 'created 100 flows'}

### Data Science side (Training)

1. loading the data via api
2. converting to data frame
3. writing etl.py file

In [42]:
# I WILL BE USING THE FOLLOWING QUERY
q = "SELECT Q.user_id, Q.is_binded, Q.creation_date, Q.binding_date, T.card_type, F.failed_count, P.purchase_time FROM feature_store.quotes Q LEFT JOIN feature_store.policies P ON Q.user_id = P.user_id LEFT JOIN feature_store.transactions T ON P.user_id = T.user_id LEFT JOIN (select T2.user_id, COUNT(T2.user_id) AS failed_count from feature_store.transactions T2 WHERE T2.successful=false GROUP BY T2.user_id) F ON Q.user_id = F.user_id"

In [43]:
query_url = "{}/query/train".format(url)
params = {"save":"true"} # SHOULD QUERY BE SAVED

payload = {"query": q, "query_name": "demo_query"}
headers = {'Content-Type': "application/json"}

response = requests.request("POST", query_url, json=payload, headers=headers, params=params)

In [44]:
print(response.status_code)

query_id = response.json()["query_id"] # <<<< SAVE THIS FOR LATER!

print("query saved to db under query_id = " + query_id)

200
query saved to db under query_id = 5d7d60df76afa700044b4946


In [45]:
df = pd.DataFrame(response.json()["data"])

In [46]:
df.head()

Unnamed: 0,user_id,is_binded,creation_date,binding_date,card_type,failed_count,purchase_time
0,8450805b-a3fc-4cb5-8b00-0b39fffe3ea9,True,1568495000.0,1568495000.0,,,
1,1a46bbdd-0208-4097-90c4-df5caa65fe0e,True,1568495000.0,1568495000.0,credit,,1568495000.0
2,617d89f5-f7cd-4d5b-8efb-f1cc15e8855a,True,1568495000.0,1568495000.0,credit,1.0,1568495000.0
3,cb179cdb-3585-495a-a6c0-ae5a85e1c995,True,1568495000.0,1568495000.0,debit,,1568495000.0
4,81c4ce2a-a4a8-467f-873b-afa5a9b571f8,True,1568495000.0,1568495000.0,credit,1.0,1568495000.0


#### I will build a simple transformation and implement it using the Etl interface

In [47]:
###

from feature_lib.etl_abc import AbstractEtl


class Etl(AbstractEtl):
    def extract(self):
        self.df.dropna(subset=["user_id"], inplace=True)
        self.df["creation_to_binding"] = self.df.binding_date - self.df.creation_date
        return self.df
    
###

#### then upload to git on this address --> https://raw.githubusercontent.com/miararoy/feature_lib/master/etl_demo.py


### using the extract API

1. load etl file to feature store
2. running the query against data warehouse
3. saving etl on feature store

In [52]:
extract_url = "{}/extract/train".format(url)
params = {"save":"true"} # SHOULD QUERY BE SAVED

payload = {
    "query_id": query_id, # <<< USING THE QUERY ID FROM BEFORE
    "etl_path": "https://raw.githubusercontent.com/miararoy/feature_lib/master/etl_demo.py" # <<< USING THE ETL WE UPLOADED TO GIT
} 
headers = {'Content-Type': "application/json"}

response = requests.request("POST", extract_url, json=payload, headers=headers, params=params)

In [53]:
print(response.status_code)

etl_id = response.json()["etl_id"] # <<<< SAVE THIS FOR LATER!

print("etl saved to db under etl_id = " + etl_id)

200
etl saved to db under etl_id = 5d7d616176afa700044b4948


In [54]:
df_after_feature_extraction = pd.DataFrame(response.json()["data"])

In [56]:
df_after_feature_extraction.tail()

Unnamed: 0,user_id,is_binded,creation_date,binding_date,card_type,failed_count,purchase_time,creation_to_binding
227,4ad61934-f526-45a2-ba84-dabebffbdd41,True,1568498000.0,1568498000.0,credit,1.0,1568498000.0,0.000782
228,88eb2d8d-6868-43b5-bf70-703bb8d3e28d,True,1568498000.0,1568498000.0,debit,1.0,1568498000.0,0.00043
229,253b509e-1045-4c8e-aa8e-a8e506bea043,True,1568498000.0,1568498000.0,debit,,1568498000.0,0.000507
230,455ce49f-1c4e-46b3-8a6f-1884a5e29177,True,1568498000.0,1568498000.0,credit,,1568498000.0,0.000668
231,316181e4-997e-4327-ab3a-8a4a5ad4e4f7,True,1568498000.0,1568498000.0,debit,,1568498000.0,0.000748


### Query on backend side (serving)

after the data scientists had built the query and the feature extraction we can use it on the backend side

steps:

1. loading the data via api using query id and key, value search
2. converting to data frame

In [62]:
query_rt_url = "{}/query/realtime".format(url)
print(query_rt_url)
payload = {"query_id": query_id, "index_key": "user_id", "index_value": "316181e4-997e-4327-ab3a-8a4a5ad4e4f7"} # <<< the key value is the way for backend to query the realtime server for a single user data for serving
headers = {'Content-Type': "application/json"}

response = requests.request("POST", query_rt_url, json=payload, headers=headers)

https://feature-store-demo.herokuapp.com/v1.0/query/realtime


In [61]:
print(response.status_code)

500
