# Feature store Example

In [2]:
# Imports

import pandas as pd
import requests

In [10]:
url = 'https://feature-store-demo.herokuapp.com/v1.0'

### Publishing quotes to db

this step will launch 'n_flows' flows each flow will act as follows

1. create a quote
2. 8/10 quotes will bind to user
3. 8/10 binded quotes will be paid and convert to policies
4. each policy will have multiple payments

In [11]:
publish_url = "{}/publish".format(url)

params = {"n_flows":"100"}
payload = ""
headers = {'Content-Type': "application/json"}

response = requests.request("PUT", publish_url, data=payload, headers=headers, params=params)

In [12]:
print(response.status_code)
response.json()

200


{'msg': 'created 100 flows'}

### Data Science side (Training)

1. loading the data via api
2. converting to data frame
3. writing etl.py file

In [23]:
# I WILL BE USING THE FOLLOWING QUERY
q = "SELECT Q.user_id, Q.is_binded, Q.creation_date, Q.binding_date, T.card_type, F.failed_count, P.purchase_time FROM feature_store.quotes Q LEFT JOIN feature_store.policies P ON Q.user_id = P.user_id LEFT JOIN feature_store.transactions T ON P.user_id = T.user_id LEFT JOIN (select T2.user_id, COUNT(T2.user_id) AS failed_count from feature_store.transactions T2 WHERE T2.successful=false GROUP BY T2.user_id) F ON Q.user_id = F.user_id"

In [29]:
query_url = "{}/query/train".format(url)
params = {"save":"true"} # SHOULD QUERY BE SAVED

payload = {"query": q, "query_name": "demo_query"}
headers = {'Content-Type': "application/json"}

response = requests.request("POST", query_url, json=payload, headers=headers, params=params)

In [35]:
print(response.status_code)

query_id = response.json()["query_id"] # <<<< SAVE THIS FOR LATER!

print("query saved to db under query_id = " + query_id)

200
query saved to db under query_id = 5d7d5c588a96060004e08da4


In [32]:
df = pd.DataFrame(response.json()["data"])

In [33]:
df.head()

Unnamed: 0,user_id,is_binded,creation_date,binding_date,card_type,failed_count,purchase_time
0,8450805b-a3fc-4cb5-8b00-0b39fffe3ea9,True,1568495000.0,1568495000.0,,,
1,1a46bbdd-0208-4097-90c4-df5caa65fe0e,True,1568495000.0,1568495000.0,credit,,1568495000.0
2,617d89f5-f7cd-4d5b-8efb-f1cc15e8855a,True,1568495000.0,1568495000.0,credit,1.0,1568495000.0
3,cb179cdb-3585-495a-a6c0-ae5a85e1c995,True,1568495000.0,1568495000.0,debit,,1568495000.0
4,81c4ce2a-a4a8-467f-873b-afa5a9b571f8,True,1568495000.0,1568495000.0,credit,1.0,1568495000.0


#### I will build a simple transformation and implement it using the Etl interface

In [34]:
###

from feature_lib.etl_abc import AbstractEtl


class Etl(AbstractEtl):
    def extract(self):
        self.df.dropna(subset=["user_id"], inplace=True)
        self.df["creation_to_binding"] = self.df.binding_date - self.df.creation_date
        self.df.set_index("user_id", inplace=True)
        return self.df
    
###

#### then upload to git on this address --> https://raw.githubusercontent.com/miararoy/feature_lib/master/etl_demo.py


### using the extract API

1. load etl file to feature store
2. running the query against data warehouse
3. saving etl on feature store

In [36]:
extract_url = "{}/extract/train".format(url)
params = {"save":"true"} # SHOULD QUERY BE SAVED

payload = {
    "query_id": query_id, # <<< USING THE QUERY ID FROM BEFORE
    "etl_path": "https://raw.githubusercontent.com/miararoy/feature_lib/master/etl_demo.py" # <<< USING THE ETL WE UPLOADED TO GIT
} 
headers = {'Content-Type': "application/json"}

response = requests.request("POST", extract_url, json=payload, headers=headers, params=params)

In [37]:
print(response.status_code)

etl_id = response.json()["etl_id"] # <<<< SAVE THIS FOR LATER!

print("etl saved to db under etl_id = " + etl_id)

500


KeyError: 'etl_id'

### Query on backend side (serving)

1. loading the data via api using query id and key, value search
2. converting to data frame