<img src="Images/Splice_logo.jpeg" width="250" height="200" align="left" >

# Using the Feature Store, and Database Deployment, for model deployment

In [None]:
#Begin spark session 
from pyspark.sql import SparkSession
spark = SparkSession.builder.getOrCreate()

#Create pysplice context. Allows you to create a Spark dataframe using our Native Spark DataSource 
from splicemachine.spark import PySpliceContext
splice = PySpliceContext(spark)

#Initialize our Feature Store API
from splicemachine.features import FeatureStore
from splicemachine.features.constants import FeatureType
fs = FeatureStore(splice)

#Initialize MLFlow
from splicemachine.mlflow_support import *
mlflow.register_feature_store(fs)
mlflow.register_splice_context(splice)

# Deploy Machine Learning model as a table in the database 

<img src="Images/database_deployment.png" width="1000" align="left" >

## Benefits of Database Model Deployment
- ### Fast
- ### Easly to deploy and govern
- ### Integreates with our Feature Store

In [None]:
from splicemachine.notebook import get_mlflow_ui
get_mlflow_ui()

## Create the deployment table

In [None]:
# Load in most relevant features generated in the previous notebook
%store -r features_list
%store -r features_str

In [None]:
%%sql
-- Create schema and drop table, if necessary
CREATE SCHEMA IF NOT EXISTS deployed_models;
DROP TABLE IF EXISTS deployed_models.twimlcon_regression;


In [None]:

#Define the training data frame. Necessary so the model table knows what columns to make
training_df = fs.get_training_set_from_view('twimlcon_customer_lifetime_value').dropna() 

#create the table itself
jobid = mlflow.deploy_db( db_schema_name='deployed_models',db_table_name='twimlcon_regression', run_id= '<replace with your run id>', 
                          primary_key={'CUSTOMERID':'INTEGER','EVAL_TIME':'TIMESTAMP'}, 
                          df=training_df.select(features_list)
                        )
#watch the table creation logs
mlflow.watch_job(jobid)

## Insert data into this empty table using the Feature Store
<img src="Images/FS_tables.png" width="800" height="400" align="left" >

### Get most up to date Feature Values in milliseconds

#### Return features as a Spark dataframe

In [None]:
feature_vector = fs.get_feature_vector(features=features_list, join_key_values={'customerid':'14235'})
feature_vector

#### Return features using SQL

In [None]:
feature_vector_sql = fs.get_feature_vector(features=features_list, return_sql=True, join_key_values={'customerid':'14235'})
print(feature_vector_sql)

In [None]:
%%time
%%sql
{Insert SQL from previous cell here}

### Generate and retreive predictions using INSERT/SELECT sequence on a single row

In [None]:
%%sql
truncate table deployed_models.twimlcon_regression;

In [None]:
%%time
splice.execute(f"""
    INSERT INTO deployed_models.twimlcon_regression ( CUSTOMERID, {features_str} ) 

    SELECT fset2.CUSTOMERID, {features_str}
    FROM twimlcon_fs.customer_lifetime fset2,
         twimlcon_fs.customer_rfm_by_category fset1 
    WHERE fset2.CUSTOMERID = 15838 AND fset1.CUSTOMERID = 15838

    union all

    SELECT fset2.CUSTOMERID, {features_str}
    FROM twimlcon_fs.customer_lifetime fset2,
         twimlcon_fs.customer_rfm_by_category fset1 
    WHERE fset2.CUSTOMERID = 15839 AND fset1.CUSTOMERID = 15839""")

In [None]:
%%sql
SELECT * FROM deployed_models.twimlcon_regression;

### Generate and retreive predictions using INSERT/SELECT sequence on a multiple rows
#### This process will take about a minute given that this is a small cluster, it scales in performance with scale of the cluster.

In [None]:
%%sql
truncate table deployed_models.twimlcon_regression;

In [None]:
%%time
splice.execute(f"""
    INSERT INTO deployed_models.twimlcon_regression ( EVAL_TIME, CUSTOMERID, {features_str} )  --splice-properties useSpark=False

    SELECT fset2.ASOF_TS, fset2.CUSTOMERID, {features_str}
    FROM twimlcon_fs.customer_lifetime_history fset2,
         twimlcon_fs.customer_rfm_by_category_history fset1 
    WHERE fset2.CUSTOMERID = fset1.CUSTOMERID 
          AND fset2.ASOF_TS >=fset1.ASOF_TS AND fset2.ASOF_TS<fset1.UNTIL_TS
          AND fset2.ASOF_TS BETWEEN '2020-10-01' and '2020-12-31'
""")


In [None]:
%%sql
SELECT * FROM deployed_models.twimlcon_regression ORDER BY EVAL_TIME {limit 10};

In [None]:
spark.stop()