# Code example consuming feature store 

## Setting spark session

In [2]:
from pyspark import SparkContext, SparkConf
from pyspark.sql import session, SparkSession
from pyspark.sql import HiveContext
from butterfree.clients import SparkClient

def spark_session():
    
    hive_metastore = "thrift://hive-metastore:9083"

    spark = (
        SparkSession
        .builder
        .appName("Feature Store")
        .config("spark.sql.warehouse.dir", hive_metastore)
        .config("spark.hive.metastore.uris", hive_metastore)
        .config("spark.executor.memory", "8g")
        .config("spark.executor.cores", "2")
        .config("spark.sql.shuffle.partitions", 10)
        .enableHiveSupport()
        .getOrCreate()
    )

    sc=spark.sparkContext

    spark_client = SparkClient()
    hive_context = HiveContext(sc)

    return spark_client, spark

spark_client, spark = spark_session()

## Realtime data (online), from Cassandra

In [3]:
from cassandra.cluster import Cluster

cluster = Cluster(['feature_store_cassandra'])
session = cluster.connect()
df = session.execute("SELECT * FROM feature_store.orders_feature_master_table")
cluster.shutdown()
# Create data frame
df = spark.createDataFrame(df)
df.toPandas()

Unnamed: 0,customer_id,avg_order_amount_1_month_val,items_count_val,ratio_order_amount_by_average_ticket_val,ratio_order_amount_by_items_val,timestamp
0,c0844066-28e1-4658-b375-91fa8173c7e2,21.0,5,0.7,4.2,2019-01-20 00:19:09
1,b38b218e-3e22-44f2-adea-fd2c9dcfb431,46.0,6,1.15,7.666667,2019-01-08 16:30:26
2,6065622a-98db-4a65-93a1-ba89e9f7ab7d,45.8,2,0.763333,22.9,2019-01-31 00:01:05
3,af30d521-5a68-4ca7-8d66-7bd8e03d7bda,54.4,6,0.906667,9.066667,2019-01-30 17:20:51
4,79677fb6-31c7-4ddc-b35c-2afe15d1f96b,65.3,3,0.81625,21.766667,2018-12-29 16:16:57
5,3590abad-efb5-4622-a98c-ed70856006a7,14.9,1,0.3725,14.9,2019-01-08 22:08:14
6,373e5d95-a3bd-484e-927e-ac4c3bdbe1c6,44.0,6,0.733333,7.333333,2019-01-18 00:17:13
7,1da1119e-cf94-47bf-ae73-3b4f9d7b7196,168.3,2,2.805,84.15,2019-01-08 21:04:42
8,fa5789d2-2ff9-4b62-8bd4-41daac2bec63,73.5,4,1.225,18.375,2019-01-29 20:55:22
9,0ddebc9b-39c1-4ea0-ad4f-6fe68e7b26ec,51.8,2,1.726667,25.9,2019-01-12 23:31:47


## Historical data (offline), from Hive Metastore

In [5]:
spark.sql("show tables").show(truncate=False)

+--------+---------+-----------+
|database|tableName|isTemporary|
+--------+---------+-----------+
+--------+---------+-----------+



In [6]:
spark.table("historical_feature_store__orders_feature_master_table").toPandas()

AnalysisException: Table or view not found: historical_feature_store__orders_feature_master_table;;
'UnresolvedRelation [historical_feature_store__orders_feature_master_table]
