# Scenario 1. Creating Feature view with Snowpark Pandas API 

### Observations:
    - simple processing 
          - read table as snowpark pandas dataframe i.e pd.read_snowflake(table_name)
          - slicing few columns, rename column headers
          - resulting dataframe is snowpark pandas_df
    - create feature view
          - feature_df= snowpark pandas_df
          - with refresh freq
 ### AttributeError: 'DataFrame' object has no attribute 'queries'

In [None]:
# Import python packages
from typing import List, Dict,Any

import modin.pandas as pd
import snowflake.snowpark.modin.plugin

from sklearn.preprocessing import OneHotEncoder
import snowflake.snowpark as snowpark
from snowflake.snowpark.functions import col
from snowflake.ml.feature_store import FeatureStore, FeatureView, Entity, CreationMode

from snowflake.snowpark.context import get_active_session
session = get_active_session()
database = session.get_current_database()
schema = session.get_current_schema()
warehouse = session.get_current_warehouse()
source_table = "NYC_YELLOW_TRIPS"
fea_store_name = "NYC_store"
print(f"active session: {session}")

table_name = f"{database}.{schema}.{source_table}" 
print(f"souce table: {table_name}")
    
#snowpark_df = session.table(table_name)
#print(snowpark_df.show())

# Create a Snowpark pandas DataFrame 
snowpark_pandas_df = pd.read_snowflake(table_name) 
    
# do feature transformation
trip_df = snowpark_pandas_df[["TPEP_DROPOFF_DATETIME","TRIP_ID","PASSENGER_COUNT","TRIP_DISTANCE","FARE_AMOUNT","STORE_AND_FWD_FLAG"]]
trip_df = trip_df.rename(columns= {"TPEP_DROPOFF_DATETIME":"TIME_STAMP"})

# scikit learn operation's tested (OneHotEncoding)    
# oh= OneHotEncoder(sparse_output=False).set_output(transform="pandas")
# one_hot_encoded=oh.fit_transform(trip_df[["STORE_AND_FWD_FLAG"]])
# trans_df = pd.concat([trip_df,one_hot_encoded],axis=1).drop(columns=["STORE_AND_FWD_FLAG"]) 


# since pandas_df is not accepted in feature_view creation, converting back to snowpark df
# snowpark_df = session.create_dataframe(data=trans_df)
#trip_df.head()

fs = FeatureStore(session= session, 
                             database= database, 
                             name = fea_store_name, 
                             default_warehouse= warehouse, 
                             creation_mode= CreationMode.CREATE_IF_NOT_EXIST)
print(f"feature store: {fs} created")


def create_entity(fs: FeatureStore, name: str, join_keys: List[str], desc: str) -> Entity:
    """
    Method creates single Entity instance and register it entity to feature store
    If entity exists in feature store, script generates userwarning i.e UserWarning: Entity TRIP_NUMBER already exists. Skip registration.
    
    feature_store   : FeatureStore to use
    name            : Entity name
    join_keys       : 
    desc            :

    returns         : registered_entity
    """
    entity = Entity(name=name,
                    join_keys=join_keys,
                    desc=desc)
    registered_entity = fs.register_entity(entity)
    return registered_entity

entity_parameter_list= [
    {
        "name": "TRIP_NUM",
        "join_keys": ["TRIP_ID"],
        "desc": "Trip Unique Number"
    },
    {
        "name": "DROP_OFF_LOC",
        "join_keys": ["DOLOCATIONID"],
        "desc": "Drop off loc id."
    }
]

entities_mapping = {}
for entity_parameters in entity_parameter_list:
    entity = create_entity(fs=fs,
                           name=entity_parameters["name"],
                               join_keys=entity_parameters["join_keys"],
                               desc=entity_parameters["desc"])
    entities_mapping[entity_parameters["name"]] = entity

print(f"entities_mapping: {entities_mapping} created")

feature_view_parameters= [
    {
        "name": "Trp_pandas_df",
        "entities": ["TRIP_NUM"],
        "feature_df": "trip_df",         
        "desc": "Feature view made with Pandas_df",        
        "timestamp_col" : "TIME_STAMP", 
        "refresh_freq" : "5 minutes",       
        "feature_desc" : {
            "PASSENGER_COUNT": "The count of passenger of a trip.",
            "TRIP_DISTANCE": "The distance of a trip.",
            "FARE_AMOUNT": "The fare of a trip.",
            "STORE_AND_FWD_FLAG" : "Flad id"
            
        },
        "version": "1"
    }
]

def create_feature_views(feature_store: FeatureStore, feature_view_parameters: List[Dict[str, Any]],
                         entity_mapping: Dict[str, Entity], feature_df: pd.DataFrame) -> Dict[str, FeatureView]:
    feature_view_mapping = {}
    registered_views = feature_store.list_feature_views()

    for feature_view_param in feature_view_parameters:
        feature_view_name = feature_view_param["name"]
        feature_view_version = feature_view_param["version"]
        entities = [entity_mapping[name] for name in feature_view_param["entities"]]
        feature_df = feature_df
        timestamp_col = feature_view_param.get("timestamp_col")
        refresh_freq = feature_view_param.get("refresh_freq")
        desc = feature_view_param.get("desc")
        feature_desc = feature_view_param.get("feature_desc")

        # If FeatureView already exists in fea_store just return the reference to it
        for view in registered_views:
            if view.name == feature_view_name and view.version == feature_view_version:
                print(f"Feature View : {feature_view_name}_{feature_view_version} already exists")
                break
        else:
            # Create the FeatureView instance
            fv_instance = FeatureView(
                name=feature_view_name,
                entities=entities,
                feature_df=feature_df,
                timestamp_col=timestamp_col,
                refresh_freq=refresh_freq,
                desc=desc).attach_feature_desc(feature_desc)

            # Register the FeatureView instance.  Creates  object in Snowflake
            feature_view = feature_store.register_feature_view(
                feature_view=fv_instance,
                version=feature_view_version,
                block=True,  # whether function call blocks until initial data is available
                overwrite=False,  # whether to replace existing feature view with same name/version
            )

            print(f"Feature View : {feature_view_name}_{feature_view_version} created")
        feature_view_mapping[feature_view_name] = feature_view

    return feature_view_mapping
    

feature_view_dict= create_feature_views(feature_store=fs,
                     feature_view_parameters=feature_view_parameters,
                     entity_mapping=entities_mapping,
                     feature_df=trip_df)
print(feature_view_dict)

# Scenario 2. Creating Feature view by converting Snowpark Pandas_df to snowpark_df with pd.to_snowpark()

### Observations:
    
    - create feature view
          - feature_df= snowpark_df
          - with and without refresh freq
          
 ### AttributeError: 'DataFrame' object has no attribute 'queries'

In [None]:
# Import python packages
from typing import List, Dict,Any

import modin.pandas as pd
import snowflake.snowpark.modin.plugin

from sklearn.preprocessing import OneHotEncoder
import snowflake.snowpark as snowpark
from snowflake.snowpark.functions import col
from snowflake.ml.feature_store import FeatureStore, FeatureView, Entity, CreationMode

from snowflake.snowpark.context import get_active_session
session = get_active_session()
database = session.get_current_database()
schema = session.get_current_schema()
warehouse = session.get_current_warehouse()
source_table = "NYC_YELLOW_TRIPS"
fea_store_name = "NYC_store"
print(f"active session: {session}")

table_name = f"{database}.{schema}.{source_table}" 
print(f"souce table: {table_name}")
    
# Create a Snowpark pandas DataFrame 
snowpark_pandas_df = pd.read_snowflake(table_name) 
    
# do feature transformation
trip_df = snowpark_pandas_df[["TPEP_DROPOFF_DATETIME","TRIP_ID","PASSENGER_COUNT","TRIP_DISTANCE","FARE_AMOUNT","STORE_AND_FWD_FLAG"]]
trip_df = trip_df.rename(columns= {"TPEP_DROPOFF_DATETIME":"TIME_STAMP"})
print("pandas dataframe:")
print(trip_df.head())

# scikit learn operation's tested (OneHotEncoding)    
# oh= OneHotEncoder(sparse_output=False).set_output(transform="pandas")
# one_hot_encoded=oh.fit_transform(trip_df[["STORE_AND_FWD_FLAG"]])
# trans_df = pd.concat([trip_df,one_hot_encoded],axis=1).drop(columns=["STORE_AND_FWD_FLAG"]) 


# since snowpark pandas_df is not accepted in feature_view creation, converting to snowpark df
pandas_snowpark_df = pd.to_snowpark(trip_df, index=False)
print("converting snowpark pandas_df into snowpark DataFrame")
print(f"data type: {type(pandas_snowpark_df)}")
print(pandas_snowpark_df.show())

fs = FeatureStore(session= session, 
                             database= database, 
                             name = fea_store_name, 
                             default_warehouse= warehouse, 
                             creation_mode= CreationMode.CREATE_IF_NOT_EXIST)
print(f"feature store: {fs} created")


def create_entity(fs: FeatureStore, name: str, join_keys: List[str], desc: str) -> Entity:
    """
    Method creates single Entity instance and register it entity to feature store
    If entity exists in feature store, script generates userwarning i.e UserWarning: Entity TRIP_NUMBER already exists. Skip registration.
    
    feature_store   : FeatureStore to use
    name            : Entity name
    join_keys       : 
    desc            :

    returns         : registered_entity
    """
    entity = Entity(name=name,
                    join_keys=join_keys,
                    desc=desc)
    registered_entity = fs.register_entity(entity)
    return registered_entity

entity_parameter_list= [
    {
        "name": "TRIP_NUM",
        "join_keys": ["TRIP_ID"],
        "desc": "Trip Unique Number"
    },
    {
        "name": "DROP_OFF_LOC",
        "join_keys": ["DOLOCATIONID"],
        "desc": "Drop off loc id."
    }
]

entities_mapping = {}
for entity_parameters in entity_parameter_list:
    entity = create_entity(fs=fs,
                           name=entity_parameters["name"],
                               join_keys=entity_parameters["join_keys"],
                               desc=entity_parameters["desc"])
    entities_mapping[entity_parameters["name"]] = entity

print(f"entities_mapping: {entities_mapping} created")

feature_view_parameters= [
    {
        "name": "Trp_pandas_to_snowpak_df",
        "entities": ["TRIP_NUM"],
        "feature_df": "pandas_snowpark_df",         
        "desc": "Feature view made by converting Pandas_df to snowpark_df",        
        "timestamp_col" : "TIME_STAMP", 
        "refresh_freq" : "5 minutes",       
        "feature_desc" : {
            "PASSENGER_COUNT": "The count of passenger of a trip.",
            "TRIP_DISTANCE": "The distance of a trip.",
            "FARE_AMOUNT": "The fare of a trip.",
            "STORE_AND_FWD_FLAG" : "Flad id"
            
        },
        "version": "1"
    }
]

def create_feature_views(feature_store: FeatureStore, feature_view_parameters: List[Dict[str, Any]],
                         entity_mapping: Dict[str, Entity], feature_df: pd.DataFrame) -> Dict[str, FeatureView]:
    feature_view_mapping = {}
    registered_views = feature_store.list_feature_views()

    for feature_view_param in feature_view_parameters:
        feature_view_name = feature_view_param["name"]
        feature_view_version = feature_view_param["version"]
        entities = [entity_mapping[name] for name in feature_view_param["entities"]]
        feature_df = feature_df
        timestamp_col = feature_view_param.get("timestamp_col")
        refresh_freq = feature_view_param.get("refresh_freq")
        desc = feature_view_param.get("desc")
        feature_desc = feature_view_param.get("feature_desc")

        # If FeatureView already exists in fea_store just return the reference to it
        for view in registered_views:
            if view.name == feature_view_name and view.version == feature_view_version:
                print(f"Feature View : {feature_view_name}_{feature_view_version} already exists")
                break
        else:
            # Create the FeatureView instance
            fv_instance = FeatureView(
                name=feature_view_name,
                entities=entities,
                feature_df=feature_df,
                timestamp_col=timestamp_col,
                refresh_freq=refresh_freq,
                desc=desc).attach_feature_desc(feature_desc)

            # Register the FeatureView instance.  Creates  object in Snowflake
            feature_view = feature_store.register_feature_view(
                feature_view=fv_instance,
                version=feature_view_version,
                block=True,  # whether function call blocks until initial data is available
                overwrite=False,  # whether to replace existing feature view with same name/version
            )

            print(f"Feature View : {feature_view_name}_{feature_view_version} created")
        feature_view_mapping[feature_view_name] = feature_view

    return feature_view_mapping
    

feature_view_dict= create_feature_views(feature_store=fs,
                     feature_view_parameters=feature_view_parameters,
                     entity_mapping=entities_mapping,
                     feature_df=trip_df)
print(feature_view_dict)

# Scenario 3: Creating Feature view by converting feature engineered Snowpark Pandas API into Snowpark_df 
Observations:
- Fea engg with  
      - read table as snowpark_pandas_df = pd.read_snowflake(table_name)
      - slicing few columns, rename column headers, appled sklearn's OneHotEnconding
      - resulting snowpark pandas_df is converted to snowpark_df using pd.to_snowpark()
- create feature view
      - feature_df= snowpark_df
      - with refresh freq 

In [None]:
# Import python packages
from typing import List, Dict,Any

import modin.pandas as pd
import snowflake.snowpark.modin.plugin

from sklearn.preprocessing import OneHotEncoder
import snowflake.snowpark as snowpark
from snowflake.snowpark.functions import col
from snowflake.ml.feature_store import FeatureStore, FeatureView, Entity, CreationMode

from snowflake.snowpark.context import get_active_session
session = get_active_session()
database = session.get_current_database()
schema = session.get_current_schema()
warehouse = session.get_current_warehouse()
source_table = "NYC_YELLOW_TRIPS"
fea_store_name = "NYC_store"
print(f"active session: {session}")

table_name = f"{database}.{schema}.{source_table}" 
print(f"souce table: {table_name}")
    
# Create a Snowpark pandas DataFrame 
snowpark_pandas_df = pd.read_snowflake(table_name) 
    
# do feature transformation
trip_df = snowpark_pandas_df[["TPEP_DROPOFF_DATETIME","TRIP_ID","PASSENGER_COUNT","TRIP_DISTANCE","FARE_AMOUNT","STORE_AND_FWD_FLAG"]]
trip_df = trip_df.rename(columns= {"TPEP_DROPOFF_DATETIME":"TIME_STAMP"})
print("pandas dataframe:")
print(trip_df.head())

# scikit learn operation's tested (OneHotEncoding)    
oh= OneHotEncoder(sparse_output=False).set_output(transform="pandas")
one_hot_encoded=oh.fit_transform(trip_df[["STORE_AND_FWD_FLAG"]])
print("one hot encoded dataframe....")
print(one_hot_encoded.head())
trans_df = pd.concat([trip_df,one_hot_encoded],axis=1).drop(columns=["STORE_AND_FWD_FLAG"]) 
print("pandas dataframe:")
print(trans_df.head())

# since snowpark pandas_df is not accepted in feature_view creation, converting to snowpark df
pandas_snowpark_df = pd.to_snowpark(trans_df, index=False)
print("converting snowpark pandas_df into snowpark DataFrame")
print(f"data type: {type(pandas_snowpark_df)}")
print(pandas_snowpark_df.show())

fs = FeatureStore(session= session, 
                             database= database, 
                             name = fea_store_name, 
                             default_warehouse= warehouse, 
                             creation_mode= CreationMode.CREATE_IF_NOT_EXIST)
print(f"feature store: {fs} created")


def create_entity(fs: FeatureStore, name: str, join_keys: List[str], desc: str) -> Entity:
    """
    Method creates single Entity instance and register it entity to feature store
    If entity exists in feature store, script generates userwarning i.e UserWarning: Entity TRIP_NUMBER already exists. Skip registration.
    
    feature_store   : FeatureStore to use
    name            : Entity name
    join_keys       : 
    desc            :

    returns         : registered_entity
    """
    entity = Entity(name=name,
                    join_keys=join_keys,
                    desc=desc)
    registered_entity = fs.register_entity(entity)
    return registered_entity

entity_parameter_list= [
    {
        "name": "TRIP_NUM",
        "join_keys": ["TRIP_ID"],
        "desc": "Trip Unique Number"
    },
    {
        "name": "DROP_OFF_LOC",
        "join_keys": ["DOLOCATIONID"],
        "desc": "Drop off loc id."
    }
]

entities_mapping = {}
for entity_parameters in entity_parameter_list:
    entity = create_entity(fs=fs,
                           name=entity_parameters["name"],
                               join_keys=entity_parameters["join_keys"],
                               desc=entity_parameters["desc"])
    entities_mapping[entity_parameters["name"]] = entity

print(f"entities_mapping: {entities_mapping} created")

feature_view_parameters= [
    {
        "name": "Trp_pandas_snowpark_df",
        "entities": ["TRIP_NUM"],
        "feature_df": "pandas_snowpark_df",         
        "desc": "Feature view made with fea engg'ed Pandas_df converted to snowpark_df with pd.to_snowpark()",        
        "timestamp_col" : "TIME_STAMP", 
        "refresh_freq" : "5 minutes",       
        "feature_desc" : {
            "PASSENGER_COUNT": "The count of passenger of a trip.",
            "TRIP_DISTANCE": "The distance of a trip.",
            "FARE_AMOUNT": "The fare of a trip.",
            "STORE_AND_FWD_FLAG" : "Flad id"
            
        },
        "version": "1"
    }
]

def create_feature_views(feature_store: FeatureStore, feature_view_parameters: List[Dict[str, Any]],
                         entity_mapping: Dict[str, Entity], feature_df: pd.DataFrame) -> Dict[str, FeatureView]:
    feature_view_mapping = {}
    registered_views = feature_store.list_feature_views()

    for feature_view_param in feature_view_parameters:
        feature_view_name = feature_view_param["name"]
        feature_view_version = feature_view_param["version"]
        entities = [entity_mapping[name] for name in feature_view_param["entities"]]
        feature_df = feature_df
        timestamp_col = feature_view_param.get("timestamp_col")
        refresh_freq = feature_view_param.get("refresh_freq")
        desc = feature_view_param.get("desc")
        feature_desc = feature_view_param.get("feature_desc")

        # If FeatureView already exists in fea_store just return the reference to it
        for view in registered_views:
            if view.name == feature_view_name and view.version == feature_view_version:
                print(f"Feature View : {feature_view_name}_{feature_view_version} already exists")
                break
        else:
            # Create the FeatureView instance
            fv_instance = FeatureView(
                name=feature_view_name,
                entities=entities,
                feature_df=feature_df,
                timestamp_col=timestamp_col,
                refresh_freq=refresh_freq,
                desc=desc).attach_feature_desc(feature_desc)

            # Register the FeatureView instance.  Creates  object in Snowflake
            feature_view = feature_store.register_feature_view(
                feature_view=fv_instance,
                version=feature_view_version,
                block=True,  # whether function call blocks until initial data is available
                overwrite=False,  # whether to replace existing feature view with same name/version
            )

            print(f"Feature View : {feature_view_name}_{feature_view_version} created")
        feature_view_mapping[feature_view_name] = feature_view

    return feature_view_mapping
    

feature_view_dict= create_feature_views(feature_store=fs,
                     feature_view_parameters=feature_view_parameters,
                     entity_mapping=entities_mapping,
                     feature_df=trip_df)
print(feature_view_dict)