In [None]:
!pip install -q snowflake-ml-python==1.5.0

In [26]:
from snowflake.ml.feature_store import FeatureStore, CreationMode
from snowflake.snowpark.context import get_active_session
from snowflake.ml.registry import Registry
from snowflake.snowpark.functions import *

session = get_active_session()

fs = FeatureStore(
    session=session, 
    database="AIRLINE_FEATURE_STORE",
    name="FEATURE_STORE",
    default_warehouse=session.get_current_warehouse(),
    creation_mode=CreationMode.CREATE_IF_NOT_EXIST,
)


mr = Registry(
    session=session,
    database_name="AIRLINE_FEATURE_STORE",
    schema_name="FEATURE_STORE",
)

[Row(status='Statement executed successfully.')]

## Determine Use Case / Label

In [3]:
spine_df = session.table('AIRLINE_FEATURE_STORE.FEATURE_STORE.US_FLIGHT_SCHEDULES')
spine_df.to_pandas().head()

Unnamed: 0,SCHEDULED_DEPARTURE_UTC,AIRPORT_NAME,AIRPORT_ZIP_CODE,DEPARTURE_CODE,DOMESTIC_CODE,TICKETS_SOLD,TAIL_NUMBER,PLANE_MODEL,DESTINATION_CODE,DEPARTING_DELAY
0,2024-03-05 05:51:00,Logan International Airport,2128,BOS,DLTA,295,#D836HHA,Boeing 787 Dreamliner,ATL,1
1,2024-03-05 05:53:00,Orlando International Airport,32827,MCO,DLTA,317,#S079NEE,Boeing 777,SEA,0
2,2024-03-05 05:55:00,Miami International Airport,33126,MIA,AA,492,#H580ILA,Airbus A380,MCO,0
3,2024-03-05 05:57:00,John F. Kennedy International Airport,11430,JFK,UA,222,#R737ISD,Airbus A330,DFW,0
4,2024-03-05 05:58:00,Fort Lauderdale–Hollywood International Airport,33315,FLL,UA,349,#X193UNL,Boeing 747,EWR,0


In [None]:
spine_df = session.sql('''
SELECT
    SCHEDULED_DEPARTURE_UTC,
    AIRPORT_ZIP_CODE,
    PLANE_MODEL,
    TICKETS_SOLD,
    DEPARTING_DELAY
FROM
    AIRLINE_FEATURE_STORE.FEATURE_STORE.US_FLIGHT_SCHEDULES
''')
spine_df.to_pandas().head()

## Explore Feature Views

In [4]:
fs.list_entities().to_pandas()



Unnamed: 0,NAME,JOIN_KEYS,DESC,OWNER
0,AIRPORT_ZIP_CODE,"[""AIRPORT_ZIP_CODE""]",,MILES
1,PLANE_MODEL,"[""PLANE_MODEL""]",,MILES


In [5]:
fs.list_feature_views(entity_name="AIRPORT_ZIP_CODE").to_pandas()



Unnamed: 0,NAME,ENTITIES,TIMESTAMP_COL,DESC,QUERY,VERSION,STATUS,FEATURE_DESC,REFRESH_FREQ,DATABASE,SCHEMA,WAREHOUSE,REFRESH_MODE,REFRESH_MODE_REASON,OWNER,PHYSICAL_NAME
0,WEATHER_FEATURES,"[\n {\n ""desc"": """",\n ""join_keys"": [\n ...",DATETIME_UTC,,"SELECT ""DATETIME_UTC"", ""AIRPORT_ZIP_CODE"", avg...",V1,FeatureViewStatus.ACTIVE,"{\n ""AVG30MIN_RAIN_MM_H"": """",\n ""AVG60MIN_RA...",1 minute,AIRLINE_FEATURE_STORE,FEATURE_STORE,MILES_WH,INCREMENTAL,,MILES,WEATHER_FEATURES$V1


In [6]:
fs.list_feature_views(entity_name="PLANE_MODEL").to_pandas()

Unnamed: 0,NAME,ENTITIES,TIMESTAMP_COL,DESC,QUERY,VERSION,STATUS,FEATURE_DESC,REFRESH_FREQ,DATABASE,SCHEMA,WAREHOUSE,REFRESH_MODE,REFRESH_MODE_REASON,OWNER,PHYSICAL_NAME
0,PLANE_FEATURES,"[\n {\n ""desc"": """",\n ""join_keys"": [\n ...",,,"SELECT ""PLANE_MODEL"", ""SEATING_CAPACITY"" FROM ...",V1,FeatureViewStatus.STATIC,"{\n ""SEATING_CAPACITY"": """"\n}",,AIRLINE_FEATURE_STORE,FEATURE_STORE,,,,MILES,PLANE_FEATURES$V1


In [7]:
weather_fv = FeatureView = fs.get_feature_view(
    name='WEATHER_FEATURES',
    version='V1'
)

plane_fv = FeatureView = fs.get_feature_view(
    name='PLANE_FEATURES',
    version='V1'
)



## Generate Training Dataset

In [28]:
training_data = fs.generate_dataset(
    name='US_FLIGHT_DELAYS_CLASSIFICATION',
    version='V8',
    spine_df=spine_df,
    features=[weather_fv, plane_fv],
    spine_timestamp_col="SCHEDULED_DEPARTURE_UTC",
    spine_label_cols = ["DEPARTING_DELAY"],
)

<snowflake.ml.dataset.dataset.Dataset at 0x297187410>

In [None]:
SHOW DATASETS IN SCHEMA AIRLINE_FEATURE_STORE.FEATURE_STORE

In [None]:
training_data_pd = training_data.read.to_pandas()
training_data_pd.head(5)

## Train & Register Model

In [30]:
X_train = training_data_pd[['TICKETS_SOLD', 'AVG30MIN_RAIN_MM_H', 'AVG60MIN_RAIN_MM_H', 'SEATING_CAPACITY']]
X_train.head()

Unnamed: 0,TICKETS_SOLD,AVG30MIN_RAIN_MM_H,AVG60MIN_RAIN_MM_H,SEATING_CAPACITY
0,317,0.009734949,0.13994,368
1,222,2.786821,1.944952,368
2,346,0.4356897,2.158125,368
3,278,-1.154632e-15,0.155997,335
4,302,0.2246927,0.112346,375


In [31]:
y_train = training_data_pd['DEPARTING_DELAY']
y_train.head()

Unnamed: 0,DEPARTING_DELAY
0,0
1,0
2,1
3,0
4,0


In [14]:
import pandas as pd

#from sklearn.linear_model import LogisticRegression
from snowflake.ml.modeling.linear_model import LogisticRegression

model = LogisticRegression(
    input_cols = X_train.columns.to_list(),
    label_cols = 'DEPARTING_DELAY'
)

model.fit(pd.concat([X_train, y_train], axis=1))

model.predict_proba(X_train)

Unnamed: 0,TICKETS_SOLD,AVG30MIN_RAIN_MM_H,AVG60MIN_RAIN_MM_H,SEATING_CAPACITY,OUTPUT_DEPARTING_DELAY
0,317,9.734949e-03,0.139940,368,0
1,222,2.786821e+00,1.944952,368,0
2,346,4.356897e-01,2.158125,368,1
3,278,-1.154632e-15,0.155997,335,0
4,302,2.246927e-01,0.112346,375,0
...,...,...,...,...,...
8635,180,1.676300e-01,0.083815,277,0
8636,123,4.681440e-15,0.060709,180,0
8637,196,3.844048e-01,0.192202,277,0
8638,241,1.955747e+00,2.044662,400,0


In [24]:
mv = mr.log_model(model,
    model_name="WEATHER_DELAY_MODEL",
    version_name="V1",
    conda_dependencies=["snowflake-ml-python"],
    comment="My awesome ML model",
    metrics={"score": 96},
    sample_input_data=X_train
)



{'name': 'AIRLINE_FEATURE_STORE.FEATURE_STORE.test',
 'platform': <TargetPlatform.SNOWPARK_CONTAINER_SERVICES: 'SNOWPARK_CONTAINER_SERVICES'>,
 'target_method': 'predict',
 'signature': ModelSignature(
                     inputs=[
                         FeatureSpec(dtype=DataType.INT16, name='TICKETS_SOLD'),
 		FeatureSpec(dtype=DataType.DOUBLE, name='AVG30MIN_RAIN_MM_H'),
 		FeatureSpec(dtype=DataType.DOUBLE, name='AVG60MIN_RAIN_MM_H'),
 		FeatureSpec(dtype=DataType.INT16, name='SEATING_CAPACITY')
                     ],
                     outputs=[
                         FeatureSpec(dtype=DataType.INT16, name='TICKETS_SOLD'),
 		FeatureSpec(dtype=DataType.DOUBLE, name='AVG30MIN_RAIN_MM_H'),
 		FeatureSpec(dtype=DataType.DOUBLE, name='AVG60MIN_RAIN_MM_H'),
 		FeatureSpec(dtype=DataType.INT16, name='SEATING_CAPACITY'),
 		FeatureSpec(dtype=DataType.INT8, name='OUTPUT_DEPARTING_DELAY')
                     ]
                 ),
 'options': {'compute_pool': 'SNOWFLAKE_MODEL_REGIST

## Serve Features in Production

In [None]:
entity_df = session.table('AIRLINE_FEATURE_STORE.FEATURE_STORE.US_FLIGHT_SCHEDULES') \
    .sort(col('SCHEDULED_DEPARTURE_UTC'), ascending=False) \
    .select('SCHEDULED_DEPARTURE_UTC', 'AIRPORT_ZIP_CODE', 'PLANE_MODEL', 'TICKETS_SOLD') \
    .limit(1)
entity_df.to_pandas()

In [None]:
pred_df = fs.retrieve_feature_values(
    spine_df=entity_df, 
    features=fs.load_feature_views_from_dataset(training_data), 
    spine_timestamp_col='SCHEDULED_DEPARTURE_UTC',
    exclude_columns=['SCHEDULED_DEPARTURE_UTC', 'AIRPORT_ZIP_CODE', 'PLANE_MODEL']
)
pred_df.to_pandas()

In [None]:
result_df = mr.get_model("WEATHER_DELAY_MODEL").version("V1").run(pred_df, function_name='predict_proba')
result_df