# Snowflake to Model Deployment Demo

In this demo, you'll walk through a complete machine learning pipeline—from data ingestion to deployment and inference—using containerized infrastructure.

## Demo Overview

This demo includes the following key steps:

1. **Data Ingestion from Snowflake**  
   Pull structured Titanic dataset from Snowflake.

2. **Feature Engineering**  
   Transform raw data into meaningful features for model training.

3. **Model Training with XGBoost**  
   Use XGBoost to train a classification model on the engineered dataset.

4. **Model Deployment**  
   Register and deploy the trained model.

5. **Batch Inference**  
   Call the deployed model to make predictions on new batches of data.


In [None]:
# Not neccessary since these packages come with the runtime (Just an example)
#!pip install xgboost snowflake-ml-python 

In [None]:
# Import python packages
import streamlit as st
import pandas as pd
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV
from snowflake.ml.registry import Registry
import ast
#add another package
# We can also use Snowpark for our analyses!
from snowflake.snowpark.context import get_active_session
session = get_active_session()


In [None]:
titanic = pd.read_csv('data/titanic_snowflake.csv')
titanic = titanic.drop(["AGE", 
                        "DECK", 
                        "ALIVE",
                        "ADULT_MALE",
                        "EMBARKED",
                        "PCLASS",
                        "ALONE",
                        "SEX"],axis=1)
titanic.head()

Usually your data will already be in Snowflake.  This next step shows how to write the pandas dataframe as a table, then how to turn a table from Snowflake into a pandas dataframe

In [None]:
# This step turns pandas -> snowpark and writes to snowflake
titanic_sf = session.create_dataframe(titanic)
titanic_sf.write.mode("overwrite").save_as_table("titanic_raw")

In [None]:
# Here we read a table from Snowflake into a Snowpark dataframe

titanic_raw = session.table('titanic_raw').to_pandas()
titanic_raw.head()

In [None]:
select * from titanic_raw

In [None]:
df_sql.to_pandas().head()

In [None]:
titanic.dropna(inplace=True)

In [None]:
titanic = pd.get_dummies(titanic, drop_first=True)

# Convert all boolean columns to integers
titanic = titanic.apply(lambda x: x.astype(int) if x.dtype == 'bool' else x)

titanic.dtypes

In [None]:
x = titanic.drop('SURVIVED',axis=1)
y = titanic.SURVIVED

In [None]:
xtrain,xtest,ytrain,ytest = train_test_split(x,y,train_size=.70,random_state=1234)

In [None]:
param_grid = {
    "n_estimators": [100, 200],
    "learning_rate": [0.1, 0.5],
    "max_depth": [1,2,3,4,5,6],
    "min_child_weight": [1, 6]
}

In [None]:
model = XGBClassifier(objective='binary:logistic', 
                      eval_metric='logloss')

grid_search = GridSearchCV(estimator=model, 
                           param_grid=param_grid)

grid_search.fit(xtrain, ytrain)

In [None]:
# Best parameters and score
best_params = grid_search.best_params_
best_score = grid_search.best_score_
print("Best Parameters:", best_params)
print("Best Score:", best_score)

# Evaluate the best model on the test set
best_model = grid_search.best_estimator_
test_score = best_model.score(xtest, ytest)
print("Test Score:", test_score)

In [None]:
metrics = {
    "Accuracy": best_score,
    "Params": best_params
}

metrics

In [None]:
from snowflake.ml.registry import Registry

# Get sample input data to pass into the registry logging function
X = xtrain.sample(n=1)

# Create a registry and log the model
# You can specify a different DB and Schema if you'd like
# otherwise it uses the session context
# If a registry does not exist it will create one
reg = Registry(session=session)

# Define model name and version (use uppercase for name)
model_name = "TITANIC"

titanic_model = reg.log_model(
    model_name=model_name,
    options = {
    "relax_version": True,
    },
    target_platforms=["WAREHOUSE"],
    #version_name="V_1", # If you leave version_name off SF creates one
    model=best_model,
    sample_input_data=X,
    metrics=metrics,
)

In [None]:
models_df = reg.show_models()
models_df[models_df['name'] == model_name]

In [None]:
models = reg.get_model(model_name).show_versions()
models.sort_values(by='created_on', ascending=False)

In [None]:
recent_model = reg.get_model(model_name).last()
recent_model

In [None]:
m = reg.get_model(model_name).last()
m.default = m
mv = m.default
mv.version_name

In [None]:
remote_prediction = mv.run(xtest, function_name="PREDICT_PROBA")
remote_prediction.head()

In [None]:
test_sf = session.create_dataframe(xtest.reset_index(drop=True))
test_sf.write.mode("overwrite").save_as_table("titanic_predict")
session.table('titanic_predict').show()

In [None]:
select *, round(TITANIC!predict_proba(*):output_feature_0,2)
as surv_pred
from titanic_predict

### Deployment Example: Dynamic Table with Model Predictions

This example demonstrates how to use a dynamic table to apply your model to newly inserted data.

- **Step 1:** Create a dynamic table that uses your model to predict current data.
- **Step 2:** Insert new data into the source table.
- **Step 3:** Navigate to the dynamic table UI and observe the model running on the new data (typically within 1 minute).
- **Step 4:** Once the demo is complete, drop the dynamic table to clean up.

In [None]:
current_wh = session.get_current_warehouse()
current_wh

In [None]:
create or replace dynamic table titanic_batch_inference
target_lag = '1 minute' 
warehouse = {{current_wh}} as
select *, round(TITANIC!predict_proba(*):output_feature_0,2)
as surv_pred
from titanic_predict;

select * from titanic_batch_inference;

In [None]:
INSERT INTO test_pd (
    SIBSP, PARCH, FARE, CLASS_SECOND, CLASS_THIRD,
    WHO_MAN, WHO_WOMAN,
    EMBARK_TOWN_QUEENSTOWN, EMBARK_TOWN_SOUTHAMPTON
) VALUES
(0, 0, 10.5, 0, 1, 1, 0, 1, 0),
(2, 1, 23.0, 1, 0, 0, 1, 0, 1),
(0, 2, 15.75, 1, 0, 0, 1, 1, 0),
(1, 1, 7.925, 0, 1, 1, 0, 0, 1),
(0, 0, 7.75, 0, 1, 1, 0, 0, 1),
(3, 2, 21.6792, 1, 0, 1, 0, 0, 1),
(0, 0, 8.05, 0, 1, 1, 0, 1, 0),
(0, 0, 8.6625, 0, 1, 0, 1, 1, 0),
(1, 0, 26.0, 1, 0, 1, 0, 0, 1),
(0, 1, 19.2583, 1, 0, 0, 1, 0, 1),
(1, 1, 69.3, 0, 0, 1, 0, 0, 1),
(0, 0, 7.2292, 0, 1, 1, 0, 1, 0),
(0, 0, 13.0, 0, 1, 1, 0, 0, 1),
(0, 0, 9.8375, 0, 1, 1, 0, 1, 0),
(1, 0, 55.0, 0, 0, 1, 0, 0, 0),
(2, 0, 27.7208, 1, 0, 0, 1, 0, 1),
(0, 0, 6.975, 0, 1, 1, 0, 1, 0),
(0, 0, 8.3, 0, 1, 1, 0, 0, 1),
(1, 1, 46.9, 1, 0, 1, 0, 0, 0),
(0, 2, 16.7, 0, 1, 0, 1, 0, 1);

In [None]:
drop dynamic table titanic_batch_inference;