# Imports

In [25]:
import warnings
warnings.filterwarnings("ignore")

# Load variables
import os
from dotenv import load_dotenv
load_dotenv()

# Snowpark Imports
from snowflake.snowpark.session import Session
import snowflake.snowpark.functions as F
import snowflake.snowpark.types as T
from snowflake.ml.model.model_signature import FeatureSpec, DataType, ModelSignature

# Connect to Snowflake

In [2]:
snowflake_connection_cfg = {
    "ACCOUNT": os.getenv('SF_ACCOUNT'),
    "USER": os.getenv('SF_USER'),
    "ROLE": os.getenv('SF_ROLE'),
    "PASSWORD": os.getenv('SF_PASSWORD'),
    "DATABASE": os.getenv('SF_DATABASE'),
    "SCHEMA": os.getenv('SF_SCHEMA'),
    "WAREHOUSE": os.getenv('SF_WAREHOUSE')
}

# Creating Snowpark Session
session = Session.builder.configs(snowflake_connection_cfg).create()

print('Role:     ', session.get_current_role())
print('Warehouse:', session.get_current_warehouse())
print('Database: ', session.get_current_database())
print('Schema:   ', session.get_current_schema())

Role:      "ACCOUNTADMIN"
Warehouse: "COMPUTE_WH"
Database:  "MACHINE_LEARNING"
Schema:    "PUBLIC"


In [19]:
# Create some test data to work with
mixed_topics_sentences = [
    # Politics
    "The government announced a new policy aimed at improving infrastructure and public services.",
    "Voters are showing increased interest in climate change issues in the upcoming elections.",
    "Recent debates highlight the need for reform in healthcare and education sectors.",

    # Sports
    "The local team secured a victory in the championship game after a dramatic final match.",
    "Athlete X broke the world record in the 100-meter sprint at the international track event.",
    "The sports community is embracing technology to enhance fair play and performance metrics.",

    # Technology
    "Advancements in AI are transforming industries, from healthcare to automotive manufacturing.",
    "Cybersecurity has become a top priority for businesses as digital threats continue to evolve.",
    "The latest smartphone model features an innovative solar charging system.",
    "Researchers have developed a new algorithm that significantly speeds up data processing."
]

candidate_labels = ['politics','sports','technology']

df = session.create_dataframe(mixed_topics_sentences, schema=['"sequences"'])
df = df.with_column('"candidate_labels"', F.lit(candidate_labels))
df.show(n=15, max_width=1000)

----------------------------------------------------------------------------------------------------------------------
|"sequences"                                                                                    |"candidate_labels"  |
----------------------------------------------------------------------------------------------------------------------
|The government announced a new policy aimed at improving infrastructure and public services.   |[                   |
|                                                                                               |  "politics",       |
|                                                                                               |  "sports",         |
|                                                                                               |  "technology"      |
|                                                                                               |]                   |
|Voters are showing increased interest in climat

# Register & Run Zero-Shot Model

In [5]:
# Get the model registry object
from snowflake.ml.registry import Registry
reg = Registry(
    session=session, 
    database_name=session.get_current_database(), 
    schema_name=session.get_current_schema()
    )

In [38]:
# Get the embedding model from Huggingface
# Make sure it fits into a Snowflake warehouse and does not require GPUs
# Otherwise the model must deployed in Snowpark Container Services
from transformers import pipeline
pipe = pipeline("zero-shot-classification", model="MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli")

# use SOWH because model is big
session.use_warehouse('snowpark_opt_wh')

# Register the model to Snowflake (predict is the model's function we want to call)
snow_model_custom = reg.log_model(
    pipe, 
    model_name='deberta_v3_base_mnli_fever_anli', 
    sample_input_data=df.limit(10),
    conda_dependencies=['tokenizers','transformers']
    )

# Model signature
classifications = snow_model_custom.run(df).cache_result()
classifications.show()

-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"sequences"                                         |"candidate_labels"  |"sequence"                                          |"labels"         |"scores"                  |
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|The latest smartphone model features an innovat...  |[                   |The latest smartphone model features an innovat...  |[                |[                         |
|                                                    |  "politics",       |                                                    |  "technology",  |  0.9903589487075806,     |
|                                                    |  "sports",         |                                                    |  

In [36]:
classifications.select(F.col('"sequences"'), F.col('"labels"')[0].cast(T.StringType()).as_('LABEL'), F.col('"scores"')[0].cast(T.FloatType()).as_('SCORE')).show(n=10, max_width=1000)

-----------------------------------------------------------------------------------------------------------------------------------
|"sequences"                                                                                    |"LABEL"     |"SCORE"             |
-----------------------------------------------------------------------------------------------------------------------------------
|The latest smartphone model features an innovative solar charging system.                      |technology  |0.9903589487075806  |
|Researchers have developed a new algorithm that significantly speeds up data processing.       |technology  |0.9947484135627747  |
|Recent debates highlight the need for reform in healthcare and education sectors.              |politics    |0.8969783782958984  |
|Advancements in AI are transforming industries, from healthcare to automotive manufacturing.   |technology  |0.986897885799408   |
|The sports community is embracing technology to enhance fair play and perfo